survey/generate/convert_questions_with_excel_fix.py
朱潮 99796408cf Initial commit: Add survey system with enhanced features
- Complete survey management system with web interface
- Question generation tools and prompts
- Report generation and analysis capabilities
- Docker configuration for deployment
- Database initialization scripts

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-28 20:28:57 +08:00

279 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import shutil
from openpyxl import load_workbook
import subprocess
import sys
def parse_question_block(lines, question_type, question_num, score):
"""解析单个题目块"""
question_data = {
'序号': question_num, # 纯数字序号
'题干': '',
'选项 A': '',
'选项 B': '',
'选项 C': '',
'选项 D': '',
'选项 E': '',
'选项 F': '',
'选项 G': '',
'选项 H': '',
'解析': '',
'分数': score,
'答案': '',
'标签': '', # 会被题型标签替换
'': '' # 第15列为空
}
# 根据题型添加基础标签
if question_type == 'A':
question_data['标签'] = '基础题'
elif question_type == 'B':
question_data['标签'] = '进阶题'
elif question_type == 'C':
question_data['标签'] = '竞赛题'
# 逐行解析
for line in lines:
# 检查是否是题目行
if '题目' in line and not any(line.startswith(prefix) for prefix in ['正确答案', '能力标签', '知识点标签', 'A.', 'B.', 'C.', 'D.']):
question_data['题干'] = line.replace('题目:', '').replace('题目', '').strip()
# 检查选项
elif line.startswith('A.'):
question_data['选项 A'] = line[2:].strip()
elif line.startswith('B.'):
question_data['选项 B'] = line[2:].strip()
elif line.startswith('C.'):
question_data['选项 C'] = line[2:].strip()
elif line.startswith('D.'):
question_data['选项 D'] = line[2:].strip()
# 检查答案
elif line.startswith('正确答案:'):
question_data['答案'] = line.replace('正确答案:', '').strip()
# 检查标签与题型标签合并用空格分割最多2个标签
elif '知识点标签' in line:
ability_tag = line.replace('知识点标签:', '').strip()
# 合并题型标签和能力标签,用空格分割
if ability_tag and question_data['标签']:
question_data['标签'] = f"{question_data['标签']} {ability_tag}"
elif ability_tag:
question_data['标签'] = ability_tag
elif '能力标签' in line:
ability_tag = line.replace('能力标签:', '').strip()
# 合并题型标签和能力标签,用空格分割
if ability_tag and question_data['标签']:
question_data['标签'] = f"{question_data['标签']} {ability_tag}"
elif ability_tag:
question_data['标签'] = ability_tag
# 如果题干为空,尝试从第一行获取
if not question_data['题干'] and lines:
first_line = lines[0]
if not any(first_line.startswith(prefix) for prefix in ['A.', 'B.', 'C.', 'D.', '正确答案', '能力标签', '知识点标签']):
question_data['题干'] = first_line
return question_data
def parse_question_file(file_path):
"""解析问题文件,提取题目信息"""
questions = {
'基础题': [],
'进阶题': [],
'竞赛题': []
}
# 尝试多种编码
encodings = ['gbk', 'utf-8', 'gb2312', 'latin-1']
content = None
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
break
except UnicodeDecodeError:
continue
if content is None:
print(f"无法读取文件: {file_path}")
return questions
# 提取分值信息
score_pattern = r'每题(\d+)分'
scores = re.findall(score_pattern, content)
# 获取所有题目块
a_blocks = re.split(r'\n*A\d+\.', content)
b_blocks = re.split(r'\n*B\d+\.', content) if len(scores) > 1 else []
c_blocks = re.split(r'\n*C\d+\.', content) if len(scores) > 2 else []
# 处理A类题目基础题
for i, block in enumerate(a_blocks[1:], 1): # 跳过第一个空块
lines = [line.strip() for line in block.strip().split('\n') if line.strip()]
if len(lines) < 5: # 至少需要题目、4个选项、答案
continue
score = scores[0] if len(scores) > 0 else '5'
question_data = parse_question_block(lines, 'A', i, score)
if question_data['题干']:
questions['基础题'].append(question_data)
# 处理B类题目进阶题
for i, block in enumerate(b_blocks[1:], 1): # 跳过第一个空块
lines = [line.strip() for line in block.strip().split('\n') if line.strip()]
if len(lines) < 5: # 至少需要题目、4个选项、答案
continue
score = scores[1] if len(scores) > 1 else '10'
question_data = parse_question_block(lines, 'B', i, score)
if question_data['题干']:
questions['进阶题'].append(question_data)
# 处理C类题目竞赛题
for i, block in enumerate(c_blocks[1:], 1): # 跳过第一个空块
lines = [line.strip() for line in block.strip().split('\n') if line.strip()]
if len(lines) < 5: # 至少需要题目、4个选项、答案
continue
score = scores[2] if len(scores) > 2 else '15'
question_data = parse_question_block(lines, 'C', i, score)
if question_data['题干']:
questions['竞赛题'].append(question_data)
return questions
def excel_resave(file_path):
"""使用Excel重新保存文件以修复格式问题"""
try:
if sys.platform == 'darwin': # macOS
# 尝试使用AppleScript打开并保存Excel文件
script = f'''
tell application "Microsoft Excel"
open POSIX file "{file_path}"
activate
delay 2
save active workbook
quit saving yes
end tell
'''
result = subprocess.run(['osascript', '-e', script],
capture_output=True, text=True, timeout=30)
if result.returncode == 0:
print(f" Excel重新保存成功: {os.path.basename(file_path)}")
return True
else:
print(f" Excel重新保存失败: {result.stderr}")
return False
elif sys.platform == 'win32': # Windows
# Windows下可以使用pywin32或其他方法
print(" Windows系统暂不支持自动Excel重新保存")
return False
except Exception as e:
print(f" Excel重新保存出错: {e}")
return False
return False
def create_excel_with_format_fix(questions, template_path, output_file):
"""创建Excel文件并修复格式问题"""
# 复制模板文件
shutil.copy2(template_path, output_file)
# 打开复制的文件
wb = load_workbook(output_file)
ws = wb['单选题']
# 字段顺序
single_choice_fields = [
'序号', '题干', '选项 A', '选项 B', '选项 C', '选项 D',
'选项 E', '选项 F', '选项 G', '选项 H', '解析', '分数', '答案', '标签', ''
]
# 合并所有题目
all_questions = []
for question_type in ['基础题', '进阶题', '竞赛题']:
all_questions.extend(questions[question_type])
if all_questions:
# 清除现有数据从第2行开始但保留格式
max_row = ws.max_row
for row in range(2, max_row + 1):
for col in range(1, 16): # 确保15列都被清理
ws.cell(row=row, column=col, value=None)
# 写入新数据,重新编号为连续的纯数字
for row_idx, question in enumerate(all_questions, 2):
# 重新设置序号为连续数字
question['序号'] = row_idx - 1
for col, field_name in enumerate(single_choice_fields, 1):
value = question.get(field_name, '')
# 确保空字符串而不是None
if value is None:
value = ''
# 写入数据,保持原有格式
ws.cell(row=row_idx, column=col, value=value)
# 保存文件
wb.save(output_file)
# 尝试用Excel重新保存以修复格式
print(" 尝试用Excel重新保存...")
excel_resave(output_file)
def main():
"""主函数:处理所有问题文件"""
questions_dir = '/Users/moshui/Documents/survey/questions'
output_dir = '/Users/moshui/Documents/survey/output'
template_path = '/Users/moshui/Documents/survey/1.xlsx' # 使用可导入的文件作为模板
# 检查模板文件是否存在
if not os.path.exists(template_path):
print(f"模板文件不存在: {template_path}")
return
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
print("开始转换文件...")
print("注意此脚本会尝试自动用Excel重新保存文件以修复格式问题")
print("如果自动重新保存失败请手动用Excel打开并保存文件")
print()
# 处理每个txt文件
for txt_file in os.listdir(questions_dir):
if txt_file.endswith('.txt'):
txt_path = os.path.join(questions_dir, txt_file)
print(f"正在处理: {txt_file}")
# 解析问题
questions = parse_question_file(txt_path)
total_questions = sum(len(q) for q in questions.values())
print(f" 找到 {total_questions} 道题目")
for qtype, qlist in questions.items():
if qlist:
print(f" {qtype}: {len(qlist)}")
if total_questions > 0:
# 生成输出文件名
base_name = txt_file.replace('.txt', '.xlsx')
output_file = os.path.join(output_dir, base_name)
# 创建Excel文件并修复格式
create_excel_with_format_fix(questions, template_path, output_file)
print(f" 已保存到: {output_file}")
else:
print(f" 未找到有效题目")
print()
if __name__ == "__main__":
main()