import re import json from html import unescape def parse_exam_html(html_content): print(f"HTML内容长度: {len(html_content)}") # 尝试提取所有题目信息 questions = [] # 处理document.write内容 doc_write_re = re.compile( r'document\.write\(\s*([\'"])(.*?)\1\s*\);', re.DOTALL | re.IGNORECASE ) # 提取所有document.write内容并合并 all_contents = [] for match in doc_write_re.finditer(html_content): content = match.group(2) content = unescape(content)


en(questions) + 1}", "question": question, "options": options, "ansopt": ansopt, "answer": answer_text, "score": score }) except Exception as e: print(f"解析题目时出错: {str(e)}") return questions # 示例用法 if __name__ == "__main__": with open("exam.html", "r", encoding="utf-8") as f: html_content = f.read() parsed_questions = parse_exam_html(html_content) print(json.dumps(parsed_questions, ensure_ascii=False, indent=4)) # 额外需要注意的逻辑点 # 1. 正则表达式可能无法覆盖所有HTML结构,需根据实际页面调整。 # 2. 如果HTML内容过大,建议分块处理以提高性能。 # 3. 确保输入的HTML编码正确,避免解析过程中出现乱码。 [2025-05-13 09:39:52 | AI写代码神器 | 203点数解答]
相关提问
- import re import json from html import unescape def parse_exam_html(html_content): print(f"HTML内容长度: {len(html_content)}") # 尝试提取所有题目信息 questions = [] # 处理document.write内容 doc_write_re = re.compile( r'document\.write\(\s*([\'"])(.*?)\1\s*\);', re.DOTALL | re.IGNORECASE ) # 提取所有document.write内容并合并 all_contents = [] for match in doc_write_re.finditer(html_content): content = match.group(2) content = unescape(content) (203点数解答 | 2025-05-13 09:39:52)114
- import re import json from html import unescape def parse_exam_html(html_content): print(f"HTML内容长度: {len(html_content)}") # 尝试提取所有题目信息 questions = [] # 处理document.write内容 doc_write_re = re.compile( r'document\.write\(\s*([\'"])(.*?)\1\s*\);', re.DOTALL | re.IGNORECASE ) # 提取所有document.write内容并合并 all_contents = [] for match in doc_write_re.finditer(html_content): content = match.group(2) content = unescape(content) (384点数解答 | 2025-05-13 09:40:43)105
- import openpyxl import smtplib import imaplib import email import json import os import re from bs4 import BeautifulSoup from fastapi import FastAPI, Form from openpyxl.styles import Alignment from email.mime.multipart import MIMEMultipart from email.mime.base import MIMEBase from email.header import decode_header from email import encoders import pandas as pd app = FastAPI() RECEIVER_EMAILS = { 0: "yundongshijie001@protonmail.com", 1: "xiaobudian001@protonmail.com" } email_address = "(182点数解答 | 2025-04-12 00:49:09)163
- import javax.swing.*; import java.awt.*; import java.awt.datatransfer.clipboard; import java.awt.datatransfer.stringselection; import java.awt.datatransfer.transferable; import java.awt.event.*; import java.io.*; import java.nio.file.files; import java.nio.file.path; import java.nio.file.paths; import java.time.localdatetime; import java.util.hashmap; import java.util.list; import java.util.map; import java.util.random; public class copy { static private final jtextarea textarea = new jtext(1497点数解答 | 2024-08-25 09:40:33)321
- import os import datetime from flask import Flask, request, jsonify import requests from flask_cors import CORS import re import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np import base64 from io import BytesIO import pandas as pd import traceback # 添加traceback以获取详细错误信息 plt.rcParams["font.sans-serif"] = ["SimHei"] app = Flask(__name__) CORS(app) FASTGPT_API_URL = 'http://localhost:3000/api/v1/chat/completions' FASTGPT_API_KEY = 'fastgpt-gWzitHpBa8XRr0q(713点数解答 | 2025-06-18 16:00:34)117
- from kivy.app import app from kivy.uix.button import button from kivy.uix.boxlayout import boxlayout from kivy.uix.filechooser import filechooserlistview from kivy.uix.popup import popup from kivy.uix.label import label from kivy.uix.screenmanager import screenmanager, screen from kivy.core.window import window from kivy.uix.treeview import treeview, treeviewlabel from unitypy import assetsmanager from unitypy.exceptions import unitypyerror import os from pil import image import time class file(262点数解答 | 2024-12-01 17:07:07)217
- package com.example.weather; import android.content.context; import android.media.mediaplayer; import android.net.uri; import android.os.bundle; import android.util.log; import android.view.layoutinflater; import android.view.view; import android.view.viewgroup; import android.webkit.websettings; import android.webkit.webview; import android.webkit.webviewclient; import android.widget.videoview; import androidx.fragment.app.fragment; public class realtimemonitoringfragment extends fragment {(30点数解答 | 2024-10-12 10:31:18)226
- package com.example.weather; import android.content.context; import android.media.mediaplayer; import android.net.uri; import android.os.bundle; import android.util.log; import android.view.layoutinflater; import android.view.view; import android.view.viewgroup; import android.webkit.websettings; import android.webkit.webview; import android.webkit.webviewclient; import android.widget.videoview; import androidx.fragment.app.fragment; public class realtimemonitoringfragment extends fragment {(531点数解答 | 2024-10-12 10:32:25)206
- #include "videoplayer.h" #include "log.h" #pragma warning(disable: 4996) //buffer: static int audio_len = 0; static uint8* audio_pos; void videoplayer::audiocallback(void* userdata, uint8_t* stream, int len) { sdl_memset(stream, 0, len); if (audio_len <= 0) return; len = (len > audio_len ? audio_len : len); /* mix as much data as possible */ sdl_mixaudio(stream, audio_pos, len, sdl_mix_maxvolume); audio_pos += len; audio_len -= len; } videoplayer::videop(549点数解答 | 2024-08-08 15:59:25)221
- python代码 获取到 怎么样变成 \xa0 怎么用让他的 \ 不是转义输出,为什么 import html from bs4 import BeautifulSoup html_content = "压缩天然气储运,减压,燃烧都在严格的密封状态下进行,不易发生泄露。 另外其储气瓶经过各种特殊的破坏性试验,安全可靠。" soup = BeautifulSoup(html_content, "html.parser") text = soup.get_text() print(text) 里面的文本 是 为什么 输出的不是\xa0 而是空格 ,可是我要输出\xa0,要的是直接输出 不是替换(371点数解答 | 2025-04-16 17:09:02)147
- python代码 获取到 怎么样变成 \xa0 怎么用让他的 \ 不是转义输出,为什么 import html from bs4 import BeautifulSoup html_content = "压缩天然气储运,减压,燃烧都在严格的密封状态下进行,不易发生泄露。 另外其储气瓶经过各种特殊的破坏性试验,安全可靠。" soup = BeautifulSoup(html_content, "html.parser") text = soup.get_text() print(text) 里面的文本 是 为什么 输出的不是\xa0 而是空格 ,可是我要输出\xa0(210点数解答 | 2025-04-16 17:09:53)149
- package com.bjrl.matrix.controller; import com.bjrl.common.annotation.Log; import com.bjrl.common.core.controller.BaseController; import com.bjrl.common.core.domain.AjaxResult; import com.bjrl.common.core.domain.entity.AccountNumberInfo; import com.bjrl.common.core.domain.entity.MatrixAncestors; import com.bjrl.common.core.page.TableDataInfo; import com.bjrl.common.enums.BusinessType; import com.bjrl.common.utils.poi.ExcelUtil; import com.bjrl.matrix.domain.PrintConfig; import com.bjrl.matrix.d(698点数解答 | 2025-09-15 10:13:12)36