Spaces:

caoyanyi
/

ai

Running

ai / app.py

Caoyanyi

* Merge ocr results.

022bc99 2 days ago

32 kB


	import os

	# 提前设置所有环境变量
	os.environ["TOKENIZERS_PARALLELISM"] = "false"
	os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True"
	os.environ["PADDLEOCR_HOME"] = "./paddleocr_models" # 设置模型缓存目录

	from fastapi import FastAPI, UploadFile, File
	from fastapi.responses import JSONResponse
	from fastapi.staticfiles import StaticFiles
	import tempfile

	# OCR相关依赖的延迟导入，仅在实际使用时才导入
	paddleocr_available = False
	PaddleOCR = None

	# pytesseract作为备选OCR方案
	pytesseract_available = False
	pytesseract = None
	cv2_available = False
	cv2 = None

	# 动态导入OpenCV
	def get_opencv():
	"""动态导入OpenCV"""
	global cv2, cv2_available
	if cv2 is None:
	try:
	import cv2 as _cv2
	cv2 = _cv2
	cv2_available = True
	print("✅ OpenCV动态导入成功")
	except ImportError as e:
	print(f"❌ OpenCV动态导入失败: {e}")
	cv2_available = False
	return cv2

	# 动态导入pytesseract
	def get_pytesseract():
	"""动态导入pytesseract作为备选方案"""
	global pytesseract, pytesseract_available
	if pytesseract is None:
	try:
	import pytesseract as _pytesseract
	pytesseract = _pytesseract

	# 确保OpenCV也被导入
	get_opencv()
	except ImportError as e:
	print(f"❌ pytesseract动态导入失败: {e}")
	pytesseract_available = False
	return pytesseract

	# 每次调用都检查tesseract可执行文件是否可用
	check_tesseract_availability()
	return pytesseract

	def check_tesseract_availability():
	"""检查tesseract可执行文件是否可用"""
	global pytesseract, pytesseract_available
	if pytesseract is None:
	pytesseract_available = False
	return

	try:
	# 1. 先检查tesseract_cmd是否存在
	if not hasattr(pytesseract.pytesseract, 'tesseract_cmd'):
	print("⚠️ tesseract_cmd未配置")
	pytesseract_available = False
	return

	# 2. 尝试直接运行tesseract命令检查可用性
	from subprocess import run, PIPE, CalledProcessError
	tesseract_cmd = pytesseract.pytesseract.tesseract_cmd

	# 尝试运行tesseract --version命令
	result = run([tesseract_cmd, '--version'],
	capture_output=True, text=True, timeout=5)

	if result.returncode == 0:
	# 提取版本信息
	version = result.stdout.strip().split('\n')[0] if result.stdout else "unknown"
	pytesseract_available = True
	print(f"✅ tesseract可执行文件可用，版本: {version}")
	else:
	print(f"⚠️ tesseract命令执行失败，返回码: {result.returncode}")
	print(f"错误输出: {result.stderr.strip()}")
	pytesseract_available = False

	except FileNotFoundError:
	# tesseract可执行文件未找到
	print(f"⚠️ tesseract可执行文件未找到: {pytesseract.pytesseract.tesseract_cmd}")
	pytesseract_available = False
	except CalledProcessError as e:
	# tesseract命令返回错误
	print(f"⚠️ tesseract命令执行失败: {e}")
	pytesseract_available = False
	except Exception as e:
	# 其他错误
	print(f"⚠️ 测试tesseract可执行文件时出错: {e}")
	pytesseract_available = False

	# 动态导入PaddleOCR函数
	def get_paddleocr():
	"""动态导入PaddleOCR"""
	global PaddleOCR, paddleocr_available
	if PaddleOCR is None:
	try:
	from paddleocr import PaddleOCR as _PaddleOCR
	PaddleOCR = _PaddleOCR
	paddleocr_available = True
	print("✅ PaddleOCR动态导入成功")
	except ImportError as e:
	print(f"❌ PaddleOCR动态导入失败: {e}")
	paddleocr_available = False
	# 尝试备选方案
	get_pytesseract()
	return PaddleOCR

	try:
	import fitz # PyMuPDF
	fitz_available = True
	except ImportError:
	fitz_available = False

	try:
	import cv2
	import numpy as np
	cv2_available = True
	except ImportError:
	cv2_available = False

	try:
	from onnxruntime import InferenceSession
	onnx_available = True
	except ImportError:
	onnx_available = False

	app = FastAPI(title="智能音频转录与摘要服务")

	# 挂载静态文件目录，前端通过/front访问
	app.mount("/front", StaticFiles(directory="static", html=True), name="static")

	# 设置环境变量，解决tokenizers并行警告
	os.environ["TOKENIZERS_PARALLELISM"] = "false"
	# 绕过模型源检查，加快启动速度
	os.environ["DISABLE_MODEL_SOURCE_CHECK"] = "True"

	# 初始化模型变量
	whisper_model = None
	summarizer = None
	ocr_model = None

	# 模型加载状态
	models_loaded = {
	"whisper": False,
	"summarizer": False,
	"ocr": False
	}

	# OCR模型加载错误信息
	ocr_load_error = None

	# 尝试导入依赖项
	# 这样可以在缺少依赖时提供更好的错误信息
	try:
	from faster_whisper import WhisperModel
	whisper_available = True
	except ImportError:
	whisper_available = False

	# 尝试导入transformers
	# 这样可以在缺少依赖时提供更好的错误信息
	try:
	from transformers import pipeline
	transformers_available = True
	except ImportError:
	transformers_available = False

	# 延迟加载模型，仅在首次使用时加载
	# 这样可以确保应用能够启动，即使模型加载有问题
	def load_whisper_model():
	"""延迟加载Whisper模型"""
	global whisper_model, models_loaded
	if whisper_model is None and whisper_available:
	try:
	whisper_model = WhisperModel("medium", device="cpu", compute_type="int8")
	models_loaded["whisper"] = True
	print("Successfully loaded Whisper model")
	except Exception as e:
	print(f"Error loading Whisper model: {e}")
	models_loaded["whisper"] = False
	return whisper_model

	def load_summarizer_model():
	"""延迟加载摘要模型"""
	global summarizer, models_loaded
	if summarizer is None and transformers_available:
	# 如果所有模型都加载失败，尝试使用AutoModelForSeq2SeqLM直接加载中文模型
	try:
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	# 使用可靠的中文摘要模型
	model_name = "csebuetnlp/mT5_multilingual_XLSum"

	# 3. 加载 (mT5 需要 sentencepiece，你已经安装了)
	# legacy=False 可以解决很多新旧版本路径不一致的警告
	tokenizer = AutoTokenizer.from_pretrained(model_name, legacy=False)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	# 4. 创建 pipeline
	summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=-1)

	models_loaded["summarizer"] = True
	print(f"✅ 成功加载模型: {model_name}")

	# 注意：这个模型调用时，建议在文本前加上前缀，或者直接传参，下面是测试
	return summarizer

	except ImportError:
	print("❌ 错误: 缺少依赖库。请运行: pip install protobuf sentencepiece")
	except Exception as e:
	# 打印详细的错误类型，方便调试
	print(f"❌ 加载模型出错 (类型: {type(e).__name__}): {e}")
	print("建议尝试清理 HuggingFace 缓存目录: rm -rf ~/.cache/huggingface/hub")

	# 如果所有模型都加载失败，返回None
	print("Failed to load any Chinese summarization model. Summarization functionality will be unavailable.")
	models_loaded["summarizer"] = False
	return None
	return summarizer

	# OCR模型加载函数
	def load_ocr_model():
	"""延迟加载OCR模型，优先使用PaddleOCR，失败则使用pytesseract"""
	global ocr_model, models_loaded, ocr_load_error

	# 重置错误信息
	ocr_load_error = None

	if ocr_model is None:
	# 优先尝试PaddleOCR
	_PaddleOCR = get_paddleocr()
	if _PaddleOCR:
	print("Starting to load PaddleOCR model...")
	# 尝试多种配置组合，使用PaddleOCR 3.3.2支持的参数
	configs = [
	{
	'lang': 'ch',
	'device': 'cpu',
	}
	]

	all_errors = []

	for i, config in enumerate(configs):
	try:
	print(f"Trying PaddleOCR config {i+1}: {config}")
	ocr_model = _PaddleOCR(**config)
	models_loaded["ocr"] = True
	print(f"Successfully loaded PaddleOCR model with config {i+1}")
	return ocr_model
	except Exception as e:
	error_msg = f"Config {i+1} failed: {str(e)}"
	print(error_msg)
	all_errors.append(error_msg)
	continue

	# 所有PaddleOCR配置都失败，尝试pytesseract
	print(f"All PaddleOCR configurations failed. Trying pytesseract as fallback...")

	# 尝试pytesseract作为备选方案
	_pytesseract = get_pytesseract()

	# 检查tesseract可用性
	check_tesseract_availability()

	if _pytesseract and pytesseract_available:
	print("Using pytesseract as OCR solution...")
	# pytesseract不需要预加载模型，直接使用
	ocr_model = {
	'type': 'pytesseract',
	'engine': _pytesseract
	}
	models_loaded["ocr"] = True
	print("Successfully configured pytesseract OCR")
	return ocr_model
	elif _pytesseract:
	# pytesseract库已导入，但tesseract可执行文件不可用
	error_details = "pytesseract库已安装，但tesseract可执行文件不可用，OCR功能无法使用。请安装tesseract可执行文件并确保其在系统PATH中。"
	print(f"❌ {error_details}")
	models_loaded["ocr"] = False
	ocr_load_error = error_details
	return None

	# 所有OCR方案都失败
	error_details = "所有OCR方案均不可用（PaddleOCR和pytesseract均未安装或导入失败）"
	print(f"❌ {error_details}")
	models_loaded["ocr"] = False
	ocr_load_error = error_details
	return None
	return ocr_model

	# PDF转图片函数
	def pdf_to_images(pdf_path, dpi=300):
	"""将PDF转换为图片列表"""
	images = []
	if fitz_available:
	doc = fitz.open(pdf_path)
	for page_num in range(doc.page_count):
	page = doc[page_num]
	# 设置高DPI以获得清晰图像
	pix = page.get_pixmap(dpi=dpi)
	# 将Pixmap转换为numpy数组
	img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
	# 如果是RGB格式，转换为BGR（OpenCV使用BGR）
	if pix.n == 3:
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
	elif pix.n == 4:
	img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
	images.append(img)
	doc.close()
	return images

	# 图像处理函数
	def preprocess_image(image):
	"""图像预处理，提高OCR识别率"""
	if cv2_available:
	# 转换为灰度图
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# 自适应阈值处理
	thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, 11, 2)
	# 高斯模糊去噪
	blurred = cv2.GaussianBlur(thresh, (1, 1), 0)
	return blurred
	return image

	@app.get("/health")
	def health_check():
	"""健康检查接口"""
	return {
	"status": "healthy",
	"services": {
	"whisper_available": whisper_available,
	"transformers_available": transformers_available,
	"ocr_available": {
	"paddleocr": paddleocr_available,
	"pytesseract": pytesseract_available,
	"pymupdf": fitz_available,
	"opencv": cv2_available,
	"onnxruntime": onnx_available
	},
	"models_loaded": models_loaded
	},
	"message": "服务正常运行"
	}

	@app.post("/ocr")
	async def ocr_document(file: UploadFile = File(...)):
	"""OCR文档解析接口，支持PDF和图片"""
	try:
	# 先尝试动态导入所有OCR选项，更新可用性状态
	get_paddleocr()
	get_pytesseract()

	# 检查是否有可用的OCR解决方案
	if not paddleocr_available and not pytesseract_available:
	return JSONResponse(content={
	"error": "所有OCR方案均不可用",
	"details": {
	"paddleocr": "PaddleOCR模块未安装或不兼容Python 3.13",
	"pytesseract": "pytesseract库已安装，但tesseract可执行文件不可用"
	},
	"suggestions": [
	"对于Python 3.13用户：安装tesseract可执行文件后重试",
	"对于Python 3.10-3.12用户：安装PaddleOCR: pip install paddleocr",
	"tesseract可执行文件下载地址：https://github.com/tesseract-ocr/tesseract/wiki/Downloads"
	]
	}, status_code=503)

	# 保存临时文件
	with tempfile.NamedTemporaryFile(suffix=".tmp", delete=False) as temp_file:
	temp_file.write(await file.read())
	temp_file_path = temp_file.name

	images = []
	file_ext = file.filename.lower().split('.')[-1] if '.' in file.filename else ''

	try:
	# 处理PDF文件
	if file_ext == 'pdf':
	if not fitz_available:
	return JSONResponse(content={"error": "PyMuPDF模块未安装，PDF处理功能不可用"}, status_code=503)

	# PDF转图片
	images = pdf_to_images(temp_file_path)
	if not images:
	return JSONResponse(content={"error": "PDF转换图片失败"}, status_code=500)

	# 处理图片文件
	elif file_ext in ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif']:
	if not cv2_available:
	return JSONResponse(content={"error": "OpenCV模块未安装，图片处理功能不可用"}, status_code=503)

	# 直接读取图片
	img = cv2.imread(temp_file_path)
	if img is not None:
	images.append(img)
	else:
	return JSONResponse(content={"error": "图片读取失败"}, status_code=500)

	else:
	return JSONResponse(content={"error": "不支持的文件格式，仅支持PDF和图片"}, status_code=400)

	# 确保OCR模型已加载
	current_ocr_model = load_ocr_model()
	if current_ocr_model is None:
	# 返回详细的错误信息
	error_msg = ocr_load_error or 'OCR模型加载失败'
	return JSONResponse(content={
	"error": "OCR模型加载失败",
	"details": error_msg,
	"suggestions": [
	"检查Python版本是否兼容（推荐3.10-3.12用于PaddleOCR）",
	"如果使用Python 3.13，确保tesseract可执行文件已正确安装",
	"查看服务器日志获取更多详细信息"
	]
	}, status_code=503)

	# 执行OCR识别
	all_results = []
	page_num = 1

	for img in images:
	# 图像预处理
	preprocessed_img = preprocess_image(img)

	# 执行OCR，根据模型类型使用不同的调用方式
	try:
	# 检查模型类型，处理不同OCR库的差异
	if isinstance(current_ocr_model, dict) and current_ocr_model['type'] == 'pytesseract': # pytesseract
	# pytesseract调用方式
	pytesseract_engine = current_ocr_model['engine']

	# 使用PIL Image或numpy array
	if isinstance(preprocessed_img, np.ndarray):
	# 转换为PIL Image
	from PIL import Image
	# 对于pytesseract，直接使用灰度图可能效果更好，尤其是对于截图
	# 检查是否是截图
	if file_ext in ['png', 'jpg', 'jpeg'] and 'screenshot' in file.filename.lower():
	# 对于截图，直接使用灰度图而不是二值化
	gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
	img_pil = Image.fromarray(gray_img)
	else:
	img_pil = Image.fromarray(preprocessed_img)
	# 执行OCR，指定中文语言包
	# 使用image_to_string获取完整文本，避免字符分隔问题
	full_text = pytesseract_engine.image_to_string(
	img_pil,
	lang='chi_sim+eng', # 添加中文简体和英文语言包
	config='--psm 6' # 假设单一文本块
	)
	# 同时获取数据用于置信度信息
	result_data = pytesseract_engine.image_to_data(
	img_pil,
	output_type=pytesseract_engine.Output.DICT,
	lang='chi_sim+eng',
	config='--psm 6'
	)
	else:
	# 直接使用图像
	full_text = pytesseract_engine.image_to_string(
	preprocessed_img,
	lang='chi_sim+eng', # 添加中文简体和英文语言包
	config='--psm 6' # 假设单一文本块
	)
	# 同时获取数据用于置信度信息
	result_data = pytesseract_engine.image_to_data(
	preprocessed_img,
	output_type=pytesseract_engine.Output.DICT,
	lang='chi_sim+eng',
	config='--psm 6'
	)

	# 处理完整文本，按行分割
	page_text = []
	# 获取平均置信度
	valid_confidences = [float(conf) for conf in result_data['conf'] if float(conf) > 0]
	avg_confidence = sum(valid_confidences) / len(valid_confidences) if valid_confidences else 0.5

	# 按行分割文本
	lines = full_text.strip().split('\n')
	for line in lines:
	line = line.strip()
	if line:
	page_text.append({"text": line, "confidence": avg_confidence / 100.0})
	elif hasattr(current_ocr_model, 'ocr'): # PaddleOCR
	try:
	result = current_ocr_model.ocr(preprocessed_img, cls=True)
	except Exception as ocr_err:
	# 尝试禁用角度分类
	try:
	result = current_ocr_model.ocr(preprocessed_img, cls=False)
	print("OCR with cls=False succeeded after cls=True failed")
	except Exception as ocr_err2:
	return JSONResponse(content={"error": f"OCR识别失败: {str(ocr_err2)}"}, status_code=500)

	# 提取文本结果 - PaddleOCR格式
	page_text = []
	for line in result[0]:
	text = line[1][0]
	confidence = line[1][1]
	page_text.append({"text": text, "confidence": confidence})
	elif hasattr(current_ocr_model, 'readtext'): # EasyOCR
	# EasyOCR调用方式
	result = current_ocr_model.readtext(preprocessed_img)

	# 提取文本结果 - EasyOCR格式
	page_text = []
	for detection in result:
	text = detection[1]
	confidence = detection[2]
	page_text.append({"text": text, "confidence": confidence})
	else:
	return JSONResponse(content={"error": "未知的OCR模型类型"}, status_code=500)
	except Exception as ocr_err:
	return JSONResponse(content={"error": f"OCR识别失败: {str(ocr_err)}"}, status_code=500)

	all_results.append({
	"page": page_num,
	"content": page_text,
	"full_text": "\n".join([item["text"] for item in page_text])
	})
	page_num += 1

	# 组合所有页面的全文
	full_document_text = "\n\n".join([page["full_text"] for page in all_results])

	return JSONResponse(content={
	"success": True,
	"filename": file.filename,
	"page_count": len(all_results),
	"pages": all_results,
	"full_text": full_document_text,
	"ocr_engine": "paddleocr" if hasattr(current_ocr_model, 'ocr') else "pytesseract"
	})

	finally:
	# 清理临时文件
	if os.path.exists(temp_file_path):
	os.unlink(temp_file_path)

	except Exception as e:
	error_details = {
	"error": "OCR处理失败",
	"details": str(e),
	"services": {
	"paddleocr_available": paddleocr_available,
	"pytesseract_available": pytesseract_available,
	"fitz_available": fitz_available,
	"cv2_available": cv2_available,
	"models_loaded": models_loaded.get("ocr", False)
	},
	"suggestion": "查看服务器日志获取更多详细信息，或尝试使用兼容的Python版本"
	}
	return JSONResponse(content=error_details, status_code=500)

	@app.get("/")
	def root():
	return {"message": "智能文档处理服务已启动，支持音频转录和OCR文档解析"}

	# 启动脚本
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)

	@app.post("/transcribe")
	async def transcribe_audio(file: UploadFile = File(...)):
	"""上传音频文件，返回转录文本"""
	try:
	# 检查faster_whisper是否可用
	if not whisper_available:
	return JSONResponse(content={"error": "faster_whisper模块未安装，转录功能不可用"}, status_code=503)

	# 检查pydub是否可用，并处理Python 3.13+的audioop问题
	try:
	from pydub import AudioSegment
	except ImportError as e:
	error_msg = str(e)
	if "audioop" in error_msg or "pyaudioop" in error_msg:
	return JSONResponse(content={"error": "Python 3.13+环境下pydub依赖的audioop模块已被移除，音频处理功能不可用"}, status_code=503)
	else:
	return JSONResponse(content={"error": "pydub模块未安装，音频处理功能不可用"}, status_code=503)

	# 保存临时文件
	with tempfile.NamedTemporaryFile(suffix=".tmp", delete=False) as temp_file:
	temp_file.write(await file.read())
	temp_file_path = temp_file.name

	try:
	# 转换为wav格式（如果不是）
	audio = AudioSegment.from_file(temp_file_path)
	wav_path = temp_file_path + ".wav"
	audio.export(wav_path, format="wav")

	# 加载并使用faster-whisper转录，降低beam_size提高速度
	current_whisper_model = load_whisper_model()
	if current_whisper_model is None:
	return JSONResponse(content={"error": "Whisper模型加载失败，转录功能不可用"}, status_code=503)

	segments, info = current_whisper_model.transcribe(wav_path, beam_size=3, language="zh", vad_filter=True)
	transcription = "".join([segment.text for segment in segments])

	# 清理临时文件
	os.unlink(temp_file_path)
	os.unlink(wav_path)

	return JSONResponse(content={"transcription": transcription})
	finally:
	# 确保临时文件被清理
	if os.path.exists(temp_file_path):
	os.unlink(temp_file_path)
	if 'wav_path' in locals() and os.path.exists(wav_path):
	os.unlink(wav_path)
	except Exception as e:
	return JSONResponse(content={"error": str(e)}, status_code=500)

	@app.post("/summarize")
	async def summarize_text(text: dict):
	"""对文本进行摘要"""
	try:
	transcription = text.get("text", "")
	if not transcription:
	return JSONResponse(content={"error": "没有提供文本"}, status_code=400)

	# 加载摘要模型
	current_summarizer = load_summarizer_model()
	if current_summarizer is None:
	return JSONResponse(content={"error": "摘要模型加载失败，摘要功能不可用"}, status_code=503)

	# 摘要 - 调整参数适合中文，提高速度
	try:
	summary = current_summarizer(
	transcription,
	max_length=150, # 更长的最大长度适合中文
	min_length=30, # 更长的最小长度适合中文
	do_sample=False,
	num_beams=2, # 减少beam数量提高速度
	length_penalty=0.8, # 长度惩罚调整
	no_repeat_ngram_size=3 # 避免重复
	)
	summary_text = summary[0]["summary_text"]
	# 清理摘要文本，去除可能的特殊字符
	summary_text = summary_text.strip().replace('\n', ' ').replace('\t', ' ')
	# 确保摘要有意义
	if len(summary_text) < 10:
	summary_text = "摘要生成过短，请检查原始文本"
	return JSONResponse(content={"summary": summary_text})
	except Exception as e:
	return JSONResponse(content={"error": f"摘要生成失败: {str(e)}"}, status_code=500)
	except Exception as e:
	return JSONResponse(content={"error": str(e)}, status_code=500)

	@app.post("/process")
	async def process_audio(file: UploadFile = File(...)):
	"""上传音频文件，返回转录文本和摘要"""
	try:
	# 检查faster_whisper是否可用
	if not whisper_available:
	return JSONResponse(content={"error": "faster_whisper模块未安装，转录功能不可用"}, status_code=503)

	# 检查pydub是否可用，并处理Python 3.13+的audioop问题
	try:
	from pydub import AudioSegment
	except ImportError as e:
	error_msg = str(e)
	if "audioop" in error_msg or "pyaudioop" in error_msg:
	return JSONResponse(content={"error": "Python 3.13+环境下pydub依赖的audioop模块已被移除，音频处理功能不可用"}, status_code=503)
	else:
	return JSONResponse(content={"error": "pydub模块未安装，音频处理功能不可用"}, status_code=503)

	# 保存临时文件
	with tempfile.NamedTemporaryFile(suffix=".tmp", delete=False) as temp_file:
	temp_file.write(await file.read())
	temp_file_path = temp_file.name

	# 转换为wav格式
	audio = AudioSegment.from_file(temp_file_path)
	wav_path = temp_file_path + ".wav"
	audio.export(wav_path, format="wav")

	# 确保Whisper模型已加载
	current_whisper_model = load_whisper_model()
	if current_whisper_model is None:
	return JSONResponse(content={"error": "Whisper模型加载失败，转录功能不可用"}, status_code=503)

	# 转录
	segments, info = current_whisper_model.transcribe(wav_path, beam_size=3, language="zh", vad_filter=True)
	transcription = "".join([segment.text for segment in segments])

	# 加载摘要模型
	current_summarizer = load_summarizer_model()

	# 摘要 - 调整参数适合中文，提高速度
	if current_summarizer is None:
	summary = None
	warning = "摘要模型加载失败，仅返回转录结果"
	else:
	try:
	# 根据不同模型调整参数，确保适合中文摘要
	summary = current_summarizer(
	transcription,
	max_length=150, # 更长的最大长度适合中文
	min_length=30, # 更长的最小长度适合中文
	do_sample=False,
	num_beams=2, # 减少beam数量提高速度
	length_penalty=0.8, # 长度惩罚调整
	no_repeat_ngram_size=3 # 避免重复
	)
	summary = summary[0]["summary_text"]
	# 清理摘要文本，去除可能的特殊字符
	summary = summary.strip().replace('\n', ' ').replace('\t', ' ')
	# 确保摘要有意义
	if len(summary) < 10:
	summary = "摘要生成过短，请检查原始文本"
	warning = None
	except Exception as e:
	print(f"Error during summarization: {e}")
	summary = None
	warning = "摘要生成失败，仅返回转录结果"

	# 清理临时文件
	if os.path.exists(temp_file_path):
	os.unlink(temp_file_path)
	if os.path.exists(wav_path):
	os.unlink(wav_path)

	if warning:
	return JSONResponse(content={
	"transcription": transcription,
	"summary": None,
	"warning": warning
	}, status_code=200)
	else:
	return JSONResponse(content={
	"transcription": transcription,
	"summary": summary
	})
	except Exception as e:
	# 清理临时文件
	if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
	os.unlink(temp_file_path)
	if 'wav_path' in locals() and os.path.exists(wav_path):
	os.unlink(wav_path)
	return JSONResponse(content={"error": str(e)}, status_code=500)