Caoyanyi
commited on
Commit
·
8dae0e0
1
Parent(s):
9363b11
* Adjust ocr model logic.
Browse files
app.py
CHANGED
|
@@ -447,26 +447,37 @@ async def ocr_document(file: UploadFile = File(...)):
|
|
| 447 |
if isinstance(preprocessed_img, np.ndarray):
|
| 448 |
# 转换为PIL Image
|
| 449 |
from PIL import Image
|
| 450 |
-
|
| 451 |
-
#
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
else:
|
| 462 |
# 直接使用图像
|
| 463 |
-
result = pytesseract_engine.image_to_data(
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
elif hasattr(current_ocr_model, 'ocr'): # PaddleOCR
|
| 471 |
try:
|
| 472 |
result = current_ocr_model.ocr(preprocessed_img, cls=True)
|
|
|
|
| 447 |
if isinstance(preprocessed_img, np.ndarray):
|
| 448 |
# 转换为PIL Image
|
| 449 |
from PIL import Image
|
| 450 |
+
# 对于pytesseract,直接使用灰度图可能效果更好,尤其是对于截图
|
| 451 |
+
# 检查是否是截图
|
| 452 |
+
if file_ext in ['png', 'jpg', 'jpeg'] and 'screenshot' in file.filename.lower():
|
| 453 |
+
# 对于截图,直接使用灰度图而不是二值化
|
| 454 |
+
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
|
| 455 |
+
img_pil = Image.fromarray(gray_img)
|
| 456 |
+
else:
|
| 457 |
+
img_pil = Image.fromarray(preprocessed_img)
|
| 458 |
+
# 执行OCR,指定中文语言包
|
| 459 |
+
result = pytesseract_engine.image_to_data(
|
| 460 |
+
img_pil,
|
| 461 |
+
output_type=pytesseract_engine.Output.DICT,
|
| 462 |
+
lang='chi_sim+eng', # 添加中文简体和英文语言包
|
| 463 |
+
config='--psm 6' # 假设单一文本块
|
| 464 |
+
)
|
| 465 |
else:
|
| 466 |
# 直接使用图像
|
| 467 |
+
result = pytesseract_engine.image_to_data(
|
| 468 |
+
preprocessed_img,
|
| 469 |
+
output_type=pytesseract_engine.Output.DICT,
|
| 470 |
+
lang='chi_sim+eng', # 添加中文简体和英文语言包
|
| 471 |
+
config='--psm 6' # 假设单一文本块
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
# 提取文本结果 - pytesseract格式
|
| 475 |
+
page_text = []
|
| 476 |
+
for i in range(len(result['text'])):
|
| 477 |
+
text = result['text'][i].strip()
|
| 478 |
+
if text:
|
| 479 |
+
confidence = float(result['conf'][i]) / 100.0 # 转换为0-1范围
|
| 480 |
+
page_text.append({"text": text, "confidence": confidence})
|
| 481 |
elif hasattr(current_ocr_model, 'ocr'): # PaddleOCR
|
| 482 |
try:
|
| 483 |
result = current_ocr_model.ocr(preprocessed_img, cls=True)
|