1.申请百度ocr api_key,secret_key
https://console.bce.baidu.com/ai-engine/old/#/ai/ocr/app/list
目前一个月免费调用1000次,个人使用应该足够


2.申请deepseek apikey
https://platform.deepseek.com/api_keys
冲个10元,可以用很久很久。
3.程序代码如下:
(1)程序主逻辑
certificate_processor.py
import os
import pytesseract
from PIL import Image
import pandas as pd
import re
import cv2
import numpy as np
import requests
import base64
import json
import time
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CertificateProcessor:
"""Process certificates with configurable student/teacher logic"""
CONFIGS = {
"student": {
"prompt": "你是一个证书信息提取专家。请从文本中提取以下信息,并以json格式返回:获奖时间,比赛名称,获奖者姓名,奖项,指导教师,组织机构。不要markdown格式,获奖时间若有则按2010.01这样的格式返回,如果某项信息不存在,返回空字符串",
"renaming_pattern": "{date}{name}同学在{event}荣获{award}{ext}",
"excel_fields": ["获奖时间", "比赛名称", "获奖者姓名", "奖项", "指导教师", "组织机构"]
},
"teacher": {
"prompt": "你是一个证书信息提取专家。请从文本中提取以下信息,并以json格式返回:活动时间,活动名称,获奖者姓名,奖项,组织机构。不要markdown格式,若活动名称中有时间则提取活动内容中的时间,没有的话按落款时间并且按2010.01这样的格式返回,特别注意获得好评也算奖项,如果某项信息不存在,返回空字符串",
"renaming_pattern": "{date}{name}在{event}荣获{award}{ext}",
"excel_fields": ["活动时间", "活动名称", "获奖者姓名", "奖项", "组织机构"]
}
}
def __init__(self, input_dir, cert_type="student", log_callback=None,
baidu_api_key=None, baidu_secret_key=None, deepseek_api_key=None):
self.input_dir = input_dir
self.cert_type = cert_type
self.config = self.CONFIGS[cert_type]
self.log_callback = log_callback if log_callback else self.default_log
self.results = []
# OCR 默认使用百度ocr,本地pytesseract识别效果不好
self.baidu_ocr_enabled = True
self.deepseek_api_enabled = True
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# 相关apikey
self.baidu_api_key = baidu_api_key
self.baidu_secret_key = baidu_secret_key
self.deepseek_api_key = deepseek_api_key
self.deepseek_api_url = "https://api.deepseek.com/chat/completions"
# Validate credentials if provided
if self.baidu_ocr_enabled and self.baidu_api_key and self.baidu_secret_key:
self.validate_baidu_credentials()
else:
self.baidu_ocr_enabled = False
self.log_callback("警告: 百度OCR凭据缺失,将使用本地OCR")
def default_log(self, message):
"""Default logging if no callback provided"""
logger.info(message)
def validate_baidu_credentials(self):
"""Validate Baidu OCR API credentials"""
token_url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={self.baidu_api_key}&client_secret={self.baidu_secret_key}"
try:
response = requests.get(token_url)
token_data = response.json()
if "error" in token_data:
error_msg = token_data.get("error_description", "未知错误")
self.log_callback(f"百度OCR认证失败: {error_msg}")
self.baidu_ocr_enabled = False
return
access_token = token_data.get("access_token")
if not access_token:
self.log_callback("百度OCR认证失败: 响应中缺少access_token")
self.baidu_ocr_enabled = False
except Exception as e:
self.log_callback(f"百度OCR认证请求失败: {str(e)}")
self.baidu_ocr_enabled = False
def preprocess_image(self, image):
"""本地识别二值化相关证书图片"""
img = np.array(image)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
equalized = cv2.equalizeHist(gray)
thresh = cv2.adaptiveThreshold(
equalized, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
denoised = cv2.medianBlur(thresh, 3)
kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpened = cv2.filter2D(denoised, -1, kernel)
return Image.fromarray(sharpened)
def baidu_ocr(self, image_path):
"""使用百度ocr"""
self.log_callback(f"[百度OCR] 开始处理: {os.path.basename(image_path)}")
try:
# Get access token
token_url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={self.baidu_api_key}&client_secret={self.baidu_secret_key}"
token_response = requests.get(token_url)
token_data = token_response.json()
if "error" in token_data or "access_token" not in token_data:
self.log_callback("百度OCR访问令牌获取失败")
return "", "百度OCR失败"
access_token = token_data["access_token"]
# Read and encode image
with open(image_path, "rb") as f:
img_data = base64.b64encode(f.read()).decode('utf-8')
# Send OCR request
url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
params = {
"access_token": access_token,
"image": img_data,
"language_type": "CHN_ENG",
"detect_direction": "true"
}
response = requests.post(url, headers=headers, data=params)
result = response.json()
if "error_code" in result:
error_msg = result.get("error_msg", "未知错误")
self.log_callback(f"百度OCR识别错误: {error_msg}")
return "", "百度OCR失败"
if "words_result" not in result:
self.log_callback("百度OCR识别失败: 无结果")
return "", "百度OCR失败"
text = "\n".join([item["words"] for item in result["words_result"]])
self.log_callback(f"[百度OCR] 识别成功, 字符数: {len(text)}")
return text, "百度OCR"
except Exception as e:
self.log_callback(f"百度OCR请求失败: {str(e)}")
return "", "百度OCR失败"
def local_ocr(self, image_path):
"""Use local Tesseract OCR"""
try:
orig_img = Image.open(image_path)
processed_img = self.preprocess_image(orig_img)
text = pytesseract.image_to_string(
processed_img,
,
config='--psm 6 --oem 1'
).strip()
self.log_callback(f"[本地OCR] 识别完成, 字符数: {len(text)}")
return text, "本地OCR"
except Exception as e:
self.log_callback(f"本地OCR处理失败: {str(e)}")
return "", "本地OCR失败"
def hybrid_ocr(self, image_path):
"""Hybrid OCR approach"""
if self.baidu_ocr_enabled:
baidu_text, source = self.baidu_ocr(image_path)
if baidu_text.strip():
return baidu_text, source
return self.local_ocr(image_path)
def deepseek_extract(self, text):
"""Extract structured info using DeepSeek API"""
headers = {
"Authorization": f"Bearer {self.deepseek_api_key}",
"Content-Type": "application/json"
}
messages = [
{"role": "system", "content": self.config["prompt"]},
{"role": "user", "content": text}
]
payload = {
"model": "deepseek-chat",
"messages": messages,
"temperature": 0.1,
"max_tokens": 200
}
try:
response = requests.post(self.deepseek_api_url, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
# Extract JSON from response
raw_string = result['choices'][0]['message']['content']
start_index = raw_string.find('{')
end_index = raw_string.rfind('}') + 1
json_string = raw_string[start_index:end_index]
return json.loads(json_string)
except Exception as e:
self.log_callback(f"DeepSeek API错误: {str(e)}")
return {field: '' for field in self.config["excel_fields"]}
def process_certificates(self, output_excel):
"""Process all certificates in input directory"""
image_exts = ['.jpg', '.jpeg', '.png', '.bmp']
image_files = [f for f in os.listdir(self.input_dir)
if any(f.lower().endswith(ext) for ext in image_exts)]
total_files = len(image_files)
self.log_callback(f"开始处理 {total_files} 个证书文件...")
for i, filename in enumerate(image_files):
image_path = os.path.join(self.input_dir, filename)
self.log_callback(f"\n=== 正在处理: {filename} ({i+1}/{total_files}) ===")
start_time = time.time()
text_content, ocr_source = self.hybrid_ocr(image_path)
ocr_time = time.time() - start_time
# Extract structured info
parsed_info = self.deepseek_extract(text_content)
# Prepare result entry
result = {
'图片路径': image_path,
'图片名称': filename,
'OCR来源': ocr_source,
'OCR耗时(秒)': f"{ocr_time:.2f}"
}
# Add extracted fields
for field in self.config["excel_fields"]:
result[field] = parsed_info.get(field, '')
# Rename file
renaming_fields = {
'date': parsed_info.get('获奖时间' if self.cert_type == 'student' else '活动时间', ''),
'name': parsed_info.get('获奖者姓名', ''),
'event': parsed_info.get('比赛名称' if self.cert_type == 'student' else '活动名称', ''),
'award': parsed_info.get('奖项', ''),
'ext': os.path.splitext(filename)[1]
}
if all(renaming_fields.values()):
new_name = self.config["renaming_pattern"].format(**renaming_fields)
new_path = os.path.join(self.input_dir, new_name)
# Handle filename conflicts
counter = 1
while os.path.exists(new_path):
renaming_fields['counter'] = counter
new_name = self.config["renaming_pattern"].format(**renaming_fields)
new_path = os.path.join(self.input_dir, new_name)
counter += 1
os.rename(image_path, new_path)
result['新文件名'] = new_name
self.log_callback(f"文件已重命名为: {new_name}")
else:
result['新文件名'] = filename
self.log_callback("关键信息缺失,未重命名文件")
self.results.append(result)
self.log_callback(f"=== 处理完成 ({ocr_source}), 耗时: {ocr_time:.2f}秒 ===")
# Save to Excel
df = pd.DataFrame(self.results)
df.to_excel(output_excel, index=False)
self.log_callback(f"\n处理完成! 结果已保存到 {output_excel}")
return df(2)gui界面
certificate_gui.py
import tkinter as tk
from tkinter import ttk, filedialog, scrolledtext
import threading
import os
from certificate_processor import CertificateProcessor
import queue
class CertificateApp:
def __init__(self):
self.window = tk.Tk()
self.window.title("证书处理系统")
self.window.geometry("800x600")
self.window.resizable(True, True)
# Configure styles
self.style = ttk.Style()
self.style.configure("TFrame", padding=5)
self.style.configure("TButton", padding=5)
self.style.configure("TLabel", padding=5)
# Create frames
self.create_file_selection_frame()
self.create_api_settings_frame() # Add API settings frame
self.create_type_selection_frame()
self.create_control_frame()
self.create_log_frame()
self.create_progress_frame()
# Message queue for thread-safe logging
self.message_queue = queue.Queue()
# Start periodic queue check
self.window.after(100, self.process_queue)
def create_file_selection_frame(self):
"""Create file selection components"""
self.file_frame = ttk.LabelFrame(self.window, text="图片目录选择")
self.file_frame.pack(fill="x", padx=10, pady=5)
# Directory entry
self.dir_label = ttk.Label(self.file_frame, text="图片目录:")
self.dir_label.grid(row=0, column=0, padx=5, pady=5, sticky="w")
self.dir_entry = ttk.Entry(self.file_frame, width=60)
self.dir_entry.grid(row=0, column=1, padx=5, pady=5, sticky="we")
# Browse button
self.browse_btn = ttk.Button(
self.file_frame,
text="浏览...",
command=self.browse_directory
)
self.browse_btn.grid(row=0, column=2, padx=5, pady=5)
# Configure grid weights
self.file_frame.columnconfigure(1, weight=1)
def create_api_settings_frame(self):
"""Create API key input components"""
self.api_frame = ttk.LabelFrame(self.window, text="API密钥设置")
self.api_frame.pack(fill="x", padx=10, pady=5)
# Baidu API Key
self.baidu_key_label = ttk.Label(self.api_frame, text="百度API Key:")
self.baidu_key_label.grid(row=0, column=0, padx=5, pady=5, sticky="w")
self.baidu_key_entry = ttk.Entry(self.api_frame, width=50, show="*")
self.baidu_key_entry.grid(row=0, column=1, padx=5, pady=5, sticky="we")
# Baidu Secret Key
self.baidu_secret_label = ttk.Label(self.api_frame, text="百度Secret Key:")
self.baidu_secret_label.grid(row=1, column=0, padx=5, pady=5, sticky="w")
self.baidu_secret_entry = ttk.Entry(self.api_frame, width=50, show="*")
self.baidu_secret_entry.grid(row=1, column=1, padx=5, pady=5, sticky="we")
# DeepSeek API Key
self.deepseek_label = ttk.Label(self.api_frame, text="DeepSeek API Key:")
self.deepseek_label.grid(row=2, column=0, padx=5, pady=5, sticky="w")
self.deepseek_entry = ttk.Entry(self.api_frame, width=50, show="*")
self.deepseek_entry.grid(row=2, column=1, padx=5, pady=5, sticky="we")
# Configure grid weights
self.api_frame.columnconfigure(1, weight=1)
def create_type_selection_frame(self):
"""Create certificate type selection components"""
self.type_frame = ttk.LabelFrame(self.window, text="证书类型")
self.type_frame.pack(fill="x", padx=10, pady=5)
self.type_var = tk.StringVar(value="student")
self.student_rb = ttk.Radiobutton(
self.type_frame,
text="学生证书",
variable=self.type_var,
value="student"
)
self.student_rb.pack(side="left", padx=10, pady=5)
self.teacher_rb = ttk.Radiobutton(
self.type_frame,
text="教师证书",
variable=self.type_var,
value="teacher"
)
self.teacher_rb.pack(side="left", padx=10, pady=5)
def create_control_frame(self):
"""Create processing control buttons"""
self.ctrl_frame = ttk.Frame(self.window)
self.ctrl_frame.pack(fill="x", padx=10, pady=5)
self.start_btn = ttk.Button(
self.ctrl_frame,
text="开始处理",
command=self.start_processing
)
self.start_btn.pack(side="left", padx=5, pady=5)
self.stop_btn = ttk.Button(
self.ctrl_frame,
text="停止",
state="disabled",
command=self.stop_processing
)
self.stop_btn.pack(side="left", padx=5, pady=5)
self.open_folder_btn = ttk.Button(
self.ctrl_frame,
text="打开结果文件夹",
command=self.open_output_folder
)
self.open_folder_btn.pack(side="right", padx=5, pady=5)
def create_log_frame(self):
"""Create log display area"""
self.log_frame = ttk.LabelFrame(self.window, text="处理日志")
self.log_frame.pack(fill="both", expand=True, padx=10, pady=5)
# Create text widget with scrollbar
self.log_text = scrolledtext.ScrolledText(
self.log_frame,
wrap="word",
state="normal"
)
self.log_text.pack(fill="both", expand=True, padx=5, pady=5)
# Configure tags for different log levels
self.log_text.tag_config("info", foreground="black")
self.log_text.tag_config("success", foreground="green")
self.log_text.tag_config("warning", foreground="orange")
self.log_text.tag_config("error", foreground="red")
def create_progress_frame(self):
"""Create progress bar"""
self.progress_frame = ttk.Frame(self.window)
self.progress_frame.pack(fill="x", padx=10, pady=5)
self.progress = ttk.Progressbar(
self.progress_frame,
orient="horizontal",
mode="determinate",
length=400
)
self.progress.pack(fill="x", padx=5, pady=5)
self.status_label = ttk.Label(
self.progress_frame,
text="就绪"
)
self.status_label.pack(pady=5)
def browse_directory(self):
"""Open directory browser dialog"""
directory = filedialog.askdirectory()
if directory:
self.dir_entry.delete(0, tk.END)
self.dir_entry.insert(0, directory)
def start_processing(self):
"""Start certificate processing in a separate thread"""
directory = self.dir_entry.get()
if not directory or not os.path.isdir(directory):
self.log("错误: 请选择有效的图片目录", "error")
return
# Get API keys
baidu_api_key = self.baidu_key_entry.get().strip()
baidu_secret_key = self.baidu_secret_entry.get().strip()
deepseek_api_key = self.deepseek_entry.get().strip()
# Disable UI controls during processing
self.start_btn.config(state="disabled")
self.stop_btn.config(state="normal")
self.browse_btn.config(state="disabled")
self.student_rb.config(state="disabled")
self.teacher_rb.config(state="disabled")
self.baidu_key_entry.config(state="disabled")
self.baidu_secret_entry.config(state="disabled")
self.deepseek_entry.config(state="disabled")
# Get certificate type
cert_type = self.type_var.get()
# Log API key usage status
if baidu_api_key and baidu_secret_key:
self.log("百度OCR凭据已提供", "info")
else:
self.log("警告: 百度OCR凭据缺失,将使用本地OCR", "warning")
if deepseek_api_key:
self.log("DeepSeek API凭据已提供", "info")
else:
self.log("错误: DeepSeek API凭据缺失,处理将失败", "error")
self.queue_message("UI:ENABLE_CONTROLS")
return
# Clear log
self.log_text.delete(1.0, tk.END)
self.log(f"开始处理{cert_type}证书...", "info")
# Reset progress
self.progress["value"] = 0
self.status_label.config(text="处理中...")
# Create processor with log callback and API keys
self.processor = CertificateProcessor(
directory,
cert_type,
self.queue_message,
baidu_api_key=baidu_api_key,
baidu_secret_key=baidu_secret_key,
deepseek_api_key=deepseek_api_key
)
# Start processing in separate thread
self.processing = True
self.thread = threading.Thread(
target=self.run_processing,
daemon=True
)
self.thread.start()
def run_processing(self):
"""Thread function for processing"""
try:
output_file = os.path.join(
self.dir_entry.get(),
"证书统计表.xlsx"
)
self.processor.process_certificates(output_file)
self.queue_message("处理完成!", "success")
except Exception as e:
self.queue_message(f"处理错误: {str(e)}", "error")
finally:
self.queue_message("UI:ENABLE_CONTROLS")
def stop_processing(self):
"""Stop processing"""
self.processing = False
self.queue_message("处理已停止", "warning")
self.queue_message("UI:ENABLE_CONTROLS")
def open_output_folder(self):
"""Open output folder in file explorer"""
directory = self.dir_entry.get()
if directory and os.path.isdir(directory):
os.startfile(directory)
def queue_message(self, message, tag="info"):
"""Add message to queue for thread-safe logging"""
self.message_queue.put((message, tag))
def process_queue(self):
"""Process messages from the queue"""
try:
while not self.message_queue.empty():
message, tag = self.message_queue.get_nowait()
if message == "UI:ENABLE_CONTROLS":
# Re-enable UI controls
self.start_btn.config(state="normal")
self.stop_btn.config(state="disabled")
self.browse_btn.config(state="normal")
self.student_rb.config(state="normal")
self.teacher_rb.config(state="normal")
self.baidu_key_entry.config(state="normal")
self.baidu_secret_entry.config(state="normal")
self.deepseek_entry.config(state="normal")
self.status_label.config(text="就绪")
continue
# Update log
self.log(message, tag)
# Update progress if needed
if "正在处理" in message and "==" in message:
try:
# Extract progress info
parts = message.split("(")
if len(parts) > 1:
progress_parts = parts[1].split("/")
current = int(progress_parts[0])
total = int(progress_parts[1].split(")")[0])
percent = (current / total) * 100
self.progress["value"] = percent
except:
pass
finally:
# Schedule next queue check
self.window.after(100, self.process_queue)
def log(self, message, tag="info"):
"""Add message to log display"""
self.log_text.config(state="normal")
self.log_text.insert(tk.END, message + "\n", tag)
self.log_text.see(tk.END)
self.log_text.config(state="disabled")
def run(self):
"""Run the application"""
self.window.mainloop()
if __name__ == "__main__":
app = CertificateApp()
app.run()
0 评论 最近
没有评论!