关于提高PaddleOCR识别准确率的一些优化(三)
PaddleOCR
Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
项目地址:https://gitcode.com/gh_mirrors/pa/PaddleOCR

·
团队博客: CSDN AI小组
前言
1、本文基于上一篇文章:关于提高OCR识别准确率的一些优化(二)进行了一些优化,将图片方向识别准确率提升至96%。
2、在阅读这篇文章之前,建议先看上一篇,以便更好的理解
一、优化思路
1、在上一篇文章中,我们使用paddleocr的方向分类器直接判别图片方向,发现效果并不怎么好,而且效率也很低,识别一张图片平均耗时2s。
2、鉴于以上存在的问题,于是想出了一个新的优化方案:
- 使用paddleocr的文本矩形框检测得到所有文本矩形框坐标
- 取出长宽比在 5 - 25 和 0.04 - 0.2 之间的文本矩形框坐标
- 从中随机取出一个矩形或按长宽比大小排序,取出长宽比居中的矩形(这里为了简便,直接取出第0个矩形)
- 用取出来的矩形,从原图中抠图
- 将抠出来的图片,作为paddleocr方向分类器的输入
二、完整代码
import cv2
import os
import time
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
class GetImageRotation(object):
def __init__(self):
self.ocr = PaddleOCR(use_angle_cls=True)
self.ocr_angle = PaddleOCR(use_angle_cls=True)
def get_real_rotation_when_null_rect(self, rect_list):
w_div_h_sum = 0
count = 0
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
if abs(w_div_h - 1.0) < 0.5:
count +=1
continue
w_div_h_sum += w_div_h
length = len(rect_list) - count
if length == 0:
length = 1
if w_div_h_sum / length >= 1.5:
return 1
else:
return 0
def get_real_rotation_flag(self, rect_list):
ret_rect = []
w_div_h_list = []
w_div_h_sum = 0
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
# w_div_h_list.append(w_div_h)
# print(w_div_h)
if 5 <= abs(w_div_h - 1.0) <= 25 or 0.04 <= abs(w_div_h) <= 0.2:
ret_rect.append(rect)
w_div_h_sum += w_div_h
if w_div_h_sum / len(ret_rect) >= 1.5:
return 1, ret_rect
else:
return 0, ret_rect
def crop_image(self, rect, image):
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
crop = image[int(p0[1]):int(p2[1]), int(p0[0]):int(p2[0])]
# crop_image = Image.fromarray(crop)
return crop
def get_img_real_angle(self, img_path):
ret_angle = 0
image = cv2.imread(img_path)
# ocr = PaddleOCR(use_angle_cls=True)
# angle_cls = ocr.ocr(img_path, det=False, rec=False, cls=True)
rect_list = self.ocr.ocr(image, rec=False)
# print(rect_list)
if rect_list != [[]]:
try:
real_angle_flag, rect_good = get_real_rotation_flag(rect_list)
# rect_crop = choice(rect_good)
rect_crop = rect_good[0]
image_crop = crop_image(rect_crop, image)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(image_crop, det=False, rec=False, cls=True)
print(angle_cls)
except:
real_angle_flag = get_real_rotation_when_null_rect(rect_list)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(image, det=False, rec=False, cls=True)
print(angle_cls)
else:
return 0
print('real_angle_flag: {}'.format(real_angle_flag))
if angle_cls[0][0] == '0':
if real_angle_flag:
ret_angle = 0
else:
ret_angle = 270
if angle_cls[0][0] == '180':
if real_angle_flag:
ret_angle = 180
else:
ret_angle = 90
return ret_angle
def get_files_path_2(file_dir):
'''获取指定文件夹下所有指定后缀名的文件的绝对路径'''
files_path = []
# label = file_dir.split('/')[-1]
for root, dirs, files in os.walk(file_dir):
for file in files:
path = os.path.join(root, file)
files_path.append(path)
return files_path
问:为什么要实例化两个PaddleOCR?
答:仅实例化一个PaddleOCR时,会出现如下警告,导致不能检测方向
[2021/07/03 12:51:32] root WARNING: Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process
应该是PaddleOCR内部的问题,有时间可以深究一下
三、测试
from time import time
get_image_rotation = GetImageRotation()
image_path = get_files_path_2('/Users/zhangzc/Desktop/workplace/ocrtest/test')
count = 0
time_list = []
for path in image_path:
if path == '/Users/Desktop/workplace/ocrtest/test/.DS_Store':
continue
t1 = time()
angle = get_image_rotation.get_img_real_angle(path)
t2 = time()
print('----'*10)
print(angle)
print('cost time: {} s'.format(t2-t1))
time_list.append(t2-t1)
print('----'*10)
if angle != 0:
print('****'*10)
print(path)
print('****'*10)
count +=1
print('print average cost time : {} s'.format(np.mean(time_list)))
测试结果:
- 200张0度图片,96%准确率
- 200张90度图片,仅有13%准确率
- 200张180度图片,88%准确率
- 200张270度图片,85%准确率
平均耗时:1.25s
四、分析
1、从测试结果发现,90度的图片准确率太低
2、90度的图片,绝大多数都被检测为270度
3、于是拿出所有方向抠出来的图片比较:
PaddleOCR
Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
项目地址:https://gitcode.com/gh_mirrors/pa/PaddleOCR
4、仔细观察就会发现:90度的图片旋转90度后,成了180度,270度的图片,旋转90度后,变成了0度,而paddleocr的方向分类器,在这两个方向上的识别准确率也比较高。因此,在图片被识别为270度时,我们可以将图片顺时针旋转90度后再输入到方向分类器中识别,或许会有个更好的效果。于是,我们开始优化。
五、优化
直接看代码吧
def rotate_bound_white_bg(self, image, angle):
# 旋转angle角度,缺失背景白色(255, 255, 255)填充
(h, w) = image.shape[:2]
(cX, cY) = (w // 2, h // 2)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
return cv2.warpAffine(image, M, (nW, nH), borderValue=(255, 255, 255))
class GetImageRotation(object):
def __init__(self):
self.ocr = PaddleOCR(use_angle_cls=True)
self.ocr_angle = PaddleOCR(use_angle_cls=True)
def get_real_rotation_when_null_rect(self, rect_list):
w_div_h_sum = 0
count = 0
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
if abs(w_div_h - 1.0) < 0.5:
count += 1
continue
w_div_h_sum += w_div_h
length = len(rect_list) - count
if length == 0:
length = 1
if w_div_h_sum / length >= 1.5:
return 1
else:
return 0
def get_real_rotation_flag(self, rect_list):
ret_rect = []
w_div_h_mean = 0
real_rect_count = 0
rect_big_list = []
rect_small_list = []
w_div_h_sum_big = []
w_div_h_sum_small = []
for rect in rect_list:
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
width = abs(p1[0] - p0[0])
height = abs(p3[1] - p0[1])
w_div_h = width / height
if 5 <= w_div_h <= 25:
real_rect_count +=1
rect_big_list.append(rect)
w_div_h_sum_big.append(w_div_h)
if 0.04 <= w_div_h <= 0.2:
real_rect_count -=1
rect_small_list.append(rect)
w_div_h_sum_small.append(w_div_h)
if real_rect_count > 0:
ret_rect = rect_big_list
w_div_h_mean = np.mean(w_div_h_sum_big)
else:
ret_rect = rect_small_list
w_div_h_mean = np.mean(w_div_h_sum_small)
if w_div_h_mean >= 1.5:
return 1, ret_rect
else:
return 0, ret_rect
def crop_image(self, rect, image):
p0 = rect[0]
p1 = rect[1]
p2 = rect[2]
p3 = rect[3]
crop = image[int(p0[1]):int(p2[1]), int(p0[0]):int(p2[0])]
# crop_image = Image.fromarray(crop)
return crop
def get_img_real_angle(self, img_path):
ret_angle = 0
image = cv2.imread(img_path)
# ocr = PaddleOCR(use_angle_cls=True)
# angle_cls = ocr.ocr(img_path, det=False, rec=False, cls=True)
rect_list = self.ocr.ocr(image, rec=False)
if rect_list != [[]]:
except_flag = False
try:
real_angle_flag, rect_good = self.get_real_rotation_flag(
rect_list)
rect_crop = choice(rect_good)
# rect_crop = rect_good[0]
image_crop = self.crop_image(rect_crop, image)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(
image_crop, det=False, rec=False, cls=True)
except:
except_flag = True
real_angle_flag = self.get_real_rotation_when_null_rect(
rect_list)
# ocr_angle = PaddleOCR(use_angle_cls=True)
angle_cls = self.ocr_angle.ocr(
image, det=False, rec=False, cls=True)
else:
return 0
if angle_cls[0][0] == '0':
if real_angle_flag:
ret_angle = 0
else:
ret_angle = 270
if not except_flag:
anticlockwise_90 = rotate_bound_white_bg(image_crop, 90)
angle_cls = self.ocr_angle.ocr(anticlockwise_90, det=False, rec=False, cls=True)
if angle_cls[0][0] == '0':
ret_angle = 270
if angle_cls[0][0] == '180':
ret_angle = 90
if angle_cls[0][0] == '180':
if real_angle_flag:
ret_angle = 180
else:
ret_angle = 90
return ret_angle
与前面代码比较,在ret_angle=270时增加了一个顺时针旋转90度后再判断方向的操作,并在get_real_rotation_flag函数中增加了一个统计长宽比大于1和小于1计数的操作,为了确保如下这种情况也能正确识别:
总结
1、在90度方向上,准确率相比之前的13%提升至86.5%
2、在270度方向上,准确率为94%,较之前85%提升了9%
3、在0度和180度方向上,准确率分别为93.5%、89%
4、在所有方向上的平均准确率为90.75%,效果还不错
5、如果你有更好的优化方案,欢迎随时私信,感激不尽
相关文章:
关于提高PaddleOCR识别准确率的一些优化(一)
关于提高PaddleOCR识别准确率的一些优化(二)
推荐内容
阅读全文
AI总结




Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
最近提交(Master分支:8 个月前 )
a80d2c89
5 天前
5d120f8f
7 天前
更多推荐
相关推荐
查看更多
PaddleOCR

Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
PaddleOCR

拉取来自百度的官方仓库
PaddleOCR

Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
热门开源项目
活动日历
查看更多
直播时间 2025-04-25 15:00:00


直播时间 2025-04-23 19:00:00

GitTalk:国内首个微服务编排框架Juggle实战解析
直播时间 2025-04-22 18:31:56

字节AI 黑科技!从 Manus Agent 入门 Eino
直播时间 2025-04-09 14:34:18

樱花限定季|G-Star校园行&华中师范大学专场
直播时间 2025-04-07 14:51:20

樱花限定季|G-Star校园行&华中农业大学专场
目录
所有评论(0)