补充:将OCR识别的结果与数据集正确结果合在一起进行比对
链接
[link]补充内容为利用文字定位模块与文字识别模块的接口批量处理图片生成OCR结果(加上标记结果进行比对)。https://blog.****.net/weixin_42343812/article/details/85100044
生成后的结果如下所示。
代码片
下面展示一些 内联代码片
。
// A code block
var foo = 'bar';
s = ''
j = 2000
k = 3
if os.path.exists('./img10000_ocr/'+'pict_0.xlsx'):
os.remove('./img10000_ocr/'+'pict_0.xlsx')
book = xlsxwriter.Workbook('./img10000_ocr/pict_0.xlsx')
sheet = book.add_worksheet('demo')
i = 1
flag_flag=0
file_json = open('/home/xudong/ocr/dataset.txt', 'r')
line=file_json.readline()
json_engchi=[]
#tuple存储所有的读取后的标记数据
tuplel=[]
#读取所有的json文件——标记数据
while(line!=''):
eng_j=line.split(',')[1]
chi_j = line.split(',')[2]
tuplel=[]
tuplel.append(eng_j)
tuplel.append(chi_j)
json_engchi.append(tuplel)
line = file_json.readline()
for i in range(1,len(b)):
shenqing = b_img_name[i]
ocr = b[i].split(',')[0]
chi = b[i].split(',')[1]
eng = b[i].split(',')[2]
if (i != 0):
if i % 1000 == 0:
if flag_flag==0:
flag_flag=1
else:
#book.close()
if os.path.exists('./img10000_ocr/'+'pict_'+str(i/1000)+'.xlsx'):
os.remove('./img10000_ocr/'+'pict_'+str(i/1000)+'.xlsx')
book = xlsxwriter.Workbook('./img10000_ocr/pict_'+str(int(i/1000))+'.xlsx')
sheet = book.add_worksheet('demo')
k=3
sheet.insert_image('A' + str(k), '/home/xudong/ocr/img10000/' + shenqing + '.jpg')
sheet.insert_image('B' + str(k), '/home/xudong/ocr/img10000/' + shenqing + '_ctpn.jpg')
sheet.write(k-1 , 2, ocr)
sheet.write(k-1 , 4, chi)
sheet.write(k-1 , 7, eng)
sheet.write(k-1, 8, json_engchi[int(shenqing)][0])
sheet.write(k-1, 5, json_engchi[int(shenqing)][1])
i += 1
print(i)
k += 1
print(i)
book.close()