paddleDetection-视频OCR

2023-08-14 PV:

PPOCR_V4

安装百度最新ppocr_v4库，使用虚拟环境为py39_vio，本虚拟环境不可与人脸识别（py38_arcface）兼容（opencv版本不兼容）

1	pip install paddleocr --user -i https://mirror.baidu.com/pypi/simple

代码

cfg_utils.py新增cfg--ocr，设置True为开启，默认False

parser.add_argument(
    "--ocr",
    type=bool,
    default=False,
    help="use paddlepaddle-ocr")

pipeline.py

from python.visualize import visualize_box_mask, visualize_attr, visualize_pose, visualize_action, visualize_vehicleplate, visualize_vehiclepress, visualize_lane, visualize_vehicle_retrograde, visualize_ocr

1
2
3

class PipePredictor(object):  
    def __init__(self, args, cfg, is_video=True, multi_camera=False):
    	self.ocr = args.ocr

def visualize_video(self,
                    image_rgb,
                    result,
                    collector,
                    frame_id,
                    fps,
                    entrance=None,
                    records=None,
                    center_traj=None,
                    do_illegal_parking_recognition=False,
                    illegal_parking_dict=None):
    image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
    mot_res = copy.deepcopy(result.get('mot'))

    if self.ocr:
        lock.acquire() # 加锁，paddleOCR是线程不安全的
        ocr_result = ocr.ocr(image, cls=True)[0]
        lock.release()
        ocr_boxes = [line[0] for line in ocr_result]
        ocr_txts = [line[1][0] for line in ocr_result]
        ocr_scores = [line[1][1] for line in ocr_result]
        
        image = visualize_ocr(image, ocr_boxes, ocr_txts, ocr_scores)

visualize.py

def visualize_ocr(im, boxes, texts, score):
    if isinstance(im, str):
        im = Image.open(im)
        im = np.ascontiguousarray(np.copy(im))
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
    else:
        im = np.ascontiguousarray(np.copy(im))

    # 创建透明图层，为图像添加文字水印
    im = Image.fromarray(im)
    im = im.convert('RGBA')
    im_canvas = Image.new('RGBA', im.size, (255, 255, 255, 0))

    for i, res in enumerate(texts):
        if boxes is not None:
            box = boxes[i]
            text = res
            if text == "":
                continue

            text_scale = max(1.0, int(box[2][1] - box[1][1]))

            draw = ImageDraw.Draw(im_canvas)
            draw.text(
                (box[0][0], box[0][1]),
                text,
                font=ImageFont.truetype(font_file, size=int(text_scale)),
                fill=(255, 255, 0, 85)) # 第四位是透明度
            try:
                draw.rectangle(
                    ((box[0][0], box[0][1]), (box[2][0], box[2][1])),
                    fill=None,
                    outline=(255, 255, 0),
                    width=1)
            except ValueError:
                pass

    # 复合图层
    im = Image.alpha_composite(im, im_canvas)
    im = im.convert('RGB')
    # 还原连续存储数组
    im = np.ascontiguousarray(np.copy(im))
    return im

启动

在deploy/pipeline/config下创建视频ocr的yml文件infer_cfg_ppocr.yml，写入基本参数

1
2
3

crop_thresh: 0.5
visual: True
warmup_frame: 50

1	python deploy/pipeline/pipeline.py --config deploy/pipeline/config/infer_cfg_ppocr.yml --device=gpu --video_file=demo_input/car_t1.mp4 --output_dir=demo_output --ocr=True