Python本地识别验证码

whitesev·2021-04-25·352 次阅读

一.识别文件

import os
from ctypes import cdll, string_at, create_string_buffer

os.environ["GLOG_minloglevel"] = "3"
# 0-调试
# 1-信息（仍然有很多输出）
# 2-警告
# 3-错误
class ocr:
    def __init__(self, image_absoult_path: str = None, folder_path: str = None, image_type: str = None):
        self.__image_absoult_path = image_absoult_path  # 图片绝对路径
        self.__folder_path = folder_path  # 文件夹路径
        self.__image_type = image_type  # 图片类型
        self.__current_path = os.path.abspath(os.path.split(__file__)[0])

    def __normal_ocr(self, imagePath: str) -> str:  # 普通精度
        self.__set_dll(dll=r"\ocr\ocr.dll")
        # 初始化识别库
        self.__dll.init()
        with open(file=imagePath, mode="rb") as f:
            # 读入图片
            image = f.read()
            # 利用dll中的ocr函数进行识别
            image_str = self.__dll.ocr(image, len(image))
            # 返回的是指针，所以此处将指针转换为字符串，然后再编码即可得到字符串类型
            verification_code = string_at(image_str).decode("utf-8")
        return verification_code

    def __high_ocr(self, imagePath: str):  # 高精度
        self.__set_dll(dll=r"\ocr\OCRS.dll")
        # 载入字库与建立字库索引
        with open(file=self.__current_path + r"\ocr\通杀英文数字库.cnn", mode="rb") as f:
            # 载入字库
            word_bank = f.read()
            # 建立字库索引
            work_index = self.__dll.INIT(self.__current_path, word_bank, len(word_bank), -1, 1)
        with open(file=imagePath, mode="rb") as f:
            # 读入图片
            image = f.read()
            image_str = create_string_buffer(100)  # 创建文本缓冲区
            self.__dll.OCR(work_index, image, len(image), image_str)  # 利用DLL中的识别函数进行识别
            verification_code = image_str.raw.decode("utf-8").strip().strip(b'\x00'.decode())  # 对识别的返回值进行编码
        return verification_code

    def __check_is_file(self):  # 检查本地图片
        if self.__image_absoult_path is None:
            raise Exception("请传入本地图片地址")
        if not os.path.isfile(self.__image_absoult_path):
            raise Exception("请传入本地图片的绝对地址")

    def __set_dll(self, dll: str = None):  # 初始化dll
        self.__dll = cdll.LoadLibrary(self.__current_path + dll)

    def __get_list_image_path(self) -> list:  # 获取文件夹目录下的图片
        pathList = []
        for root, dirs, files in os.walk(self.__folder_path):
            for file in files:
                if file.endswith(self.__image_type):
                    pathList.append(os.path.join(root, file))
        return pathList  # 输出以filetype为后缀的列表

    def ocr_single_file(self, model: bool = False) -> str:  # 单个图片识别
        # model True启动高精度，否则普通精度
        self.__check_is_file()
        if model is False:
            return self.__normal_ocr(imagePath=self.__image_absoult_path)
        else:
            return self.__high_ocr(imagePath=self.__image_absoult_path)

    def ocr_multiple_files(self, model: bool = False) -> dict:  # 多个图片识别
        # model True启动高精度，否则普通精度
        image_list = self.__get_list_image_path()
        if not image_list: return None
        image_list.sort(key=lambda x: int(x.split('.')[0].split("\\")[-1]))
        verification_code_list = []
        for image_absolute_path in image_list:
            verification_code_dict = {"image_path": None, "verification_code": None}
            verification_code_dict["image_path"] = image_absolute_path
            if model is False:
                verification_code_dict["verification_code"] = self.__normal_ocr(imagePath=image_absolute_path)
            else:
                verification_code_dict["verification_code"] = self.__high_ocr(imagePath=image_absolute_path)
            verification_code_list.append(verification_code_dict)
        return verification_code_list

二.验证码图片

三.调用

1.单文件调用

参数	解释
image_absoult_path	单个验证码图片的绝对路径
model	True/False True是高精度，默认为False普通精度

image_path = os.path.abspath(os.path.split(__file__)[0])+r"\captcha\1.png"
a = ocr(image_absoult_path=image_path)
print(a.ocr_single_file(model=True))

2.文件夹调用

参数	解释
folder_path	文件夹的绝对路径
image_type	文件夹下的图片的类型
model	True/False True是高精度，默认为False普通精度

folder_path = os.path.abspath(os.path.split(__file__)[0]) + r"\captcha"
b = ocr(folder_path=folder_path, image_type="png")
print(b.ocr_multiple_files(model=False))

3.文件夹调用(需要指定文件下的图片类型)

参数	解释
folder_path	文件夹的绝对路径
image_type	文件夹下的图片的类型
model	True/False True是高精度，默认为False普通精度

folder_path_ = os.path.abspath(os.path.split(__file__)[0]) + r"\captcha"
c = ocr(folder_path=folder_path_, image_type="png")
print(c.ocr_multiple_files(model=True))

四.识别结果

类型	模式	文件名称	识别结果	成功率
单验证码识别	高精度	1.png		100%
多验证码识别	普通精度	1.png~10.png		60%
多验证码识别	高精度	1.png~10.png		80%

五.结尾

感谢网友提供dll

网盘	文件简介	链接	密码
天翼云	必须的dll	189.cn	fp0n
天翼云	需要识别的验证码图片	189.cn	9rhm

win32pdh获取进程网络速率

第三方网易云音乐修改

Author

whitesev

WhiteSevの博客

切换主题 | SCHEME TOOL