ddddocr可以识别文字验证码，手把手教你用python来实现自动化程序识别验证文字。【建议收藏】

ddddocr可以识别文字验证码，手把手教你用python来实现自动化程序识别验证文字。

peihuwang

3310人浏览 · 2023-06-06 09:07:37

peihuwang · 2023-06-06 09:07:37 发布

python代码如下：

# coding:utf-8

import datetime
import math
import os
import random
import re
import sys
import time
from io import BytesIO

import ddddocr
from PIL import Image, ImageDraw
from selenium.webdriver import ActionChains

from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

from api import get_tiiktok_anti2
import execjs
import requests
from colorama import Fore, init
import api
import cooke_path_8
import json
from selenium import webdriver
from selenium.webdriver.common.by import By


# 获取当前文件的目录
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)

cookie_path = cooke_path_8.get_cooke_path_8()
current_path = cooke_path_8.get_current_path()

get_chromedriver_path = cooke_path_8.get_chromedriver_path()

import logging
logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

'''
nid =1
ss1= "https://mobile.pinduoduo.com/goods.html?goods_id={0}&force_use_web_bundle=1".format(str(nid))
ss2= "https://mobile.pinduoduo.com/goods.html?goods_id={}&force_use_web_bundle=1".format(str(nid))
print(ss1)
print(ss2)

sys.exit(1)
'''


class UpData:
    def __init__(self, page_max, which):
        #m3 = execjs.compile(open(r"route.js", encoding='utf-8').read())
        #verifyFp = m3.call('mergeFp', '')
        #print(verifyFp)
        #exit(1)
        #dXNlcg==
        #anNzd3h2Mmc==

        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--disable-blink-features=AutomationControlled")  # 禁用启用Blink运行时的功能
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])  # 去除浏览器检测框
        #chrome_options.add_argument('--user-data-dir=' + cookie_path)
        #chrome_options.add_argument('--proxy-server=http://127.0.0.1:8999')
        self.driver = webdriver.Chrome(chrome_options=chrome_options,
                                       executable_path=get_chromedriver_path)

        '''
        with open(f'{cur_path}' + '/stealth.min.js', encoding='utf-8') as f:
            js = f.read()
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": js
        }) 
        
        '''



        self.page_max = page_max + 1
        self.page_no = 1
        self.page_size = 200
        self.detail_sleep_time = 1
        self.debug = 0
        self.begin_date = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y/%m/%d')
        self.end_date = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y/%m/%d')
        self.headers = {
            'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'cors',
            'sec-fetch-site': 'same-origin',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
        }
        self.getVerificationCode()


    def getVerificationCode(self):


        base_url = f"https://www.kgcaptcha.com/demo/content?t=4&cindex=2#code_lang"
        base_url = f"http://127.0.0.1/test.html"
        self.driver.get(base_url)
        time.sleep(5)
        wait = WebDriverWait(self.driver, 10)

        image2 = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="verify-bar-code"]')))
        target_img_url = image2.get_attribute('src')
        target_image_content = requests.get(target_img_url).content

        docr = ddddocr.DdddOcr(show_ad=False)
        target_words = docr.classification(target_image_content)



        image1 = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="captcha-verify-image"]')))
        background_img_url = image1.get_attribute('src')
        background_image_content = requests.get(background_img_url).content

        ddet = ddddocr.DdddOcr(det=True, show_ad=False)
        poses = ddet.detection(background_image_content)

        img = Image.open(BytesIO(background_image_content))
        draw = ImageDraw.Draw(img)
        click_identify_result = {}
        for row in poses:
            # 框字
            row = (row[0] - 3, row[1] - 3, row[2] + 3, row[3] + 3)
            x1, y1, x2, y2 = row
            draw.line(([(x1, y1), (x1, y2), (x2, y2), (x2, y1), (x1, y1)]), width=1, fill='red')
            # 裁剪出单个字
            corp = img.crop(row)
            img_byte = BytesIO()
            corp.save(img_byte, 'png')
            # 识别出单个字
            word = docr.classification(img_byte.getvalue())
            click_identify_result[word] = row
        img.show()

        # 计算文字点击坐标
        img_xy = {}
        for key, xy in click_identify_result.items():
            if key:
                img_xy[key] = (int((xy[0] + xy[2]) / 2), int((xy[1] + xy[3]) / 2))
        logger.info(img_xy)
        # 计算最终点击顺序与坐标
        result = {}
        for word in target_words:
            result[word] = img_xy[word]
        logger.info(result)
        # 点击坐标
        image1_x = image1.location.get('x')
        image1_y = image1.location.get('y')
        for xy in result.values():
            x = xy[0] * (340 / 552)
            y = xy[1] * (212 / 344)
            ActionChains(self.driver).reset_actions()
            ActionChains(self.driver).move_by_offset(image1_x + x, image1_y + y).click().perform()



        time.sleep(6000)


if __name__ == '__main__':
    updata = UpData(1, 2)

html代码如下：

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<title>站点创建成功-phpstudy for windows</title>
<meta name="keywords" content="">
<meta name="description" content="">
<meta name="renderer" content="webkit">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="format-detection" content="telephone=no">
<meta HTTP-EQUIV="pragma" CONTENT="no-cache">
<meta HTTP-EQUIV="Cache-Control" CONTENT="no-store, must-revalidate">
<meta HTTP-EQUIV="expires" CONTENT="Wed, 26 Feb 1997 08:21:57 GMT">
<meta HTTP-EQUIV="expires" CONTENT="0">
<style>
body{
font: 16px arial,'Microsoft Yahei','Hiragino Sans GB',sans-serif;
}
h1{
margin: 0;
color:#3a87ad;
font-size: 26px;
}
.content{
width: 45%;
margin: 0 auto;

}
.content >div{
margin-top: 5px;
padding: 20px;
background: #d9edf7;
border-radius: 12px;
}
.content dl{
color: #2d6a88;
line-height: 40px;
}
.content div div {
padding-bottom: 20px;
text-align:center;
}
</style>
</head>
<body>
<div class="content">
<div>
<img id="verify-bar-code" src="http://localhost/1.png">
</div>
<div>
<img id="captcha-verify-image" src="http://localhost/2.png">
</div>
</div>
</body>
</html>