准备工作

  • 安装好selenium库,使用的浏览器为Chrome
  • 这次使用的打码平台为超级鹰,提前准备好账户
  • 练习地址为:https://captcha3.scrape.center/
    • 提供者:崔庆才

获取打码平台API

  • https://www.chaojiying.com/api-14.html
  • 如下所示
#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5

class Chaojiying_Client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        self.password = md5(password.encode('utf-8')).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
            'Connection': 'Keep-Alive',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
        }

    def PostPic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
        return r.json()

    def ReportError(self, im_id):
        """
        im_id:报错题目的图片ID
        """
        params = {
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
        return r.json()

初始化

  • 首先导入必要的包,并初始化变量,代码如下所示
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from chaojiying import Chaojiying_Client

USERNAME = 'admin'  # 登录网站的账户
PASSWORD = 'admin'  # 登录网站的密码
CHAOJIYING_USERNAME = 'username'  # 超级鹰账户
CHAOJIYING_PASSWORD = 'password'  # 超级鹰密码  
CHAOJIYING_SOFT_ID = 913617  # 超级鹰生成的软件ID
CHAOJIYING_KIND = 9004  # 验证码类型,可在官网查看


class CrackCaptcha():
    def __init__(self):
        self.url = 'https://captcha3.scrape.center/'
        self.browser = webdriver.Chrome()
        self.wait = WebDriverWait(self.browser, 20)
        self.username = USERNAME
        self.password = PASSWORD
        self.chaojiying = Chaojiying_Client(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
    
    def __del__(self):
        """gc机制关闭浏览器"""
        self.browser.close()

获取验证码

    def open(self):
        """打开网页输入用户名和密码"""
        self.browser.get(self.url)
        # 填入用户名密码
        username = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type="text"]')))
        password = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type="password"]')))
        username.send_keys(self.username)
        time.sleep(2)
        password.send_keys(self.password)
        
    def get_captcha_button(self):
        """获取初始验证按钮"""
        button = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[type="button"]')))
        return button
       
    def get_captcha_element(self):
        """获取验证图片对象"""
        # 验证图片加载出来
        self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'img.geetest_item_img')))
        # 验证码完整节点
        element = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_widget')))
        return element

    def get_captcha_position(self):
        """获取验证码位置"""
        element = self.get_captcha_element()
        time.sleep(2)
        location = element.location
        size = element.size
        top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width']
        return [top, bottom, left, right]

    def get_screenshot(self):
        """获取网页截图"""
        screenshot = self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        screenshot.save('screenshot.png')
        return screenshot

    def get_captcha_image(self, name='captcha.png'):
        """获取验证码图片"""
        top, bottom, left, right = self.get_captcha_position()
        print('验证码位置', top, bottom, left, right)
        screenshot = self.get_screenshot()
        captcha = screenshot.crop((left, top, right, bottom))
        captcha.save(name)
        return captcha

解析结果并提交

    def get_points(self, captcha_result):
        """解析识别结果"""
        groups = captcha_result.get('pic_str').split('|')
        locations = [[int(number) for number in group.split(',')] for group in groups]
        return locations

    def touch_click_words(self, locations):
        """点击验证图片"""
        for location in locations:
            ActionChains(self.browser).move_to_element_with_offset(self.get_captcha_element(), location[0], location[1]).click().perform()
            time.sleep(1)

    def get_verifi_button(self):
        """确认按钮"""
        submit = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.geetest_commit_tip')))
        submit.click()

运行

    def crack_login(self):
        """登录"""
        self.open()
        time.sleep(1)
        button = self.get_captcha_button()
        button.click()
        image = self.get_captcha_image()
        bytes_array = BytesIO()
        image.save(bytes_array, format='PNG')
        # 调用超级鹰的PostPic方法将图片发给后台,发送的是字节流格式
        result = self.chaojiying.PostPic(bytes_array.getvalue(), CHAOJIYING_KIND)
        print(result)  # 交给超级鹰处理的结果
        locations = self.get_points(result)
        self.touch_click_words(locations)
        time.sleep(3)
        self.get_verifi_button()
        time.sleep(5)
        success = self.wait.until(EC.text_to_be_present_in_element((By.TAG_NAME, 'h2'), '登录成功'))
        if success:
            print('登录成功')
        else:
            print('登录失败')


if __name__ == '__main__':
    crack = CrackCaptcha()
    crack.crack_login()

结语

  • 这样就完成了借助打码平台解决验证码的问题,借助打码平台几乎任意的验证码都可以识别,是一种通用的方法。
Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐