Python3 语音识别谷歌验证码

Python3 语音识别谷歌验证码

FOAF-lambda

1106人浏览 · 2022-05-24 17:39:05

FOAF-lambda · 2022-05-24 17:39:05 发布

一、 使用reCaptchaBypasser库
1 -安装 ffmpeg
2 - pip install pydub
3 - pip install selenium
4 - pip install SpeechRecognition
5 - pip install reCaptchaBypasser
https://www.gyan.dev/ffmpeg/builds/ 选择ffmpeg-5.0.1-essentials_build.zip 下载
将ffmpeg.exe,ffplay.exe,ffprobe.exe 三个文件放到项目同文件夹下

def audio_captcha(self): #语音识别验证码
    from reCaptchaBypasser import reCaptchaScraper
    reCaptcha = reCaptchaScraper(self.driver, SleepTime=3)
    res = reCaptcha.reCaptchaGoogleV2()
    if res["Response"] == True:
        print("ByPassed !!!")
        return True
    else:
        print("Try Again !!!")
        print("reCaptchaTextKey ==> ", res["reCaptchaTextKey"])
        return False

def verify_google_captcha(self):
    src = self.driver.execute_script('''var src_data=document.querySelector('iframe[title="reCAPTCHA"]');if(src_data){return src_data.getAttribute('src')}''')
    if src or 'https://www.google.com/recaptcha/api2/anchor' in self.driver.page_source:
        print('出现谷歌验证码')
        result = self.audio_captcha()
        return result
    else:return True

案例：

from selenium import webdriver
from reCaptchaBypasser import reCaptchaScraper
import time
chrome_path = r'C:\Users\Administrator\AppData\Local\Google\Chrome\Application'  # 浏览器路径
executable_path = f'{chrome_path}\chromedriver_win32\chromedriver.exe'
driver = webdriver.Chrome(executable_path =executable_path)
url='https://www.google.com/recaptcha/api2/demo'
driver.get(url)
time.sleep(5)
reCaptcha = reCaptchaScraper(driver, SleepTime=3)
res = reCaptcha.reCaptchaGoogleV2()
if res["Response"] == True:
    print("ByPassed !!!")
else:
    print("Try Again !!!")
    print("reCaptchaTextKey ==> ", res["reCaptchaTextKey"])

二、

改自https://github.com/QIN2DIM/armour-recaptcha

core.py 文件

import os
import time
from random import randint
from time import sleep
import pydub
import requests
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.support.wait import WebDriverWait
from speech_recognition import Recognizer, AudioFile


def activate_recaptcha(api: Chrome) -> str:
    """
    激活 reCAPTCHA 人机验证，并跳转至声纹识别界面，返回声源文件的下载地址
    :param api: 为了消除 driver 指纹特征，可在高并发场景使用  undetected_chromedriver.v2 替代 selenium
    """
    # 定位并切换至 reCAPTCHA iframe
    recaptcha_iframe = WebDriverWait(api, 10).until(presence_of_element_located((
        By.XPATH, "//iframe[@title='reCAPTCHA']"
    )))
    api.switch_to.frame(recaptcha_iframe)
    # 点击并激活 recaptcha
    api.find_element(By.CLASS_NAME, "recaptcha-checkbox-border").click()
    # 回到 main_frame
    api.switch_to.default_content()
    # 切换到 main_frame 中的另一个 frame
    api.switch_to.frame(api.find_element(By.XPATH, "//iframe[@title='reCAPTCHA 验证将于 2 分钟后过期']"))
    sleep(randint(2, 4))
    # 点击切换到声纹识别界面
    api.find_element(By.ID, "recaptcha-audio-button").click()
    sleep(randint(2, 4))
    # 点击播放按钮
    try:
        api.find_element(By.XPATH, "//button[@aria-labelledby]").click()
    except NoSuchElementException:
        return ""
    # 定位声源文件 url
    audio_url = api.find_element(By.ID, "audio-source").get_attribute("src")
    return audio_url


def handle_audio(audio_url: str) -> str:
    """
    reCAPTCHA Audio 音频文件的定位、下载、转码
    :param audio_url: reCAPTCHA Audio 链接地址
    :param dir_audio_cache: 音频缓存目录
    :return:
    """
    # 拼接音频缓存文件路径
    timestamp_ = int(time.time())
    path_audio_mp3 = "audio.mp3"
    path_audio_wav = "audio.wav"
    # 将声源文件下载到本地
    print(audio_url)
    res = requests.get(audio_url)
    try:
        os.remove("audio.mp3")
    except Exception:
        audio_file = open("audio.mp3", "wb")
        audio_file.write(res.content)
        audio_file.close()
    else:
        audio_file = open("audio.mp3", "wb")
        audio_file.write(res.content)
        audio_file.close()
    # urllib.request.urlretrieve(audio_url, path_audio_mp3)
    # 转换音频格式 mp3 --> wav
    pydub.AudioSegment.from_mp3(path_audio_mp3).export(path_audio_wav, format="wav")
    # 返回 wav 格式的音频文件 增加识别精度
    return path_audio_wav

def parse_audio(path_audio_wav: str, language: str = None) -> str:
    """
    声纹识别，音频转文本
    :param path_audio_wav: reCAPTCHA Audio 音频文件的本地路径（wav格式）
    :param language: 音频文件的国际化语言格式，默认 en-US 美式发音。非必要参数，但可增加模型精度。
    """
    language = "en-US" if language is None else language
    # 将音频读入并切割成帧矩阵
    recognizer = Recognizer()
    audio_file = AudioFile(path_audio_wav)
    with audio_file as stream:
        audio = recognizer.record(stream)
    # 流识别
    answer: str = recognizer.recognize_google(audio, language=language)
    # 返回短音频对应的文本(str)，en-US 情况下为不成句式的若干个单词
    return answer

def submit_recaptcha(api: Chrome, answer: str) -> bool:
    """
    提交 reCAPTCHA 人机验证，需要传入 answer 文本信息，需要 action 停留在可提交界面
    :param api: 为了消除 driver 指纹特征，可在高并发场景使用  undetected_chromedriver.v2 替代 selenium
    :param answer: 声纹识别数据
    """
    try:
        # 定位回答框
        input_field = api.find_element(By.ID, "audio-response")
        # 提交文本数据
        input_field.clear()
        input_field.send_keys(answer.lower())
        # 使用 clear + ENTER 消除控制特征
        input_field.send_keys(Keys.ENTER)
        return True
    except (NameError, NoSuchElementException):
        return False

run.py 文件

import time
from selenium.common.exceptions import WebDriverException,TimeoutException
from core import activate_recaptcha, submit_recaptcha, handle_audio, parse_audio
from selenium import webdriver

"""
将ffmpeg.exe,ffplay.exe,ffprobe.exe 三个文件放到项目同文件夹下
"""

class CatWalk:
    def __init__(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_argument('--ignore-ssl-errors')
        chrome_options.add_argument('--enable-extensions')
        executable_path = r'C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver_win32\chromedriver.exe'
        self.driver = webdriver.Chrome(executable_path=executable_path, chrome_options=chrome_options)
        
    @staticmethod
    def get_html_handle(self, url, wait_seconds: int = 15):
        self.driver.set_page_load_timeout(time_to_wait=wait_seconds)
        self.driver.get(url)
        
    def utils_recaptcha(self):
        """
        处理 SSPanel 中的 Google reCAPTCHA v2 Checkbox 人机验证。
        使用音频声纹识别验证而非直面图像识别。
        > 无论站点本身可否直连访问，本模块的执行过程的流量必须过墙，否则音频文件的下载及转码必然报错。
        > 可能的异常有:
         - speech_recognition.RequestError
         - http.client.IncompleteRead
        """
        time.sleep(2)
        # 激活 reCAPTCHA 并获取音频文件下载链接
        audio_url: str = activate_recaptcha(self.driver)
        # Google reCAPTCHA 风控
        if not audio_url:
            raise WebDriverException
        # 音频转码 （MP3 --> WAV） 增加识别精度
        path_audio_wav: str = handle_audio(audio_url=audio_url)
        
        answer: str = parse_audio(path_audio_wav)
        print('answer:',answer)
        # 定位输入框并填写文本数据
        response = submit_recaptcha(self.driver, answer=answer)
        if not response:
            raise TimeoutException
    
    def sign_up(self,url="https://www.google.com/recaptcha/api2/demo"):
        self.driver.get(url)
        try:
            self.utils_recaptcha()
            # 回到 main-frame 否则后续DOM操作无法生效
            self.driver.switch_to.default_content()
        except TimeoutException:
            time.sleep(0.5)
        # Google reCAPTCHA 风控
        except WebDriverException:
            exit()
            


if __name__ == '__main__':
    start = CatWalk()
    start.sign_up()

CSDN学习社区

CSDN联合极客时间，共同打造面向开发者的精品内容学习社区，助力成长！

更多推荐

cover

用 OpenAI Assistants 做大模型应用开发

CSDN学习社区

cover

1 小时解读鸿蒙 10 大热点问题

CSDN学习社区

cover

1 小时解读鸿蒙 10 大热点问题

CSDN学习社区

所有评论(0)

查看更多评论

FOAF-lambda

已为社区贡献2条内容