1.安装selenium:
pip install selenium
2.安装chrome最新版本:
yum install epel-release;
yum install https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm

安装chrome版本前可能会失败,需要在shell中执行以下命令后然后再执行安装:

cat << EOF > /etc/yum.repos.d/google-chrome.repo
[google-chrome]
name=google-chrome
baseurl=http://dl.google.com/linux/chrome/rpm/stable/x86_64
enabled=1
gpgcheck=1
gpgkey=https://dl.google.com/linux/linux_signing_key.pub
EOF

一开始安装了错误版本的77版本,而浏览器版本为79,导致执行脚本报错:驱动版本支持的最高版本为77
下载驱动可在:https://npm.taobao.org/mirrors
这是淘宝提供的镜像以及各类安装包,还是很赞的!!!

3.安装ChromeDriver驱动(这里要注意,安装的版本一定跟跟chrome版本匹配的上才行,否则会报错)

查看chrome版本

[root@localhost ~]# google-chrome-stable --version
[root@localhost ~]# Google Chrome 79.0.3945.79
# 根据谷歌的版本对应驱动的版本号: https://npm.taobao.org/mirrors/chromedriver/
# 下载对应的驱动,由于官网的最新版本79.0.3945.36,直接安装该版本:
[root@localhost ~]# wget https://npm.taobao.org/mirrors/chromedriver/79.0.3945.36/chromedriver_linux64.zip
[root@localhost ~]# unzip chromedriver_linux64.zip
执行命令
[root@localhost ~]# chromedriver
[root@localhost ~]# -bash: /usr/bin/chromedriver: 没有那个文件或目录
# 移动文件
[root@localhost ~]# mv chromedriver /usr/bin/
# 设置权限
[root@localhost ~]# chmod 777 /usr/bin/chromedriver
#再执行
[root@localhost ~]# chromedriver
#如果还报错
-bash: /usr/local/bin/chromedriver: 没有那个文件或目录
#那么说明这个位置也需要有chromedriver: 需要复制一份文件过去
[root@localhost ~]# cp -r  chromedriver /usr/local/bin/
[root@localhost ~]# chromedriver
[root@localhost ~]# Starting ChromeDriver 79.0.3945.36 (3582db32b33893869b8c1339e8f4d9ed1816f143-refs/branch-heads/3945@{#614}) on port 9515
Only local connections are allowed.
Please protect ports used by ChromeDriver and related test frameworks to prevent access by malicious code.

# 查看驱动
chromedriver -v
[root@localhost ~]# ChromeDriver 79.0.3945.36 (3582db32b33893869b8c1339e8f4d9ed1816f143-refs/branch-heads/3945@{#614})
[root@localhost ~]# unzip chromedriver_linux64.zip
Archive:  chromedriver_linux64.zip
replace chromedriver? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: chromedriver
# 明明下载的是最新版本的驱动,也正常解压了,也提示是否覆盖,选择y,没想到一查还是旧版,暴力点,直接卸载,重新安装,卸载:
[root@localhost ~]# rm -rf /usr/bin/chromedriver
[root@localhost ~]# rm -rf /usr/local/bin/chromedriver
[root@localhost ~]# chromedriver -v
-bash: /usr/local/bin/chromedriver: 没有那个文件或目录
[root@localhost ~]# rpm -qa | grep chromedriver  
[root@localhost ~]# chromedriver -v
-bash: /usr/local/bin/chromedriver: 没有那个文件或目录
#卸载成功
#重新下载驱动压缩包:
[root@localhost ~]# wget https://npm.taobao.org/mirrors/chromedriver/81.0.4044.69/chromedriver_linux64.zip
#安装完就好了

测试程序如下:

from selenium import webdriver as wdr
dr = wdr.Chrome()
dr.get('http://www.baidu.com/')
print(dr.title)
dr.quit()

执行发现会报错:

(unknown error: DevToolsActivePort file doesn't exist)
(The process started from chrome location /usr/bin/google-chrome is no longer running, so ChromeDriver is assuming that Chrome has crashed.)

那么我们就得换种方式:

from selenium import webdriver as wdr
from selenium.webdriver .chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--headless')
dr = wdr.Chrome(chrome_options=chrome_options)
dr.get('http://www.baidu.com/')
print(dr.title)
dr.quit()

执行的结果如下:
百度一下,你就知道

Ok,这就表示我们搭建的环境已经成功,接下来就可以根据自己的需求进行数据采集啦!

小笔记–Linux常用命令:
卸载软件:
rpm -qa | grep chromedriver
移除文件:
rm -rf chromedriver_linux64.zip
查询已安装的包:
rpm -qa
卸载RPM包:
rpm -e chromedriver-77.0.3865.120-1.el7.x86_64
升级RPM包:
rpm -Uvh zip-3.1-11.el7.x86_64.rpm

配置代理
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
# 设置默认打开是英文
chrome_options.add_experimental_option('prefs', {'intl.accept_languages': 'en'})
chrome_options.add_argument('--proxy-server=http://%s' % proxy)
chrome_options.add_argument('--proxy-server=%s' % proxy)
Logo

更多推荐