爬虫爬取图片(女生切勿点开)
#!usr/bin/python# -*- coding: utf-8 -*-import bs4import requestsfrom bs4 import BeautifulSoupimport webbrowserimport reimport easyguifrom lxml import htmlfrom lxml import etreenum = 0...
·
#!usr/bin/python
# -*- coding: utf-8 -*-
import bs4
import requests
from bs4 import BeautifulSoup
import webbrowser
import re
import easygui
from lxml import html
from lxml import etree
num = 0
def Header(referer):
headers = {
'Host': 'i.meizitu.net',
'Pragma': 'no-cache',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/59.0.3071.115 Safari/537.36',
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Referer': '{}'.format(referer),
}
return headers
def get_img(url):
global num
header = {
"User-Agent" : "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
res = requests.get(url, headers = header)
html = etree.HTML(res.text)
#确定图片数量
count = html.xpath('//div[@class="pagenavi"]/a[last()-1]/span/text()')[0]
count = int(count)
for j in range(count):
print(j)
Url = url + '/{}'.format(j + 1)
print(Url)
re = requests.get(Url, headers = header)
Html = etree.HTML(re.text)
src = Html.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
rr = requests.get(src, headers = Header(src))
with open('F:/代码/picture1/{}.jpg'.format(num), 'wb') as f:
num = num + 1
f.write(rr.content)
header = {
"User-Agent" : "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
url = 'http://www.mzitu.com/page/1'
res = requests.get(url, headers = header)
html = etree.HTML(res.text)
srcs = html.xpath('//div[@class="postlist"]//li/a/@href')
for i in srcs:
# print(i)
get_img(i)
更多推荐
已为社区贡献2条内容
所有评论(0)