表情包--爬虫
#!/user/bin/python# -*- coding:utf-8 -*-# __author__:yanglong# __data__:2019.4.23import requestsfrom lxml import etreeclass phto():def __init__(self):self.url="https://www.soogif...
·
#!/user/bin/python
# -*- coding:utf-8 -*-
# __author__:yanglong
# __data__:2019.4.23
import requests
from lxml import etree
class phto():
def __init__(self):
self.url="https://www.soogif.com"
self.header={
"User - Agent": "Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko)"
" Chrome / 73.0.3683.103Safari / 537.36"
}
def send_requests(self,url):
response=requests.get(url,headers=self.header)
return response.content
def save_data(self,img_data,img_name):
data=self.send_requests(img_data)
name="./img/"+img_name+".mp4"
print '正在保存图片:%s'%img_name.encode("utf-8")
with open(name,"wb") as f:
f.write(data)
def get_data(self,data,rel):
elment=etree.HTML(data)
html_xpath=elment.xpath(rel)
return html_xpath
def get_all_url(self):
all_path="/html/body/div[10]/div/div[1]/div[2]/a/@href"
response=self.send_requests(self.url+'/sort/147')
all_url=self.get_data(response,all_path)
for i in range(len(all_url)):
all_url[i]=self.url+all_url[i]
#print all_url
return all_url
def run(self):
all_url=self.get_all_url()
for each_pege in all_url:
main_url_list=[each_pege+"?pageSize=28&pageNumber={}".format(i) for i in range(1,10)]
for main_url in main_url_list:
response_data=self.send_requests(main_url)
img_url_path="//div[@class='up clearfix']/a/figure/div/video/@src"
img_name_path="//div[@class='up clearfix']/a/figure/div/video/@alt"
img_url_list=self.get_data(response_data,img_url_path)
img_name_list=self.get_data(response_data,img_name_path)
for src,name in zip(img_url_list,img_name_list):
self.save_data(src,name)
if __name__ == '__main__':
test=phto()
test.run()
更多推荐
已为社区贡献1条内容
所有评论(0)