[Python] 纯文本查看 复制代码# -*- coding: utf-8 -*-

import requests

import os

import random

from PIL import Image

from bs4 import BeautifulSoup

import numpy

user_agent = ['Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50',

'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0',

'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)',

'Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1',

'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,'

'likeGecko)Chrome/17.0.963.56Safari/535.11',

'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Maxthon2.0)',

'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TencentTraveler4.0)',

'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Trident/4.0;SE2.XMetaSr1.0;SE2.XMetaSr1.0;.NETCLR2.0.50727'

';SE2.XMetaSr1.0)',

'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)',

'Mozilla/5.0(Macintosh;U;IntelMacOSX10_6_8;en-us)AppleWebKit/534.50(KHTML,'

'likeGecko)Version/5.1Safari/534.50']

class GetBaiduImg(object):

kw = {'Host': 'image.baidu.com'}

urls = list()

def __init__(self, result, n):

"""

:param result: 输入百度图片搜索的关键词

:param n: 输入要爬取的页数

"""

self.downloadJpg(datalist=self.getPages(result, n))

self.convertColor('./baidu')

def getPages(self, keyword, pages):

try:

GetBaiduImg.kw['user-agent'] = random.choice(user_agent)

param = list()

for i in range(30, pages * 30 + 30, 30):

param.append({'tn': 'resultjson_com', 'ipn': 'rj',

'ct': '201326592', 'is': '', 'fp': 'result', 'queryWord': keyword, 'cl': '2',

'lm': '-1', 'ie': 'utf-8', 'oe': 'utf-8', 'adpicid': '', 'st': '-1',

'z': '', 'ic': '0', 'hd': '', 'latest': '', 'copyright': '',

'word': keyword, 's': '', 'se': '', 'tab': '', 'width': '', 'height': '',

'face': '0', 'istype': '2', 'qc': '', 'nc': '1', 'fr': '', 'expermode': '',

'force': '', 'pn': i, 'rn': '30', 'gsm': '1e', '1597926642252': ''})

start_url = 'https://image.baidu.com/search/acjson'

for i in param:

res = requests.request(method='get', url=start_url, headers=GetBaiduImg.kw, params=i,

proxies={"http": "175.43.58.44:9999"})

res.raise_for_status()

res.encoding = res.apparent_encoding

response = res.content.decode('utf-8')

for a in response.split('"'):

if "https://ss" and ".jpg" and "bdstatic" in a:

GetBaiduImg.urls.append(a)

return set(GetBaiduImg.urls)

except requests.RequestException as e:

print('mistake info==>', str(e))

def downloadJpg(self, datalist, direct='./baidu'):

if not os.path.exists(direct):

os.mkdir(direct)

x = 1

for data in datalist:

if len(data):

print(f'downloading img {data}')

try:

resp = requests.request(method='get', url=data, proxies={"http": "175.43.58.44:9999"})

open(f'{direct}/{x}.jpg', 'wb').write(resp.content)

x += 1

except Exception as exp:

print("misktake info==>",str(exp))

def convertColor(self, direct):

for i in os.listdir(direct):

im = numpy.array(Image.open(direct + '/' + f'{i}').convert('L')).astype('float')

print(f"converting img {i} with ", im.shape, im.dtype)

depth = 10

grad = numpy.gradient(im)

grad_x, grad_y = grad

grad_x = grad_x * depth / 100

grad_y = grad_y * depth / 100

A = numpy.sqrt(grad_x ** 2 + grad_y ** 2 + 1)

uni_x = grad_x / A

uni_y = grad_y / A

uni_z = 1 / A

vec_el = numpy.pi / 2.2

vec_az = numpy.pi / 4

dx = numpy.cos(vec_el) * numpy.cos(vec_az)

dy = numpy.cos(vec_el) * numpy.sin(vec_az)

dz = numpy.sin(vec_el)

b = 255 * (dx * uni_x + dy * uni_y + dz * uni_z)

a = b.clip(0, 255)

im = Image.fromarray(a.astype('uint8'))

im.save(direct + '/' + f'[灰度照]{i}')

if __name__ == "__main__":

baiduimg = GetBaiduImg(result="郁金香", n=1)

Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐