喜欢python的同学应该都知道python又很多库,使得我们写起代码的时候相比Java,C语言方便简单,高效很多。今天我们要用到的库呢是第三方库,是需要我们去下载安装的。

 

点击Terminal 进入命令行模式,输入pip install pymysql 按下回车即可下载。等待下载安装好之后我们就可以开始了。(这里写的是爬虫的模板,在写入数据库,先安装MySQL数据库)

第一步:写代码之前肯定是导库了

导入我们可能用到的库

import requests
import jsonpath
import json
import time
# 导入与MySQL数据交互的pymysql
from pymysql import *

第二部:敲代码(这里必须要会一点MySQL的一些基本使用,不然可能看不懂)

import requests
import jsonpath
import json
import time
# 导入与MySQL数据交互的pymysql
from pymysql import *

if __name__ == '__main__':
    # 先与数据库进行连接,得到一个数据库对象
    mysql_obj = connect(host="192.168.257.223 ", user="root", password="mysql", database="spider2011", port=3306, charset="utf8mb4")
    print('数据库连接成功')
#host=本机的IP地址,win+R打开命令行模式,输入ipconfig .找到第一个的IPv4 后面就是我们本机的#IP地址。
#uesr 就是我们的用户名,也就是我们MySQL的用户名,password对应的是我们的密码。
#database就是我们要操作的数据库,这个库是必须存在的,port是端口号,照写就对了,charset是编
#码格式

    # 数据库对象有了(连接成功), 想操作数据库的话, 需要创建一个新的对象, 游标对象..
    obj_ = mysql_obj.cursor()  # 游标对象

    pages = int(input('请输入想要抓取的页数:'))
    for i in range(pages):  # 0 1 2
        # 1.明确目标的url
        url_ = f'https://ke.qq.com/cgi-bin/comment_new/course_comment_list?cid=383855&count=10&page={i}&filter_rating=0&bkn=1836436529&r=0.3440207316955324'

        # 手动构造一下请求头的参数
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
            'Cookie': 'pgv_pvi=4242348032; RK=BBDdnAh9fO; ptcz=5e8dd82b2f82d2e067d1833301b0b83106abae37947aa4a9c09c147ba38caae4; pgv_pvid=2844100480; pgv_si=s8198533120; tvfe_boss_uuid=7ba2b9a7a32b459c; midas_openid=1965418605; midas_openkey=@v4N7CpoRm; ptisp=ctc; eas_sid=z155k9g0s359C0x4k0I1B1j8b1; lolqqcomrouteLine=a20191126pulsefire_index-tool_main_a20200109season_index-tool_index-page_main; ied_qq=o1919270709; uin_cookie=o1919270709; pgv_info=ssid=s4943067190&pgvReferrer=https://qt.qq.com/zhibo/index.html; mobileUV=1_173a90b93df_b69f0; iip=0; LW_sid=X1G620c6G1s3w5k8S9g5w4S8i4; LW_uid=S1n6z0l6i1P3x5Z8S9z5G4T8m8; pvpqqcomrouteLine=index_herolist_herodetail_herodetail; qqmusic_uin=; qqmusic_fromtag=; qz_gdt=xpo4cx64aaaiedlh3y5a; _tucao_userinfo=SSt4cjhQVVJFdkNQbEpTVEtqUXQ5eG9wNSs5K0xUd01oWUhwYUtteUtTYjFQdnM2L1c5R09LZzVKRDlrdmVWSHI5OENTUUNtMmlqKzZrR3luUENybWRoaUEyTFJKd1hvNlZwV3lWdC8zWkpsWnZKeFVjSThiejNWYVR2VXRsQkdMTkR4cXlPOXRVM2o4SHlOSlppNzNTTHJwSjBHa1FzcUI5a3IrZzNSVzF5Ym5ocEd0QXYrQnVCenJxdlFxN3FZ--tTW1vWHvvry5y7E1pKdXVA%3D%3D; o_cookie=1919270709; pac_uid=1_1919270709; ke_login_type=1; ts_refer=www.baidu.com/link; p_uin=o1919270709; localInterest=[2054]; tdw_auin_data=-; iswebp=1; ts_uid=4986155046; miniapp_qrcode_id=df3e8845d68f42c38a0f8540a83b04ab; pt4_token=dm*zCHaydGJaS4*PQCjlhLYWberNsLYQvK87guGmPuo_; p_skey=7fIl*jYYMZzCM0*3FhZcknCxz9Y6NybY4YGV-YCJwKs_; p_luin=o1919270709; p_lskey=00040000fb3d356130aba0ae71b4d7ff548a3051550d427abaafe3cfb86cf0dcbf2ee75c7ce73204d53e39cd; ptui_loginuin=513829540; rv2=80A7BB71A8D6861F5D9D9E63D0AD98D5D4B49E23B01F9D2C0D; property20=6EC5368B8C28F1C81FF3B64321764C9CD81AE8DAA2D8DF13F4C37A40E7ADC76188ED1B2D71700E50; sessionPath=16152909890504494172809; _pathcode=0.17238645944388464; tdw_first_visited=1; Hm_lvt_0c196c536f609d373a16d246a117fd44=1614762183,1615011268,1615986420,1615986575; tdw_data_sessionid=161598658528956023743757; tdw_data={"ver4":"www.baidu.com","ver5":"","ver6":"","refer":"www.baidu.com","from_channel":"","path":"addddh-0.17238645944388464","auin":"-","uin":1919270709,"real_uin":"1919270709"}; ts_last=ke.qq.com/course/383855; tdw_data_testid=; tdw_data_flowid=; tdw_data_new_2={"auin":"-","sourcetype":"tuin","sourcefrom":"7265bf35","uin":1919270709,"visitor_id":"36808235051603","sessionPath":"16152909890504494172809","ver9":1919270709}; Hm_lpvt_0c196c536f609d373a16d246a117fd44=1615986784',
            'Referer': 'https://ke.qq.com/course/383855?tuin=7265bf35'
        }

        # 2.发送网络请求,获取响应对象
        response_ = requests.get(url_, headers=headers_)
        data_ = response_.json()

        # 3.数据的提取
        name_list = jsonpath.jsonpath(data_, '$..nick_name')
        comment_list = jsonpath.jsonpath(data_, '$..first_comment')

        # 4.保存到mysql数据库
        for i in range(len(name_list)):  # 一个for循环跑完, 插入十条数据
            # sql语句,mysql的命令语句
            obj_.execute('insert into txkt001(id,nick_name,comment) values(0,("%s"),("%s"))' % (name_list[i],comment_list[i]))

    # 提交操作..确认保存操作  数据库对象
    mysql_obj.commit()

    # 关闭游标对象
    obj_.close()

    # 关闭数据库对象
    mysql_obj.close()

更多推荐