1.所需环境

python环境,我使用的是python3.9.2,pycharm工具,以及MySQL5.7。大家可以自行搜索安装下载教程。

使用的第三方库pymsql,requests_html打开win+r输入以下命令安装。

pip install pymysql

pip install requests_html


中国地震台网——历史查询http://www.ceic.ac.cn/history

具体代码如下

from requests_html import HTMLSession
from lxml import etree
import pymysql
import json


class Myspider():
    def __init__(self):
        self.url = 'http://www.ceic.ac.cn/ajax/search?page={}&&start=1950-01-01&&end=2022-02-28&&jingdu1=&&jingdu2=&&weidu1=&&weidu2=&&height1=&&height2=&&zhenji1=&&zhenji2=&'
        self.session = HTMLSession()
        self.headers = {
            'User - Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
            'Cookie': 'PHPSESSID = c5c60a38a7bf98c06236f6bc55a0146c',
            'Host': 'www.ceic.ac.cn',
            'Referer': 'http: // www.ceic.ac.cn / history',
            'X-Requested-With': 'XMLHttpRequest'
        }
        host = "localhost"
        port = 3306
        db = '你自己的数据库名'
        user = '数据库账号'
        password = "数据库密码"

        self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
        self.cursor = self.conn.cursor()

    def parse(self):
        for page in range(1, 21):
            response = self.session.get(self.url.format(page), headers=self.headers)
            response.encoding = 'utf-8'
            data = eval(response.text)
            for a in data['shuju']:
                magitude = a['M']
                time = a['SAVE_TIME']
                latitude = a['EPI_LAT']
                longitude = a['EPI_LON']
                depth = a['EPI_DEPTH']
                location = a['LOCATION_C']
                data1 = (magitude, time, latitude, longitude, depth, location)
                self.saveMySql(data1)

    def saveMySql(self, data1):
        self.cursor.execute(
            "INSERT INTO test  VALUES ('%s', '%s', '%s', '%s', '%s', '%s')" % (
                data1[0], data1[1], data1[2], data1[3], data1[4], data1[5]))
        self.conn.commit()

    def run(self):
        self.parse()
        self.cursor.close()
        self.conn.close()


if __name__ == '__main__':
    spider = Myspider()
    spider.run()

点击阅读全文
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐