Python pandas读取Excel 数据写入到数据库

Python pands读取Excel 数据写入到数据库

高个子男孩

3678人浏览 · 2022-01-14 09:18:53

高个子男孩 · 2022-01-14 09:18:53 发布

需求得到天眼查的法人信息数据导入到数据库中，经过多次不断试错最后使用Python导入

先上代码

import pandas as pd
import sqlalchemy as sqla
import os


# 读取Excel数据
def getexcel(filename):
    # 表头和子段映射关系
    column_dic = {'公司名称': 'company_name', '经营状态': 'business_status', '法定代表人': 'legal_representative',
                  '注册资本': 'registered_capital',
                  '实缴资本': 'paid_up_capital', '成立日期': 'date_of_establishment', '核准日期': 'approval_date',
                  '所属省份': 'province',
                  '所属城市': 'city', '所属区县': 'districts_and_counties', '统一社会信用代码': 'unified_social_credit_code',
                  '纳税人识别号': 'taxpayer_identification_number', '注册号': 'registration_number',
                  '组织机构代码': 'organization_code',
                  '参保人数': 'number_of_participants', '公司类型': 'type_of_company', '所属行业': 'industry', '曾用名': 'former_name',
                  '注册地址': 'registered_address', '最新年报地址': 'the_latest_annual_report_address', '网址': 'url',
                  '电话': 'telephone', '邮箱': 'mail', '其他邮箱': 'other_mailboxes', '经营范围': 'business_scope',
                  '可用电话\n号码正常，可联系': 'telephone', '其他电话': 'other_phone', '其他号码\n未成功检测号码，或固话，或非大陆号码': 'other_phone',
                  '不可用电话\n空号、停机、沉默号、风险号，或同企业电话数量过多，建议跳过': 'unavailable_telephone', }

    # 读取文件，跳过前两行
    df = pd.read_excel(filename, skiprows=2)
    # 字段间的映射转换，Excel中的表头和数据库表中字段的映射
    data = {}
    for i in df.columns:
        if i in column_dic:
            data[i] = column_dic[i]
    df.rename(columns=data, inplace=True)
    return df


# 获取数据库连接
def getCon():
    return sqla.create_engine("postgresql+psycopg2://etl_user用户名:etl_user密码@172.16.xx.xxIP/xxxx数据库")


# 递归读取文件夹的所有文件
def readFile(rootdir):
    db = getCon()
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        path = os.path.join(rootdir, list[i])
        if os.path.isdir(path):
            readFile(path)
        elif os.path.isfile(path):
            try:
                # 输出读取的文件路径
                print(path)
                # 获取数据
                df = getexcel(path)
                # 写入数据库
                df.to_sql('md_corporate_information_all', db, index=False, if_exists='append')
            except Exception as e:
                print('异常数据:' + path + ', 错误日志:' + e)
        else:
            print('其他:' + path)


if __name__ == '__main__':
    readFile('E:\天眼查')