excel里面有重复数据,需要对重复数据去重

from openpyxl import load_workbook
import requests
import numpy as np

class DoExcel():

    def __init__(self,filepath):
        self.wb = load_workbook(filepath)
        self.sh =self.wb.active

    #读取excel内容
    def do_excel(self):
        all_data = []
        for row in range(2,self.sh.max_row+1):
            all_data.append(self.sh.cell(row=row,column=2).value)
        return all_data

    #通过内容获取行政区域和经纬度
    def gaud_map(self,address):
        url ="https://restapi.amap.com/v3/geocode/geo"
        data={
            "key": "759c9d4904jeadceb66ce9aafdfb8wbf161",
            "address":address
        }
        r = requests.get(url,params=data,timeout =10).json()
        return r["geocodes"][0]["district"],r["geocodes"][0]["location"]

    # 将数据写进excel表格里面-注意,需要将excel关闭下
    def w_excel(self,row,administrative_division,Latitude_longitude):
        # 写入行政区域
        self.sh.cell(row=row,column=3,value=administrative_division)
        # 写入经纬度
        self.sh.cell(row=row,column=4,value=Latitude_longitude)

    # 保存excel表格
    def save_excel(self,filepath):
        self.wb.save(filepath)

file_route =r'D:\服务地点4.xlsx'
excel_data=DoExcel(file_route)
a = excel_data.do_excel()
#excel里面原始数据
# print(a)
# 对原始数据去重
b =np.array(a)
# print(b)
# # print(len(list(set(a))))
for k in list(set(a)):
    try:
        c =excel_data.gaud_map(k)
    except Exception as e:
        excel_data.save_excel(file_route)
        continue
    print("地点和行政区域和经纬度",k,c)

    # 获取重复元素的下标位置
    eq_letter = np.where(b == k)
    for j in eq_letter[0] + 2:
        if c[0] == []:
            excel_data.w_excel(j, "None", c[1])
        else:
            excel_data.w_excel(j, c[0], c[1])
        print(j)
    excel_data.save_excel(file_route)






from openpyxl import load_workbook
import requests

class DoExcel():
    
    def __init__(self,filepath):
        self.wb = load_workbook(filepath)
        self.sh =self.wb.active
        
    #读取excel内容
    def do_excel(self):
        all_data = []
        for row in range(12,self.sh.max_row+1):
            all_data.append(self.sh.cell(row=row,column=2).value)
        return all_data

    #通过内容获取行政区域和经纬度
    def gaud_map(self,address):
        url ="https://restapi.amap.com/v3/geocode/geo"
        data={
            "key": "",
            "address":address
        }
        r = requests.get(url,params=data).json()
        return r["geocodes"][0]["district"],r["geocodes"][0]["location"]

    # 将数据写进excel表格里面-注意,需要将excel关闭下
    def w_excel(self,row,administrative_division,Latitude_longitude):
        # 写入行政区域
        self.sh.cell(row=row,column=3,value=administrative_division)
        # 写入经纬度
        self.sh.cell(row=row,column=4,value=Latitude_longitude)

    # 保存excel表格
    def save_excel(self,filepath):
        self.wb.save(filepath)

file_route =r'D:\服务地点1.xlsx'
excel_data=DoExcel(file_route)
a = excel_data.do_excel()

d =12
for k in list(set(a)):
    try:
        c =excel_data.gaud_map(k)
    except Exception as e:
        excel_data.save_excel(file_route)
        continue
    print("地点和行政区域和经纬度",k,c)
    # 获取重复元素的下标位置
    if c[0] ==[]:
        excel_data.w_excel(d, "None", c[1])
    else:
        excel_data.w_excel(d, c[0], c[1])
    d+=1
excel_data.save_excel(file_route)
Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐