前言

通过地址查询邮编处理步骤:
1. 首先要对地址进行格式处理,去掉多余部分,某某路后面的部分不要,因为地址库中可能没有会查不到;
2. 对省市县进行处理,去掉省市县(民族)后缀;
3. 若提供的详细地址在库中查不到,则从地址尾部去掉一个字再查询,直到查询到数据为止;
4. 使用Mysql全文检索(5.7以后的版本),提供查询精度;


一、数据准备

首先准备全国邮编数据库,这里我整理了一份 邮编数据库下载地址可提供大家使用。

CREATE TABLE `postcode_detail` (
  `detail_address` varchar(500) NOT NULL COMMENT '地址唯一标识(完整地址)',
  `postcode` varchar(10) NOT NULL COMMENT '邮政编码',
  `province_name` varchar(100) DEFAULT NULL COMMENT '省份名称',
  `city_name` varchar(100) DEFAULT NULL COMMENT '城市名称',
  `district_name` varchar(100) DEFAULT NULL COMMENT '区县名称',
  `street_name` varchar(100) DEFAULT NULL COMMENT '乡镇街道',
  `address` varchar(255) DEFAULT NULL COMMENT '地址',
  `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
  PRIMARY KEY (`detail_address`) USING BTREE,
  KEY `index_postcode` (`postcode`),
  KEY `index_name` (`province_name`,`city_name`,`district_name`,`street_name`),
  FULLTEXT KEY `index_detail_address` (`detail_address`) /*!50100 WITH PARSER `ngram` */ 
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='国内邮编信息详情表';

数据效果图如下:
数据效果图如下


二、代码实现

1. 格式化地址对象


import cn.hutool.core.util.StrUtil;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;

import java.io.Serializable;

/**
 * 格式化地址对象
 *
 * @author chqiu
 */
@Data
@ApiModel(value = "格式化地址对象")
public class FormattedAddressDTO implements Serializable {

    private static final long serialVersionUID = 1L;
    /**
     * 省份名称 `province_name` varchar(100)  COMMENT 省份名称
     */
    @ApiModelProperty(value = "省份名称")
    private String provinceName;
    /**
     * 城市名称 `city_name` varchar(100)  COMMENT 城市名称
     */
    @ApiModelProperty(value = "城市名称")
    private String cityName;
    /**
     * 区县名称 `district_name` varchar(100)  COMMENT 区县名称
     */
    @ApiModelProperty(value = "区县名称")
    private String districtName;
    /**
     * 乡镇街道 `street_name` varchar(100)  COMMENT 乡镇街道
     */
    @ApiModelProperty(value = "乡镇街道")
    private String streetName;
    /**
     * 地址 `address` varchar(255)  COMMENT 地址
     */
    @ApiModelProperty(value = "地址")
    private String address;
    /**
     * 格式化的小地名
     */
    private String smailAddress;
    /**
     * 完整地址
     */
    private String detailAddress;
    /**
     * 生成的查询条件(全文检索)。如:"+云南省 +昆明市 +呈贡区 <石龙"
     */
    private String againstAddress;

    /**
     * 地址解析
     *
     * @param address 地址
     * @return 解析后的地址字符串
     */
    public static FormattedAddressDTO parse(String address) {
        return parse(null, null, null, address, false);
    }

    /**
     * 地址解析
     *
     * @param provinceName 省份名称
     * @param cityName     城市名称
     * @param districtName 区县名称
     * @param address      地址
     * @return 解析后的地址字符串
     */
    public static FormattedAddressDTO parse(String provinceName, String cityName, String districtName, String address) {
        return parse(provinceName, cityName, districtName, address, false);
    }

    /**
     * 地址解析
     *
     * @param provinceName 省份名称
     * @param cityName     城市名称
     * @param districtName 区县名称
     * @param address      地址
     * @param isSubStr     是否较少字符
     * @return 解析后的地址字符串
     */
    public static FormattedAddressDTO parse(String provinceName, String cityName, String districtName, String address, boolean isSubStr) {
        if (StrUtil.isNotEmpty(address)) {
            // 省市区为空,地址中带省市区情况进行解析
            if (StrUtil.isEmpty(provinceName)) {
                provinceName = parseProvince(address);
                if (StrUtil.isNotEmpty(provinceName)) {
                    address = address.replace(provinceName, "");
                }
            }
            if (StrUtil.isEmpty(cityName)) {
                cityName = parseCity(address);
                if (StrUtil.isNotEmpty(cityName)) {
                    address = address.replace(cityName, "");
                }
            }
            if (StrUtil.isEmpty(districtName)) {
                districtName = parseDistrict(address);
                if (StrUtil.isNotEmpty(districtName)) {
                    address = address.replace(districtName, "");
                }
            }
        }

        // 减少字符处理
        if (isSubStr) {
            if (StrUtil.isNotEmpty(address)) {
                address = address.length() < 3 ? "" : address.substring(0, address.length() - 1);
            } else if (StrUtil.isNotEmpty(districtName)) {
                districtName = districtName.length() < 3 ? "" : districtName.substring(0, districtName.length() - 1);
            } else if (StrUtil.isNotEmpty(cityName)) {
                cityName = cityName.length() < 3 ? "" : cityName.substring(0, cityName.length() - 1);
            } else if (StrUtil.isNotEmpty(provinceName)) {
                provinceName = districtName.length() < 3 ? "" : districtName.substring(0, provinceName.length() - 1);
            }
        }
        String smailAddress = parseSmailAddress(provinceName, cityName, districtName, address);
        FormattedAddressDTO dto = new FormattedAddressDTO();
        dto.setProvinceName(getSimplifiedName(provinceName));
        dto.setCityName(getSimplifiedName(cityName));
        dto.setDistrictName(getSimplifiedName(districtName));
        dto.setAddress(address);
        dto.setSmailAddress(smailAddress);
        dto.setDetailAddress(String.format("%s%s%s%s", provinceName, cityName, districtName, dto.getSmailAddress()));
        dto.setAgainstAddress(parseAgainstAddress(dto.getProvinceName(), dto.getCityName(), dto.getDistrictName(), dto.getSmailAddress()));
        return dto;
    }

    /**
     * 省份后缀名
     */
    private final static String[] PROVINCE_POSTFIXS = new String[]{"省", "市", "自治区"};
    /**
     * 地市后缀
     */
    private final static String[] CITY_POSTFIXS = new String[]{"市", "自治州", "地区", "自治县", "县", "盟"};
    /**
     * 区县后缀
     */
    private final static String[] DISTRICT_POSTFIXS = new String[]{"市", "区", "自治县", "县", "自治旗", "旗"};

    /**
     * 从地址中提取省份名称
     *
     * @param address 地址
     * @return 省份名称
     */
    private static String parseProvince(String address) {
        return parsePostfix(address, PROVINCE_POSTFIXS);
    }

    /**
     * 从地址中提取地市名称
     *
     * @param address 地址
     * @return 地市名称
     */
    private static String parseCity(String address) {
        return parsePostfix(address, CITY_POSTFIXS);
    }

    /**
     * 从地址中提取区县名称
     *
     * @param address 地址
     * @return 区县名称
     */
    private static String parseDistrict(String address) {
        return parsePostfix(address, DISTRICT_POSTFIXS);
    }

    /**
     * 根据地址和相应后缀名,读取后缀名之前的部分内容
     *
     * @param address  地址
     * @param postfixs 后缀关键字
     * @return 后缀名之前的部分内容
     */
    private static String parsePostfix(String address, String[] postfixs) {
        for (String postfix : postfixs) {
            int postfixIndex = address.indexOf(postfix);
            if (postfixIndex > 0) {
                return address.substring(0, postfixIndex + postfix.length());
            }
        }
        return null;
    }

    /**
     * 获取简化名
     *
     * @param name 名称
     * @return 简化名
     */
    private static String getSimplifiedName(String name) {
        if (null == name || name.length() < 3) {
            return name;
        }
        for (int i = 0; i < EXCLUDES.length; i++) {
            if (name.length() > 2) {
                if (name.endsWith(EXCLUDES[i])) {
                    // 去掉后缀,前提地名必须大于1
                    if (name.length() - EXCLUDES[i].length() >= 2) {
                        name = name.substring(0, name.length() - EXCLUDES[i].length());
                    } else {
                        return name;
                    }
                    if (name.endsWith("族")) {
                        return getSimplifiedName(name);
                    }
                }
            } else {
                break;
            }
        }
        return name;
    }


    /**
     * 地名单位级别顺序
     */
    private final static String[] EXCLUDES = new String[]{"省", "自治区", "自治州", "地区", "市", "自治县", "区", "县", "自治旗", "旗",
            "维吾尔", "哈萨克", "蒙古", "各族"
            , "傈僳族", "哈尼族", "羌族", "满族", "蒙古族", "回族", "藏族", "维吾尔族", "苗族", "彝族", "壮族", "布依族", "侗族", "瑶族", "白族", "土家族", "哈尼族", "哈萨克族", "傣族", "黎族", "傈僳族", "佤族", "畲族", "高山族", "拉祜族", "水族", "东乡族", "纳西族", "景颇族", "柯尔克孜族", "土族", "达斡尔族", "仫佬族", "羌族", "布朗族", "撒拉族", "毛南族", "仡佬族", "锡伯族", "阿昌族", "普米族", "朝鲜族", "塔吉克族", "怒族", "乌孜别克族", "俄罗斯族", "鄂温克族", "德昂族", "保安族", "裕固族", "京族", "塔塔尔族", "独龙族", "鄂伦春族", "赫哲族", "门巴族", "珞巴族", "基诺族"};

    /**
     * 地名单位级别顺序
     */
    private final static String[] LEVELS = new String[]{"路", "村", "庄", "里", "街道", "街", "农场", "巷", "弄", "屯", "胡同", "小区", "大道", "乡", "镇"};

    /**
     * 格式化地址
     * <br>
     * 字符串截断,按照地名级别由小到大,保留关键词之前的部分:路、村、庄、里、街、农场、巷、弄、胡同、小区、大道、乡、镇
     *
     * @param address 地址
     * @return 格式化以后的地址
     */
    private static String parseSmailAddress(String provinceName, String cityName, String districtName, String address) {
        // 和并处理详细地址
        String smailAddress = String.format("%s%s%s%s", StrUtil.isEmpty(provinceName) ? "" : provinceName
                , StrUtil.isEmpty(cityName) ? "" : cityName, StrUtil.isEmpty(districtName) ? "" : districtName
                , StrUtil.isEmpty(address) ? "" : address);
        if (StrUtil.isNotEmpty(districtName)) {
            smailAddress = smailAddress.replace(districtName, "");
        }
        if (StrUtil.isNotEmpty(cityName)) {
            smailAddress = smailAddress.replace(cityName, "");
        }
        if (StrUtil.isNotEmpty(provinceName)) {
            smailAddress = smailAddress.replace(provinceName, "");
        }
        for (String level : LEVELS) {
            if (smailAddress.contains(level)) {
                // 找到最小级别,开始截取
                smailAddress = smailAddress.substring(0, smailAddress.indexOf(level) + level.length());
                break;
            }
        }
        return smailAddress;
    }

    private static String parseAgainstAddress(String provinceName, String cityName, String districtName, String address) {
        // 拼接查询语句
        StringBuilder builder = new StringBuilder();
        if (StrUtil.isNotEmpty(provinceName)) {
            builder.append("+");
            builder.append(provinceName);
        }
        if (StrUtil.isNotEmpty(cityName)) {
            if (StrUtil.isNotEmpty(provinceName)) {
                builder.append(" ");
            }
            builder.append("+");
            builder.append(cityName);
        }
        if (StrUtil.isNotEmpty(districtName)) {
            if (StrUtil.isNotEmpty(provinceName) || StrUtil.isNotEmpty(cityName)) {
                builder.append(" ");
            }
            builder.append("+");
            builder.append(districtName);
        }
        if (StrUtil.isNotEmpty(address)) {
            if (StrUtil.isNotEmpty(provinceName) || StrUtil.isNotEmpty(cityName) || StrUtil.isNotEmpty(districtName)) {
                builder.append(" ");
            }
            builder.append("<");
            builder.append(address);
        }
        return builder.toString();
    }
}

2. mapper数据库查询代码

代码如下:


@Repository
public interface PostcodeDetailMapper {
    /**
     * 根据地址查询邮编信息
     *
     * @param againstAddress 地址
     * @return 邮编信息
     */
    @Select("SELECT detail_address,postcode, MATCH (detail_address) AGAINST (#{againstAddress}) as score from postcode_detail where MATCH (detail_address) AGAINST (#{againstAddress} IN BOOLEAN MODE) LIMIT 2")
    List<PostcodeDetailSearchDTO> selectListByAddress(@Param("againstAddress") String againstAddress);
}

3. 查询代码实现

代码如下:


import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.core.MybatisConfiguration;
import com.baomidou.mybatisplus.core.MybatisSqlSessionFactoryBuilder;
import com.baomidou.mybatisplus.core.MybatisXMLLanguageDriver;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.chqiuu.spider.common.connect.BaseConnect;
import com.chqiuu.spider.common.connect.HikariConnect;
import com.chqiuu.spider.common.enums.DriverClassEnum;
import com.chqiuu.spider.modules.spider.dto.FormattedAddressDTO;
import com.chqiuu.spider.modules.spider.dto.PostcodeDetailSearchDTO;
import com.chqiuu.spider.modules.spider.entity.ChinaCourtEntity;
import com.chqiuu.spider.modules.spider.mapper.ChinaCourtMapper;
import com.chqiuu.spider.modules.spider.mapper.PostcodeBaseMapper;
import com.chqiuu.spider.modules.spider.mapper.PostcodeDetailMapper;
import lombok.extern.slf4j.Slf4j;
import org.apache.ibatis.logging.stdout.StdOutImpl;
import org.apache.ibatis.mapping.Environment;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.transaction.TransactionFactory;
import org.apache.ibatis.transaction.jdbc.JdbcTransactionFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import javax.sql.DataSource;
import java.util.ArrayList;
import java.util.List;

@Slf4j
public class PostcodeTest {
    private static SqlSessionFactory sqlSessionFactory;

    public static SqlSessionFactory initSqlSessionFactory() {
        BaseConnect connect = new HikariConnect(DriverClassEnum.MYSQL, "127.0.0.1", 3306, "spider", "root", "");
        if (null == connect.getDataSource()) {
            return null;
        }
        DataSource dataSource = connect.getDataSource();
        TransactionFactory transactionFactory = new JdbcTransactionFactory();
        Environment environment = new Environment("PostcodeTests", transactionFactory, dataSource);
        MybatisConfiguration configuration = new MybatisConfiguration(environment);
        configuration.addMapper(PostcodeBaseMapper.class);
        configuration.addMapper(PostcodeDetailMapper.class);
        configuration.addMapper(ChinaCourtMapper.class);
        configuration.setDefaultScriptingLanguage(MybatisXMLLanguageDriver.class);
        configuration.setLogImpl(StdOutImpl.class);
        return new MybatisSqlSessionFactoryBuilder().build(configuration);
    }

    @BeforeEach
    public void prepare() {
        log.info(" before :所有的测试方法之前都先执行这个方法");
        sqlSessionFactory = initSqlSessionFactory();
    }

    @AfterEach
    public void destroy() {
        log.info("执行完毕!");
    }

    @Test
    void getPostcode() {
        String address = "云南省大理白族自治州漾濞彝族自治县苍山东路60号";
        log.info("{} {}", address, getPostcode(address));

        log.info(getPostcode("江苏省", "苏州市", "苏州工业园区", "星湖街288号"));
        log.info(getPostcode("福建省", "龙岩市", "连城县", "西环中路与北大西路交叉口北20米路西"));
        log.info(getPostcode("云南省", "昆明市", "五华区", "环城西路565号"));
        log.info(getPostcode("云南省", "昆明市", "西山区", "昆明市西山区日新中路393号"));
    }

    @Test
    void getFormattedAddress() {
        FormattedAddressDTO formattedAddressDTO = FormattedAddressDTO.parse("云南省昆明市寻甸回族彝族自治县凤梧路");
        log.info(formattedAddressDTO.toString());
        formattedAddressDTO = FormattedAddressDTO.parse("云南省大理白族自治州漾濞彝族自治县苍山东路60号");
        log.info(formattedAddressDTO.toString());
        formattedAddressDTO = FormattedAddressDTO.parse("云南省", "大理白族自治州", "漾濞彝族自治县", "云南省大理白族自治州漾濞彝族自治县苍山东路60号");
        log.info(formattedAddressDTO.toString());
        formattedAddressDTO = FormattedAddressDTO.parse("云南省", "昆明市", "寻甸回族彝族自治县", "云南省昆明市寻甸回族彝族自治县凤梧路");
        log.info(formattedAddressDTO.toString());
    }



    /**
     * 根据地址获取邮政编码
     *
     * @param address 详细地址
     * @return 邮政编码
     */
    private String getPostcode(String address) {
        return getPostcode(null, null, null, address, false);
    }

    /**
     * 根据地址获取邮政编码
     *
     * @param provinceName 省份
     * @param cityName     地市
     * @param districtName 区县
     * @param address      详细地址
     * @return 邮政编码
     */
    private String getPostcode(String provinceName, String cityName, String districtName, String address) {
        return getPostcode(provinceName, cityName, districtName, address, false);
    }

    /**
     * 根据地址获取邮政编码
     *
     * @param provinceName 省份
     * @param cityName     地市
     * @param districtName 区县
     * @param address      详细地址
     * @param isSubStr     是否缩减文字
     * @return 邮政编码
     */
    private String getPostcode(String provinceName, String cityName, String districtName, String address, boolean isSubStr) {
        FormattedAddressDTO formattedAddressDTO = FormattedAddressDTO.parse(provinceName, cityName, districtName, address, isSubStr);
        // 下面到邮编库中查询邮编
        List<PostcodeDetailSearchDTO> postcodeDetails = new ArrayList<>();
        try (SqlSession session = sqlSessionFactory.openSession(true)) {
            PostcodeDetailMapper postcodeDetailMapper = session.getMapper(PostcodeDetailMapper.class);
            postcodeDetails = postcodeDetailMapper.selectListByAddress(formattedAddressDTO.getAgainstAddress());
        }
        if (postcodeDetails.size() > 0) {
            return postcodeDetails.get(0).getPostcode();
        } else {
            if (provinceName.length() + cityName.length() + districtName.length() + address.length() < 2) {
                return null;
            }
            // 缩短地址继续找
            log.info("缩短地址继续找 {}", formattedAddressDTO.getAgainstAddress());
            return getPostcode(formattedAddressDTO.getProvinceName(), formattedAddressDTO.getCityName(), formattedAddressDTO.getDistrictName(), formattedAddressDTO.getSmailAddress(), true);
        }
    }
}

三、运行效果

运行效果如下:
在这里插入图片描述

Logo

更多推荐