pandas - DataFrame

1、数据格式化 - round(四舍五入)


def dataFormatRound():
    """
    数据格式化 - 四舍五入
    :return:
    """
    data = [
        [100011.12345, 200012.34567, 300013.56789],
        [100021.12345, 200022.34567, 300023.56789],
        [100031.12345, 200032.34567, 300033.56789],
        [100041.12345, 200042.34567, 300043.56789],
        [100051.12345, 200052.34567, 300053.56789]
    ]
    columns = ['col-1', 'col-2', 'col-3']
    index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5']

    df = pd.DataFrame(data=data, index=index, columns=columns)

    # 整体数据四舍五入取2位
    df_1 = df.copy()
    df_1 = df_1.round(2)
    print(df_1)

    # 'col-1' 列数据四舍五入取1位, 'col-2'列数据四舍五入取3位
    df_2 = df.copy()
    df_2 = df_2.round({"col-1": 1, "col-2": 3})
    print(df_2)

原始结果:

	              col-1         col-2         col-3
	idx-1  100011.12345  200012.34567  300013.56789
	idx-2  100021.12345  200022.34567  300023.56789
	idx-3  100031.12345  200032.34567  300033.56789
	idx-4  100041.12345  200042.34567  300043.56789
	idx-5  100051.12345  200052.34567  300053.56789

df_1 整体数据四舍五入取2位 返回结果:

	           col-1      col-2      col-3
	idx-1  100011.12  200012.35  300013.57
	idx-2  100021.12  200022.35  300023.57
	idx-3  100031.12  200032.35  300033.57
	idx-4  100041.12  200042.35  300043.57
	idx-5  100051.12  200052.35  300053.57

df_2 ‘col-1’ 列数据四舍五入取1位, 'col-2’列数据四舍五入取3位 返回结果:

	          col-1       col-2         col-3
	idx-1  100011.1  200012.346  300013.56789
	idx-2  100021.1  200022.346  300023.56789
	idx-3  100031.1  200032.346  300033.56789
	idx-4  100041.1  200042.346  300043.56789
	idx-5  100051.1  200052.346  300053.56789

2、数据格式化 - apply(百分比、千位分隔符)


def dataFormatApply():
    """
    数据格式化 - apply
    :return:
    """

    # 解决数据输出时列名不对齐的问题
    pd.set_option('display.unicode.ambiguous_as_wide', True)
    pd.set_option('display.unicode.east_asian_width', True)

    data = [
        [100011.12345, 200012.34567, 300013.56789, 400014.789],
        [100021.12345, 200022.34567, 300023.56789, 400024.789],
        [100031.12345, 200032.34567, 300033.56789, 400034.789],
        [100041.12345, 200042.34567, 300043.56789, 400044.789],
        [100051.12345, 200052.34567, 300053.56789, 400054.789]
    ]
    columns = ['col-1', 'col-2', 'col-3', 'col-4']
    index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5']

    df = pd.DataFrame(data=data, index=index, columns=columns)
    print(df)

    res = pd.DataFrame()
    res["百分比- 整数"] = df["col-1"].apply(lambda item: format(item, ".0%"))
    res["百分比- 三位小数"] = df["col-1"].apply(lambda item: format(item, ".3%"))
    res["千位分隔符"] = df["col-1"].apply(lambda item: format(item, ","))
    res["转换int并千位分隔符"] = df["col-1"].apply(lambda item: format(int(item), ","))
    print(res)

原始结果:

	              col-1         col-2         col-3       col-4
	idx-1  100011.12345  200012.34567  300013.56789  400014.789
	idx-2  100021.12345  200022.34567  300023.56789  400024.789
	idx-3  100031.12345  200032.34567  300033.56789  400034.789
	idx-4  100041.12345  200042.34567  300043.56789  400044.789
	idx-5  100051.12345  200052.34567  300053.56789  400054.789

res 返回结果:

	      	百分比- 整数	 百分比- 三位小数     千位分隔符 	  转换int并千位分隔符
	idx-1    10001112%    10001112.345%  100,011.12345             100,011
	idx-2    10002112%    10002112.345%  100,021.12345             100,021
	idx-3    10003112%    10003112.345%  100,031.12345             100,031
	idx-4    10004112%    10004112.345%  100,041.12345             100,041
	idx-5    10005112%    10005112.345%  100,051.12345             100,051

3、 数据格式化 - map(百分比)


def dataFormatMap():
    """
    数据格式化 - map
    :return:
    """
    data = [
        [100011.12345, 200012.34567, 300013.56789, 400014.789],
        [100021.12345, 200022.34567, 300023.56789, 400024.789],
        [100031.12345, 200032.34567, 300033.56789, 400034.789],
        [100041.12345, 200042.34567, 300043.56789, 400044.789],
        [100051.12345, 200052.34567, 300053.56789, 400054.789]
    ]
    columns = ['col-1', 'col-2', 'col-3', 'col-4']
    index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5']

    df = pd.DataFrame(data=data, index=index, columns=columns)

    df["百分比- 二位小数"] = df["col-1"].map(lambda item: '{:.2%}'.format(item))

    print(df)

原始结果:

	              col-1         col-2         col-3       col-4
	idx-1  100011.12345  200012.34567  300013.56789  400014.789
	idx-2  100021.12345  200022.34567  300023.56789  400024.789
	idx-3  100031.12345  200032.34567  300033.56789  400034.789
	idx-4  100041.12345  200042.34567  300043.56789  400044.789
	idx-5  100051.12345  200052.34567  300053.56789  400054.789

df 百分比 返回结果:

	              col-1         col-2         col-3       col-4   百分比- 二位小数
	idx-1  100011.12345  200012.34567  300013.56789  400014.789     10001112.35%
	idx-2  100021.12345  200022.34567  300023.56789  400024.789     10002112.35%
	idx-3  100031.12345  200032.34567  300033.56789  400034.789     10003112.35%
	idx-4  100041.12345  200042.34567  300043.56789  400044.789     10004112.35%
	idx-5  100051.12345  200052.34567  300053.56789  400054.789     10005112.35%

4、 数据格式化 - split

def splitFun():
    """
    分割
    :return:
    """
    data = [
        ["a,b"],
        ["a,b"],
        ["a,b"],
        ["a,b"],
        ["a,b"]
    ]
    columns = ['col-1']
    index = ['idx-1', 'idx-2', 'idx-3', 'idx-4', 'idx-5']

    df = pd.DataFrame(data=data, index=index, columns=columns)

    split_info = df['col-1'].str.split(",", expand=True)
    df["part-1"] = split_info[0]
    df["part-2"] = split_info[1]
    print(df)

原始结果:

		  col-1   
	idx-1   a,b  
	idx-2   a,b 
	idx-3   a,b   
	idx-4   a,b 
	idx-5   a,b  

df 百分比 返回结果:

		  col-1   part-1   part-2
	idx-1   a,b      a      b
	idx-2   a,b      a      b
	idx-3   a,b      a      b
	idx-4   a,b      a      b
	idx-5   a,b      a      b
Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐