pandas --合并表格

因为我经常要做数据拉取并合并存储在表格里，所以这里对我遇到的几种类型的表格合并进行讲解。
第一种就是columns是一样的，表格进行追加，如图
pandas --合并表格

import glob,os
from numpy import *


# 下面这些变量需要您根据自己的具体情况选择
biaotou = ['类目', '渠道', '时间', '品牌名', 'TOP品牌', '销售金额(指数)', '销量(指数)', '平均价格(指数)', '购买人数(环比)', '购买人数(指数)', '购买频次',
                     '单次购买量(指数)', '搜索人数(指数)', '转化率'
                     ]
# 在哪里搜索多个表格
filelocation = r"D:\code\cui\策略中心\市场概览\市场格局\data"
# 当前文件夹下搜索的文件名后缀
fileform = ".xlsx"
# 将合并后的表格存放到的位置
filedestination = r"D:\code\cui\策略中心\市场概览\市场格局\data"
# 合并后的表格命名为file
file = "兰蔻"

# 首先查找默认文件夹下有多少文档需要整合


filearray = []
for filename in glob.glob(os.path.join(filelocation,"*xlsx")):
    filearray.append(filename)
# 以上是从pythonscripts文件夹下读取所有excel表格，并将所有的名字存储到列表filearray
print("在默认文件夹下有%d个文档哦" % len(filearray))
ge = len(filearray)
matrix = [None] * ge
# 实现读写数据

# 下面是将所有文件读数据到三维列表cell[][][]中（不包含表头）
import xlrd

for i in range(ge):
    fname = filearray[i]
    bk = xlrd.open_workbook(fname)
    try:
        sh = bk.sheet_by_name("Sheet1")
    except:
        print("在文件%s中没有找到sheet1，读取文件数据失败,要不你换换表格的名字？" % fname)
    nrows = sh.nrows
    matrix[i] = [0] * (nrows - 1)

    ncols = sh.ncols
    for m in range(nrows - 1):
        matrix[i][m] = ["0"] * ncols

    for j in range(1, nrows):
        for k in range(0, ncols):
            matrix[i][j - 1][k] = sh.cell(j, k).value
        # 下面是写数据到新的表格test.xls中哦
import xlwt

filename = xlwt.Workbook()
sheet = filename.add_sheet("hel")
# 下面是把表头写上
for i in range(0, len(biaotou)):
    sheet.write(0, i, biaotou[i])
# 求和前面的文件一共写了多少行
zh = 1
for i in range(ge):
    for j in range(len(matrix[i])):
        for k in range(len(matrix[i][j])):
            sheet.write(zh, k, matrix[i][j][k])
        zh = zh + 1
print("我已经将%d个文件合并成1个文件，并命名为%s.xls.快打开看看正确不？" % (ge, file))
filename.save(r'D:\code\cui\策略中心\市场概览\市场格局\data' + file + ".xls")

第二种：不同表格进行合并，以所有表格的的index为基准。
pandas --合并表格

代码如下

import pandas as pd
import numpy as np
import glob,os

filearray=[]
filelocation=r"D:\code\cui\数据银行\自定义分析\自定义人群\data"
for filename in glob.glob(os.path.join(filelocation,"*xlsx")):
    filearray.append(filename)
print(filearray)

filearray1=pd.read_excel(filearray[0])
filearray2=pd.read_excel(filearray[1])


datan=filearray1.merge(filearray2,on='标签',how='outer')

print(datan)


for data in filearray[2:len(filearray)+1]:
    data=pd.read_excel(data)
    #这里可以选择外链接：尽管有的表格没有这个标签，那么这个标签依然会被留下。
    #内连接：inner,所有的表格都有这个标签，这个标签才会被留下。
    datan=datan.merge(data,on='标签',how='outer')

downloadFile = datan
fileName = 'D:\code\cui\数据银行\自定义分析\自定义人群\data'+'.xlsx'

downloadFile.to_excel(fileName, encoding='utf8')

相关推荐