-
Notifications
You must be signed in to change notification settings - Fork 0
/
Read_excel_files_and_combine.py
207 lines (172 loc) · 6.88 KB
/
Read_excel_files_and_combine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#https://stackoverflow.com/questions/15793349/how-to-concatenate-three-excels-files-xlsx-using-python
#use openpyxl
import pandas as pd
# filenames
excel_names = ["xlsx1.xlsx", "xlsx2.xlsx", "xlsx3.xlsx"]
# read them in
excels = [pd.ExcelFile(name) for name in excel_names]
# turn them into dataframes
frames = [x.parse(x.sheet_names[0], header=None,index_col=None) for x in excels]
# delete the first row for all frames except the first
# i.e. remove the header row -- assumes it's the first
frames[1:] = [df[1:] for df in frames[1:]]
# concatenate them..
combined = pd.concat(frames)
# write it out
combined.to_excel("c.xlsx", header=False, index=False)
#use xlrd and xlwt
import xlwt
import xlrd
wkbk = xlwt.Workbook()
outsheet = wkbk.add_sheet('Sheet1')
xlsfiles = [r'C:\foo.xlsx', r'C:\bar.xlsx', r'C:\baz.xlsx']
outrow_idx = 0
for f in xlsfiles:
# This is all untested; essentially just pseudocode for concept!
insheet = xlrd.open_workbook(f).sheets()[0]
for row_idx in xrange(insheet.nrows):
for col_idx in xrange(insheet.ncols):
outsheet.write(outrow_idx, col_idx,
insheet.cell_value(row_idx, col_idx))
outrow_idx += 1
wkbk.save(r'C:\combined.xls')
'''
If your files all have a header line, you probably don't want to repeat that,
so you could modify the code above to look more like this:
'''
firstfile = True # Is this the first sheet?
for f in xlsfiles:
insheet = xlrd.open_workbook(f).sheets()[0]
for row_idx in xrange(0 if firstfile else 1, insheet.nrows):
pass # processing; etc
firstfile = False # We're done with the first sheet.
#https://zhuanlan.zhihu.com/p/31541902
import pandas as pd
import os
inputdir=r'C:\Users\数据\Desktop\新建文件夹'
df_empty=pd.DataFrame(columns=['名称','列1','列2'])
for parents, dirnames, filenames in os.walk(inputdir):
for filename in filenames:
df=pd.read_excel(os.path.join(parent,filename))
df_empty=df_empty.append(df,ignore_index=True)
#https://blog.csdn.net/d1240673769/article/details/74513206
# -*- coding: utf-8 -*-
#将多个Excel文件合并成一个
import xlrd
import xlsxwriter
#打开一个excel文件
def open_xls(file):
fh=xlrd.open_workbook(file)
return fh
#获取excel中所有的sheet表
def getsheet(fh):
return fh.sheets()
#获取sheet表的行数
def getnrows(fh,sheet):
table=fh.sheets()[sheet]
return table.nrows
#读取文件内容并返回行内容
def getFilect(file,shnum):
fh=open_xls(file)
table=fh.sheets()[shnum]
num=table.nrows
for row in range(num):
rdata=table.row_values(row)
datavalue.append(rdata)
return datavalue
#获取sheet表的个数
def getshnum(fh):
x=0
sh=getsheet(fh)
for sheet in sh:
x+=1
return x
if __name__=='__main__':
#定义要合并的excel文件列表
allxls=['F:/test/excel1.xlsx','F:/test/excel2.xlsx']
#存储所有读取的结果
datavalue=[]
for fl in allxls:
fh=open_xls(fl)
x=getshnum(fh)
for shnum in range(x):
print("正在读取文件:"+str(fl)+"的第"+str(shnum)+"个sheet表的内容...")
rvalue=getFilect(fl,shnum)
#定义最终合并后生成的新文件
endfile='F:/test/excel3.xlsx'
wb1=xlsxwriter.Workbook(endfile)
#创建一个sheet工作对象
ws=wb1.add_worksheet()
for a in range(len(rvalue)):
for b in range(len(rvalue[a])):
c=rvalue[a][b]
ws.write(a,b,c)
wb1.close()
print("文件合并完成")
#https://blog.csdn.net/u012209894/article/details/80097271
#!/usr/bin/env python
# coding=utf-8
import xlrd
import shutil
from xlutils.copy import copy
import datetime
class excel_cp:
def __init__(self, ):
day = datetime.date.today()
self.str_day = str(day).replace('-', '')
# 处理excel合并
def excel_merge(self, old_name, new_name):
if old_name == '降价通知.xls':
lead_source = 'r510'
elif old_name == 'BI5096.xls':
lead_source = 'r953'
elif old_name == '400溢出.xls':
lead_source = 'r880'
elif old_name == '潜客推荐.xls':
lead_source = 'r722'
else:
lead_source = 'r520'
old_dir = 'E:\\load_bi\\' + self.str_day + '\\' + old_name
new_dir = 'E:\\load_bi\\' + self.str_day + '\\' + new_name
# 打开要使用的excel,获取要需要写入的行数
bk = xlrd.open_workbook(old_dir)
sh = bk.sheet_by_name("Page 1")
nrows = sh.nrows
# 打开要插入的excel,获取sheet页面的行数,再获取输入的sheet
oldWb = xlrd.open_workbook(new_dir, formatting_info=True)
in_sheet = oldWb.sheet_by_name("Sheet1")
in_nrows = in_sheet.nrows
newWb = copy(oldWb)
sheet = newWb.get_sheet(0)
for i in range(1, nrows):
row_data = sh.row_values(i)
print(row_data)
for j in (in_nrows-1+i, i+in_nrows-1):
# ---------写出文件到excel--------
print("-----正在往j写入 " + str(j) + " 行")
sheet.write(j, 0, label=sh.cell_value(i, 4)) # 将old_dir的第i行第5列数据写入到new_dir第j行第1列
sheet.write(j, 1, label=sh.cell_value(i, 1)) # 将old_dir的第i行第2列数据写入到new_dir第j行第2列
sheet.write(j, 3, lead_source) # 将指定数据写入到new_dir第2行第4列
sheet.write(j, 14, label=sh.cell_value(i, 2)) # 将old_dir的第i行第3列数据写入到new_dir第j行第15列
sheet.write(j, 15, label=sh.cell_value(i, 3)) # 将old_dir的第i行第4列数据写入到new_dir第j行第16列
sheet.write(j, 28, label=sh.cell_value(i, 7)) # 将old_dir的第i行第8列数据写入到new_dir第j行第29列
sheet.write(j, 29, label=sh.cell_value(i, 8)) # 将old_dir的第i行第9列数据写入到new_dir第j行第30列
sheet.write(j, 30, label=sh.cell_value(i, 9)) # 将old_dir的第i行第10列数据写入到new_dir第j行第31列
newWb.save(new_dir)
# 复制文件到指定路径
def file_copy(self, rm_file):
rm_file_dir = 'E:\\load_bi\\' + self.str_day + '\\' + rm_file
new_file_dir = 'C:\\fakepath\\' + rm_file
shutil.copyfile(rm_file_dir, new_file_dir)
if __name__ == '__main__':
# @version : 3.4
# @Author : robot_lei
# @Software: PyCharm Community Edition
# 将这多个excel合并成一张
old_file_name_s = ('降价通知.xls', 'BI5096.xls', '车库线索.xls', '400溢出.xls', '潜客推荐.xls')
leads_source_str = ('r510', 'r953', 'r520', 'r880', 'r722')
new_file_name = '5096.xls'
for old_file_name in old_file_name_s:
excel_cp().excel_merge(old_file_name, new_file_name)
# 复制文件到指定路径
excel_cp().file_copy(new_file_name)