1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
| import pandas as pd import math
class OperateExcel(): def __init__(self, file_path): self.file_path = file_path self.df = pd.read_excel(file_path, sheet_name="日记帐", header=[0, 1])
def get_head(self): """ 头部处理,格式: :return: [year, headers]
year为年,如2021年 headers=[],把所有头部值存到[]中,日期 凭证编号 车牌号 拿货吨位 卖出吨位 摘要 收入 支出 余额 """ headers = [] head = self.df.head(0) year = "" for items in head: for i in items: if i.find("年") != -1: year = i break if items[1].replace(" ", "").find("Unnamed") != -1: headers.append(items[0].replace(" ", "")) if items[0].replace(" ", "").find("金额") != -1: headers.append(items[1].replace(" ", ""))
headers.insert(0, "日期") return year, headers
def get_data(self): """ 处理数据 :return: [file_name, data] file_name表示将要保持的名字 data 表示处理好的数据,给pd.DataFrame使用,格式 {"金额":[1,2,3],"收入":[]} """
g_head = self.get_head() headers = g_head[1] year = g_head[0] month = "" data = {} for i in headers: data[i] = [] for items in self.df.values: if items[0] == '合计' or math.isnan(items[0]): break month = str(items[0]) + "月" m_date = year + month + str(int(items[1])) + "日" data["日期"].append(m_date) num = 0 if str(items[2]) != "nan": num = items[2] data["凭证编号"].append(num)
car_num = 0 if str(items[3]) != "nan": car_num = items[3] data["车牌号"].append(car_num)
take_tonnage = 0 if str(items[4]) != "nan": take_tonnage = items[4] data["拿货吨位"].append(take_tonnage)
sell_tonnage = 0 if str(items[5]) != "nan": sell_tonnage = items[5] data["卖出吨位"].append(sell_tonnage)
summary = "" if str(items[6]) != "nan": summary = items[6] data["摘要"].append(summary)
revenue = 0 if str(items[7]) != "nan": revenue = items[7] data["收入"].append(revenue)
spend = 0 if str(items[8]) != "nan": spend = items[8] data["支出"].append(spend)
balance = 0 if str(items[9]) != "nan": balance = items[9] data["余额"].append(balance)
file_name = year + month + ".xlsx" return file_name, data
def sum_data(self, data): """ 新增统计处理的数据,最终给pd.DataFrame使用 :param data: list|表示处理好的数据,给pd.DataFrame使用,格式 {"金额":[1,2,3],"收入":[]} :return: list """ for i in data: data[i].append(0)
data["日期"].append("合计") data["拿货吨位"].append(sum(data["拿货吨位"])) data["卖出吨位"].append(sum(data["卖出吨位"])) data["收入"].append(sum(data["收入"])) data["支出"].append(sum(data["支出"])) data["余额"].append(sum(data["余额"])) for i in data: if i not in ["日期", "拿货吨位", "卖出吨位", "收入", "支出", "余额"]: data[i].append("")
for j in data: for k in range(len(data[j])): if data[j][k] == 0: data[j][k] = ""
return data
def generate_excel(self): """ 重新生成excel :return: """ g_data = self.get_data() file_name = g_data[0] l_data = g_data[1] s_data = self.sum_data(l_data) info_marks = pd.DataFrame(s_data) writer = pd.ExcelWriter(file_name) info_marks.to_excel(writer, index=False) writer.save() print('生成excel成功,文件名为:%s' % file_name)
if __name__ == "__main__": file_path = 'XXXX.xls' o_excel = OperateExcel(file_path) o_excel.generate_excel()
|