Requires xlsxwriter to save file - https://xlsxwriter.readthedocs.io/
src_file = "https://github.com/dm-fedorov/pandas_basic/blob/master/%D1%83%D1%80%D0%BE%D0%BA%D0%B8_pandas/04-data-input-output/data/raw/excel_complicated_document.xlsx?raw=True"
report_file = "TPS_report.xlsx"
df_sales = pd.read_excel(src_file,
sheet_name='sales data',
usecols='A:G')
| 0 |
ZN-870-29 |
Realcube |
2019-03-05 |
shirt |
19 |
17 |
323 |
| 1 |
JQ-501-63 |
Zooxo |
2019-07-09 |
book |
30 |
14 |
420 |
| 2 |
FI-165-58 |
Dabtype |
2019-08-12 |
poster |
7 |
23 |
161 |
| 3 |
XP-005-55 |
Skipfire |
2019-11-18 |
pen |
7 |
29 |
203 |
| 4 |
NB-917-18 |
Bluezoom |
2019-04-18 |
poster |
36 |
19 |
684 |
sales_summary = (df_sales
.groupby(['company', 'product'])
.agg({'extended amount': 'sum'})
)
| company |
product |
|
| Abatz |
book |
2063 |
| pen |
140 |
| poster |
1248 |
| Agivu |
book |
385 |
| shirt |
700 |
| ... |
... |
... |
| Zooxo |
book |
420 |
| shirt |
1378 |
| Zoozzy |
pen |
527 |
| poster |
620 |
| shirt |
437 |
731 rows × 1 columns
sales_avg = (df_sales
.agg({'extended amount': ['sum', 'mean'],
'price': ['mean']})
.fillna(0)
)
| sum |
510270.00 |
0.000 |
| mean |
510.27 |
22.816 |
product_summary = pd.pivot_table(data=df_sales,
index=['product'],
values=['extended amount'],
aggfunc=['sum', 'mean', 'count'])
|
extended amount |
extended amount |
extended amount |
| book |
118356 |
505.794872 |
234 |
| pen |
115017 |
508.924779 |
226 |
| poster |
139008 |
516.758364 |
269 |
| shirt |
137889 |
508.815498 |
271 |
sales_summary.to_excel(report_file)
Collecting xlsxwriter
Downloading xlsxwriter-3.2.5-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.5-py3-none-any.whl (172 kB)
Installing collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.5
writer = pd.ExcelWriter(report_file,
engine='xlsxwriter')
sales_summary.to_excel(writer,
sheet_name='Sales_Summary')
sales_avg.to_excel(writer,
sheet_name='Sales_Summary',
startcol=4)
product_summary.to_excel(writer,
sheet_name='Product_Summary')
workbook = writer.book
worksheet = writer.sheets['Sales_Summary']
num_format = workbook.add_format({'num_format': '#,##0.00'})
worksheet.set_column('A:B', 18)
worksheet.set_column('C:C', 18, num_format)
worksheet.set_column('E:G', 16, num_format)
worksheet_2 = writer.sheets['Product_Summary']
worksheet_2.set_column('A:D', 16, num_format)