Last active
December 6, 2025 19:10
-
-
Save ncalm/1b9cee36ae034c2eeeabfb54dbce39a9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| df = xl("A2:G31").astype(str) | |
| # set the column names from the first group | |
| df.columns = df.loc[1,:] | |
| # create initial region and quarter columns | |
| fill = lambda s, d: s.where(s.str.contains(d), np.nan).ffill() | |
| df['Region'] = fill(df['Category'], ':') | |
| df['Quarter'] = np.where(fill(df['Jan-Sales'], '-')=='Jan-Sales','Q1','Q2') | |
| # remove unwanted rows | |
| df = df.loc[ | |
| (df['Category']!=df['Region']) & | |
| (~df['Jan-Sales'].str.contains('-')) | |
| ] | |
| # convert columns to categorical to ensure custom ordering later | |
| ordered_cats = lambda s: pd.Categorical(s,categories=s.unique(), ordered=True) | |
| df['Region'] = ordered_cats(df['Region'].str.replace('Region: ','')) | |
| df['Category'] = ordered_cats(df['Category']) | |
| # to avoid repetition in the following code | |
| grp = ['Region','Quarter','Category'] | |
| val = ['Total_Sales','Total_Returns'] | |
| # unpivot the value columns | |
| df = df.melt(id_vars=grp, var_name='x', value_name='v') | |
| df['v'] = df['v'].astype(int) | |
| # Convert values like 'Jan-Sales' and 'Jan-Returns' to 'Sales' and 'Returns' | |
| df['x'] = np.where(~df['x'].str.contains('Sales'),*val) | |
| # sum the sales and returns by region, quarter, category | |
| df = df.groupby(grp+['x'], as_index=False).sum() | |
| # move the sales and the returns to the columns axis | |
| df = df.pivot(index=grp,columns='x').reset_index() | |
| # reset the column names to kill the multi-index | |
| df.columns = grp+val | |
| # remove the category from the grouping fields - no longer needed | |
| grp.remove('Category') | |
| # where Sales is max Sales and Returns is max Returns, place the | |
| # category text in these new columns | |
| df[['Max_Sold_Item','Max_Returned_Item']] = np.where( | |
| df[val].eq(df.groupby(grp)[val].transform('max')), | |
| df[['Category','Category']], | |
| None) | |
| # finally, sum the sales and returns, and join the max items | |
| joiner = lambda s: ', '.join(x for x in s if x) | |
| df.groupby(grp, as_index=False).agg({ | |
| "Total_Sales": "sum", | |
| "Total_Returns": "sum", | |
| "Max_Sold_Item": joiner, | |
| "Max_Returned_Item": joiner | |
| }) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| df = df.astype(str) | |
| # set the column names from the first group | |
| df.columns = df.loc[1,:] | |
| # create initial region and quarter columns | |
| fill = lambda s, d: s.where(s.str.contains(d), np.nan).ffill() | |
| df['Region'] = fill(df['Category'], ':') | |
| df['Quarter'] = np.where(fill(df['Jan-Sales'], '-')=='Jan-Sales','Q1','Q2') | |
| # remove unwanted rows | |
| df = df.loc[ | |
| (df['Category']!=df['Region']) & | |
| (~df['Jan-Sales'].str.contains('-')) | |
| ] | |
| # convert columns to categorical to ensure custom ordering later | |
| ordered_cats = lambda s: pd.Categorical(s,categories=s.unique(), ordered=True) | |
| df['Region'] = ordered_cats(df['Region'].str.replace('Region: ','')) | |
| df['Category'] = ordered_cats(df['Category']) | |
| # to avoid repetition in the following code | |
| grp = ['Region','Quarter','Category'] | |
| val = ['Total_Sales','Total_Returns'] | |
| # unpivot the value columns | |
| df = df.melt(id_vars=grp, var_name='x', value_name='v') | |
| df['v'] = df['v'].astype(int) | |
| # Convert values like 'Jan-Sales' and 'Jan-Returns' to 'Sales' and 'Returns' | |
| df['x'] = np.where(~df['x'].str.contains('Sales'),*val) | |
| # sum the sales and returns by region, quarter, category | |
| df = df.groupby(grp+['x'], as_index=False).sum() | |
| # move the sales and the returns to the columns axis | |
| df = df.pivot(index=grp,columns='x').reset_index() | |
| # reset the column names to kill the multi-index | |
| df.columns = grp+val | |
| # remove the category from the grouping fields - no longer needed | |
| grp.remove('Category') | |
| # where Sales is max Sales and Returns is max Returns, place the | |
| # category text in these new columns | |
| df[['Max_Sold_Item','Max_Returned_Item']] = np.where( | |
| df[val].eq(df.groupby(grp)[val].transform('max')), | |
| df[['Category','Category']], | |
| None) | |
| # finally, sum the sales and returns, and join the max items | |
| joiner = lambda s: ', '.join(x for x in s if x) | |
| df = df.groupby(grp, as_index=False).agg({ | |
| "Total_Sales": "sum", | |
| "Total_Returns": "sum", | |
| "Max_Sold_Item": joiner, | |
| "Max_Returned_Item": joiner | |
| }) | |
| df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment