Skip to content

Instantly share code, notes, and snippets.

@ncalm
Last active December 6, 2025 19:10
Show Gist options
  • Select an option

  • Save ncalm/1b9cee36ae034c2eeeabfb54dbce39a9 to your computer and use it in GitHub Desktop.

Select an option

Save ncalm/1b9cee36ae034c2eeeabfb54dbce39a9 to your computer and use it in GitHub Desktop.
df = xl("A2:G31").astype(str)
# set the column names from the first group
df.columns = df.loc[1,:]
# create initial region and quarter columns
fill = lambda s, d: s.where(s.str.contains(d), np.nan).ffill()
df['Region'] = fill(df['Category'], ':')
df['Quarter'] = np.where(fill(df['Jan-Sales'], '-')=='Jan-Sales','Q1','Q2')
# remove unwanted rows
df = df.loc[
(df['Category']!=df['Region']) &
(~df['Jan-Sales'].str.contains('-'))
]
# convert columns to categorical to ensure custom ordering later
ordered_cats = lambda s: pd.Categorical(s,categories=s.unique(), ordered=True)
df['Region'] = ordered_cats(df['Region'].str.replace('Region: ',''))
df['Category'] = ordered_cats(df['Category'])
# to avoid repetition in the following code
grp = ['Region','Quarter','Category']
val = ['Total_Sales','Total_Returns']
# unpivot the value columns
df = df.melt(id_vars=grp, var_name='x', value_name='v')
df['v'] = df['v'].astype(int)
# Convert values like 'Jan-Sales' and 'Jan-Returns' to 'Sales' and 'Returns'
df['x'] = np.where(~df['x'].str.contains('Sales'),*val)
# sum the sales and returns by region, quarter, category
df = df.groupby(grp+['x'], as_index=False).sum()
# move the sales and the returns to the columns axis
df = df.pivot(index=grp,columns='x').reset_index()
# reset the column names to kill the multi-index
df.columns = grp+val
# remove the category from the grouping fields - no longer needed
grp.remove('Category')
# where Sales is max Sales and Returns is max Returns, place the
# category text in these new columns
df[['Max_Sold_Item','Max_Returned_Item']] = np.where(
df[val].eq(df.groupby(grp)[val].transform('max')),
df[['Category','Category']],
None)
# finally, sum the sales and returns, and join the max items
joiner = lambda s: ', '.join(x for x in s if x)
df.groupby(grp, as_index=False).agg({
"Total_Sales": "sum",
"Total_Returns": "sum",
"Max_Sold_Item": joiner,
"Max_Returned_Item": joiner
})
import pandas as pd
import numpy as np
df = df.astype(str)
# set the column names from the first group
df.columns = df.loc[1,:]
# create initial region and quarter columns
fill = lambda s, d: s.where(s.str.contains(d), np.nan).ffill()
df['Region'] = fill(df['Category'], ':')
df['Quarter'] = np.where(fill(df['Jan-Sales'], '-')=='Jan-Sales','Q1','Q2')
# remove unwanted rows
df = df.loc[
(df['Category']!=df['Region']) &
(~df['Jan-Sales'].str.contains('-'))
]
# convert columns to categorical to ensure custom ordering later
ordered_cats = lambda s: pd.Categorical(s,categories=s.unique(), ordered=True)
df['Region'] = ordered_cats(df['Region'].str.replace('Region: ',''))
df['Category'] = ordered_cats(df['Category'])
# to avoid repetition in the following code
grp = ['Region','Quarter','Category']
val = ['Total_Sales','Total_Returns']
# unpivot the value columns
df = df.melt(id_vars=grp, var_name='x', value_name='v')
df['v'] = df['v'].astype(int)
# Convert values like 'Jan-Sales' and 'Jan-Returns' to 'Sales' and 'Returns'
df['x'] = np.where(~df['x'].str.contains('Sales'),*val)
# sum the sales and returns by region, quarter, category
df = df.groupby(grp+['x'], as_index=False).sum()
# move the sales and the returns to the columns axis
df = df.pivot(index=grp,columns='x').reset_index()
# reset the column names to kill the multi-index
df.columns = grp+val
# remove the category from the grouping fields - no longer needed
grp.remove('Category')
# where Sales is max Sales and Returns is max Returns, place the
# category text in these new columns
df[['Max_Sold_Item','Max_Returned_Item']] = np.where(
df[val].eq(df.groupby(grp)[val].transform('max')),
df[['Category','Category']],
None)
# finally, sum the sales and returns, and join the max items
joiner = lambda s: ', '.join(x for x in s if x)
df = df.groupby(grp, as_index=False).agg({
"Total_Sales": "sum",
"Total_Returns": "sum",
"Max_Sold_Item": joiner,
"Max_Returned_Item": joiner
})
df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment