import pandas as pddf = pd.DataFrame({'x': [1, 1, 1, 2, 2, 3],
'y': [1, 2, 3, 1, 2, 1],
'n': [3, 2, 1, 1, 2, 1]})
df| x | y | n | |
|---|---|---|---|
| 0 | 1 | 1 | 3 |
| 1 | 1 | 2 | 2 |
| 2 | 1 | 3 | 1 |
| 3 | 2 | 1 | 1 |
| 4 | 2 | 2 | 2 |
| 5 | 3 | 1 | 1 |
# wrap the function into pipe
# a handy way to chain functions
# pipe can return arbitrary objects
# and can be handy in some awkward pandas operations
df.groupby('x').y.pipe(lambda df: pd.DataFrame({'frequency' : df.sum(),
'second' : df.nth(1)}
)
)| frequency | second | |
|---|---|---|
| x | ||
| 1 | 6 | 2.0 |
| 2 | 3 | 2.0 |
| 3 | 1 | NaN |