Skip to content

Instantly share code, notes, and snippets.

@l-althueser
Last active April 26, 2018 15:56
Show Gist options
  • Select an option

  • Save l-althueser/d2612b750f8a0864274614283c647068 to your computer and use it in GitHub Desktop.

Select an option

Save l-althueser/d2612b750f8a0864274614283c647068 to your computer and use it in GitHub Desktop.
Some examples on how to treat MC files with uproot.
import uproot
# Just load one file and look at it
f = uproot.open('optPhot_00001_g4mc_G4.root')['events/events']
eventid = f['eventid'].array()
f.keys()
# Iterate over several branches of a single file
f = uproot.open('optPhot_00001_g4mc_G4.root')['events/events']
for x, y, z in f.iterate(["xp_pri", "yp_pri", "zp_pri"], outputtype=tuple):
print(x, y, z)
break
# Determine number of events in all files like TChain (in a very fast and efficient way!)
MC_files = ['optPhot_00001_g4mc_G4.root']
nevents = [_chunk[1] for _chunk in uproot.iterate(MC_files, "events/events", ["?"], reportentries=True, entrysteps=100000, outputtype=collections.namedtuple)][-1]
# Iterate TChain like over branches of several ROOT files with 10000 events per step
MC_files = ['optPhot_*_g4mc_G4.root']
for xp_pri, yp_pri, zp_pri in uproot.iterate(MC_files, "events/events",
["xp_pri", "yp_pri", "zp_pri"], entrysteps=10000, outputtype=tuple):
print(xp_pri, yp_pri, zp_pri)
break
# Iterate TChain like with all possible branches in a single collection
import collections
MC_files = ['optPhot_*_g4mc_G4.root']
for _chunk in uproot.iterate(MC_files, "events/events", ["*"], entrysteps=10000, outputtype=collections.namedtuple):
print(_chunk.xp_pri)
break
# Thanks to Christian Wittweg
import pandas as pd
branches= ['Ed','X','Y','Z','type_pri']
import concurrent.futures
executor = concurrent.futures.ThreadPoolExecutor(8)
for _chunk in uproot.iterate(_files, "events/events",branches, entrysteps=100000,
outputtype=pd.DataFrame, executor=executor):
_chunk.columns=branches
_chunk =_chunk[ _chunk['Ed'].apply(lambda x: len(x)==1) ] # Remove events with more than one scatter
_chunk =_chunk.applymap(lambda x: x[0]) # Convert lists in cells by taking only the 0th element
_chunk.type_pri =_chunk.type_pri.apply(lambda x: x.decode("utf-8")) # Decode bytestrings for primary type
print(_chunk.head(20))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment