l-althueser · April 26, 2018 15:56
diff --git a/MC_uproot.py b/MC_uproot.py
 import uproot

 # Just load one file and look at it
 f = uproot.open('optPhot_00001_g4mc_G4.root')['events/events']
 eventid = f['eventid'].array()
 f.keys()

 # Iterate over several branches of a single file
 f = uproot.open('optPhot_00001_g4mc_G4.root')['events/events']
 for x, y, z in f.iterate(["xp_pri", "yp_pri", "zp_pri"], outputtype=tuple):
    print(x, y, z)
    break

 # Determine number of events in all files like TChain (in a very fast and efficient way!)
 MC_files = ['optPhot_00001_g4mc_G4.root']
 nevents = [_chunk[1] for _chunk in uproot.iterate(MC_files, "events/events", ["?"], reportentries=True, entrysteps=100000, outputtype=collections.namedtuple)][-1]
    
 # Iterate TChain like over branches of several ROOT files with 10000 events per step
 MC_files = ['optPhot_*_g4mc_G4.root']

 for xp_pri, yp_pri, zp_pri in uproot.iterate(MC_files, "events/events", 
                            ["xp_pri", "yp_pri", "zp_pri"], entrysteps=10000, outputtype=tuple):
    print(xp_pri, yp_pri, zp_pri)
    break

 # Iterate TChain like with all possible branches in a single collection
 import collections
 MC_files = ['optPhot_*_g4mc_G4.root']

 for _chunk in uproot.iterate(MC_files, "events/events", ["*"], entrysteps=10000, outputtype=collections.namedtuple):
    print(_chunk.xp_pri)
    break
diff --git a/Read_as_pandas.py b/Read_as_pandas.py
 # Thanks to Christian Wittweg
 import pandas as pd

 branches= ['Ed','X','Y','Z','type_pri']

 import concurrent.futures
 executor = concurrent.futures.ThreadPoolExecutor(8)

 for _chunk in uproot.iterate(_files, "events/events",branches, entrysteps=100000, 
                                       outputtype=pd.DataFrame, executor=executor):
    _chunk.columns=branches
    _chunk =_chunk[ _chunk['Ed'].apply(lambda x: len(x)==1) ] # Remove events with more than one scatter
    _chunk =_chunk.applymap(lambda x: x[0]) # Convert lists in cells by taking only the 0th element
    _chunk.type_pri =_chunk.type_pri.apply(lambda x: x.decode("utf-8")) # Decode bytestrings for primary type

 print(_chunk.head(20))
	import uproot

	# Just load one file and look at it
	f = uproot.open('optPhot_00001_g4mc_G4.root')['events/events']
	eventid = f['eventid'].array()
	f.keys()

	# Iterate over several branches of a single file
	f = uproot.open('optPhot_00001_g4mc_G4.root')['events/events']
	for x, y, z in f.iterate(["xp_pri", "yp_pri", "zp_pri"], outputtype=tuple):
	print(x, y, z)
	break

	# Determine number of events in all files like TChain (in a very fast and efficient way!)
	MC_files = ['optPhot_00001_g4mc_G4.root']
	nevents = [_chunk[1] for _chunk in uproot.iterate(MC_files, "events/events", ["?"], reportentries=True, entrysteps=100000, outputtype=collections.namedtuple)][-1]

	# Iterate TChain like over branches of several ROOT files with 10000 events per step
	MC_files = ['optPhot_*_g4mc_G4.root']

	for xp_pri, yp_pri, zp_pri in uproot.iterate(MC_files, "events/events",
	["xp_pri", "yp_pri", "zp_pri"], entrysteps=10000, outputtype=tuple):
	print(xp_pri, yp_pri, zp_pri)
	break

	# Iterate TChain like with all possible branches in a single collection
	import collections
	MC_files = ['optPhot_*_g4mc_G4.root']

	for _chunk in uproot.iterate(MC_files, "events/events", ["*"], entrysteps=10000, outputtype=collections.namedtuple):
	print(_chunk.xp_pri)
	break
	# Thanks to Christian Wittweg
	import pandas as pd

	branches= ['Ed','X','Y','Z','type_pri']

	import concurrent.futures
	executor = concurrent.futures.ThreadPoolExecutor(8)

	for _chunk in uproot.iterate(_files, "events/events",branches, entrysteps=100000,
	outputtype=pd.DataFrame, executor=executor):
	_chunk.columns=branches
	_chunk =_chunk[ _chunk['Ed'].apply(lambda x: len(x)==1) ] # Remove events with more than one scatter
	_chunk =_chunk.applymap(lambda x: x[0]) # Convert lists in cells by taking only the 0th element
	_chunk.type_pri =_chunk.type_pri.apply(lambda x: x.decode("utf-8")) # Decode bytestrings for primary type

	print(_chunk.head(20))