Skip to content

Instantly share code, notes, and snippets.

@MiaAltieri
Created July 8, 2019 20:17
Show Gist options
  • Select an option

  • Save MiaAltieri/4b52d3f1715b91ed3dfd48f0af317dc7 to your computer and use it in GitHub Desktop.

Select an option

Save MiaAltieri/4b52d3f1715b91ed3dfd48f0af317dc7 to your computer and use it in GitHub Desktop.
PyUoIMiceSamples runs PyUoI on samples, run wtih --help to see input arguments, requires that sparse_control.preprocessed.h5 be in the same directory
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
from scipy.sparse import csc_matrix
import pickle
from pyuoi import UoI_Lasso
from pyuoi import UoI_L1Logistic
from pyuoi.utils import check_logger
import getopt
import os
import sys
from mpi4py import MPI
import logging
import h5py
# handle input arguments/options
opts, args = getopt.getopt(sys.argv[1:], 'test', ['predict=','help'])
predict = None
test = ''
for o, a in opts:
if o in ("--help"):
print('required argument: --predict=<weight | speed | memory>')
print('optional arguments: test, --help')
print('example: python PyUoIMiceSamples --predict=weight test')
sys.exit()
elif o in ("--predict"):
predict = a
else:
print('unhandled option ', o)
print('try --help')
sys.exit()
for a in args:
if a in ("test"):
test='test'
else:
print('unhandled argument ', a)
print('try --help')
sys.exit()
if predict == None:
print('mising required argument')
print('required argument: --predict=<weight | speed | memory>')
print('try --help')
sys.exit()
print('Now running PyUoI on ',predict)
mode = 'test' if 'test' in args else ''
# set up mpi variables
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
# set up logger
logger = check_logger(None, name='uoi_main', comm=comm)
logger.setLevel(logging.INFO)
# open data and parition into X and Y
reread = ( pd.read_hdf('sparse_control.preprocessed.h5', key='rotarod') if predict != 'memory'
else pd.read_hdf('sparse_control.preprocessed.h5', key='memory') )
start_index = 1 if predict == 'weight' else 2
end_index = 2 if predict == 'weight' else 3
X = reread.iloc[1:5, 4:25] if mode == "test" else reread.iloc[:, 4:]
Y = reread.iloc[1:5, start_index:end_index] if mode == "test" else reread.iloc[:, start_index:end_index]
X_test = None
Y_test = None
if mode != "test":
X, X_test, Y, Y_test = train_test_split( \
X, Y, test_size=0.20, random_state=42)
else:
X, X_test, Y, Y_test = train_test_split( \
X, Y, test_size=0.5, random_state=42)
X = csr_matrix(X)
X_test = csr_matrix(X_test)
if rank == 0:
logger.info('occupancy: %f' % (X.nnz/(X.shape[0]*X.shape[1])))
logger.info('regressing onto genetic data')
# binarize Y if memory
if predict == 'memory':
Y = np.where(Y >= 600, 0, 1)
# UoI for sparse matrices
logger.info('running Lasso')
clf = ( UoI_Lasso(comm=comm, standardize=False, fit_intercept=True) if predict != 'memory'
else UoI_L1Logistic(comm=comm, standardize=False, fit_intercept=True))
clf.fit(X, Y, verbose=True)
logger.info(clf.intercept_)
# verify we can pass in a vector and get a prediction
# X_test = reread.iloc[3:4, 4:25] if mode == "test" else reread.iloc[3:4, 4:]
# Y_test = reread.iloc[3:4, start_index:end_index] if mode == "test" else reread.iloc[3:4, start_index:end_index]
# binarize Y_test if memory
if predict == 'memory' and rank == 0:
print('binarizing Y for memory dataset')
Y_test = Y_test.values
Y_test = np.where(Y_test >= 600, 0, 1)
print(clf.intercept_)
print(clf.coef_)
# save coefficents and intercept
model_name = 'model_'+predict+mode+'.h5'
output_path = os.path.join(os.getcwd(), model_name)
if rank == 0:
logger.info("writing model to %s" % output_path)
f = h5py.File(output_path , 'w')
f.create_dataset('coef', data=clf.coef_)
f.create_dataset('intercept', data=clf.intercept_)
dset = f['coef']
print(dset)
f.close()
test_data = {'X': X_test, 'Y': Y_test}
# now pickle X_test and Y_test
with open('test_data_'+predict+'.pickle', 'wb') as handle:
pickle.dump(test_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment