Skip to content

Instantly share code, notes, and snippets.

@nchibana
Created October 17, 2019 23:14
Show Gist options
  • Select an option

  • Save nchibana/618c31bf413c9664add7d21caf02d562 to your computer and use it in GitHub Desktop.

Select an option

Save nchibana/618c31bf413c9664add7d21caf02d562 to your computer and use it in GitHub Desktop.
simple labMT process function
def process():
windowSizes = [2000]
words = [x.lower() for x in re.findall(r"[\w\@\#\'\&\]\*\-\/\[\=\;]+",raw_text_clean,flags=re.UNICODE)]
lines = raw_text_clean.split("\n")
kwords = []
klines = []
for i in range(len(lines)):
if lines[i][0:3] != "<b>":
tmpwords = [x.lower() for x in re.findall(r"[\w\@\#\'\&\]\*\-\/\[\=\;]+",lines[i],flags=re.UNICODE)]
kwords.extend(tmpwords)
klines.extend([i for j in range(len(tmpwords))])
for window in windowSizes:
breaks = [klines[window/10*i] for i in range(int(floor(float(len(klines))/window*10)))]
breaks[0] = 0
f = open("word-vectors/"+str(window)+"/"+movie+"-breaks.csv","w")
f.write(",".join(map(str,breaks)))
f.close()
chopper(kwords,labMT,labMTvector,"word-vectors/"+str(window)+"/"+movie+".csv",minSize=window//10)
f = open("word-vectors/"+str(window)+"/"+movie+".csv","r")
fullVec = [list(map(int,line.split(","))) for line in f]
f.close()
# some movies are blank
if len(list(fullVec)) > 0:
if len(list(fullVec[0])) > 9:
precomputeTimeseries(fullVec,labMT,labMTvector,"timeseries/"+str(window)+"/"+movie+".csv")
else:
print("this movie is blank:")
print(movie.title)
movie.exclude = True
movie.excludeReason = "movie blank"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment