Skip to content

Instantly share code, notes, and snippets.

@mircealungu
Last active November 6, 2015 19:04
Show Gist options
  • Select an option

  • Save mircealungu/13ebac8ec76b01689d3a to your computer and use it in GitHub Desktop.

Select an option

Save mircealungu/13ebac8ec76b01689d3a to your computer and use it in GitHub Desktop.
import threading
from goose import Goose
import time
urls = [
'http://www.derbund.ch/schweiz/standard/davon-profitieren-die-schlimmsten-gauner/story/25657350',
'http://www.derbund.ch/kultur/kino/der-soll-mein-land-verlassen/story/29603044',
'http://www.derbund.ch/kultur/kino/sie-will-fuehlen-nicht-verharren/story/29651519',
'http://www.derbund.ch/kultur/kino/Ihre-Koepfe-sind-ausgehoehlt/story/24473164',
'http://www.derbund.ch/kultur/kino/Ein-Schuss-der-alles-veraendert/story/16568277',
'http://www.derbund.ch/kultur/kino/Sprayen-flirten-feiern-/story/19332269',
'http://www.derbund.ch/kultur/kino/Hahn-tot-Leiche-lebt/story/23769116',
'http://www.derbund.ch/kultur/kino/Torschlusspanik-im-Cafe/story/28486204',
'http://www.derbund.ch/bern/kanton/schoenes-wetter-stellt-bergbahnen-vor-herausforderungen/story/25060261',
'http://www.derbund.ch/wirtschaft/unternehmen-und-konjunktur/Die-meisten-Bahnen-wuerden-ohne-oeffentliche-Gelder-nicht-ueberleben/story/10387056'
]
def worker(url):
g = Goose()
article = g.extract(url)
print "done..."
start = time.time()
threads = []
for i in range(1,40):
t = threading.Thread(target=worker, args=(urls[i % 10],))
threads.append(t)
# worker(urls[i])
for t in threads:
print "starting " + str(t)
t.start()
for t in threads:
print "will wait for... " + str(t)
t.join(7.0)
end = time.time()
print end - start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment