Skip to content

Instantly share code, notes, and snippets.

@davisv7
Created January 27, 2021 07:04
Show Gist options
  • Select an option

  • Save davisv7/73e8970dea442aa3fd55d528ae511f7c to your computer and use it in GitHub Desktop.

Select an option

Save davisv7/73e8970dea442aa3fd55d528ae511f7c to your computer and use it in GitHub Desktop.
def get_merchants():
# go to directory
# grab all categories
# go to all categories
# grab all pub keys on each page
# return a list of pubkeys
if os.path.isfile("merchants.txt"):
print("Merchants list found, delete it to update.")
with open("merchants.txt", "r") as fileobj:
pub_keys = fileobj.read().splitlines()
else:
print("Merchant list updating...")
base_url = "https://1ml.com"
directory_link = join(base_url, "directory")
response = requests.get(directory_link)
directory_soup = bs(response.content, "html.parser")
categories = directory_soup.find_all("li", {"class": "list-group-item"})[1:]
links = []
for category in categories:
links.extend(category.find_all("a", {"title": True}))
links = [link["href"] for link in links]
print(links)
links = [base_url + link for link in links] # idk why join doesnt work here
print(links)
responses = ManyRequests(n_workers=10, n_connections=10)(
method='GET', url=links)
pub_keys = []
for response in responses:
soup = bs(response.content, "html.parser")
pub_keys.extend(soup.find_all("strong", {"class": "small selectable"}))
pub_keys = list(set([pub_key.text for pub_key in pub_keys]))
with open("merchants.txt", "w") as fileobj:
fileobj.writelines([pub_key+"\n" for pub_key in pub_keys])
return pub_keys
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment