Created
January 27, 2021 07:04
-
-
Save davisv7/73e8970dea442aa3fd55d528ae511f7c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_merchants(): | |
| # go to directory | |
| # grab all categories | |
| # go to all categories | |
| # grab all pub keys on each page | |
| # return a list of pubkeys | |
| if os.path.isfile("merchants.txt"): | |
| print("Merchants list found, delete it to update.") | |
| with open("merchants.txt", "r") as fileobj: | |
| pub_keys = fileobj.read().splitlines() | |
| else: | |
| print("Merchant list updating...") | |
| base_url = "https://1ml.com" | |
| directory_link = join(base_url, "directory") | |
| response = requests.get(directory_link) | |
| directory_soup = bs(response.content, "html.parser") | |
| categories = directory_soup.find_all("li", {"class": "list-group-item"})[1:] | |
| links = [] | |
| for category in categories: | |
| links.extend(category.find_all("a", {"title": True})) | |
| links = [link["href"] for link in links] | |
| print(links) | |
| links = [base_url + link for link in links] # idk why join doesnt work here | |
| print(links) | |
| responses = ManyRequests(n_workers=10, n_connections=10)( | |
| method='GET', url=links) | |
| pub_keys = [] | |
| for response in responses: | |
| soup = bs(response.content, "html.parser") | |
| pub_keys.extend(soup.find_all("strong", {"class": "small selectable"})) | |
| pub_keys = list(set([pub_key.text for pub_key in pub_keys])) | |
| with open("merchants.txt", "w") as fileobj: | |
| fileobj.writelines([pub_key+"\n" for pub_key in pub_keys]) | |
| return pub_keys |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment