Created
April 6, 2021 03:53
-
-
Save InJeCTrL/3804a846c1680a4d0d0c509fb8eb2835 to your computer and use it in GitHub Desktop.
APK提取URL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from concurrent.futures import ProcessPoolExecutor | |
| import requests | |
| import pandas as pd | |
| from androguard.misc import AnalyzeAPK | |
| import os | |
| import re | |
| import base64 | |
| maxCurrent = 12 | |
| def getURLList(filename = "10000apk.csv"): | |
| prefix = "http://dfs.asec.buptnsrc.com/" | |
| list_URL = [] | |
| with open(filename, "r") as f: | |
| for line in f: | |
| line = prefix + str(base64.b64decode(line.replace("\n", "")))[2:-1] | |
| list_URL.append(line) | |
| return list_URL | |
| def download(idx, url): | |
| prefix = "./download/" | |
| path = "%s%d" % (prefix, idx) | |
| try: | |
| with requests.get(url) as response: | |
| with open(path, "wb") as f: | |
| f.write(response.content) | |
| except Exception as e: | |
| # print(e) | |
| return None | |
| return path | |
| def analysis(filepath): | |
| try: | |
| a, d, dx = AnalyzeAPK(filepath) | |
| packageName = a.get_package() | |
| versionName = a.get_androidversion_name() | |
| apkName = a.get_app_name() | |
| ipList = [] | |
| urlList = [] | |
| for ditem in d: | |
| for strItem in ditem.get_strings(): | |
| ip = re.search(r'(([01]{0,1}\d{0,1}\d|2[0-4]\d|25[0-5])\.){3}' | |
| r'([01]{0,1}\d{0,1}\d|2[0-4]\d|25[0-5])', strItem) | |
| host = re.search(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', strItem) | |
| if ip: | |
| ipList.append(ip.group()) | |
| elif host: | |
| urlList.append(host.group()) | |
| else: | |
| continue | |
| except: | |
| return [] | |
| return [apkName, packageName, versionName, ipList, urlList] | |
| def work(idx, url_sample): | |
| if not os.path.exists("./results/%d.csv" % (idx)): | |
| if not os.path.exists("./download/%d" % (idx)): | |
| path = download(idx, url_sample) | |
| if path: | |
| result_analysis = analysis(path) | |
| if len(result_analysis) == 0: | |
| print("File_%d Skip(Error)!\n" % (idx), end = '') | |
| else: | |
| result = pd.DataFrame(columns = ["apkName", "packageName", "versionName", "ipList", "urlList"]) | |
| result = result.append({ | |
| "apkName" : result_analysis[0], | |
| "packageName" : result_analysis[1], | |
| "versionName" : result_analysis[2], | |
| "ipList" : result_analysis[3], | |
| "urlList" : result_analysis[4], | |
| }, ignore_index = True) | |
| os.remove(path) | |
| print("File_%d OK!\n" % (idx), end = '') | |
| result.to_csv("./results/%d.csv" % (idx), encoding = "utf-8", index = False) | |
| else: | |
| print("FileDownload Failed!\nindex: %d\nURL: %s\n" % (idx, url_sample), end = '') | |
| else: | |
| print("File_%d Skip!(Processed)\n" % (idx), end = '') | |
| if __name__ == "__main__": | |
| processPool = ProcessPoolExecutor(max_workers = maxCurrent) | |
| for idx, url in enumerate(getURLList()): | |
| processPool.submit(work, idx, url) | |
| processPool.shutdown(wait = True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment