Skip to content

Instantly share code, notes, and snippets.

@InJeCTrL
Created April 6, 2021 03:53
Show Gist options
  • Select an option

  • Save InJeCTrL/3804a846c1680a4d0d0c509fb8eb2835 to your computer and use it in GitHub Desktop.

Select an option

Save InJeCTrL/3804a846c1680a4d0d0c509fb8eb2835 to your computer and use it in GitHub Desktop.
APK提取URL
from concurrent.futures import ProcessPoolExecutor
import requests
import pandas as pd
from androguard.misc import AnalyzeAPK
import os
import re
import base64
maxCurrent = 12
def getURLList(filename = "10000apk.csv"):
prefix = "http://dfs.asec.buptnsrc.com/"
list_URL = []
with open(filename, "r") as f:
for line in f:
line = prefix + str(base64.b64decode(line.replace("\n", "")))[2:-1]
list_URL.append(line)
return list_URL
def download(idx, url):
prefix = "./download/"
path = "%s%d" % (prefix, idx)
try:
with requests.get(url) as response:
with open(path, "wb") as f:
f.write(response.content)
except Exception as e:
# print(e)
return None
return path
def analysis(filepath):
try:
a, d, dx = AnalyzeAPK(filepath)
packageName = a.get_package()
versionName = a.get_androidversion_name()
apkName = a.get_app_name()
ipList = []
urlList = []
for ditem in d:
for strItem in ditem.get_strings():
ip = re.search(r'(([01]{0,1}\d{0,1}\d|2[0-4]\d|25[0-5])\.){3}'
r'([01]{0,1}\d{0,1}\d|2[0-4]\d|25[0-5])', strItem)
host = re.search(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', strItem)
if ip:
ipList.append(ip.group())
elif host:
urlList.append(host.group())
else:
continue
except:
return []
return [apkName, packageName, versionName, ipList, urlList]
def work(idx, url_sample):
if not os.path.exists("./results/%d.csv" % (idx)):
if not os.path.exists("./download/%d" % (idx)):
path = download(idx, url_sample)
if path:
result_analysis = analysis(path)
if len(result_analysis) == 0:
print("File_%d Skip(Error)!\n" % (idx), end = '')
else:
result = pd.DataFrame(columns = ["apkName", "packageName", "versionName", "ipList", "urlList"])
result = result.append({
"apkName" : result_analysis[0],
"packageName" : result_analysis[1],
"versionName" : result_analysis[2],
"ipList" : result_analysis[3],
"urlList" : result_analysis[4],
}, ignore_index = True)
os.remove(path)
print("File_%d OK!\n" % (idx), end = '')
result.to_csv("./results/%d.csv" % (idx), encoding = "utf-8", index = False)
else:
print("FileDownload Failed!\nindex: %d\nURL: %s\n" % (idx, url_sample), end = '')
else:
print("File_%d Skip!(Processed)\n" % (idx), end = '')
if __name__ == "__main__":
processPool = ProcessPoolExecutor(max_workers = maxCurrent)
for idx, url in enumerate(getURLList()):
processPool.submit(work, idx, url)
processPool.shutdown(wait = True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment