snacsnoc · October 9, 2024 08:46
diff --git a/download_dem_bc_files.py b/download_dem_bc_files.py
 import os
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin

 # Base URL for the dataset
 # See https://catalogue.data.gov.bc.ca/dataset/digital-elevation-model-for-british-columbia-cded-1-250-000
 BASE_URL = "https://pub.data.gov.bc.ca/datasets/175624/"

 # Local folder to save the downloaded files
 DOWNLOAD_FOLDER = "bc_dem_files"


 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

 def download_file(url, folder):
    local_filename = os.path.join(folder, url.split('/')[-1])
    print(f"Downloading {local_filename}...")
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

 def parse_and_download_sublocks(sub_url, base_url):
    response = requests.get(sub_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Iterate through all links in the subfolder
    for link in soup.find_all('a'):
        href = link.get('href')
        # We're only interested in .zip files
        if href.endswith('.zip'): 
            full_url = urljoin(base_url, href)
            download_file(full_url, DOWNLOAD_FOLDER)

 def main():
    response = requests.get(BASE_URL)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Iterate through all subfolder links
    for link in soup.find_all('a'):
        subfolder = link.get('href')
        # We're only interested in subfolders
        if subfolder.endswith('/'):  
            subfolder_url = urljoin(BASE_URL, subfolder)
            print(f"Processing subfolder: {subfolder_url}")
            parse_and_download_sublocks(subfolder_url, subfolder_url)

 if __name__ == "__main__":
    main()
	import os
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin

	# Base URL for the dataset
	# See https://catalogue.data.gov.bc.ca/dataset/digital-elevation-model-for-british-columbia-cded-1-250-000
	BASE_URL = "https://pub.data.gov.bc.ca/datasets/175624/"

	# Local folder to save the downloaded files
	DOWNLOAD_FOLDER = "bc_dem_files"


	os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

	def download_file(url, folder):
	local_filename = os.path.join(folder, url.split('/')[-1])
	print(f"Downloading {local_filename}...")
	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	with open(local_filename, 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)
	return local_filename

	def parse_and_download_sublocks(sub_url, base_url):
	response = requests.get(sub_url)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Iterate through all links in the subfolder
	for link in soup.find_all('a'):
	href = link.get('href')
	# We're only interested in .zip files
	if href.endswith('.zip'):
	full_url = urljoin(base_url, href)
	download_file(full_url, DOWNLOAD_FOLDER)

	def main():
	response = requests.get(BASE_URL)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Iterate through all subfolder links
	for link in soup.find_all('a'):
	subfolder = link.get('href')
	# We're only interested in subfolders
	if subfolder.endswith('/'):
	subfolder_url = urljoin(BASE_URL, subfolder)
	print(f"Processing subfolder: {subfolder_url}")
	parse_and_download_sublocks(subfolder_url, subfolder_url)

	if __name__ == "__main__":
	main()
No results found