Skip to content

Instantly share code, notes, and snippets.

@philerooski
Created February 13, 2026 18:54
Show Gist options
  • Select an option

  • Save philerooski/1812f6feed8bf20dd102514e41685640 to your computer and use it in GitHub Desktop.

Select an option

Save philerooski/1812f6feed8bf20dd102514e41685640 to your computer and use it in GitHub Desktop.
Download all form data from a specific form group to local directory
#!/usr/bin/env python3
"""
Download all form data from a specific form group to local directory.
"""
import argparse
import sys
import json
import tempfile
import shutil
import urllib.request
from pathlib import Path
from synapseclient import Synapse, login
from synapseclient.models import FormGroup, FormData
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Download all forms from a Synapse form group."
)
parser.add_argument(
"--form-group-id",
type=str,
required=True,
help="The form group ID",
)
parser.add_argument(
"--output-dir",
type=str,
default="./forms",
help="Directory to save downloaded forms (default: ./forms)",
)
parser.add_argument(
"--as-reviewer",
action="store_true",
default=True,
help="List forms as a reviewer (default: True)",
)
parser.add_argument(
"--no-as-reviewer",
action="store_false",
dest="as_reviewer",
help="List forms as owner instead of reviewer",
)
parser.add_argument(
"--filter-by-state",
type=str,
default=None,
help="Comma-separated list of states to filter by (e.g., 'accepted,rejected')",
)
parser.add_argument(
"--profile",
type=str,
default=None,
help="The Synapse profile name to use for authentication (from ~/.synapseConfig)",
)
parser.add_argument(
"--synapse-auth-token",
type=str,
help="Your Synapse access token (alternative to using profile)",
)
parser.add_argument(
"--api-download",
action="store_true",
default=False,
help="Use direct API calls for downloading instead of FormData.download() method",
)
return parser.parse_args()
def download_form_data_via_api(form: FormData, syn: Synapse, output_dir: Path) -> Path:
"""
Download a form's data file using direct API calls.
This approach mirrors the R implementation in get-form-data.R:
1. Request a presigned URL via /file/v1/fileHandle/batch
2. Download the file from the presigned URL
Args:
form: FormData object to download
syn: Authenticated Synapse client
output_dir: Directory to save the downloaded file
Returns:
Path to the downloaded file
"""
# Build the request body for the batch file handle API
request_body = {
"requestedFiles": [
{
"fileHandleId": form.data_file_handle_id,
"associateObjectId": form.form_data_id,
"associateObjectType": "FormData",
}
],
"includePreSignedURLs": True,
"includeFileHandles": False,
}
# Request the presigned URL
response = syn.restPOST(
uri="https://repo-prod.prod.sagebase.org/file/v1/fileHandle/batch",
body=json.dumps(request_body),
)
# Extract the presigned URL
presigned_url = response["requestedFiles"][0]["preSignedURL"]
# Create output filename and path
output_filename = f"form_{form.form_data_id}.json"
output_path = output_dir / output_filename
# Download the file from the presigned URL
urllib.request.urlretrieve(presigned_url, output_path)
return output_path
def download_form_data(
form: FormData, syn: Synapse, output_dir: Path, use_api: bool = False
) -> Path:
"""
Download a form's data file.
Args:
form: FormData object to download
syn: Authenticated Synapse client
output_dir: Directory to save the downloaded file
use_api: If True, use direct API calls; if False, use FormData.download()
Returns:
Path to the downloaded file
"""
if use_api:
# Use direct API approach
return download_form_data_via_api(form, syn, output_dir)
# Use the native FormData.download() method
# Create a temporary directory for download
temp_dir = tempfile.mkdtemp()
try:
# Use the native download method to get the form data file
downloaded_path = form.download(
synapse_id=form.form_data_id,
download_location=temp_dir,
synapse_client=syn,
)
# Create output filename
output_filename = f"form_{form.form_data_id}.json"
output_path = output_dir / output_filename
# Copy to final destination
shutil.copy2(downloaded_path, output_path)
return output_path
finally:
# Clean up temp directory
shutil.rmtree(temp_dir, ignore_errors=True)
def main():
"""Main entry point."""
args = parse_args()
# Create output directory if it doesn't exist
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Login to Synapse
print("Logging into Synapse...")
if args.synapse_auth_token:
syn = Synapse()
syn.login(authToken=args.synapse_auth_token)
elif args.profile:
print(f"Using profile: {args.profile}")
syn = login(profile=args.profile)
else:
syn = login()
user = syn.getUserProfile()
print(f"Logged in as: {user.get('userName', 'Unknown')}")
print()
# Prepare filter states
if args.filter_by_state:
filter_states = [
s.strip().lower().replace(" ", "_") for s in args.filter_by_state.split(",")
]
print(f"Filtering by states: {', '.join(filter_states)}")
else:
# Use default reviewer or owner states
if args.as_reviewer:
filter_states = list(FormGroup.DEFAULT_REVIEWER_STATES)
print(f"Using default reviewer states: {', '.join(filter_states)}")
else:
filter_states = list(FormGroup.DEFAULT_OWNER_STATES)
print(f"Using default owner states: {', '.join(filter_states)}")
print()
# Create FormGroup instance
form_group = FormGroup(group_id=args.form_group_id)
print(f"Fetching forms from group ID: {form_group.group_id}")
print(f"As reviewer: {args.as_reviewer}")
print(
f"Download method: {'Direct API' if args.api_download else 'FormData.download()'}"
)
print(f"Output directory: {output_dir.absolute()}")
print()
try:
# List all forms using the native method
print("Retrieving forms...")
forms = list(
form_group.list(
filter_by_state=filter_states,
synapse_client=syn,
as_reviewer=args.as_reviewer,
)
)
# Display results
print(f"\n{'='*80}")
print(f"Found {len(forms)} form(s) in group {args.form_group_id}")
print(f"{'='*80}\n")
if not forms:
print("No forms found matching the criteria.")
return 0
# Download each form
successful_downloads = 0
failed_downloads = 0
for idx, form in enumerate(forms, 1):
print(
f"[{idx}/{len(forms)}] Downloading form {form.form_data_id}...", end=" "
)
try:
output_path = download_form_data(
form, syn, output_dir, use_api=args.api_download
)
print(f"✓ Saved to {output_path.name}")
successful_downloads += 1
except Exception as e:
print(f"✗ Failed: {e}")
failed_downloads += 1
# Summary
print()
print(f"{'='*80}")
print(f"Download Summary:")
print(f" Total forms: {len(forms)}")
print(f" Successful: {successful_downloads}")
print(f" Failed: {failed_downloads}")
print(f" Output directory: {output_dir.absolute()}")
print(f"{'='*80}")
return 0 if failed_downloads == 0 else 1
except Exception as e:
print(f"\n❌ Error: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment