Created
February 13, 2026 18:54
-
-
Save philerooski/1812f6feed8bf20dd102514e41685640 to your computer and use it in GitHub Desktop.
Download all form data from a specific form group to local directory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Download all form data from a specific form group to local directory. | |
| """ | |
| import argparse | |
| import sys | |
| import json | |
| import tempfile | |
| import shutil | |
| import urllib.request | |
| from pathlib import Path | |
| from synapseclient import Synapse, login | |
| from synapseclient.models import FormGroup, FormData | |
| def parse_args(): | |
| """Parse command line arguments.""" | |
| parser = argparse.ArgumentParser( | |
| description="Download all forms from a Synapse form group." | |
| ) | |
| parser.add_argument( | |
| "--form-group-id", | |
| type=str, | |
| required=True, | |
| help="The form group ID", | |
| ) | |
| parser.add_argument( | |
| "--output-dir", | |
| type=str, | |
| default="./forms", | |
| help="Directory to save downloaded forms (default: ./forms)", | |
| ) | |
| parser.add_argument( | |
| "--as-reviewer", | |
| action="store_true", | |
| default=True, | |
| help="List forms as a reviewer (default: True)", | |
| ) | |
| parser.add_argument( | |
| "--no-as-reviewer", | |
| action="store_false", | |
| dest="as_reviewer", | |
| help="List forms as owner instead of reviewer", | |
| ) | |
| parser.add_argument( | |
| "--filter-by-state", | |
| type=str, | |
| default=None, | |
| help="Comma-separated list of states to filter by (e.g., 'accepted,rejected')", | |
| ) | |
| parser.add_argument( | |
| "--profile", | |
| type=str, | |
| default=None, | |
| help="The Synapse profile name to use for authentication (from ~/.synapseConfig)", | |
| ) | |
| parser.add_argument( | |
| "--synapse-auth-token", | |
| type=str, | |
| help="Your Synapse access token (alternative to using profile)", | |
| ) | |
| parser.add_argument( | |
| "--api-download", | |
| action="store_true", | |
| default=False, | |
| help="Use direct API calls for downloading instead of FormData.download() method", | |
| ) | |
| return parser.parse_args() | |
| def download_form_data_via_api(form: FormData, syn: Synapse, output_dir: Path) -> Path: | |
| """ | |
| Download a form's data file using direct API calls. | |
| This approach mirrors the R implementation in get-form-data.R: | |
| 1. Request a presigned URL via /file/v1/fileHandle/batch | |
| 2. Download the file from the presigned URL | |
| Args: | |
| form: FormData object to download | |
| syn: Authenticated Synapse client | |
| output_dir: Directory to save the downloaded file | |
| Returns: | |
| Path to the downloaded file | |
| """ | |
| # Build the request body for the batch file handle API | |
| request_body = { | |
| "requestedFiles": [ | |
| { | |
| "fileHandleId": form.data_file_handle_id, | |
| "associateObjectId": form.form_data_id, | |
| "associateObjectType": "FormData", | |
| } | |
| ], | |
| "includePreSignedURLs": True, | |
| "includeFileHandles": False, | |
| } | |
| # Request the presigned URL | |
| response = syn.restPOST( | |
| uri="https://repo-prod.prod.sagebase.org/file/v1/fileHandle/batch", | |
| body=json.dumps(request_body), | |
| ) | |
| # Extract the presigned URL | |
| presigned_url = response["requestedFiles"][0]["preSignedURL"] | |
| # Create output filename and path | |
| output_filename = f"form_{form.form_data_id}.json" | |
| output_path = output_dir / output_filename | |
| # Download the file from the presigned URL | |
| urllib.request.urlretrieve(presigned_url, output_path) | |
| return output_path | |
| def download_form_data( | |
| form: FormData, syn: Synapse, output_dir: Path, use_api: bool = False | |
| ) -> Path: | |
| """ | |
| Download a form's data file. | |
| Args: | |
| form: FormData object to download | |
| syn: Authenticated Synapse client | |
| output_dir: Directory to save the downloaded file | |
| use_api: If True, use direct API calls; if False, use FormData.download() | |
| Returns: | |
| Path to the downloaded file | |
| """ | |
| if use_api: | |
| # Use direct API approach | |
| return download_form_data_via_api(form, syn, output_dir) | |
| # Use the native FormData.download() method | |
| # Create a temporary directory for download | |
| temp_dir = tempfile.mkdtemp() | |
| try: | |
| # Use the native download method to get the form data file | |
| downloaded_path = form.download( | |
| synapse_id=form.form_data_id, | |
| download_location=temp_dir, | |
| synapse_client=syn, | |
| ) | |
| # Create output filename | |
| output_filename = f"form_{form.form_data_id}.json" | |
| output_path = output_dir / output_filename | |
| # Copy to final destination | |
| shutil.copy2(downloaded_path, output_path) | |
| return output_path | |
| finally: | |
| # Clean up temp directory | |
| shutil.rmtree(temp_dir, ignore_errors=True) | |
| def main(): | |
| """Main entry point.""" | |
| args = parse_args() | |
| # Create output directory if it doesn't exist | |
| output_dir = Path(args.output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| # Login to Synapse | |
| print("Logging into Synapse...") | |
| if args.synapse_auth_token: | |
| syn = Synapse() | |
| syn.login(authToken=args.synapse_auth_token) | |
| elif args.profile: | |
| print(f"Using profile: {args.profile}") | |
| syn = login(profile=args.profile) | |
| else: | |
| syn = login() | |
| user = syn.getUserProfile() | |
| print(f"Logged in as: {user.get('userName', 'Unknown')}") | |
| print() | |
| # Prepare filter states | |
| if args.filter_by_state: | |
| filter_states = [ | |
| s.strip().lower().replace(" ", "_") for s in args.filter_by_state.split(",") | |
| ] | |
| print(f"Filtering by states: {', '.join(filter_states)}") | |
| else: | |
| # Use default reviewer or owner states | |
| if args.as_reviewer: | |
| filter_states = list(FormGroup.DEFAULT_REVIEWER_STATES) | |
| print(f"Using default reviewer states: {', '.join(filter_states)}") | |
| else: | |
| filter_states = list(FormGroup.DEFAULT_OWNER_STATES) | |
| print(f"Using default owner states: {', '.join(filter_states)}") | |
| print() | |
| # Create FormGroup instance | |
| form_group = FormGroup(group_id=args.form_group_id) | |
| print(f"Fetching forms from group ID: {form_group.group_id}") | |
| print(f"As reviewer: {args.as_reviewer}") | |
| print( | |
| f"Download method: {'Direct API' if args.api_download else 'FormData.download()'}" | |
| ) | |
| print(f"Output directory: {output_dir.absolute()}") | |
| print() | |
| try: | |
| # List all forms using the native method | |
| print("Retrieving forms...") | |
| forms = list( | |
| form_group.list( | |
| filter_by_state=filter_states, | |
| synapse_client=syn, | |
| as_reviewer=args.as_reviewer, | |
| ) | |
| ) | |
| # Display results | |
| print(f"\n{'='*80}") | |
| print(f"Found {len(forms)} form(s) in group {args.form_group_id}") | |
| print(f"{'='*80}\n") | |
| if not forms: | |
| print("No forms found matching the criteria.") | |
| return 0 | |
| # Download each form | |
| successful_downloads = 0 | |
| failed_downloads = 0 | |
| for idx, form in enumerate(forms, 1): | |
| print( | |
| f"[{idx}/{len(forms)}] Downloading form {form.form_data_id}...", end=" " | |
| ) | |
| try: | |
| output_path = download_form_data( | |
| form, syn, output_dir, use_api=args.api_download | |
| ) | |
| print(f"✓ Saved to {output_path.name}") | |
| successful_downloads += 1 | |
| except Exception as e: | |
| print(f"✗ Failed: {e}") | |
| failed_downloads += 1 | |
| # Summary | |
| print() | |
| print(f"{'='*80}") | |
| print(f"Download Summary:") | |
| print(f" Total forms: {len(forms)}") | |
| print(f" Successful: {successful_downloads}") | |
| print(f" Failed: {failed_downloads}") | |
| print(f" Output directory: {output_dir.absolute()}") | |
| print(f"{'='*80}") | |
| return 0 if failed_downloads == 0 else 1 | |
| except Exception as e: | |
| print(f"\n❌ Error: {e}", file=sys.stderr) | |
| import traceback | |
| traceback.print_exc() | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment