alecjacobson · December 11, 2025 02:51
diff --git a/papers_most_often_cited_in_specific_author_papers.py b/papers_most_often_cited_in_specific_author_papers.py
 from semanticscholar import SemanticScholar
 from collections import defaultdict
 from tqdm import tqdm
 import os

 # Read key from env instead of hard-coding
 #s2_api_key = os.environ.get("SEMANTIC_SCHOLAR_API_KEY")
 sch = SemanticScholar(timeout=10)
 # alec
 author_ids = [2242015445,145151177,2251097727,2241538537,2244829783,2199254215,2309006642,2275054179,2312381751,2242589355]
 # If author_ids is a single int, wrap it in a list
 if isinstance(author_ids, int):
    author_ids = [author_ids]

 # Batch fetch all authors in one call
 authors = sch.get_authors([str(aid) for aid in author_ids])

 papers = []
 names = []

 for author in authors:
    names.append(author.name)
    if len(author.papers) < author.paperCount:
        print(
            f"Warning: {author.name} has {author.paperCount} papers, "
            f"but only {len(author.papers)} were retrieved"
        )
    papers.extend(author.papers)

 names = ", ".join(names)

 papers_by_id = {paper["paperId"]: paper for paper in papers}

 # --- Batch fetch full paper records for these paper IDs ---
 BATCH_SIZE = 1000  # API allows up to 1000 IDs per call

 for i in range(0, len(paper_ids), BATCH_SIZE):
    batch_ids = paper_ids[i : i + BATCH_SIZE]

    # Ask specifically for references, title, year to minimize payload
    batch_papers = sch.get_papers(
        batch_ids,
        fields=["title", "year", "references.title", "references.year"],
    )

    for authors_paper_full_rec in batch_papers:
        # authors_paper_full_rec is a Paper object

        # Get its references (may be list of dicts or objects depending on version)
        references = getattr(authors_paper_full_rec, "references", []) or []

        for ref in references:
            # Handle both dict-style and object-style references
            if isinstance(ref, dict):
                ref_data = ref
            else:
                ref_data = getattr(ref, "raw_data", {}) or {}

            ref_id = ref_data.get("paperId")
            if not ref_id:
                continue

            paperId_counts[ref_id] += 1
            # Store the short record so we can print title/year later
            paperId_short_recs[ref_id] = ref_data

 # --- Sort and print results ---
 paperId_counts_sorted = sorted(
    paperId_counts.items(), key=lambda item: -item[1]
 )

 for paperId, count in paperId_counts_sorted:
    rec = paperId_short_recs.get(paperId, {})
    title = rec.get("title", "UNKNOWN TITLE")
    year = rec.get("year", "UNKNOWN YEAR")
    print(f'{count}\t"{title}," {year}')
	from semanticscholar import SemanticScholar
	from collections import defaultdict
	from tqdm import tqdm
	import os

	# Read key from env instead of hard-coding
	#s2_api_key = os.environ.get("SEMANTIC_SCHOLAR_API_KEY")
	sch = SemanticScholar(timeout=10)
	# alec
	author_ids = [2242015445,145151177,2251097727,2241538537,2244829783,2199254215,2309006642,2275054179,2312381751,2242589355]
	# If author_ids is a single int, wrap it in a list
	if isinstance(author_ids, int):
	author_ids = [author_ids]

	# Batch fetch all authors in one call
	authors = sch.get_authors([str(aid) for aid in author_ids])

	papers = []
	names = []

	for author in authors:
	names.append(author.name)
	if len(author.papers) < author.paperCount:
	print(
	f"Warning: {author.name} has {author.paperCount} papers, "
	f"but only {len(author.papers)} were retrieved"
	)
	papers.extend(author.papers)

	names = ", ".join(names)

	papers_by_id = {paper["paperId"]: paper for paper in papers}

	# --- Batch fetch full paper records for these paper IDs ---
	BATCH_SIZE = 1000 # API allows up to 1000 IDs per call

	for i in range(0, len(paper_ids), BATCH_SIZE):
	batch_ids = paper_ids[i : i + BATCH_SIZE]

	# Ask specifically for references, title, year to minimize payload
	batch_papers = sch.get_papers(
	batch_ids,
	fields=["title", "year", "references.title", "references.year"],
	)

	for authors_paper_full_rec in batch_papers:
	# authors_paper_full_rec is a Paper object

	# Get its references (may be list of dicts or objects depending on version)
	references = getattr(authors_paper_full_rec, "references", []) or []

	for ref in references:
	# Handle both dict-style and object-style references
	if isinstance(ref, dict):
	ref_data = ref
	else:
	ref_data = getattr(ref, "raw_data", {}) or {}

	ref_id = ref_data.get("paperId")
	if not ref_id:
	continue

	paperId_counts[ref_id] += 1
	# Store the short record so we can print title/year later
	paperId_short_recs[ref_id] = ref_data

	# --- Sort and print results ---
	paperId_counts_sorted = sorted(
	paperId_counts.items(), key=lambda item: -item[1]
	)

	for paperId, count in paperId_counts_sorted:
	rec = paperId_short_recs.get(paperId, {})
	title = rec.get("title", "UNKNOWN TITLE")
	year = rec.get("year", "UNKNOWN YEAR")
	print(f'{count}\t"{title}," {year}')
No results found