Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save alecjacobson/b36c4dba356ed39581c91dc465cf90e5 to your computer and use it in GitHub Desktop.

Select an option

Save alecjacobson/b36c4dba356ed39581c91dc465cf90e5 to your computer and use it in GitHub Desktop.
Frequency Count of Papers Most Cited in a Specific Authors Publications
from semanticscholar import SemanticScholar
from collections import defaultdict
from tqdm import tqdm
import os
# Read key from env instead of hard-coding
#s2_api_key = os.environ.get("SEMANTIC_SCHOLAR_API_KEY")
sch = SemanticScholar(timeout=10)
# alec
author_ids = [2242015445,145151177,2251097727,2241538537,2244829783,2199254215,2309006642,2275054179,2312381751,2242589355]
# If author_ids is a single int, wrap it in a list
if isinstance(author_ids, int):
author_ids = [author_ids]
# Batch fetch all authors in one call
authors = sch.get_authors([str(aid) for aid in author_ids])
papers = []
names = []
for author in authors:
names.append(author.name)
if len(author.papers) < author.paperCount:
print(
f"Warning: {author.name} has {author.paperCount} papers, "
f"but only {len(author.papers)} were retrieved"
)
papers.extend(author.papers)
names = ", ".join(names)
papers_by_id = {paper["paperId"]: paper for paper in papers}
# --- Batch fetch full paper records for these paper IDs ---
BATCH_SIZE = 1000 # API allows up to 1000 IDs per call
for i in range(0, len(paper_ids), BATCH_SIZE):
batch_ids = paper_ids[i : i + BATCH_SIZE]
# Ask specifically for references, title, year to minimize payload
batch_papers = sch.get_papers(
batch_ids,
fields=["title", "year", "references.title", "references.year"],
)
for authors_paper_full_rec in batch_papers:
# authors_paper_full_rec is a Paper object
# Get its references (may be list of dicts or objects depending on version)
references = getattr(authors_paper_full_rec, "references", []) or []
for ref in references:
# Handle both dict-style and object-style references
if isinstance(ref, dict):
ref_data = ref
else:
ref_data = getattr(ref, "raw_data", {}) or {}
ref_id = ref_data.get("paperId")
if not ref_id:
continue
paperId_counts[ref_id] += 1
# Store the short record so we can print title/year later
paperId_short_recs[ref_id] = ref_data
# --- Sort and print results ---
paperId_counts_sorted = sorted(
paperId_counts.items(), key=lambda item: -item[1]
)
for paperId, count in paperId_counts_sorted:
rec = paperId_short_recs.get(paperId, {})
title = rec.get("title", "UNKNOWN TITLE")
year = rec.get("year", "UNKNOWN YEAR")
print(f'{count}\t"{title}," {year}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment