Created
December 7, 2025 22:35
-
-
Save christopherlovell/cee6593f316117f27f7229dea2da415c to your computer and use it in GitHub Desktop.
Convert ADS library to publication list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import sys | |
| from collections import defaultdict | |
| import ads # pip install ads | |
| LIBRARY_ID = "x3_uMCyHTJ2-YisJxQxo_g" # your ADS library id | |
| ME = "Lovell, Christopher C." # match your name as in ADS | |
| # Replace troublesome unicode with LaTeX-safe equivalents | |
| UNICODE_MAP = { | |
| "≥": r"$\geq$", | |
| "≤": r"$\leq$", | |
| "Λ": r"$\Lambda$", | |
| "Ω": r"$\Omega$", | |
| "─": r"$-$", | |
| "<": r"$<$", | |
| } | |
| HTML_ENTITY_MAP = { | |
| "<": r"$<$", | |
| ">": r"$>$", | |
| "<SUB>": r"_{", | |
| "</SUB>": r"}", | |
| # "&": r"\&", | |
| } | |
| # Fields to pull; adjust if you want more | |
| FIELDS = [ | |
| "id", "title", "author", "year", "bibcode", | |
| "pub", "volume", "page", "doi", "arxiv_eprint", "identifier", | |
| "comment", "comments", "pubnote" | |
| ] | |
| def fetch_library(): | |
| token = os.getenv("ADS_DEV_KEY") | |
| if not token: | |
| raise SystemExit("Set ADS_DEV_KEY to your ADS API token.") | |
| ads.config.token = token | |
| # ADS limits rows per query; request enough to cover your library | |
| rows = 200 | |
| papers = list(ads.SearchQuery( | |
| q=f"docs(library/{LIBRARY_ID})", | |
| fl=FIELDS, | |
| rows=rows, | |
| sort="date desc" | |
| )) | |
| return papers | |
| def sanitize(text: str) -> str: | |
| if not text: | |
| return "" | |
| for bad, repl in HTML_ENTITY_MAP.items(): | |
| text = text.replace(bad, repl) | |
| # Common HTML escaped journal strings | |
| text = text.replace("A\\&A", "A\\&A") | |
| text = text.replace("A&A", "A\\&A") | |
| text = text.replace("M&NRAS", "MNRAS") | |
| for bad, repl in UNICODE_MAP.items(): | |
| text = text.replace(bad, repl) | |
| # Guard against double math dollars that can appear if ADS already supplies $<$/$>$ | |
| text = text.replace("$$<$", "$<$").replace("$$>$", "$>$") | |
| # Collapse any remaining double-$ introduced by mixed sources | |
| while "$$" in text: | |
| text = text.replace("$$", "$") | |
| # Fix cases where math is split, e.g. $\Omega$_{m} -> $\Omega_{m} | |
| text = text.replace(r"$\Omega$_{", r"$\Omega_{") | |
| # Handle omega sub m formatting consistently | |
| text = text.replace(r"$\Omega_{m}", r"$\Omega_{m}$") | |
| text = text.replace(r"\Omega_{m}", r"$\Omega_{m}$") | |
| # Handle molecular hydrogen consistently | |
| text = text.replace(r"$H_{2}", r"$H_{2}$") | |
| text = text.replace(r"H_{2}", r"$H_{2}$") | |
| # Collapse double-math for omega if present | |
| text = text.replace(r"$$\Omega_{m}$$", r"$\Omega_{m}$") | |
| return text | |
| def slice_submitted(text: str) -> str: | |
| """Return substring starting at 'submitted to' if present (case-insensitive).""" | |
| if not text: | |
| return "" | |
| lower = text.lower() | |
| key = "submitted to" | |
| idx = lower.find(key) | |
| return text[idx:].strip() if idx != -1 else text.strip() | |
| def to_latex_item(p): | |
| year = p.year or "????" | |
| title = sanitize(p.title[0] if p.title else "Untitled") | |
| authors_list = [sanitize(a) for a in p.author] if p.author else [] | |
| # Truncate authors: if ME in top three, keep top three; otherwise keep top two, then add suffix. | |
| suffix = "" | |
| if authors_list: | |
| me_in_top3 = any(a.startswith(ME.split(",")[0]) and ME in a for a in authors_list[:3]) | |
| top_n = 3 if me_in_top3 else 2 | |
| trimmed = authors_list[:top_n] | |
| if len(authors_list) > top_n: | |
| suffix = " \\& others" if me_in_top3 else f" \\& others including {ME}" | |
| authors = ", ".join(trimmed) + suffix | |
| else: | |
| authors = "" | |
| # try to recover an arXiv id | |
| arxiv_id = None | |
| if getattr(p, "arxiv_eprint", None): | |
| arxiv_id = p.arxiv_eprint | |
| elif getattr(p, "identifier", None): | |
| for ident in p.identifier: | |
| if ident.startswith("arXiv:"): | |
| arxiv_id = ident.split(":", 1)[1] | |
| break | |
| arxiv = f"\\href{{https://arxiv.org/abs/{arxiv_id}}}{{arXiv:{arxiv_id}}}" if arxiv_id else "" | |
| pub_parts = [] | |
| is_arxiv_pub = getattr(p, "pub", None) and "arXiv" in p.pub | |
| def _as_str(val): | |
| if isinstance(val, (list, tuple)): | |
| return " ".join(str(v) for v in val) | |
| return str(val) | |
| if getattr(p, "pub", None) and not is_arxiv_pub: | |
| pub_parts.append(_as_str(p.pub)) | |
| # If this is an arXiv-only entry, try to use the e-print comment (often contains "submitted to ...") | |
| if is_arxiv_pub: | |
| comment_text = None | |
| for attr in ("comment", "comments", "pubnote"): | |
| val = getattr(p, attr, None) | |
| if val: | |
| comment_text = _as_str(val) | |
| break | |
| if comment_text: | |
| comment_text = sanitize(comment_text) | |
| # Only keep comments that contain a submission/acceptance phrase and an allowed venue | |
| trigger_map = [ | |
| ("submitted to", "Submitted to"), | |
| ("accepted for publication in", "Accepted for publication in"), | |
| ("accepted for publication", "Accepted for publication in"), | |
| ("accepted in", "Accepted in"), | |
| ("accepted", "Accepted in"), | |
| ("published as", "Published as"), | |
| ("published in", "Published in"), | |
| ("Paper submitted as part of the", "Paper submitted as part of the") | |
| ] | |
| venue_map = { | |
| "A\\&A Special Issue `Euclid Quick Data Release (Q1)'": "A\\&A Special Issue `Euclid Quick Data Release (Q1)'", | |
| "encyclopedia of astrophysics": "chapter for the Encyclopedia of Astrophysics", | |
| "a&a": "A\\&A", | |
| "a\\&a": "A\\&A", | |
| "mnras": "MNRAS", | |
| "apj": "ApJ", | |
| "apjl": "ApJL", | |
| "apjs": "ApJS", | |
| "jcap": "JCAP", | |
| "physical review letters": "Physical Review Letters", | |
| "nature": "Nature", | |
| "science": "Science", | |
| "a workshop paper at the ML4Astro Workshop at ICML 2025": "a workshop paper at the ML4Astro Workshop at ICML 2025", | |
| "the Open Journal of Astrophysics": "the Open Journal of Astrophysics", | |
| } | |
| lower_c = comment_text.lower() | |
| normalized = lower_c.replace("\\&", "&") | |
| trigger_found = None | |
| for key, canon in trigger_map: | |
| if key.lower() in normalized: | |
| trigger_found = canon | |
| break | |
| venue_found = None | |
| # Special-case: encyclopedia chapter without trigger wording | |
| if "encyclopedia of astrophysics" in normalized: | |
| venue_found = "chapter for the Encyclopedia of Astrophysics" | |
| trigger_found = "" | |
| for key, canon in venue_map.items(): | |
| if key.lower() in normalized: | |
| venue_found = canon | |
| break | |
| if venue_found and trigger_found is not None: | |
| text_out = f"{trigger_found} {venue_found}".strip() | |
| pub_parts.append(text_out if text_out else venue_found) | |
| else: | |
| sys.stderr.write(f"[WARN] No trigger/venue match for '{title}' with comment '{comment_text}'\n") | |
| if getattr(p, "volume", None) and not is_arxiv_pub: | |
| pub_parts.append(_as_str(p.volume)) | |
| if getattr(p, "page", None) and p.page and not is_arxiv_pub: | |
| pub_parts.append(_as_str(p.page[0])) | |
| pubinfo = sanitize(" ".join(pub_parts)) | |
| # make the user name bold in author list | |
| authors = authors.replace(ME, f"\\textbf{{{ME}}}") | |
| entry = f"\\item \\textbf{{{year}:}} {title}. \\textit{{{authors}}}" | |
| if pubinfo: | |
| entry += f", {pubinfo}." | |
| else: | |
| entry += "." | |
| if arxiv: | |
| # avoid duplicating arXiv if pubinfo already indicates arXiv-only submission | |
| if not (is_arxiv_pub and "arxiv" in pubinfo.lower()): | |
| entry += f" {arxiv}" | |
| return entry | |
| def split_first_author(papers): | |
| first_author, coauth = [], [] | |
| for p in papers: | |
| if p.author and p.author[0].lower().startswith("lovell"): | |
| first_author.append(p) | |
| else: | |
| coauth.append(p) | |
| return first_author, coauth | |
| def render_section(title, items): | |
| out = [f"\\subsubsection*{{{title}}}", "\\begin{itemize}"] | |
| out.extend(items) | |
| out.append("\\end{itemize}\n") | |
| return "\n".join(out) | |
| def main(): | |
| papers = fetch_library() | |
| fa, co = split_first_author(papers) | |
| fa_items = [to_latex_item(p) for p in fa] | |
| co_items = [to_latex_item(p) for p in co] | |
| parts = [ | |
| # "\\section*{Publications}", | |
| render_section("First-Author", fa_items), | |
| render_section("Co-Authored", co_items), | |
| "% Generated by build_publication_list.py" | |
| ] | |
| print("\n".join(parts)) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment