mdmitry1 · December 6, 2025 12:09
diff --git a/arxiv_ex.py b/arxiv_ex.py
 #!/usr/bin/python3.14
 import arxiv,sh
 from json import load, loads, dump, dumps, JSONDecodeError 
 from sys import argv
 from os import makedirs, listdir, getcwd
 from os.path import join, isdir, isfile, realpath, dirname
 from typing import List, Tuple
 from pydantic import RootModel
 from operator import itemgetter 
 from rich import print as rprint

 class DataTuple(RootModel[Tuple[str, str, str]]):
    """
    A Pydantic model for a tuple containing three strings
    The root value is accessed via the `root` attribute.
    """
    pass


 def search_papers(topic: str, max_results: int = 10, PAPER_DIR: str = ".") -> List[DataTuple]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = join(PAPER_DIR, topic.lower().replace(" ", "_"))
    makedirs(path, exist_ok=True)
    
    file_path = join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = load(json_file)
    except (FileNotFoundError, JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_headers = []
    for paper in papers:
        paper_id  = paper.get_short_id()
        published = str(paper.published.date())
        raw_data=(paper_id, paper.title, published)
        paper_headers.append(DataTuple.model_validate(raw_data))
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': published
        }
        papers_info[paper_id] = paper_info

    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_headers

 def extract_info(paper_id: str, PAPER_DIR: str = ".") -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
    for item in listdir(PAPER_DIR):
        item_path = join(PAPER_DIR, item)
        if isdir(item_path):
            file_path = join(item_path, "papers_info.json")
            if isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = load(json_file)
                        if paper_id in papers_info:
                            return dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."

 if __name__ == "__main__":
    if len(argv) < 2:
        print("\nUsage: " + dirname(realpath(argv[0])) + " '<topic>' [<id>]\n")   
        exit(0)
    PAPER_DIR=getcwd()
    paper_headers=search_papers(argv[1], 20, PAPER_DIR)
    header_list=[]
    for header in paper_headers:
        paper_id, paper_title, published = header.root
        header_list.append((paper_id, paper_title, published))
    header_list.sort(key=lambda x: x[2], reverse=True)
    [rprint(row[0],row[1],row[2]) for row in header_list[:10]]
    if len(argv) > 2:
        info=loads(extract_info(argv[2]))    
        info_pretty=dumps(info,indent=4)
        print(info_pretty)
        pdf=argv[2] + ".pdf"
        sh.wget(info["pdf_url"],"-O", pdf)
        rprint(sh.ls('-ls',pdf).strip())
	#!/usr/bin/python3.14
	import arxiv,sh
	from json import load, loads, dump, dumps, JSONDecodeError
	from sys import argv
	from os import makedirs, listdir, getcwd
	from os.path import join, isdir, isfile, realpath, dirname
	from typing import List, Tuple
	from pydantic import RootModel
	from operator import itemgetter
	from rich import print as rprint

	class DataTuple(RootModel[Tuple[str, str, str]]):
	"""
	A Pydantic model for a tuple containing three strings
	The root value is accessed via the `root` attribute.
	"""
	pass


	def search_papers(topic: str, max_results: int = 10, PAPER_DIR: str = ".") -> List[DataTuple]:
	"""
	Search for papers on arXiv based on a topic and store their information.

	Args:
	topic: The topic to search for
	max_results: Maximum number of results to retrieve (default: 5)

	Returns:
	List of paper IDs found in the search
	"""

	# Use arxiv to find the papers
	client = arxiv.Client()

	# Search for the most relevant articles matching the queried topic
	search = arxiv.Search(
	query = topic,
	max_results = max_results,
	sort_by = arxiv.SortCriterion.Relevance
	)

	papers = client.results(search)

	# Create directory for this topic
	path = join(PAPER_DIR, topic.lower().replace(" ", "_"))
	makedirs(path, exist_ok=True)

	file_path = join(path, "papers_info.json")

	# Try to load existing papers info
	try:
	with open(file_path, "r") as json_file:
	papers_info = load(json_file)
	except (FileNotFoundError, JSONDecodeError):
	papers_info = {}

	# Process each paper and add to papers_info
	paper_headers = []
	for paper in papers:
	paper_id = paper.get_short_id()
	published = str(paper.published.date())
	raw_data=(paper_id, paper.title, published)
	paper_headers.append(DataTuple.model_validate(raw_data))
	paper_info = {
	'title': paper.title,
	'authors': [author.name for author in paper.authors],
	'summary': paper.summary,
	'pdf_url': paper.pdf_url,
	'published': published
	}
	papers_info[paper_id] = paper_info

	# Save updated papers_info to json file
	with open(file_path, "w") as json_file:
	dump(papers_info, json_file, indent=2)

	print(f"Results are saved in: {file_path}")

	return paper_headers

	def extract_info(paper_id: str, PAPER_DIR: str = ".") -> str:
	"""
	Search for information about a specific paper across all topic directories.

	Args:
	paper_id: The ID of the paper to look for

	Returns:
	JSON string with paper information if found, error message if not found
	"""
	for item in listdir(PAPER_DIR):
	item_path = join(PAPER_DIR, item)
	if isdir(item_path):
	file_path = join(item_path, "papers_info.json")
	if isfile(file_path):
	try:
	with open(file_path, "r") as json_file:
	papers_info = load(json_file)
	if paper_id in papers_info:
	return dumps(papers_info[paper_id], indent=2)
	except (FileNotFoundError, JSONDecodeError) as e:
	print(f"Error reading {file_path}: {str(e)}")
	continue

	return f"There's no saved information related to paper {paper_id}."

	if __name__ == "__main__":
	if len(argv) < 2:
	print("\nUsage: " + dirname(realpath(argv[0])) + " '<topic>' [<id>]\n")
	exit(0)
	PAPER_DIR=getcwd()
	paper_headers=search_papers(argv[1], 20, PAPER_DIR)
	header_list=[]
	for header in paper_headers:
	paper_id, paper_title, published = header.root
	header_list.append((paper_id, paper_title, published))
	header_list.sort(key=lambda x: x[2], reverse=True)
	[rprint(row[0],row[1],row[2]) for row in header_list[:10]]
	if len(argv) > 2:
	info=loads(extract_info(argv[2]))
	info_pretty=dumps(info,indent=4)
	print(info_pretty)
	pdf=argv[2] + ".pdf"
	sh.wget(info["pdf_url"],"-O", pdf)
	rprint(sh.ls('-ls',pdf).strip())
No results found