Initial version
This commit is contained in:
commit
42996b0f4e
31 changed files with 933 additions and 0 deletions
29
README.md
Normal file
29
README.md
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
# Citer
|
||||||
|
|
||||||
|
Simple CLI that turns an arXiv ID/URL or DOI/URL into a single-line APA citation.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# arXiv IDs or URLs
|
||||||
|
citer arxiv 2106.01342
|
||||||
|
citer arxiv https://arxiv.org/abs/2106.01342
|
||||||
|
|
||||||
|
# DOIs or DOI URLs
|
||||||
|
citer doi 10.1038/nphys1170
|
||||||
|
citer doi https://doi.org/10.1038/nphys1170
|
||||||
|
```
|
||||||
|
|
||||||
|
Errors are printed with a clear message if an ID cannot be parsed or a lookup fails.
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest
|
||||||
|
```
|
||||||
3
build/lib/citer/__init__.py
Normal file
3
build/lib/citer/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
"""APA citation helper CLI."""
|
||||||
|
|
||||||
|
__all__ = ["cli", "fetchers", "formatter", "identifiers", "models"]
|
||||||
48
build/lib/citer/cli.py
Normal file
48
build/lib/citer/cli.py
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from .exceptions import CitationError
|
||||||
|
from .fetchers import fetch_arxiv, fetch_doi
|
||||||
|
from .formatter import format_apa
|
||||||
|
from .identifiers import normalize_arxiv_identifier, normalize_doi
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Create an APA style citation from an arXiv ID or DOI."
|
||||||
|
)
|
||||||
|
subparsers = parser.add_subparsers(dest="source", required=True)
|
||||||
|
|
||||||
|
arxiv_parser = subparsers.add_parser("arxiv", help="Cite an arXiv identifier or URL")
|
||||||
|
arxiv_parser.add_argument("identifier", help="arXiv ID or URL")
|
||||||
|
|
||||||
|
doi_parser = subparsers.add_parser("doi", help="Cite a DOI or DOI URL")
|
||||||
|
doi_parser.add_argument("identifier", help="DOI or DOI URL")
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv=None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if args.source == "arxiv":
|
||||||
|
arxiv_id = normalize_arxiv_identifier(args.identifier)
|
||||||
|
metadata = fetch_arxiv(arxiv_id)
|
||||||
|
elif args.source == "doi":
|
||||||
|
doi = normalize_doi(args.identifier)
|
||||||
|
metadata = fetch_doi(doi)
|
||||||
|
else:
|
||||||
|
parser.error("Unsupported source")
|
||||||
|
return 1
|
||||||
|
except CitationError as exc:
|
||||||
|
print(f"Error: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(format_apa(metadata))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
2
build/lib/citer/exceptions.py
Normal file
2
build/lib/citer/exceptions.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
class CitationError(Exception):
|
||||||
|
"""Raised when a citation cannot be created from the provided input."""
|
||||||
158
build/lib/citer/fetchers.py
Normal file
158
build/lib/citer/fetchers.py
Normal file
|
|
@ -0,0 +1,158 @@
|
||||||
|
import datetime as _dt
|
||||||
|
from typing import List, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .exceptions import CitationError
|
||||||
|
from .models import Author, WorkMetadata
|
||||||
|
|
||||||
|
|
||||||
|
ARXIV_API = "http://export.arxiv.org/api/query"
|
||||||
|
CROSSREF_WORKS = "https://api.crossref.org/works/"
|
||||||
|
USER_AGENT = "citer-cli/0.1 (mailto:unknown@example.com)"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_arxiv(arxiv_id: str) -> WorkMetadata:
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
ARXIV_API,
|
||||||
|
params={"id_list": arxiv_id},
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
raise CitationError(f"Failed to reach arXiv API: {exc}") from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = ElementTree.fromstring(response.text)
|
||||||
|
except ElementTree.ParseError as exc:
|
||||||
|
raise CitationError("Received invalid XML from arXiv") from exc
|
||||||
|
|
||||||
|
ns = {
|
||||||
|
"atom": "http://www.w3.org/2005/Atom",
|
||||||
|
"arxiv": "http://arxiv.org/schemas/atom",
|
||||||
|
}
|
||||||
|
entry = root.find("atom:entry", ns)
|
||||||
|
if entry is None:
|
||||||
|
raise CitationError(f"No entry found for arXiv ID {arxiv_id}")
|
||||||
|
|
||||||
|
title = _clean(entry.findtext("atom:title", default="", namespaces=ns))
|
||||||
|
authors = _parse_arxiv_authors(entry.findall("atom:author", ns))
|
||||||
|
year = _parse_year(entry.findtext("atom:published", default="", namespaces=ns))
|
||||||
|
doi = _clean(entry.findtext("arxiv:doi", default="", namespaces=ns)) or None
|
||||||
|
|
||||||
|
url = f"https://arxiv.org/abs/{arxiv_id}"
|
||||||
|
return WorkMetadata(
|
||||||
|
title=title,
|
||||||
|
authors=authors,
|
||||||
|
year=year,
|
||||||
|
container_title="arXiv preprint",
|
||||||
|
doi=doi,
|
||||||
|
url=url,
|
||||||
|
source="arxiv",
|
||||||
|
identifier=arxiv_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_doi(doi: str) -> WorkMetadata:
|
||||||
|
url = CROSSREF_WORKS + quote(doi)
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
url, headers={"Accept": "application/json", "User-Agent": USER_AGENT}, timeout=10
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
raise CitationError(f"Failed to reach Crossref for DOI {doi}: {exc}") from exc
|
||||||
|
except ValueError as exc:
|
||||||
|
raise CitationError("Received invalid JSON from Crossref") from exc
|
||||||
|
|
||||||
|
message = data.get("message", {})
|
||||||
|
title = _clean(" ".join(message.get("title", [])).strip())
|
||||||
|
authors = _parse_crossref_authors(message.get("author", []))
|
||||||
|
year = _extract_year(message)
|
||||||
|
container_title = _clean((message.get("container-title") or [""])[0])
|
||||||
|
volume = _clean(message.get("volume", "")).strip() or None
|
||||||
|
issue = _clean(message.get("issue", "")).strip() or None
|
||||||
|
pages = _clean(message.get("page", "")).strip() or None
|
||||||
|
url = message.get("URL") or f"https://doi.org/{doi}"
|
||||||
|
|
||||||
|
return WorkMetadata(
|
||||||
|
title=title,
|
||||||
|
authors=authors,
|
||||||
|
year=year,
|
||||||
|
container_title=container_title or None,
|
||||||
|
volume=volume,
|
||||||
|
issue=issue,
|
||||||
|
pages=pages,
|
||||||
|
doi=doi,
|
||||||
|
url=url,
|
||||||
|
source="doi",
|
||||||
|
identifier=doi,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _clean(value: str) -> str:
|
||||||
|
return " ".join(value.split())
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_arxiv_authors(author_elements) -> List[Author]:
|
||||||
|
authors: List[Author] = []
|
||||||
|
for element in author_elements:
|
||||||
|
name = element.findtext("{http://www.w3.org/2005/Atom}name", default="")
|
||||||
|
given, family = _split_author_name(name)
|
||||||
|
authors.append(Author(given=given, family=family))
|
||||||
|
return authors
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_crossref_authors(author_data) -> List[Author]:
|
||||||
|
authors: List[Author] = []
|
||||||
|
for author in author_data:
|
||||||
|
if "family" in author or "given" in author:
|
||||||
|
given = author.get("given", "").strip()
|
||||||
|
family = author.get("family", "").strip()
|
||||||
|
elif "name" in author:
|
||||||
|
given, family = _split_author_name(author.get("name", ""))
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
authors.append(Author(given=given, family=family))
|
||||||
|
return authors
|
||||||
|
|
||||||
|
|
||||||
|
def _split_author_name(name: str) -> tuple[str, str]:
|
||||||
|
clean_name = _clean(name)
|
||||||
|
if not clean_name:
|
||||||
|
return "", ""
|
||||||
|
if "," in clean_name:
|
||||||
|
family, given = [part.strip() for part in clean_name.split(",", 1)]
|
||||||
|
else:
|
||||||
|
parts = clean_name.split()
|
||||||
|
family = parts[-1]
|
||||||
|
given = " ".join(parts[:-1])
|
||||||
|
return given, family
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_year(value: str) -> Optional[int]:
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return _dt.datetime.fromisoformat(value).year
|
||||||
|
except ValueError:
|
||||||
|
if len(value) >= 4 and value[:4].isdigit():
|
||||||
|
return int(value[:4])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_year(message: dict) -> int | None:
|
||||||
|
for key in ("published-print", "published-online", "issued"):
|
||||||
|
data = message.get(key, {})
|
||||||
|
parts = data.get("date-parts") if isinstance(data, dict) else None
|
||||||
|
if parts and len(parts) and len(parts[0]):
|
||||||
|
maybe_year = parts[0][0]
|
||||||
|
if isinstance(maybe_year, int):
|
||||||
|
return maybe_year
|
||||||
|
return None
|
||||||
90
build/lib/citer/formatter.py
Normal file
90
build/lib/citer/formatter.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from .models import Author, WorkMetadata
|
||||||
|
|
||||||
|
|
||||||
|
def format_apa(metadata: WorkMetadata) -> str:
|
||||||
|
authors_text = format_authors(metadata.authors)
|
||||||
|
year_text = f"({metadata.year})." if metadata.year else "(n.d.)."
|
||||||
|
title_text = _sentence_case(metadata.title).rstrip(".") + "."
|
||||||
|
container_text = _format_container(metadata)
|
||||||
|
|
||||||
|
parts = [authors_text, year_text, title_text]
|
||||||
|
if container_text:
|
||||||
|
parts.append(container_text)
|
||||||
|
|
||||||
|
link = None
|
||||||
|
if metadata.doi:
|
||||||
|
link = f"https://doi.org/{metadata.doi}"
|
||||||
|
elif metadata.url:
|
||||||
|
link = metadata.url
|
||||||
|
if link:
|
||||||
|
parts.append(link)
|
||||||
|
|
||||||
|
return " ".join(part.strip() for part in parts if part).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def format_authors(authors: Iterable[Author]) -> str:
|
||||||
|
formatted = [format_author(author) for author in authors if format_author(author)]
|
||||||
|
if not formatted:
|
||||||
|
return ""
|
||||||
|
if len(formatted) == 1:
|
||||||
|
return formatted[0]
|
||||||
|
if len(formatted) == 2:
|
||||||
|
return f"{formatted[0]}, & {formatted[1]}"
|
||||||
|
return ", ".join(formatted[:-1]) + f", & {formatted[-1]}"
|
||||||
|
|
||||||
|
|
||||||
|
def format_author(author: Author) -> str:
|
||||||
|
given_initials = " ".join(_initial(part) for part in author.given.split() if part)
|
||||||
|
family = author.family.strip()
|
||||||
|
if family and given_initials:
|
||||||
|
return f"{family}, {given_initials}"
|
||||||
|
if family:
|
||||||
|
return family
|
||||||
|
return given_initials
|
||||||
|
|
||||||
|
|
||||||
|
def _initial(part: str) -> str:
|
||||||
|
clean = part.strip()
|
||||||
|
if not clean:
|
||||||
|
return ""
|
||||||
|
return f"{clean[0].upper()}."
|
||||||
|
|
||||||
|
|
||||||
|
def _sentence_case(text: str) -> str:
|
||||||
|
stripped = text.strip()
|
||||||
|
if not stripped:
|
||||||
|
return stripped
|
||||||
|
lower = stripped[0].upper() + stripped[1:]
|
||||||
|
return lower
|
||||||
|
|
||||||
|
|
||||||
|
def _format_container(metadata: WorkMetadata) -> str:
|
||||||
|
if not metadata.container_title and not metadata.volume and not metadata.pages:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
pieces = []
|
||||||
|
if metadata.container_title:
|
||||||
|
pieces.append(metadata.container_title)
|
||||||
|
volume_issue = ""
|
||||||
|
if metadata.volume:
|
||||||
|
volume_issue = metadata.volume
|
||||||
|
if metadata.issue:
|
||||||
|
volume_issue += f"({metadata.issue})"
|
||||||
|
elif metadata.issue:
|
||||||
|
volume_issue = f"({metadata.issue})"
|
||||||
|
|
||||||
|
if volume_issue:
|
||||||
|
pieces.append(volume_issue)
|
||||||
|
|
||||||
|
if metadata.source == "arxiv" and metadata.identifier:
|
||||||
|
pieces.append(f"arXiv:{metadata.identifier}")
|
||||||
|
|
||||||
|
if metadata.pages:
|
||||||
|
pieces.append(metadata.pages)
|
||||||
|
|
||||||
|
container = ", ".join(pieces)
|
||||||
|
if container and not container.endswith("."):
|
||||||
|
container += "."
|
||||||
|
return container
|
||||||
50
build/lib/citer/identifiers.py
Normal file
50
build/lib/citer/identifiers.py
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
import re
|
||||||
|
from urllib.parse import unquote
|
||||||
|
|
||||||
|
from .exceptions import CitationError
|
||||||
|
|
||||||
|
|
||||||
|
ARXIV_ID = re.compile(
|
||||||
|
r"^(?P<id>(?:\d{4}\.\d{4,5}|[a-z-]+\/\d{7}))(?:v\d+)?$",
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
DOI_PATTERN = re.compile(r"10\.\d{4,9}/\S+", flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_arxiv_identifier(raw: str) -> str:
|
||||||
|
text = raw.strip()
|
||||||
|
text = unquote(text)
|
||||||
|
|
||||||
|
url_match = re.search(r"arxiv\.org/(?:abs|pdf)/(?P<id>[^?#/]+)", text, re.IGNORECASE)
|
||||||
|
if url_match:
|
||||||
|
text = url_match.group("id")
|
||||||
|
|
||||||
|
text = re.sub(r"(?i)^arxiv:", "", text)
|
||||||
|
text = re.sub(r"(?i)\.pdf$", "", text)
|
||||||
|
text = text.split("?")[0]
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
match = ARXIV_ID.match(text)
|
||||||
|
if not match:
|
||||||
|
raise CitationError(f"Could not understand arXiv identifier: {raw}")
|
||||||
|
|
||||||
|
base_id = match.group("id")
|
||||||
|
base_id = re.sub(r"(?i)v\d+$", "", base_id)
|
||||||
|
return base_id
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_doi(raw: str) -> str:
|
||||||
|
text = raw.strip()
|
||||||
|
text = unquote(text)
|
||||||
|
|
||||||
|
text = re.sub(r"(?i)^https?://(dx\.)?doi\.org/", "", text)
|
||||||
|
text = re.sub(r"(?i)^doi:\s*", "", text)
|
||||||
|
text = text.split("?")[0]
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
match = DOI_PATTERN.search(text)
|
||||||
|
if not match:
|
||||||
|
raise CitationError(f"Could not understand DOI: {raw}")
|
||||||
|
|
||||||
|
doi = match.group(0)
|
||||||
|
return doi.lower()
|
||||||
27
build/lib/citer/models.py
Normal file
27
build/lib/citer/models.py
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Author:
|
||||||
|
given: str
|
||||||
|
family: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def full_name(self) -> str:
|
||||||
|
return f"{self.given} {self.family}".strip()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WorkMetadata:
|
||||||
|
title: str
|
||||||
|
authors: List[Author]
|
||||||
|
year: Optional[int]
|
||||||
|
container_title: Optional[str] = None
|
||||||
|
volume: Optional[str] = None
|
||||||
|
issue: Optional[str] = None
|
||||||
|
pages: Optional[str] = None
|
||||||
|
doi: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
source: str = ""
|
||||||
|
identifier: Optional[str] = None
|
||||||
37
citer.egg-info/PKG-INFO
Normal file
37
citer.egg-info/PKG-INFO
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
Metadata-Version: 2.4
|
||||||
|
Name: citer
|
||||||
|
Version: 0.1.0
|
||||||
|
Summary: CLI to create APA style citations from arXiv IDs or DOIs
|
||||||
|
Requires-Python: >=3.9
|
||||||
|
Description-Content-Type: text/markdown
|
||||||
|
Requires-Dist: requests>=2.31.0
|
||||||
|
|
||||||
|
# Citer
|
||||||
|
|
||||||
|
Simple CLI that turns an arXiv ID/URL or DOI/URL into a single-line APA citation.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# arXiv IDs or URLs
|
||||||
|
citer arxiv 2106.01342
|
||||||
|
citer arxiv https://arxiv.org/abs/2106.01342
|
||||||
|
|
||||||
|
# DOIs or DOI URLs
|
||||||
|
citer doi 10.1038/nphys1170
|
||||||
|
citer doi https://doi.org/10.1038/nphys1170
|
||||||
|
```
|
||||||
|
|
||||||
|
Errors are printed with a clear message if an ID cannot be parsed or a lookup fails.
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest
|
||||||
|
```
|
||||||
17
citer.egg-info/SOURCES.txt
Normal file
17
citer.egg-info/SOURCES.txt
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
README.md
|
||||||
|
pyproject.toml
|
||||||
|
citer/__init__.py
|
||||||
|
citer/cli.py
|
||||||
|
citer/exceptions.py
|
||||||
|
citer/fetchers.py
|
||||||
|
citer/formatter.py
|
||||||
|
citer/identifiers.py
|
||||||
|
citer/models.py
|
||||||
|
citer.egg-info/PKG-INFO
|
||||||
|
citer.egg-info/SOURCES.txt
|
||||||
|
citer.egg-info/dependency_links.txt
|
||||||
|
citer.egg-info/entry_points.txt
|
||||||
|
citer.egg-info/requires.txt
|
||||||
|
citer.egg-info/top_level.txt
|
||||||
|
tests/test_formatter.py
|
||||||
|
tests/test_identifiers.py
|
||||||
1
citer.egg-info/dependency_links.txt
Normal file
1
citer.egg-info/dependency_links.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
2
citer.egg-info/entry_points.txt
Normal file
2
citer.egg-info/entry_points.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
[console_scripts]
|
||||||
|
citer = citer.cli:main
|
||||||
1
citer.egg-info/requires.txt
Normal file
1
citer.egg-info/requires.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
requests>=2.31.0
|
||||||
1
citer.egg-info/top_level.txt
Normal file
1
citer.egg-info/top_level.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
citer
|
||||||
3
citer/__init__.py
Normal file
3
citer/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
"""APA citation helper CLI."""
|
||||||
|
|
||||||
|
__all__ = ["cli", "fetchers", "formatter", "identifiers", "models"]
|
||||||
BIN
citer/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
citer/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
citer/__pycache__/exceptions.cpython-313.pyc
Normal file
BIN
citer/__pycache__/exceptions.cpython-313.pyc
Normal file
Binary file not shown.
BIN
citer/__pycache__/formatter.cpython-313.pyc
Normal file
BIN
citer/__pycache__/formatter.cpython-313.pyc
Normal file
Binary file not shown.
BIN
citer/__pycache__/identifiers.cpython-313.pyc
Normal file
BIN
citer/__pycache__/identifiers.cpython-313.pyc
Normal file
Binary file not shown.
BIN
citer/__pycache__/models.cpython-313.pyc
Normal file
BIN
citer/__pycache__/models.cpython-313.pyc
Normal file
Binary file not shown.
48
citer/cli.py
Normal file
48
citer/cli.py
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from .exceptions import CitationError
|
||||||
|
from .fetchers import fetch_arxiv, fetch_doi
|
||||||
|
from .formatter import format_apa
|
||||||
|
from .identifiers import normalize_arxiv_identifier, normalize_doi
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Create an APA style citation from an arXiv ID or DOI."
|
||||||
|
)
|
||||||
|
subparsers = parser.add_subparsers(dest="source", required=True)
|
||||||
|
|
||||||
|
arxiv_parser = subparsers.add_parser("arxiv", help="Cite an arXiv identifier or URL")
|
||||||
|
arxiv_parser.add_argument("identifier", help="arXiv ID or URL")
|
||||||
|
|
||||||
|
doi_parser = subparsers.add_parser("doi", help="Cite a DOI or DOI URL")
|
||||||
|
doi_parser.add_argument("identifier", help="DOI or DOI URL")
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv=None) -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if args.source == "arxiv":
|
||||||
|
arxiv_id = normalize_arxiv_identifier(args.identifier)
|
||||||
|
metadata = fetch_arxiv(arxiv_id)
|
||||||
|
elif args.source == "doi":
|
||||||
|
doi = normalize_doi(args.identifier)
|
||||||
|
metadata = fetch_doi(doi)
|
||||||
|
else:
|
||||||
|
parser.error("Unsupported source")
|
||||||
|
return 1
|
||||||
|
except CitationError as exc:
|
||||||
|
print(f"Error: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(format_apa(metadata))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
2
citer/exceptions.py
Normal file
2
citer/exceptions.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
class CitationError(Exception):
|
||||||
|
"""Raised when a citation cannot be created from the provided input."""
|
||||||
158
citer/fetchers.py
Normal file
158
citer/fetchers.py
Normal file
|
|
@ -0,0 +1,158 @@
|
||||||
|
import datetime as _dt
|
||||||
|
from typing import List, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from .exceptions import CitationError
|
||||||
|
from .models import Author, WorkMetadata
|
||||||
|
|
||||||
|
|
||||||
|
ARXIV_API = "http://export.arxiv.org/api/query"
|
||||||
|
CROSSREF_WORKS = "https://api.crossref.org/works/"
|
||||||
|
USER_AGENT = "citer-cli/0.1 (mailto:unknown@example.com)"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_arxiv(arxiv_id: str) -> WorkMetadata:
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
ARXIV_API,
|
||||||
|
params={"id_list": arxiv_id},
|
||||||
|
headers={"User-Agent": USER_AGENT},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
raise CitationError(f"Failed to reach arXiv API: {exc}") from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = ElementTree.fromstring(response.text)
|
||||||
|
except ElementTree.ParseError as exc:
|
||||||
|
raise CitationError("Received invalid XML from arXiv") from exc
|
||||||
|
|
||||||
|
ns = {
|
||||||
|
"atom": "http://www.w3.org/2005/Atom",
|
||||||
|
"arxiv": "http://arxiv.org/schemas/atom",
|
||||||
|
}
|
||||||
|
entry = root.find("atom:entry", ns)
|
||||||
|
if entry is None:
|
||||||
|
raise CitationError(f"No entry found for arXiv ID {arxiv_id}")
|
||||||
|
|
||||||
|
title = _clean(entry.findtext("atom:title", default="", namespaces=ns))
|
||||||
|
authors = _parse_arxiv_authors(entry.findall("atom:author", ns))
|
||||||
|
year = _parse_year(entry.findtext("atom:published", default="", namespaces=ns))
|
||||||
|
doi = _clean(entry.findtext("arxiv:doi", default="", namespaces=ns)) or None
|
||||||
|
|
||||||
|
url = f"https://arxiv.org/abs/{arxiv_id}"
|
||||||
|
return WorkMetadata(
|
||||||
|
title=title,
|
||||||
|
authors=authors,
|
||||||
|
year=year,
|
||||||
|
container_title="arXiv preprint",
|
||||||
|
doi=doi,
|
||||||
|
url=url,
|
||||||
|
source="arxiv",
|
||||||
|
identifier=arxiv_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_doi(doi: str) -> WorkMetadata:
|
||||||
|
url = CROSSREF_WORKS + quote(doi)
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
url, headers={"Accept": "application/json", "User-Agent": USER_AGENT}, timeout=10
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
raise CitationError(f"Failed to reach Crossref for DOI {doi}: {exc}") from exc
|
||||||
|
except ValueError as exc:
|
||||||
|
raise CitationError("Received invalid JSON from Crossref") from exc
|
||||||
|
|
||||||
|
message = data.get("message", {})
|
||||||
|
title = _clean(" ".join(message.get("title", [])).strip())
|
||||||
|
authors = _parse_crossref_authors(message.get("author", []))
|
||||||
|
year = _extract_year(message)
|
||||||
|
container_title = _clean((message.get("container-title") or [""])[0])
|
||||||
|
volume = _clean(message.get("volume", "")).strip() or None
|
||||||
|
issue = _clean(message.get("issue", "")).strip() or None
|
||||||
|
pages = _clean(message.get("page", "")).strip() or None
|
||||||
|
url = message.get("URL") or f"https://doi.org/{doi}"
|
||||||
|
|
||||||
|
return WorkMetadata(
|
||||||
|
title=title,
|
||||||
|
authors=authors,
|
||||||
|
year=year,
|
||||||
|
container_title=container_title or None,
|
||||||
|
volume=volume,
|
||||||
|
issue=issue,
|
||||||
|
pages=pages,
|
||||||
|
doi=doi,
|
||||||
|
url=url,
|
||||||
|
source="doi",
|
||||||
|
identifier=doi,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _clean(value: str) -> str:
|
||||||
|
return " ".join(value.split())
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_arxiv_authors(author_elements) -> List[Author]:
|
||||||
|
authors: List[Author] = []
|
||||||
|
for element in author_elements:
|
||||||
|
name = element.findtext("{http://www.w3.org/2005/Atom}name", default="")
|
||||||
|
given, family = _split_author_name(name)
|
||||||
|
authors.append(Author(given=given, family=family))
|
||||||
|
return authors
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_crossref_authors(author_data) -> List[Author]:
|
||||||
|
authors: List[Author] = []
|
||||||
|
for author in author_data:
|
||||||
|
if "family" in author or "given" in author:
|
||||||
|
given = author.get("given", "").strip()
|
||||||
|
family = author.get("family", "").strip()
|
||||||
|
elif "name" in author:
|
||||||
|
given, family = _split_author_name(author.get("name", ""))
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
authors.append(Author(given=given, family=family))
|
||||||
|
return authors
|
||||||
|
|
||||||
|
|
||||||
|
def _split_author_name(name: str) -> tuple[str, str]:
|
||||||
|
clean_name = _clean(name)
|
||||||
|
if not clean_name:
|
||||||
|
return "", ""
|
||||||
|
if "," in clean_name:
|
||||||
|
family, given = [part.strip() for part in clean_name.split(",", 1)]
|
||||||
|
else:
|
||||||
|
parts = clean_name.split()
|
||||||
|
family = parts[-1]
|
||||||
|
given = " ".join(parts[:-1])
|
||||||
|
return given, family
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_year(value: str) -> Optional[int]:
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return _dt.datetime.fromisoformat(value).year
|
||||||
|
except ValueError:
|
||||||
|
if len(value) >= 4 and value[:4].isdigit():
|
||||||
|
return int(value[:4])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_year(message: dict) -> int | None:
|
||||||
|
for key in ("published-print", "published-online", "issued"):
|
||||||
|
data = message.get(key, {})
|
||||||
|
parts = data.get("date-parts") if isinstance(data, dict) else None
|
||||||
|
if parts and len(parts) and len(parts[0]):
|
||||||
|
maybe_year = parts[0][0]
|
||||||
|
if isinstance(maybe_year, int):
|
||||||
|
return maybe_year
|
||||||
|
return None
|
||||||
90
citer/formatter.py
Normal file
90
citer/formatter.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from .models import Author, WorkMetadata
|
||||||
|
|
||||||
|
|
||||||
|
def format_apa(metadata: WorkMetadata) -> str:
|
||||||
|
authors_text = format_authors(metadata.authors)
|
||||||
|
year_text = f"({metadata.year})." if metadata.year else "(n.d.)."
|
||||||
|
title_text = _sentence_case(metadata.title).rstrip(".") + "."
|
||||||
|
container_text = _format_container(metadata)
|
||||||
|
|
||||||
|
parts = [authors_text, year_text, title_text]
|
||||||
|
if container_text:
|
||||||
|
parts.append(container_text)
|
||||||
|
|
||||||
|
link = None
|
||||||
|
if metadata.doi:
|
||||||
|
link = f"https://doi.org/{metadata.doi}"
|
||||||
|
elif metadata.url:
|
||||||
|
link = metadata.url
|
||||||
|
if link:
|
||||||
|
parts.append(link)
|
||||||
|
|
||||||
|
return " ".join(part.strip() for part in parts if part).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def format_authors(authors: Iterable[Author]) -> str:
|
||||||
|
formatted = [format_author(author) for author in authors if format_author(author)]
|
||||||
|
if not formatted:
|
||||||
|
return ""
|
||||||
|
if len(formatted) == 1:
|
||||||
|
return formatted[0]
|
||||||
|
if len(formatted) == 2:
|
||||||
|
return f"{formatted[0]}, & {formatted[1]}"
|
||||||
|
return ", ".join(formatted[:-1]) + f", & {formatted[-1]}"
|
||||||
|
|
||||||
|
|
||||||
|
def format_author(author: Author) -> str:
|
||||||
|
given_initials = " ".join(_initial(part) for part in author.given.split() if part)
|
||||||
|
family = author.family.strip()
|
||||||
|
if family and given_initials:
|
||||||
|
return f"{family}, {given_initials}"
|
||||||
|
if family:
|
||||||
|
return family
|
||||||
|
return given_initials
|
||||||
|
|
||||||
|
|
||||||
|
def _initial(part: str) -> str:
|
||||||
|
clean = part.strip()
|
||||||
|
if not clean:
|
||||||
|
return ""
|
||||||
|
return f"{clean[0].upper()}."
|
||||||
|
|
||||||
|
|
||||||
|
def _sentence_case(text: str) -> str:
|
||||||
|
stripped = text.strip()
|
||||||
|
if not stripped:
|
||||||
|
return stripped
|
||||||
|
lower = stripped[0].upper() + stripped[1:]
|
||||||
|
return lower
|
||||||
|
|
||||||
|
|
||||||
|
def _format_container(metadata: WorkMetadata) -> str:
|
||||||
|
if not metadata.container_title and not metadata.volume and not metadata.pages:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
pieces = []
|
||||||
|
if metadata.container_title:
|
||||||
|
pieces.append(metadata.container_title)
|
||||||
|
volume_issue = ""
|
||||||
|
if metadata.volume:
|
||||||
|
volume_issue = metadata.volume
|
||||||
|
if metadata.issue:
|
||||||
|
volume_issue += f"({metadata.issue})"
|
||||||
|
elif metadata.issue:
|
||||||
|
volume_issue = f"({metadata.issue})"
|
||||||
|
|
||||||
|
if volume_issue:
|
||||||
|
pieces.append(volume_issue)
|
||||||
|
|
||||||
|
if metadata.source == "arxiv" and metadata.identifier:
|
||||||
|
pieces.append(f"arXiv:{metadata.identifier}")
|
||||||
|
|
||||||
|
if metadata.pages:
|
||||||
|
pieces.append(metadata.pages)
|
||||||
|
|
||||||
|
container = ", ".join(pieces)
|
||||||
|
if container and not container.endswith("."):
|
||||||
|
container += "."
|
||||||
|
return container
|
||||||
50
citer/identifiers.py
Normal file
50
citer/identifiers.py
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
import re
|
||||||
|
from urllib.parse import unquote
|
||||||
|
|
||||||
|
from .exceptions import CitationError
|
||||||
|
|
||||||
|
|
||||||
|
ARXIV_ID = re.compile(
|
||||||
|
r"^(?P<id>(?:\d{4}\.\d{4,5}|[a-z-]+\/\d{7}))(?:v\d+)?$",
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
DOI_PATTERN = re.compile(r"10\.\d{4,9}/\S+", flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_arxiv_identifier(raw: str) -> str:
|
||||||
|
text = raw.strip()
|
||||||
|
text = unquote(text)
|
||||||
|
|
||||||
|
url_match = re.search(r"arxiv\.org/(?:abs|pdf)/(?P<id>[^?#/]+)", text, re.IGNORECASE)
|
||||||
|
if url_match:
|
||||||
|
text = url_match.group("id")
|
||||||
|
|
||||||
|
text = re.sub(r"(?i)^arxiv:", "", text)
|
||||||
|
text = re.sub(r"(?i)\.pdf$", "", text)
|
||||||
|
text = text.split("?")[0]
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
match = ARXIV_ID.match(text)
|
||||||
|
if not match:
|
||||||
|
raise CitationError(f"Could not understand arXiv identifier: {raw}")
|
||||||
|
|
||||||
|
base_id = match.group("id")
|
||||||
|
base_id = re.sub(r"(?i)v\d+$", "", base_id)
|
||||||
|
return base_id
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_doi(raw: str) -> str:
|
||||||
|
text = raw.strip()
|
||||||
|
text = unquote(text)
|
||||||
|
|
||||||
|
text = re.sub(r"(?i)^https?://(dx\.)?doi\.org/", "", text)
|
||||||
|
text = re.sub(r"(?i)^doi:\s*", "", text)
|
||||||
|
text = text.split("?")[0]
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
match = DOI_PATTERN.search(text)
|
||||||
|
if not match:
|
||||||
|
raise CitationError(f"Could not understand DOI: {raw}")
|
||||||
|
|
||||||
|
doi = match.group(0)
|
||||||
|
return doi.lower()
|
||||||
27
citer/models.py
Normal file
27
citer/models.py
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Author:
|
||||||
|
given: str
|
||||||
|
family: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def full_name(self) -> str:
|
||||||
|
return f"{self.given} {self.family}".strip()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WorkMetadata:
|
||||||
|
title: str
|
||||||
|
authors: List[Author]
|
||||||
|
year: Optional[int]
|
||||||
|
container_title: Optional[str] = None
|
||||||
|
volume: Optional[str] = None
|
||||||
|
issue: Optional[str] = None
|
||||||
|
pages: Optional[str] = None
|
||||||
|
doi: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
source: str = ""
|
||||||
|
identifier: Optional[str] = None
|
||||||
20
pyproject.toml
Normal file
20
pyproject.toml
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=61.0"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "citer"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "CLI to create APA style citations from arXiv IDs or DOIs"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.9"
|
||||||
|
dependencies = [
|
||||||
|
"requests>=2.31.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
citer = "citer.cli:main"
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
addopts = "-q"
|
||||||
|
testpaths = ["tests"]
|
||||||
BIN
tests/__pycache__/test_formatter.cpython-313-pytest-8.3.5.pyc
Normal file
BIN
tests/__pycache__/test_formatter.cpython-313-pytest-8.3.5.pyc
Normal file
Binary file not shown.
BIN
tests/__pycache__/test_identifiers.cpython-313-pytest-8.3.5.pyc
Normal file
BIN
tests/__pycache__/test_identifiers.cpython-313-pytest-8.3.5.pyc
Normal file
Binary file not shown.
44
tests/test_formatter.py
Normal file
44
tests/test_formatter.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
from citer.formatter import format_apa
|
||||||
|
from citer.models import Author, WorkMetadata
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_doi_article():
|
||||||
|
metadata = WorkMetadata(
|
||||||
|
title="Sample study on testing",
|
||||||
|
authors=[Author("Jane", "Doe"), Author("John", "Smith")],
|
||||||
|
year=2020,
|
||||||
|
container_title="Journal of Tests",
|
||||||
|
volume="12",
|
||||||
|
issue="3",
|
||||||
|
pages="45-67",
|
||||||
|
doi="10.1234/example.doi",
|
||||||
|
url="https://doi.org/10.1234/example.doi",
|
||||||
|
source="doi",
|
||||||
|
identifier="10.1234/example.doi",
|
||||||
|
)
|
||||||
|
|
||||||
|
citation = format_apa(metadata)
|
||||||
|
assert (
|
||||||
|
citation
|
||||||
|
== "Doe, J., & Smith, J. (2020). Sample study on testing. "
|
||||||
|
"Journal of Tests, 12(3), 45-67. https://doi.org/10.1234/example.doi"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_arxiv_preprint():
|
||||||
|
metadata = WorkMetadata(
|
||||||
|
title="Deep learning for cats",
|
||||||
|
authors=[Author("Alice", "Nguyen"), Author("Bob", "Smith")],
|
||||||
|
year=2021,
|
||||||
|
container_title="arXiv preprint",
|
||||||
|
url="https://arxiv.org/abs/2101.00001",
|
||||||
|
source="arxiv",
|
||||||
|
identifier="2101.00001",
|
||||||
|
)
|
||||||
|
|
||||||
|
citation = format_apa(metadata)
|
||||||
|
assert (
|
||||||
|
citation
|
||||||
|
== "Nguyen, A., & Smith, B. (2021). Deep learning for cats. "
|
||||||
|
"arXiv preprint, arXiv:2101.00001. https://arxiv.org/abs/2101.00001"
|
||||||
|
)
|
||||||
25
tests/test_identifiers.py
Normal file
25
tests/test_identifiers.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from citer.identifiers import normalize_arxiv_identifier, normalize_doi
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_arxiv_variants():
|
||||||
|
assert normalize_arxiv_identifier("2106.01342v2") == "2106.01342"
|
||||||
|
assert normalize_arxiv_identifier("https://arxiv.org/abs/2106.01342") == "2106.01342"
|
||||||
|
assert normalize_arxiv_identifier("arXiv:hep-th/9901001") == "hep-th/9901001"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_arxiv_invalid():
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
normalize_arxiv_identifier("not-an-id")
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_doi_variants():
|
||||||
|
assert normalize_doi("https://doi.org/10.1038/nphys1170") == "10.1038/nphys1170"
|
||||||
|
assert normalize_doi("DOI:10.5555/12345678") == "10.5555/12345678"
|
||||||
|
assert normalize_doi("10.1000/182") == "10.1000/182"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_doi_invalid():
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
normalize_doi("not-a-doi")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue