Initial version

2026-02-12 23:55:07 -05:00 · 2026-02-12 23:55:07 -05:00 · 42996b0f4e
commit 42996b0f4e
31 changed files with 933 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,29 @@
 # Citer
 Simple CLI that turns an arXiv ID/URL or DOI/URL into a single-line APA citation.
 ## Setup
 ```bash
 pip install -e .
 ```
 ## Usage
 ```bash
 # arXiv IDs or URLs
 citer arxiv 2106.01342
 citer arxiv https://arxiv.org/abs/2106.01342
 # DOIs or DOI URLs
 citer doi 10.1038/nphys1170
 citer doi https://doi.org/10.1038/nphys1170
 ```
 Errors are printed with a clear message if an ID cannot be parsed or a lookup fails.
 ## Tests
 ```bash
 python -m pytest
 ```
--- a/build/lib/citer/init.py
+++ b/build/lib/citer/init.py
@ -0,0 +1,3 @@
 """APA citation helper CLI."""
 __all__ = ["cli", "fetchers", "formatter", "identifiers", "models"]
--- a/build/lib/citer/cli.py
+++ b/build/lib/citer/cli.py
@ -0,0 +1,48 @@
 import argparse
 import sys
 from .exceptions import CitationError
 from .fetchers import fetch_arxiv, fetch_doi
 from .formatter import format_apa
 from .identifiers import normalize_arxiv_identifier, normalize_doi
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Create an APA style citation from an arXiv ID or DOI."
    )
    subparsers = parser.add_subparsers(dest="source", required=True)
    arxiv_parser = subparsers.add_parser("arxiv", help="Cite an arXiv identifier or URL")
    arxiv_parser.add_argument("identifier", help="arXiv ID or URL")
    doi_parser = subparsers.add_parser("doi", help="Cite a DOI or DOI URL")
    doi_parser.add_argument("identifier", help="DOI or DOI URL")
    return parser
 def main(argv=None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    try:
        if args.source == "arxiv":
            arxiv_id = normalize_arxiv_identifier(args.identifier)
            metadata = fetch_arxiv(arxiv_id)
        elif args.source == "doi":
            doi = normalize_doi(args.identifier)
            metadata = fetch_doi(doi)
        else:
            parser.error("Unsupported source")
            return 1
    except CitationError as exc:
        print(f"Error: {exc}", file=sys.stderr)
        return 1
    print(format_apa(metadata))
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/build/lib/citer/exceptions.py
+++ b/build/lib/citer/exceptions.py
@ -0,0 +1,2 @@
 class CitationError(Exception):
    """Raised when a citation cannot be created from the provided input."""
--- a/build/lib/citer/fetchers.py
+++ b/build/lib/citer/fetchers.py
@ -0,0 +1,158 @@
 import datetime as _dt
 from typing import List, Optional
 from urllib.parse import quote
 from xml.etree import ElementTree
 import requests
 from .exceptions import CitationError
 from .models import Author, WorkMetadata
 ARXIV_API = "http://export.arxiv.org/api/query"
 CROSSREF_WORKS = "https://api.crossref.org/works/"
 USER_AGENT = "citer-cli/0.1 (mailto:unknown@example.com)"
 def fetch_arxiv(arxiv_id: str) -> WorkMetadata:
    try:
        response = requests.get(
            ARXIV_API,
            params={"id_list": arxiv_id},
            headers={"User-Agent": USER_AGENT},
            timeout=10,
        )
        response.raise_for_status()
    except requests.RequestException as exc:
        raise CitationError(f"Failed to reach arXiv API: {exc}") from exc
    try:
        root = ElementTree.fromstring(response.text)
    except ElementTree.ParseError as exc:
        raise CitationError("Received invalid XML from arXiv") from exc
    ns = {
        "atom": "http://www.w3.org/2005/Atom",
        "arxiv": "http://arxiv.org/schemas/atom",
    }
    entry = root.find("atom:entry", ns)
    if entry is None:
        raise CitationError(f"No entry found for arXiv ID {arxiv_id}")
    title = _clean(entry.findtext("atom:title", default="", namespaces=ns))
    authors = _parse_arxiv_authors(entry.findall("atom:author", ns))
    year = _parse_year(entry.findtext("atom:published", default="", namespaces=ns))
    doi = _clean(entry.findtext("arxiv:doi", default="", namespaces=ns)) or None
    url = f"https://arxiv.org/abs/{arxiv_id}"
    return WorkMetadata(
        title=title,
        authors=authors,
        year=year,
        container_title="arXiv preprint",
        doi=doi,
        url=url,
        source="arxiv",
        identifier=arxiv_id,
    )
 def fetch_doi(doi: str) -> WorkMetadata:
    url = CROSSREF_WORKS + quote(doi)
    try:
        response = requests.get(
            url, headers={"Accept": "application/json", "User-Agent": USER_AGENT}, timeout=10
        )
        response.raise_for_status()
        data = response.json()
    except requests.RequestException as exc:
        raise CitationError(f"Failed to reach Crossref for DOI {doi}: {exc}") from exc
    except ValueError as exc:
        raise CitationError("Received invalid JSON from Crossref") from exc
    message = data.get("message", {})
    title = _clean(" ".join(message.get("title", [])).strip())
    authors = _parse_crossref_authors(message.get("author", []))
    year = _extract_year(message)
    container_title = _clean((message.get("container-title") or [""])[0])
    volume = _clean(message.get("volume", "")).strip() or None
    issue = _clean(message.get("issue", "")).strip() or None
    pages = _clean(message.get("page", "")).strip() or None
    url = message.get("URL") or f"https://doi.org/{doi}"
    return WorkMetadata(
        title=title,
        authors=authors,
        year=year,
        container_title=container_title or None,
        volume=volume,
        issue=issue,
        pages=pages,
        doi=doi,
        url=url,
        source="doi",
        identifier=doi,
    )
 def _clean(value: str) -> str:
    return " ".join(value.split())
 def _parse_arxiv_authors(author_elements) -> List[Author]:
    authors: List[Author] = []
    for element in author_elements:
        name = element.findtext("{http://www.w3.org/2005/Atom}name", default="")
        given, family = _split_author_name(name)
        authors.append(Author(given=given, family=family))
    return authors
 def _parse_crossref_authors(author_data) -> List[Author]:
    authors: List[Author] = []
    for author in author_data:
        if "family" in author or "given" in author:
            given = author.get("given", "").strip()
            family = author.get("family", "").strip()
        elif "name" in author:
            given, family = _split_author_name(author.get("name", ""))
        else:
            continue
        authors.append(Author(given=given, family=family))
    return authors
 def _split_author_name(name: str) -> tuple[str, str]:
    clean_name = _clean(name)
    if not clean_name:
        return "", ""
    if "," in clean_name:
        family, given = [part.strip() for part in clean_name.split(",", 1)]
    else:
        parts = clean_name.split()
        family = parts[-1]
        given = " ".join(parts[:-1])
    return given, family
 def _parse_year(value: str) -> Optional[int]:
    value = value.strip()
    if not value:
        return None
    try:
        return _dt.datetime.fromisoformat(value).year
    except ValueError:
        if len(value) >= 4 and value[:4].isdigit():
            return int(value[:4])
    return None
 def _extract_year(message: dict) -> int | None:
    for key in ("published-print", "published-online", "issued"):
        data = message.get(key, {})
        parts = data.get("date-parts") if isinstance(data, dict) else None
        if parts and len(parts) and len(parts[0]):
            maybe_year = parts[0][0]
            if isinstance(maybe_year, int):
                return maybe_year
    return None
--- a/build/lib/citer/formatter.py
+++ b/build/lib/citer/formatter.py
@ -0,0 +1,90 @@
 from typing import Iterable
 from .models import Author, WorkMetadata
 def format_apa(metadata: WorkMetadata) -> str:
    authors_text = format_authors(metadata.authors)
    year_text = f"({metadata.year})." if metadata.year else "(n.d.)."
    title_text = _sentence_case(metadata.title).rstrip(".") + "."
    container_text = _format_container(metadata)
    parts = [authors_text, year_text, title_text]
    if container_text:
        parts.append(container_text)
    link = None
    if metadata.doi:
        link = f"https://doi.org/{metadata.doi}"
    elif metadata.url:
        link = metadata.url
    if link:
        parts.append(link)
    return " ".join(part.strip() for part in parts if part).strip()
 def format_authors(authors: Iterable[Author]) -> str:
    formatted = [format_author(author) for author in authors if format_author(author)]
    if not formatted:
        return ""
    if len(formatted) == 1:
        return formatted[0]
    if len(formatted) == 2:
        return f"{formatted[0]}, & {formatted[1]}"
    return ", ".join(formatted[:-1]) + f", & {formatted[-1]}"
 def format_author(author: Author) -> str:
    given_initials = " ".join(_initial(part) for part in author.given.split() if part)
    family = author.family.strip()
    if family and given_initials:
        return f"{family}, {given_initials}"
    if family:
        return family
    return given_initials
 def _initial(part: str) -> str:
    clean = part.strip()
    if not clean:
        return ""
    return f"{clean[0].upper()}."
 def _sentence_case(text: str) -> str:
    stripped = text.strip()
    if not stripped:
        return stripped
    lower = stripped[0].upper() + stripped[1:]
    return lower
 def _format_container(metadata: WorkMetadata) -> str:
    if not metadata.container_title and not metadata.volume and not metadata.pages:
        return ""
    pieces = []
    if metadata.container_title:
        pieces.append(metadata.container_title)
    volume_issue = ""
    if metadata.volume:
        volume_issue = metadata.volume
        if metadata.issue:
            volume_issue += f"({metadata.issue})"
    elif metadata.issue:
        volume_issue = f"({metadata.issue})"
    if volume_issue:
        pieces.append(volume_issue)
    if metadata.source == "arxiv" and metadata.identifier:
        pieces.append(f"arXiv:{metadata.identifier}")
    if metadata.pages:
        pieces.append(metadata.pages)
    container = ", ".join(pieces)
    if container and not container.endswith("."):
        container += "."
    return container
--- a/build/lib/citer/identifiers.py
+++ b/build/lib/citer/identifiers.py
@ -0,0 +1,50 @@
 import re
 from urllib.parse import unquote
 from .exceptions import CitationError
 ARXIV_ID = re.compile(
    r"^(?P<id>(?:\d{4}\.\d{4,5}|[a-z-]+\/\d{7}))(?:v\d+)?$",
    flags=re.IGNORECASE,
 )
 DOI_PATTERN = re.compile(r"10\.\d{4,9}/\S+", flags=re.IGNORECASE)
 def normalize_arxiv_identifier(raw: str) -> str:
    text = raw.strip()
    text = unquote(text)
    url_match = re.search(r"arxiv\.org/(?:abs|pdf)/(?P<id>[^?#/]+)", text, re.IGNORECASE)
    if url_match:
        text = url_match.group("id")
    text = re.sub(r"(?i)^arxiv:", "", text)
    text = re.sub(r"(?i)\.pdf$", "", text)
    text = text.split("?")[0]
    text = text.strip()
    match = ARXIV_ID.match(text)
    if not match:
        raise CitationError(f"Could not understand arXiv identifier: {raw}")
    base_id = match.group("id")
    base_id = re.sub(r"(?i)v\d+$", "", base_id)
    return base_id
 def normalize_doi(raw: str) -> str:
    text = raw.strip()
    text = unquote(text)
    text = re.sub(r"(?i)^https?://(dx\.)?doi\.org/", "", text)
    text = re.sub(r"(?i)^doi:\s*", "", text)
    text = text.split("?")[0]
    text = text.strip()
    match = DOI_PATTERN.search(text)
    if not match:
        raise CitationError(f"Could not understand DOI: {raw}")
    doi = match.group(0)
    return doi.lower()
--- a/build/lib/citer/models.py
+++ b/build/lib/citer/models.py
@ -0,0 +1,27 @@
 from dataclasses import dataclass
 from typing import List, Optional
@dataclass
 class Author:
    given: str
    family: str
    @property
    def full_name(self) -> str:
        return f"{self.given} {self.family}".strip()
@dataclass
 class WorkMetadata:
    title: str
    authors: List[Author]
    year: Optional[int]
    container_title: Optional[str] = None
    volume: Optional[str] = None
    issue: Optional[str] = None
    pages: Optional[str] = None
    doi: Optional[str] = None
    url: Optional[str] = None
    source: str = ""
    identifier: Optional[str] = None
--- a/citer.egg-info/PKG-INFO
+++ b/citer.egg-info/PKG-INFO
@ -0,0 +1,37 @@
 Metadata-Version: 2.4
 Name: citer
 Version: 0.1.0
 Summary: CLI to create APA style citations from arXiv IDs or DOIs
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 Requires-Dist: requests>=2.31.0
 # Citer
 Simple CLI that turns an arXiv ID/URL or DOI/URL into a single-line APA citation.
 ## Setup
 ```bash
 pip install -e .
 ```
 ## Usage
 ```bash
 # arXiv IDs or URLs
 citer arxiv 2106.01342
 citer arxiv https://arxiv.org/abs/2106.01342
 # DOIs or DOI URLs
 citer doi 10.1038/nphys1170
 citer doi https://doi.org/10.1038/nphys1170
 ```
 Errors are printed with a clear message if an ID cannot be parsed or a lookup fails.
 ## Tests
 ```bash
 python -m pytest
 ```
--- a/citer.egg-info/SOURCES.txt
+++ b/citer.egg-info/SOURCES.txt
@ -0,0 +1,17 @@
 README.md
 pyproject.toml
 citer/__init__.py
 citer/cli.py
 citer/exceptions.py
 citer/fetchers.py
 citer/formatter.py
 citer/identifiers.py
 citer/models.py
 citer.egg-info/PKG-INFO
 citer.egg-info/SOURCES.txt
 citer.egg-info/dependency_links.txt
 citer.egg-info/entry_points.txt
 citer.egg-info/requires.txt
 citer.egg-info/top_level.txt
 tests/test_formatter.py
 tests/test_identifiers.py
--- a/citer.egg-info/dependency_links.txt
+++ b/citer.egg-info/dependency_links.txt
@ -0,0 +1 @@
--- a/citer.egg-info/entry_points.txt
+++ b/citer.egg-info/entry_points.txt
@ -0,0 +1,2 @@
 [console_scripts]
 citer = citer.cli:main
--- a/citer.egg-info/requires.txt
+++ b/citer.egg-info/requires.txt
@ -0,0 +1 @@
 requests>=2.31.0
--- a/citer.egg-info/top_level.txt
+++ b/citer.egg-info/top_level.txt
@ -0,0 +1 @@
 citer
--- a/citer/init.py
+++ b/citer/init.py
@ -0,0 +1,3 @@
 """APA citation helper CLI."""
 __all__ = ["cli", "fetchers", "formatter", "identifiers", "models"]
--- a/citer/pycache/init.cpython-313.pyc
+++ b/citer/pycache/init.cpython-313.pyc
--- a/citer/pycache/exceptions.cpython-313.pyc
+++ b/citer/pycache/exceptions.cpython-313.pyc
--- a/citer/pycache/formatter.cpython-313.pyc
+++ b/citer/pycache/formatter.cpython-313.pyc
--- a/citer/pycache/identifiers.cpython-313.pyc
+++ b/citer/pycache/identifiers.cpython-313.pyc
--- a/citer/pycache/models.cpython-313.pyc
+++ b/citer/pycache/models.cpython-313.pyc
--- a/citer/cli.py
+++ b/citer/cli.py
@ -0,0 +1,48 @@
 import argparse
 import sys
 from .exceptions import CitationError
 from .fetchers import fetch_arxiv, fetch_doi
 from .formatter import format_apa
 from .identifiers import normalize_arxiv_identifier, normalize_doi
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="Create an APA style citation from an arXiv ID or DOI."
    )
    subparsers = parser.add_subparsers(dest="source", required=True)
    arxiv_parser = subparsers.add_parser("arxiv", help="Cite an arXiv identifier or URL")
    arxiv_parser.add_argument("identifier", help="arXiv ID or URL")
    doi_parser = subparsers.add_parser("doi", help="Cite a DOI or DOI URL")
    doi_parser.add_argument("identifier", help="DOI or DOI URL")
    return parser
 def main(argv=None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    try:
        if args.source == "arxiv":
            arxiv_id = normalize_arxiv_identifier(args.identifier)
            metadata = fetch_arxiv(arxiv_id)
        elif args.source == "doi":
            doi = normalize_doi(args.identifier)
            metadata = fetch_doi(doi)
        else:
            parser.error("Unsupported source")
            return 1
    except CitationError as exc:
        print(f"Error: {exc}", file=sys.stderr)
        return 1
    print(format_apa(metadata))
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/citer/exceptions.py
+++ b/citer/exceptions.py
@ -0,0 +1,2 @@
 class CitationError(Exception):
    """Raised when a citation cannot be created from the provided input."""
--- a/citer/fetchers.py
+++ b/citer/fetchers.py
@ -0,0 +1,158 @@
 import datetime as _dt
 from typing import List, Optional
 from urllib.parse import quote
 from xml.etree import ElementTree
 import requests
 from .exceptions import CitationError
 from .models import Author, WorkMetadata
 ARXIV_API = "http://export.arxiv.org/api/query"
 CROSSREF_WORKS = "https://api.crossref.org/works/"
 USER_AGENT = "citer-cli/0.1 (mailto:unknown@example.com)"
 def fetch_arxiv(arxiv_id: str) -> WorkMetadata:
    try:
        response = requests.get(
            ARXIV_API,
            params={"id_list": arxiv_id},
            headers={"User-Agent": USER_AGENT},
            timeout=10,
        )
        response.raise_for_status()
    except requests.RequestException as exc:
        raise CitationError(f"Failed to reach arXiv API: {exc}") from exc
    try:
        root = ElementTree.fromstring(response.text)
    except ElementTree.ParseError as exc:
        raise CitationError("Received invalid XML from arXiv") from exc
    ns = {
        "atom": "http://www.w3.org/2005/Atom",
        "arxiv": "http://arxiv.org/schemas/atom",
    }
    entry = root.find("atom:entry", ns)
    if entry is None:
        raise CitationError(f"No entry found for arXiv ID {arxiv_id}")
    title = _clean(entry.findtext("atom:title", default="", namespaces=ns))
    authors = _parse_arxiv_authors(entry.findall("atom:author", ns))
    year = _parse_year(entry.findtext("atom:published", default="", namespaces=ns))
    doi = _clean(entry.findtext("arxiv:doi", default="", namespaces=ns)) or None
    url = f"https://arxiv.org/abs/{arxiv_id}"
    return WorkMetadata(
        title=title,
        authors=authors,
        year=year,
        container_title="arXiv preprint",
        doi=doi,
        url=url,
        source="arxiv",
        identifier=arxiv_id,
    )
 def fetch_doi(doi: str) -> WorkMetadata:
    url = CROSSREF_WORKS + quote(doi)
    try:
        response = requests.get(
            url, headers={"Accept": "application/json", "User-Agent": USER_AGENT}, timeout=10
        )
        response.raise_for_status()
        data = response.json()
    except requests.RequestException as exc:
        raise CitationError(f"Failed to reach Crossref for DOI {doi}: {exc}") from exc
    except ValueError as exc:
        raise CitationError("Received invalid JSON from Crossref") from exc
    message = data.get("message", {})
    title = _clean(" ".join(message.get("title", [])).strip())
    authors = _parse_crossref_authors(message.get("author", []))
    year = _extract_year(message)
    container_title = _clean((message.get("container-title") or [""])[0])
    volume = _clean(message.get("volume", "")).strip() or None
    issue = _clean(message.get("issue", "")).strip() or None
    pages = _clean(message.get("page", "")).strip() or None
    url = message.get("URL") or f"https://doi.org/{doi}"
    return WorkMetadata(
        title=title,
        authors=authors,
        year=year,
        container_title=container_title or None,
        volume=volume,
        issue=issue,
        pages=pages,
        doi=doi,
        url=url,
        source="doi",
        identifier=doi,
    )
 def _clean(value: str) -> str:
    return " ".join(value.split())
 def _parse_arxiv_authors(author_elements) -> List[Author]:
    authors: List[Author] = []
    for element in author_elements:
        name = element.findtext("{http://www.w3.org/2005/Atom}name", default="")
        given, family = _split_author_name(name)
        authors.append(Author(given=given, family=family))
    return authors
 def _parse_crossref_authors(author_data) -> List[Author]:
    authors: List[Author] = []
    for author in author_data:
        if "family" in author or "given" in author:
            given = author.get("given", "").strip()
            family = author.get("family", "").strip()
        elif "name" in author:
            given, family = _split_author_name(author.get("name", ""))
        else:
            continue
        authors.append(Author(given=given, family=family))
    return authors
 def _split_author_name(name: str) -> tuple[str, str]:
    clean_name = _clean(name)
    if not clean_name:
        return "", ""
    if "," in clean_name:
        family, given = [part.strip() for part in clean_name.split(",", 1)]
    else:
        parts = clean_name.split()
        family = parts[-1]
        given = " ".join(parts[:-1])
    return given, family
 def _parse_year(value: str) -> Optional[int]:
    value = value.strip()
    if not value:
        return None
    try:
        return _dt.datetime.fromisoformat(value).year
    except ValueError:
        if len(value) >= 4 and value[:4].isdigit():
            return int(value[:4])
    return None
 def _extract_year(message: dict) -> int | None:
    for key in ("published-print", "published-online", "issued"):
        data = message.get(key, {})
        parts = data.get("date-parts") if isinstance(data, dict) else None
        if parts and len(parts) and len(parts[0]):
            maybe_year = parts[0][0]
            if isinstance(maybe_year, int):
                return maybe_year
    return None
--- a/citer/formatter.py
+++ b/citer/formatter.py
@ -0,0 +1,90 @@
 from typing import Iterable
 from .models import Author, WorkMetadata
 def format_apa(metadata: WorkMetadata) -> str:
    authors_text = format_authors(metadata.authors)
    year_text = f"({metadata.year})." if metadata.year else "(n.d.)."
    title_text = _sentence_case(metadata.title).rstrip(".") + "."
    container_text = _format_container(metadata)
    parts = [authors_text, year_text, title_text]
    if container_text:
        parts.append(container_text)
    link = None
    if metadata.doi:
        link = f"https://doi.org/{metadata.doi}"
    elif metadata.url:
        link = metadata.url
    if link:
        parts.append(link)
    return " ".join(part.strip() for part in parts if part).strip()
 def format_authors(authors: Iterable[Author]) -> str:
    formatted = [format_author(author) for author in authors if format_author(author)]
    if not formatted:
        return ""
    if len(formatted) == 1:
        return formatted[0]
    if len(formatted) == 2:
        return f"{formatted[0]}, & {formatted[1]}"
    return ", ".join(formatted[:-1]) + f", & {formatted[-1]}"
 def format_author(author: Author) -> str:
    given_initials = " ".join(_initial(part) for part in author.given.split() if part)
    family = author.family.strip()
    if family and given_initials:
        return f"{family}, {given_initials}"
    if family:
        return family
    return given_initials
 def _initial(part: str) -> str:
    clean = part.strip()
    if not clean:
        return ""
    return f"{clean[0].upper()}."
 def _sentence_case(text: str) -> str:
    stripped = text.strip()
    if not stripped:
        return stripped
    lower = stripped[0].upper() + stripped[1:]
    return lower
 def _format_container(metadata: WorkMetadata) -> str:
    if not metadata.container_title and not metadata.volume and not metadata.pages:
        return ""
    pieces = []
    if metadata.container_title:
        pieces.append(metadata.container_title)
    volume_issue = ""
    if metadata.volume:
        volume_issue = metadata.volume
        if metadata.issue:
            volume_issue += f"({metadata.issue})"
    elif metadata.issue:
        volume_issue = f"({metadata.issue})"
    if volume_issue:
        pieces.append(volume_issue)
    if metadata.source == "arxiv" and metadata.identifier:
        pieces.append(f"arXiv:{metadata.identifier}")
    if metadata.pages:
        pieces.append(metadata.pages)
    container = ", ".join(pieces)
    if container and not container.endswith("."):
        container += "."
    return container
--- a/citer/identifiers.py
+++ b/citer/identifiers.py
@ -0,0 +1,50 @@
 import re
 from urllib.parse import unquote
 from .exceptions import CitationError
 ARXIV_ID = re.compile(
    r"^(?P<id>(?:\d{4}\.\d{4,5}|[a-z-]+\/\d{7}))(?:v\d+)?$",
    flags=re.IGNORECASE,
 )
 DOI_PATTERN = re.compile(r"10\.\d{4,9}/\S+", flags=re.IGNORECASE)
 def normalize_arxiv_identifier(raw: str) -> str:
    text = raw.strip()
    text = unquote(text)
    url_match = re.search(r"arxiv\.org/(?:abs|pdf)/(?P<id>[^?#/]+)", text, re.IGNORECASE)
    if url_match:
        text = url_match.group("id")
    text = re.sub(r"(?i)^arxiv:", "", text)
    text = re.sub(r"(?i)\.pdf$", "", text)
    text = text.split("?")[0]
    text = text.strip()
    match = ARXIV_ID.match(text)
    if not match:
        raise CitationError(f"Could not understand arXiv identifier: {raw}")
    base_id = match.group("id")
    base_id = re.sub(r"(?i)v\d+$", "", base_id)
    return base_id
 def normalize_doi(raw: str) -> str:
    text = raw.strip()
    text = unquote(text)
    text = re.sub(r"(?i)^https?://(dx\.)?doi\.org/", "", text)
    text = re.sub(r"(?i)^doi:\s*", "", text)
    text = text.split("?")[0]
    text = text.strip()
    match = DOI_PATTERN.search(text)
    if not match:
        raise CitationError(f"Could not understand DOI: {raw}")
    doi = match.group(0)
    return doi.lower()
--- a/citer/models.py
+++ b/citer/models.py
@ -0,0 +1,27 @@
 from dataclasses import dataclass
 from typing import List, Optional
@dataclass
 class Author:
    given: str
    family: str
    @property
    def full_name(self) -> str:
        return f"{self.given} {self.family}".strip()
@dataclass
 class WorkMetadata:
    title: str
    authors: List[Author]
    year: Optional[int]
    container_title: Optional[str] = None
    volume: Optional[str] = None
    issue: Optional[str] = None
    pages: Optional[str] = None
    doi: Optional[str] = None
    url: Optional[str] = None
    source: str = ""
    identifier: Optional[str] = None
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,20 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "citer"
 version = "0.1.0"
 description = "CLI to create APA style citations from arXiv IDs or DOIs"
 readme = "README.md"
 requires-python = ">=3.9"
 dependencies = [
  "requests>=2.31.0",
 ]
 [project.scripts]
 citer = "citer.cli:main"
 [tool.pytest.ini_options]
 addopts = "-q"
 testpaths = ["tests"]
--- a/tests/pycache/test_formatter.cpython-313-pytest-8.3.5.pyc
+++ b/tests/pycache/test_formatter.cpython-313-pytest-8.3.5.pyc
--- a/tests/pycache/test_identifiers.cpython-313-pytest-8.3.5.pyc
+++ b/tests/pycache/test_identifiers.cpython-313-pytest-8.3.5.pyc
--- a/tests/test_formatter.py
+++ b/tests/test_formatter.py
@ -0,0 +1,44 @@
 from citer.formatter import format_apa
 from citer.models import Author, WorkMetadata
 def test_format_doi_article():
    metadata = WorkMetadata(
        title="Sample study on testing",
        authors=[Author("Jane", "Doe"), Author("John", "Smith")],
        year=2020,
        container_title="Journal of Tests",
        volume="12",
        issue="3",
        pages="45-67",
        doi="10.1234/example.doi",
        url="https://doi.org/10.1234/example.doi",
        source="doi",
        identifier="10.1234/example.doi",
    )
    citation = format_apa(metadata)
    assert (
        citation
        == "Doe, J., & Smith, J. (2020). Sample study on testing. "
        "Journal of Tests, 12(3), 45-67. https://doi.org/10.1234/example.doi"
    )
 def test_format_arxiv_preprint():
    metadata = WorkMetadata(
        title="Deep learning for cats",
        authors=[Author("Alice", "Nguyen"), Author("Bob", "Smith")],
        year=2021,
        container_title="arXiv preprint",
        url="https://arxiv.org/abs/2101.00001",
        source="arxiv",
        identifier="2101.00001",
    )
    citation = format_apa(metadata)
    assert (
        citation
        == "Nguyen, A., & Smith, B. (2021). Deep learning for cats. "
        "arXiv preprint, arXiv:2101.00001. https://arxiv.org/abs/2101.00001"
    )
--- a/tests/test_identifiers.py
+++ b/tests/test_identifiers.py
@ -0,0 +1,25 @@
 import pytest
 from citer.identifiers import normalize_arxiv_identifier, normalize_doi
 def test_normalize_arxiv_variants():
    assert normalize_arxiv_identifier("2106.01342v2") == "2106.01342"
    assert normalize_arxiv_identifier("https://arxiv.org/abs/2106.01342") == "2106.01342"
    assert normalize_arxiv_identifier("arXiv:hep-th/9901001") == "hep-th/9901001"
 def test_normalize_arxiv_invalid():
    with pytest.raises(Exception):
        normalize_arxiv_identifier("not-an-id")
 def test_normalize_doi_variants():
    assert normalize_doi("https://doi.org/10.1038/nphys1170") == "10.1038/nphys1170"
    assert normalize_doi("DOI:10.5555/12345678") == "10.5555/12345678"
    assert normalize_doi("10.1000/182") == "10.1000/182"
 def test_normalize_doi_invalid():
    with pytest.raises(Exception):
        normalize_doi("not-a-doi")
		`@ -0,0 +1,3 @@`
							`"""APA citation helper CLI."""`

							`__all__ = ["cli", "fetchers", "formatter", "identifiers", "models"]`
		`@ -0,0 +1,2 @@`
							`class CitationError(Exception):`
							`"""Raised when a citation cannot be created from the provided input."""`