Initial untested implementation
This commit is contained in:
parent
24200b899a
commit
193a50dd3b
8 changed files with 917 additions and 11 deletions
|
@ -1,2 +1,28 @@
|
|||
def main() -> None:
|
||||
print("Hello from rss2newsletter!")
|
||||
"""
|
||||
rss2newsletter - A simple tool for generating HTML newsletters from RSS feeds.
|
||||
|
||||
This package provides functionality to:
|
||||
- Fetch articles from RSS feeds
|
||||
- Filter articles published today
|
||||
- Generate AI summaries using Ollama
|
||||
- Create clean HTML newsletters
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__author__ = "Your Name"
|
||||
__email__ = "your.email@example.com"
|
||||
|
||||
from .rss_fetcher import get_todays_articles
|
||||
from .ollama_client import create_ollama_client, summarize_articles
|
||||
from .html_generator import generate_newsletter_html, save_newsletter_html
|
||||
from .config import get_config_from_env, setup_logging
|
||||
|
||||
__all__ = [
|
||||
"get_todays_articles",
|
||||
"create_ollama_client",
|
||||
"summarize_articles",
|
||||
"generate_newsletter_html",
|
||||
"save_newsletter_html",
|
||||
"get_config_from_env",
|
||||
"setup_logging",
|
||||
]
|
||||
|
|
161
src/rss2newsletter/__main__.py
Normal file
161
src/rss2newsletter/__main__.py
Normal file
|
@ -0,0 +1,161 @@
|
|||
"""Main entry point for rss2newsletter."""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
|
||||
from .config import get_config_from_env, setup_logging
|
||||
from .rss_fetcher import get_todays_articles
|
||||
from .ollama_client import create_ollama_client, summarize_articles
|
||||
from .html_generator import generate_newsletter_html, save_newsletter_html
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_argument_parser() -> argparse.ArgumentParser:
|
||||
"""Create command line argument parser."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate HTML newsletter from RSS feed using Ollama AI summaries",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python -m rss2newsletter https://feeds.example.com/rss
|
||||
python -m rss2newsletter https://blog.example.com/feed.xml --output my_newsletter.html
|
||||
python -m rss2newsletter https://news.example.com/rss --model llama3.1
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("rss_url", help="RSS feed URL to process")
|
||||
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
default=None,
|
||||
help="Output HTML filename (default: newsletter.html)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
"-m",
|
||||
default=None,
|
||||
help="Ollama model to use for summaries (default: llama3.2)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ollama-url",
|
||||
default=None,
|
||||
help="Ollama server URL (default: http://localhost:11434)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--title", "-t", default=None, help="Newsletter title (default: RSS Newsletter)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Enable verbose logging"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Fetch articles but don't generate summaries or save HTML",
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def merge_config_with_args(config: Dict, args: argparse.Namespace) -> Dict:
|
||||
"""Merge configuration with command line arguments."""
|
||||
if args.output:
|
||||
config["output"]["filename"] = args.output
|
||||
|
||||
if args.model:
|
||||
config["ollama"]["model"] = args.model
|
||||
|
||||
if args.ollama_url:
|
||||
config["ollama"]["base_url"] = args.ollama_url
|
||||
|
||||
if args.title:
|
||||
config["output"]["feed_title"] = args.title
|
||||
|
||||
if args.verbose:
|
||||
config["logging"]["level"] = "DEBUG"
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def process_rss_to_newsletter(rss_url: str, config: Dict, dry_run: bool = False) -> str:
|
||||
"""Main processing pipeline for RSS to newsletter conversion."""
|
||||
logger.info(f"Starting RSS newsletter generation for: {rss_url}")
|
||||
|
||||
# Step 1: Fetch today's articles
|
||||
logger.info("Fetching articles from RSS feed...")
|
||||
articles = get_todays_articles(rss_url)
|
||||
|
||||
if not articles:
|
||||
logger.warning("No articles found for today")
|
||||
return generate_newsletter_html([], config["output"]["feed_title"])
|
||||
|
||||
logger.info(f"Found {len(articles)} articles from today")
|
||||
|
||||
if dry_run:
|
||||
logger.info("Dry run mode - stopping before summarization")
|
||||
for i, article in enumerate(articles, 1):
|
||||
logger.info(f"Article {i}: {article['title']}")
|
||||
return ""
|
||||
|
||||
# Step 2: Create Ollama client and summarize articles
|
||||
logger.info("Generating AI summaries...")
|
||||
ollama_client = create_ollama_client(config["ollama"]["base_url"])
|
||||
|
||||
summarized_articles = summarize_articles(
|
||||
ollama_client, articles, config["ollama"]["model"]
|
||||
)
|
||||
|
||||
# Step 3: Generate HTML newsletter
|
||||
logger.info("Generating HTML newsletter...")
|
||||
html_content = generate_newsletter_html(
|
||||
summarized_articles, config["output"]["feed_title"]
|
||||
)
|
||||
|
||||
return html_content
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Main entry point."""
|
||||
parser = create_argument_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load and merge configuration
|
||||
config = get_config_from_env()
|
||||
config = merge_config_with_args(config, args)
|
||||
|
||||
# Setup logging
|
||||
setup_logging(config)
|
||||
|
||||
try:
|
||||
# Process RSS feed to newsletter
|
||||
html_content = process_rss_to_newsletter(args.rss_url, config, args.dry_run)
|
||||
|
||||
if not args.dry_run and html_content:
|
||||
# Save HTML file
|
||||
output_filename = save_newsletter_html(
|
||||
html_content, config["output"]["filename"]
|
||||
)
|
||||
print(f"Newsletter saved to: {output_filename}")
|
||||
|
||||
return 0
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Process interrupted by user")
|
||||
return 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating newsletter: {e}")
|
||||
if config["logging"]["level"] == "DEBUG":
|
||||
logger.exception("Full error details:")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
75
src/rss2newsletter/config.py
Normal file
75
src/rss2newsletter/config.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
"""Configuration management for rss2newsletter."""
|
||||
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_default_config() -> Dict[str, Any]:
|
||||
"""Get default configuration values."""
|
||||
return {
|
||||
"ollama": {
|
||||
"base_url": "http://localhost:11434",
|
||||
"model": "llama3.2",
|
||||
"timeout": 60,
|
||||
"max_summary_length": 150,
|
||||
},
|
||||
"output": {"filename": "newsletter.html", "feed_title": "RSS Newsletter"},
|
||||
"logging": {
|
||||
"level": "INFO",
|
||||
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_config_from_env() -> Dict[str, Any]:
|
||||
"""Get configuration from environment variables."""
|
||||
config = get_default_config()
|
||||
|
||||
# Ollama configuration
|
||||
if os.getenv("OLLAMA_BASE_URL"):
|
||||
config["ollama"]["base_url"] = os.getenv("OLLAMA_BASE_URL")
|
||||
|
||||
if os.getenv("OLLAMA_MODEL"):
|
||||
config["ollama"]["model"] = os.getenv("OLLAMA_MODEL")
|
||||
|
||||
if os.getenv("OLLAMA_TIMEOUT"):
|
||||
try:
|
||||
config["ollama"]["timeout"] = int(os.getenv("OLLAMA_TIMEOUT"))
|
||||
except ValueError:
|
||||
logger.warning("Invalid OLLAMA_TIMEOUT value, using default")
|
||||
|
||||
if os.getenv("MAX_SUMMARY_LENGTH"):
|
||||
try:
|
||||
config["ollama"]["max_summary_length"] = int(
|
||||
os.getenv("MAX_SUMMARY_LENGTH")
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning("Invalid MAX_SUMMARY_LENGTH value, using default")
|
||||
|
||||
# Output configuration
|
||||
if os.getenv("OUTPUT_FILENAME"):
|
||||
config["output"]["filename"] = os.getenv("OUTPUT_FILENAME")
|
||||
|
||||
if os.getenv("FEED_TITLE"):
|
||||
config["output"]["feed_title"] = os.getenv("FEED_TITLE")
|
||||
|
||||
# Logging configuration
|
||||
if os.getenv("LOG_LEVEL"):
|
||||
config["logging"]["level"] = os.getenv("LOG_LEVEL").upper()
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def setup_logging(config: Dict[str, Any]) -> None:
|
||||
"""Setup logging based on configuration."""
|
||||
log_level = getattr(logging, config["logging"]["level"], logging.INFO)
|
||||
log_format = config["logging"]["format"]
|
||||
|
||||
logging.basicConfig(level=log_level, format=log_format, datefmt="%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Reduce noise from external libraries
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
logging.getLogger("requests").setLevel(logging.WARNING)
|
227
src/rss2newsletter/html_generator.py
Normal file
227
src/rss2newsletter/html_generator.py
Normal file
|
@ -0,0 +1,227 @@
|
|||
"""HTML newsletter generation functionality."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Dict
|
||||
import html
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def escape_html(text: str) -> str:
|
||||
"""Safely escape HTML in text content."""
|
||||
return html.escape(str(text)) if text else ""
|
||||
|
||||
|
||||
def create_article_html(article: Dict[str, str]) -> str:
|
||||
"""Generate HTML for a single article."""
|
||||
title = escape_html(article.get("title", "No Title"))
|
||||
link = escape_html(article.get("link", ""))
|
||||
ai_summary = escape_html(article.get("ai_summary", "No summary available"))
|
||||
|
||||
return f"""
|
||||
<div class="article">
|
||||
<h2 class="article-title">
|
||||
<a href="{link}" target="_blank">{title}</a>
|
||||
</h2>
|
||||
<div class="article-summary">
|
||||
<p>{ai_summary}</p>
|
||||
</div>
|
||||
<div class="article-link">
|
||||
<a href="{link}" target="_blank" class="read-more">Read full article →</a>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def create_css_styles() -> str:
|
||||
"""Generate CSS styles for the newsletter."""
|
||||
return """
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
.newsletter {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 30px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.header {
|
||||
text-align: center;
|
||||
border-bottom: 2px solid #eee;
|
||||
padding-bottom: 20px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.header h1 {
|
||||
color: #2c3e50;
|
||||
margin: 0;
|
||||
font-size: 2.5em;
|
||||
}
|
||||
|
||||
.header .date {
|
||||
color: #7f8c8d;
|
||||
font-size: 1.1em;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.article {
|
||||
margin-bottom: 40px;
|
||||
padding-bottom: 30px;
|
||||
border-bottom: 1px solid #eee;
|
||||
}
|
||||
|
||||
.article:last-child {
|
||||
border-bottom: none;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.article-title {
|
||||
margin: 0 0 15px 0;
|
||||
font-size: 1.4em;
|
||||
}
|
||||
|
||||
.article-title a {
|
||||
color: #2980b9;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.article-title a:hover {
|
||||
color: #3498db;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.article-summary {
|
||||
margin: 15px 0;
|
||||
background: #f8f9fa;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
border-left: 4px solid #3498db;
|
||||
}
|
||||
|
||||
.article-summary p {
|
||||
margin: 0;
|
||||
color: #555;
|
||||
}
|
||||
|
||||
.article-link {
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.read-more {
|
||||
color: #e74c3c;
|
||||
text-decoration: none;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.read-more:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.footer {
|
||||
margin-top: 40px;
|
||||
padding-top: 20px;
|
||||
border-top: 1px solid #eee;
|
||||
text-align: center;
|
||||
color: #7f8c8d;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
.no-articles {
|
||||
text-align: center;
|
||||
color: #7f8c8d;
|
||||
font-style: italic;
|
||||
padding: 40px 20px;
|
||||
}
|
||||
</style>
|
||||
"""
|
||||
|
||||
|
||||
def create_header_html(feed_title: str = "RSS Newsletter") -> str:
|
||||
"""Generate the newsletter header."""
|
||||
current_date = datetime.now().strftime("%B %d, %Y")
|
||||
|
||||
return f"""
|
||||
<div class="header">
|
||||
<h1>📰 {escape_html(feed_title)}</h1>
|
||||
<div class="date">Daily Summary for {current_date}</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def create_footer_html() -> str:
|
||||
"""Generate the newsletter footer."""
|
||||
return """
|
||||
<div class="footer">
|
||||
<p>Generated by rss2newsletter 🤖</p>
|
||||
<p>Powered by Ollama AI summaries</p>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def create_no_articles_html() -> str:
|
||||
"""Generate HTML for when no articles are found."""
|
||||
return """
|
||||
<div class="no-articles">
|
||||
<h2>📭 No articles found for today</h2>
|
||||
<p>Check back tomorrow for fresh content!</p>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def generate_newsletter_html(
|
||||
articles: List[Dict[str, str]], feed_title: str = "RSS Newsletter"
|
||||
) -> str:
|
||||
"""Generate complete HTML newsletter from articles."""
|
||||
logger.info(f"Generating HTML newsletter with {len(articles)} articles")
|
||||
|
||||
# Generate article HTML
|
||||
if articles:
|
||||
articles_html = "\n".join(create_article_html(article) for article in articles)
|
||||
else:
|
||||
articles_html = create_no_articles_html()
|
||||
|
||||
# Combine all parts
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{escape_html(feed_title)} - Daily Summary</title>
|
||||
{create_css_styles()}
|
||||
</head>
|
||||
<body>
|
||||
<div class="newsletter">
|
||||
{create_header_html(feed_title)}
|
||||
{articles_html}
|
||||
{create_footer_html()}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
return html_content
|
||||
|
||||
|
||||
def save_newsletter_html(html_content: str, filename: str = "newsletter.html") -> str:
|
||||
"""Save HTML content to file and return the filename."""
|
||||
try:
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
f.write(html_content)
|
||||
|
||||
logger.info(f"Newsletter saved to: {filename}")
|
||||
return filename
|
||||
|
||||
except IOError as e:
|
||||
logger.error(f"Error saving newsletter: {e}")
|
||||
raise
|
112
src/rss2newsletter/ollama_client.py
Normal file
112
src/rss2newsletter/ollama_client.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
"""Ollama API client for generating summaries."""
|
||||
|
||||
import requests
|
||||
import json
|
||||
from typing import Dict, Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_ollama_client(base_url: str = "http://localhost:11434") -> Dict[str, str]:
|
||||
"""Create an Ollama client configuration."""
|
||||
return {
|
||||
"base_url": base_url,
|
||||
"generate_endpoint": f"{base_url}/api/generate",
|
||||
"chat_endpoint": f"{base_url}/api/chat",
|
||||
}
|
||||
|
||||
|
||||
def test_ollama_connection(client_config: Dict[str, str]) -> bool:
|
||||
"""Test if Ollama server is accessible."""
|
||||
try:
|
||||
response = requests.get(f"{client_config['base_url']}/api/tags", timeout=5)
|
||||
return response.status_code == 200
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Failed to connect to Ollama server: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def generate_summary(
|
||||
client_config: Dict[str, str],
|
||||
content: str,
|
||||
model: str = "llama3.2",
|
||||
max_length: int = 150,
|
||||
) -> Optional[str]:
|
||||
"""Generate a summary of the given content using Ollama."""
|
||||
if not content.strip():
|
||||
return "No content available for summarization."
|
||||
|
||||
prompt = f"""Please provide a concise summary of the following article in approximately {max_length} words. Focus on the key points and main ideas:
|
||||
|
||||
{content}
|
||||
|
||||
Summary:"""
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.9,
|
||||
"max_tokens": max_length * 2, # Allow some buffer
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
logger.info(f"Generating summary using model: {model}")
|
||||
response = requests.post(
|
||||
client_config["generate_endpoint"], json=payload, timeout=60
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
summary = result.get("response", "").strip()
|
||||
|
||||
if not summary:
|
||||
logger.warning("Received empty summary from Ollama")
|
||||
return "Summary could not be generated."
|
||||
|
||||
return summary
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Error communicating with Ollama: {e}")
|
||||
return f"Error generating summary: {str(e)}"
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing Ollama response: {e}")
|
||||
return "Error: Invalid response from Ollama server."
|
||||
|
||||
|
||||
def summarize_article(
|
||||
client_config: Dict[str, str], article: Dict[str, str], model: str = "llama3.2"
|
||||
) -> Dict[str, str]:
|
||||
"""Summarize a single article and return enriched article data."""
|
||||
# Use content if available, otherwise fall back to summary
|
||||
content_to_summarize = article.get("content") or article.get("summary", "")
|
||||
|
||||
# Generate AI summary
|
||||
ai_summary = generate_summary(client_config, content_to_summarize, model)
|
||||
|
||||
# Return enriched article data
|
||||
return {**article, "ai_summary": ai_summary or "Summary unavailable."}
|
||||
|
||||
|
||||
def summarize_articles(
|
||||
client_config: Dict[str, str], articles: list, model: str = "llama3.2"
|
||||
) -> list:
|
||||
"""Summarize multiple articles using functional approach."""
|
||||
if not test_ollama_connection(client_config):
|
||||
logger.error("Cannot connect to Ollama server. Summaries will be unavailable.")
|
||||
# Return articles with placeholder summaries
|
||||
return [
|
||||
{
|
||||
**article,
|
||||
"ai_summary": "Summary unavailable - Ollama server not accessible.",
|
||||
}
|
||||
for article in articles
|
||||
]
|
||||
|
||||
logger.info(f"Summarizing {len(articles)} articles")
|
||||
|
||||
return [summarize_article(client_config, article, model) for article in articles]
|
93
src/rss2newsletter/rss_fetcher.py
Normal file
93
src/rss2newsletter/rss_fetcher.py
Normal file
|
@ -0,0 +1,93 @@
|
|||
"""RSS feed fetching functionality."""
|
||||
|
||||
import feedparser
|
||||
from datetime import datetime, timezone
|
||||
from dateutil import parser as date_parser
|
||||
from typing import List, Dict, Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def fetch_rss_feed(url: str) -> feedparser.FeedParserDict:
|
||||
"""Fetch and parse RSS feed from URL."""
|
||||
try:
|
||||
logger.info(f"Fetching RSS feed from: {url}")
|
||||
feed = feedparser.parse(url)
|
||||
|
||||
if feed.bozo:
|
||||
logger.warning(f"Feed parsing warning: {feed.bozo_exception}")
|
||||
|
||||
return feed
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching RSS feed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def is_today(entry_date: Optional[str]) -> bool:
|
||||
"""Check if an entry was published today."""
|
||||
if not entry_date:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Parse the entry date
|
||||
parsed_date = date_parser.parse(entry_date)
|
||||
|
||||
# Make sure it's timezone-aware
|
||||
if parsed_date.tzinfo is None:
|
||||
parsed_date = parsed_date.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Get today's date in UTC
|
||||
today = datetime.now(timezone.utc).date()
|
||||
|
||||
return parsed_date.date() == today
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing date '{entry_date}': {e}")
|
||||
return False
|
||||
|
||||
|
||||
def extract_article_data(entry) -> Dict[str, str]:
|
||||
"""Extract relevant data from a feed entry."""
|
||||
return {
|
||||
"title": getattr(entry, "title", "No Title"),
|
||||
"link": getattr(entry, "link", ""),
|
||||
"summary": getattr(entry, "summary", ""),
|
||||
"published": getattr(entry, "published", ""),
|
||||
"content": get_entry_content(entry),
|
||||
}
|
||||
|
||||
|
||||
def get_entry_content(entry) -> str:
|
||||
"""Extract the best available content from an entry."""
|
||||
# Try to get full content first
|
||||
if hasattr(entry, "content") and entry.content:
|
||||
return entry.content[0].value if entry.content else ""
|
||||
|
||||
# Fall back to summary
|
||||
return getattr(entry, "summary", "")
|
||||
|
||||
|
||||
def filter_todays_articles(feed: feedparser.FeedParserDict) -> List[Dict[str, str]]:
|
||||
"""Filter articles published today from the RSS feed."""
|
||||
todays_articles = []
|
||||
|
||||
for entry in feed.entries:
|
||||
# Check multiple possible date fields
|
||||
published_date = getattr(entry, "published", None)
|
||||
updated_date = getattr(entry, "updated", None)
|
||||
|
||||
entry_date = published_date or updated_date
|
||||
|
||||
if is_today(entry_date):
|
||||
article_data = extract_article_data(entry)
|
||||
todays_articles.append(article_data)
|
||||
logger.info(f"Found today's article: {article_data['title']}")
|
||||
|
||||
logger.info(f"Found {len(todays_articles)} articles from today")
|
||||
return todays_articles
|
||||
|
||||
|
||||
def get_todays_articles(rss_url: str) -> List[Dict[str, str]]:
|
||||
"""Main function to fetch and filter today's articles from an RSS feed."""
|
||||
feed = fetch_rss_feed(rss_url)
|
||||
return filter_todays_articles(feed)
|
Loading…
Add table
Add a link
Reference in a new issue