From cb494952118737b7cd894886e58a1a7ff25ca596 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Mon, 14 Jul 2025 20:57:47 -0400
Subject: [PATCH] Initial integration

- Clean up a few functions
- Combine all components into a single pipeline
---
 src/reviewllama/cli.py         | 51 +++++++++++--------------
 src/reviewllama/configs.py     | 15 ++++----
 src/reviewllama/git_diff.py    |  4 +-
 src/reviewllama/logger.py      |  4 ++
 src/reviewllama/reviewllama.py | 68 ++++++++++++++++++++++++++++++++++
 5 files changed, 103 insertions(+), 39 deletions(-)
 create mode 100644 src/reviewllama/reviewllama.py

diff --git a/src/reviewllama/cli.py b/src/reviewllama/cli.py
index 5e5da02..4cf35e4 100644
--- a/src/reviewllama/cli.py
+++ b/src/reviewllama/cli.py
@@ -3,15 +3,10 @@ import sys
 from pathlib import Path
 from typing import List, Optional
 
-from reviewllama.git_diff import analyze_git_repository
+from reviewllama.reviewllama import run_reviewllama
 
-from .configs import ReviewConfig, create_config_from_vars
-from .logger import (
-    log_git_analysis_result,
-    log_git_analysis_start,
-    log_paths,
-    log_review_start,
-)
+from .configs import ReviewConfig, namespace_to_config
+from .logger import log_paths, log_review_start
 
 
 def normalize_server_url(url: str) -> str:
@@ -61,6 +56,23 @@ Examples:
         help="Base branch to compare against (default: %(default)s)",
     )
 
+    parser.add_argument(
+        "--embedding_model",
+        dest="embedding_model",
+        default="nomic-embed-text",
+        help="Base branch to compare against (default: %(default)s)",
+    )
+
+    parser.add_argument(
+        "--system_prompt",
+        dest="system_prompt",
+        default=(
+            "You are a PR review assistant in charge of softare quality control. "
+            "You analyze code changes in the context of the full code base to verify style, "
+            "syntax, and functionality"
+        ),
+        help="Base branch to compare against (default: %(default)s)",
+    )
     return parser
 
 
@@ -70,39 +82,20 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace:
     return parser.parse_args(args)
 
 
-def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig:
-    """Transform argparse namespace into ReviewConfig."""
-    paths = [Path(path_str) for path_str in namespace.paths]
-
-    return create_config_from_vars(
-        paths=paths,
-        model=namespace.model,
-        server_url=normalize_server_url(namespace.server_url),
-        # TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this.
-        system_prompt="You are a helpful AI assistant",
-        base_branch=namespace.base_branch,
-    )
-
-
 def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig:
     """Parse command line arguments and return validated configuration."""
     raw_namespace = parse_raw_arguments(args)
-    return transform_namespace_to_config(raw_namespace)
+    return namespace_to_config(raw_namespace)
 
 
 def cli() -> None:
     """Main entry point for the CLI."""
     try:
         config = parse_arguments()
-        # TODO: Pass config to review engine
         log_review_start(config)
         log_paths(config.paths)
 
-        for path in config.paths:
-            analysis = analyze_git_repository(path, config.base_branch)
-            log_git_analysis_start(path, config.base_branch)
-            log_git_analysis_result(analysis)
-            print(analysis.diffs)
+        run_reviewllama(config)
 
     except SystemExit:
         # argparse calls sys.exit on error, let it propagate
diff --git a/src/reviewllama/configs.py b/src/reviewllama/configs.py
index 352d02a..d577915 100644
--- a/src/reviewllama/configs.py
+++ b/src/reviewllama/configs.py
@@ -1,3 +1,4 @@
+import argparse
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import List
@@ -48,15 +49,13 @@ def create_review_config(
     return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch)
 
 
-def create_config_from_vars(
-    paths: List[Path],
-    model: str,
-    server_url: str,
-    system_prompt: str,
-    base_branch: str,
+def namespace_to_config(
+        namespace: argparse.Namespace
 ):
+    """Transform argparse namespace into ReviewConfig."""
+    paths = [Path(path_str) for path_str in namespace.paths]
     ollama_config = OllamaConfig(
-        chat_model=model, base_url=server_url, system_prompt=system_prompt
+        chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model
     )
 
-    return create_review_config(paths, ollama_config, base_branch)
+    return create_review_config(paths, ollama_config, namespace.base_branch)
diff --git a/src/reviewllama/git_diff.py b/src/reviewllama/git_diff.py
index 87dd811..8bf41d3 100644
--- a/src/reviewllama/git_diff.py
+++ b/src/reviewllama/git_diff.py
@@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool:
         return False
 
 
-def get_tracked_files(repo: Repo):
+def get_tracked_files(repo: Repo) -> list[Path]:
     return [
-        entry.abspath
+        Path(entry.abspath)
         for entry in repo.commit().tree.traverse()
         if Path(entry.abspath).is_file()
     ]
diff --git a/src/reviewllama/logger.py b/src/reviewllama/logger.py
index c1e1e2e..ec09bec 100644
--- a/src/reviewllama/logger.py
+++ b/src/reviewllama/logger.py
@@ -38,6 +38,10 @@ def log_paths(paths: List[Path]) -> None:
         console.print(f"  • {path}")
     console.print()
 
+def log_info(info: str) -> None:
+    """Log message with colored output."""
+    console = create_console()
+    console.print(f"{info}")
 
 def log_error(error: str) -> None:
     """Log error message with colored output."""
diff --git a/src/reviewllama/reviewllama.py b/src/reviewllama/reviewllama.py
new file mode 100644
index 0000000..db30ba0
--- /dev/null
+++ b/src/reviewllama/reviewllama.py
@@ -0,0 +1,68 @@
+from pathlib import Path
+
+from git import Repo
+from langchain_core.vectorstores import VectorStoreRetriever
+
+from reviewllama.configs import OllamaConfig, ReviewConfig
+from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository,
+                                  get_tracked_files)
+from reviewllama.llm import ChatClient, chat_with_client, create_chat_client
+from reviewllama.vector_store import create_retriever
+
+from .logger import log_git_analysis_result, log_git_analysis_start, log_info
+
+
+def run_reviewllama(config: ReviewConfig):
+    for path in config.paths:
+        chat_client = create_and_log_chat_client(config.ollama)
+        analysis = create_and_log_git_diff_analysis(path, config.base_branch)
+        retriever = create_and_log_vector_store_retriever(
+            analysis.repo, config.ollama.embedding_model
+        )
+
+        for diff in analysis.diffs:
+            chat_client = get_suggestions(diff, retriever, chat_client)
+
+
+def create_and_log_chat_client(config: OllamaConfig) -> ChatClient:
+    log_info("Initializing LLM chat client")
+    return create_chat_client(config)
+
+
+def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis:
+    log_git_analysis_start(path, base_branch)
+    analysis = analyze_git_repository(path, base_branch)
+    log_git_analysis_result(analysis)
+    return analysis
+
+
+def create_and_log_vector_store_retriever(
+    repo: Repo, embedding_model: str
+) -> VectorStoreRetriever:
+    log_info("Creating vector_store...")
+    retriever = create_retriever(
+        get_tracked_files(repo),
+        embedding_model,
+    )
+    log_info("Done creating vector store")
+    return retriever
+
+
+def get_suggestions(
+    diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient
+) -> ChatClient:
+    new_client = chat_with_client(chat_client, craft_message(diff), retriever)
+    log_info(str(new_client.get_last_response_or_none()))
+    return new_client
+
+
+def craft_message(diff) -> str:
+    return (
+        "Review the following code changes and make up to three suggestions on "
+        "how to improve it. If the code is sufficiently simple or accurate then say "
+        "no suggestions can be found. Important issues you should consider are consistent "
+        "style, introduction of syntax errors, and potentially breaking changes in "
+        "interfaces/APIs that aren't properly handled.\n\n"
+        f"The original code:\n```\n{diff.old_content}\n```\n"
+        f"The new code:\n```\n{diff.new_content}```"
+    )