From cb494952118737b7cd894886e58a1a7ff25ca596 Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Mon, 14 Jul 2025 20:57:47 -0400 Subject: [PATCH] Initial integration - Clean up a few functions - Combine all components into a single pipeline --- src/reviewllama/cli.py | 51 +++++++++++-------------- src/reviewllama/configs.py | 15 ++++---- src/reviewllama/git_diff.py | 4 +- src/reviewllama/logger.py | 4 ++ src/reviewllama/reviewllama.py | 68 ++++++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 39 deletions(-) create mode 100644 src/reviewllama/reviewllama.py diff --git a/src/reviewllama/cli.py b/src/reviewllama/cli.py index 5e5da02..4cf35e4 100644 --- a/src/reviewllama/cli.py +++ b/src/reviewllama/cli.py @@ -3,15 +3,10 @@ import sys from pathlib import Path from typing import List, Optional -from reviewllama.git_diff import analyze_git_repository +from reviewllama.reviewllama import run_reviewllama -from .configs import ReviewConfig, create_config_from_vars -from .logger import ( - log_git_analysis_result, - log_git_analysis_start, - log_paths, - log_review_start, -) +from .configs import ReviewConfig, namespace_to_config +from .logger import log_paths, log_review_start def normalize_server_url(url: str) -> str: @@ -61,6 +56,23 @@ Examples: help="Base branch to compare against (default: %(default)s)", ) + parser.add_argument( + "--embedding_model", + dest="embedding_model", + default="nomic-embed-text", + help="Base branch to compare against (default: %(default)s)", + ) + + parser.add_argument( + "--system_prompt", + dest="system_prompt", + default=( + "You are a PR review assistant in charge of softare quality control. " + "You analyze code changes in the context of the full code base to verify style, " + "syntax, and functionality" + ), + help="Base branch to compare against (default: %(default)s)", + ) return parser @@ -70,39 +82,20 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace: return parser.parse_args(args) -def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig: - """Transform argparse namespace into ReviewConfig.""" - paths = [Path(path_str) for path_str in namespace.paths] - - return create_config_from_vars( - paths=paths, - model=namespace.model, - server_url=normalize_server_url(namespace.server_url), - # TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this. - system_prompt="You are a helpful AI assistant", - base_branch=namespace.base_branch, - ) - - def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig: """Parse command line arguments and return validated configuration.""" raw_namespace = parse_raw_arguments(args) - return transform_namespace_to_config(raw_namespace) + return namespace_to_config(raw_namespace) def cli() -> None: """Main entry point for the CLI.""" try: config = parse_arguments() - # TODO: Pass config to review engine log_review_start(config) log_paths(config.paths) - for path in config.paths: - analysis = analyze_git_repository(path, config.base_branch) - log_git_analysis_start(path, config.base_branch) - log_git_analysis_result(analysis) - print(analysis.diffs) + run_reviewllama(config) except SystemExit: # argparse calls sys.exit on error, let it propagate diff --git a/src/reviewllama/configs.py b/src/reviewllama/configs.py index 352d02a..d577915 100644 --- a/src/reviewllama/configs.py +++ b/src/reviewllama/configs.py @@ -1,3 +1,4 @@ +import argparse from dataclasses import dataclass, field from pathlib import Path from typing import List @@ -48,15 +49,13 @@ def create_review_config( return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch) -def create_config_from_vars( - paths: List[Path], - model: str, - server_url: str, - system_prompt: str, - base_branch: str, +def namespace_to_config( + namespace: argparse.Namespace ): + """Transform argparse namespace into ReviewConfig.""" + paths = [Path(path_str) for path_str in namespace.paths] ollama_config = OllamaConfig( - chat_model=model, base_url=server_url, system_prompt=system_prompt + chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model ) - return create_review_config(paths, ollama_config, base_branch) + return create_review_config(paths, ollama_config, namespace.base_branch) diff --git a/src/reviewllama/git_diff.py b/src/reviewllama/git_diff.py index 87dd811..8bf41d3 100644 --- a/src/reviewllama/git_diff.py +++ b/src/reviewllama/git_diff.py @@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool: return False -def get_tracked_files(repo: Repo): +def get_tracked_files(repo: Repo) -> list[Path]: return [ - entry.abspath + Path(entry.abspath) for entry in repo.commit().tree.traverse() if Path(entry.abspath).is_file() ] diff --git a/src/reviewllama/logger.py b/src/reviewllama/logger.py index c1e1e2e..ec09bec 100644 --- a/src/reviewllama/logger.py +++ b/src/reviewllama/logger.py @@ -38,6 +38,10 @@ def log_paths(paths: List[Path]) -> None: console.print(f" • {path}") console.print() +def log_info(info: str) -> None: + """Log message with colored output.""" + console = create_console() + console.print(f"{info}") def log_error(error: str) -> None: """Log error message with colored output.""" diff --git a/src/reviewllama/reviewllama.py b/src/reviewllama/reviewllama.py new file mode 100644 index 0000000..db30ba0 --- /dev/null +++ b/src/reviewllama/reviewllama.py @@ -0,0 +1,68 @@ +from pathlib import Path + +from git import Repo +from langchain_core.vectorstores import VectorStoreRetriever + +from reviewllama.configs import OllamaConfig, ReviewConfig +from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository, + get_tracked_files) +from reviewllama.llm import ChatClient, chat_with_client, create_chat_client +from reviewllama.vector_store import create_retriever + +from .logger import log_git_analysis_result, log_git_analysis_start, log_info + + +def run_reviewllama(config: ReviewConfig): + for path in config.paths: + chat_client = create_and_log_chat_client(config.ollama) + analysis = create_and_log_git_diff_analysis(path, config.base_branch) + retriever = create_and_log_vector_store_retriever( + analysis.repo, config.ollama.embedding_model + ) + + for diff in analysis.diffs: + chat_client = get_suggestions(diff, retriever, chat_client) + + +def create_and_log_chat_client(config: OllamaConfig) -> ChatClient: + log_info("Initializing LLM chat client") + return create_chat_client(config) + + +def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis: + log_git_analysis_start(path, base_branch) + analysis = analyze_git_repository(path, base_branch) + log_git_analysis_result(analysis) + return analysis + + +def create_and_log_vector_store_retriever( + repo: Repo, embedding_model: str +) -> VectorStoreRetriever: + log_info("Creating vector_store...") + retriever = create_retriever( + get_tracked_files(repo), + embedding_model, + ) + log_info("Done creating vector store") + return retriever + + +def get_suggestions( + diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient +) -> ChatClient: + new_client = chat_with_client(chat_client, craft_message(diff), retriever) + log_info(str(new_client.get_last_response_or_none())) + return new_client + + +def craft_message(diff) -> str: + return ( + "Review the following code changes and make up to three suggestions on " + "how to improve it. If the code is sufficiently simple or accurate then say " + "no suggestions can be found. Important issues you should consider are consistent " + "style, introduction of syntax errors, and potentially breaking changes in " + "interfaces/APIs that aren't properly handled.\n\n" + f"The original code:\n```\n{diff.old_content}\n```\n" + f"The new code:\n```\n{diff.new_content}```" + )