diff --git a/src/reviewllama/cli.py b/src/reviewllama/cli.py index 5e5da02..68d4345 100644 --- a/src/reviewllama/cli.py +++ b/src/reviewllama/cli.py @@ -3,15 +3,10 @@ import sys from pathlib import Path from typing import List, Optional -from reviewllama.git_diff import analyze_git_repository +from reviewllama.reviewllama import run_reviewllama -from .configs import ReviewConfig, create_config_from_vars -from .logger import ( - log_git_analysis_result, - log_git_analysis_start, - log_paths, - log_review_start, -) +from .configs import ReviewConfig, namespace_to_config +from .logger import log_paths, log_review_start def normalize_server_url(url: str) -> str: @@ -30,7 +25,7 @@ def create_argument_parser() -> argparse.ArgumentParser: epilog=""" Examples: reviewllama . --model gemma3:27b --server localhost:11434 - reviewllama src/ tests/ --model llama3.2:7b + reviewllama src/ tests/ --model gemma3:4b """, ) @@ -43,7 +38,7 @@ Examples: parser.add_argument( "--model", - default="llama3.2:3b", + default="gemma3:4b", help="Ollama model to use for code review (default: %(default)s)", ) @@ -61,6 +56,26 @@ Examples: help="Base branch to compare against (default: %(default)s)", ) + parser.add_argument( + "--embedding_model", + dest="embedding_model", + default="nomic-embed-text", + help="Base branch to compare against (default: %(default)s)", + ) + + parser.add_argument( + "--system_prompt", + dest="system_prompt", + default=( + "You are a PR review assistant in charge of softare quality control. " + "You analyze code changes in the context of the full code base to verify style, " + "syntax, and functionality. Each suggestion should consist of the original code, " + "suggested changes if relevant, and a short description of why the change is suggested " + "Examples\nInput:\n```\nvarialbe=1+1\n```\nOutput:\nOriginal:\n```\nvarialbe=1+1\n" + "Suggestion:\n```variable=1+1\n```\nReason: `varialbe` is likely a typo." + ), + help="Base branch to compare against (default: %(default)s)", + ) return parser @@ -70,39 +85,20 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace: return parser.parse_args(args) -def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig: - """Transform argparse namespace into ReviewConfig.""" - paths = [Path(path_str) for path_str in namespace.paths] - - return create_config_from_vars( - paths=paths, - model=namespace.model, - server_url=normalize_server_url(namespace.server_url), - # TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this. - system_prompt="You are a helpful AI assistant", - base_branch=namespace.base_branch, - ) - - def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig: """Parse command line arguments and return validated configuration.""" raw_namespace = parse_raw_arguments(args) - return transform_namespace_to_config(raw_namespace) + return namespace_to_config(raw_namespace) def cli() -> None: """Main entry point for the CLI.""" try: config = parse_arguments() - # TODO: Pass config to review engine log_review_start(config) log_paths(config.paths) - for path in config.paths: - analysis = analyze_git_repository(path, config.base_branch) - log_git_analysis_start(path, config.base_branch) - log_git_analysis_result(analysis) - print(analysis.diffs) + run_reviewllama(config) except SystemExit: # argparse calls sys.exit on error, let it propagate diff --git a/src/reviewllama/configs.py b/src/reviewllama/configs.py index 352d02a..d577915 100644 --- a/src/reviewllama/configs.py +++ b/src/reviewllama/configs.py @@ -1,3 +1,4 @@ +import argparse from dataclasses import dataclass, field from pathlib import Path from typing import List @@ -48,15 +49,13 @@ def create_review_config( return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch) -def create_config_from_vars( - paths: List[Path], - model: str, - server_url: str, - system_prompt: str, - base_branch: str, +def namespace_to_config( + namespace: argparse.Namespace ): + """Transform argparse namespace into ReviewConfig.""" + paths = [Path(path_str) for path_str in namespace.paths] ollama_config = OllamaConfig( - chat_model=model, base_url=server_url, system_prompt=system_prompt + chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model ) - return create_review_config(paths, ollama_config, base_branch) + return create_review_config(paths, ollama_config, namespace.base_branch) diff --git a/src/reviewllama/git_diff.py b/src/reviewllama/git_diff.py index 87dd811..8bf41d3 100644 --- a/src/reviewllama/git_diff.py +++ b/src/reviewllama/git_diff.py @@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool: return False -def get_tracked_files(repo: Repo): +def get_tracked_files(repo: Repo) -> list[Path]: return [ - entry.abspath + Path(entry.abspath) for entry in repo.commit().tree.traverse() if Path(entry.abspath).is_file() ] diff --git a/src/reviewllama/llm.py b/src/reviewllama/llm.py index 85e6d96..73a7265 100644 --- a/src/reviewllama/llm.py +++ b/src/reviewllama/llm.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from pathlib import Path from typing import Any from langchain.memory import ConversationBufferMemory diff --git a/src/reviewllama/logger.py b/src/reviewllama/logger.py index c1e1e2e..15a5dc6 100644 --- a/src/reviewllama/logger.py +++ b/src/reviewllama/logger.py @@ -38,6 +38,10 @@ def log_paths(paths: List[Path]) -> None: console.print(f" • {path}") console.print() +def log_info(info: str) -> None: + """Log error message with colored output.""" + console = create_console() + console.print(f"{info}") def log_error(error: str) -> None: """Log error message with colored output.""" diff --git a/src/reviewllama/reviewllama.py b/src/reviewllama/reviewllama.py new file mode 100644 index 0000000..ae0cbba --- /dev/null +++ b/src/reviewllama/reviewllama.py @@ -0,0 +1,63 @@ +from pathlib import Path + +from git import Repo +from langchain_core.vectorstores import VectorStoreRetriever + +from reviewllama.configs import OllamaConfig, ReviewConfig +from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository, + get_tracked_files) +from reviewllama.llm import ChatClient, chat_with_client, create_chat_client +from reviewllama.vector_store import create_retriever + +from .logger import log_git_analysis_result, log_git_analysis_start, log_info + + +def run_reviewllama(config: ReviewConfig): + for path in config.paths: + chat_client = create_and_log_chat_client(config.ollama) + analysis = create_and_log_git_diff_analysis(path, config.base_branch) + retriever = create_and_log_vector_store_retriever(analysis.repo, config.ollama) + + for diff in analysis.diffs: + chat_client = get_suggestions(diff, retriever, chat_client) + + +def create_and_log_chat_client(config: OllamaConfig) -> ChatClient: + log_info("Initializing LLM chat client") + return create_chat_client(config) + + +def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis: + log_git_analysis_start(path, base_branch) + analysis = analyze_git_repository(path, base_branch) + log_git_analysis_result(analysis) + return analysis + + +def create_and_log_vector_store_retriever( + repo: Repo, config: OllamaConfig +) -> VectorStoreRetriever: + log_info("Creating vector_store...") + retriever = create_retriever(get_tracked_files(repo), config) + log_info("Done creating vector store") + return retriever + + +def get_suggestions( + diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient +) -> ChatClient: + new_client = chat_with_client(chat_client, craft_message(diff), retriever) + log_info(str(new_client.get_last_response_or_none().content)) + return new_client + + +def craft_message(diff) -> str: + return ( + "Review the following code changes and make up to three suggestions on " + "how to improve it. If the code is sufficiently simple or accurate then say " + "no suggestions can be found. Important issues you should consider are consistent " + "style, introduction of syntax errors, and potentially breaking changes in " + "interfaces/APIs that aren't properly handled.\n\n" + f"The original code:\n```\n{diff.old_content}\n```\n" + f"The new code:\n```\n{diff.new_content}```" + ) diff --git a/src/reviewllama/vector_store.py b/src/reviewllama/vector_store.py index 47303c3..ac151d5 100644 --- a/src/reviewllama/vector_store.py +++ b/src/reviewllama/vector_store.py @@ -6,21 +6,25 @@ from langchain_core.documents.base import Document from langchain_core.vectorstores import VectorStoreRetriever from langchain_ollama.embeddings import OllamaEmbeddings +from .configs import OllamaConfig + def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]: return [doc for file_path in file_paths for doc in TextLoader(file_path).load()] def create_retriever( - file_paths: list[Path | str], embedding_model: str + file_paths: list[Path | str], config: OllamaConfig ) -> VectorStoreRetriever: - embeddings = OllamaEmbeddings(model=embedding_model) + embeddings = OllamaEmbeddings( + model=config.embedding_model, base_url=config.base_url + ) vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings) return vectorstore.as_retriever() def get_context_from_store(message: str, retriever: VectorStoreRetriever): - docs = retriever.get_relevant_documents(message) + docs = retriever.invoke(message) return "\n\n".join([doc.page_content for doc in docs]) diff --git a/tests/conftest.py b/tests/conftest.py index 2c9d092..2b4737c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,5 +6,5 @@ from reviewllama.configs import create_ollama_config @pytest.fixture def ollama_config(): return create_ollama_config( - "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0 + "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0, "nomic-embed-text" ) diff --git a/tests/test_vector_store.py b/tests/test_vector_store.py index f3a5c0b..025ccad 100644 --- a/tests/test_vector_store.py +++ b/tests/test_vector_store.py @@ -1,16 +1,13 @@ import os import tempfile from pathlib import Path -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import Mock, patch import pytest from langchain_core.documents.base import Document from langchain_core.vectorstores import VectorStoreRetriever -from reviewllama.utilities import is_ollama_available -from reviewllama.vector_store import (create_retriever, - documents_from_path_list, - get_context_from_store) +from reviewllama.vector_store import create_retriever, documents_from_path_list @pytest.fixture @@ -52,7 +49,9 @@ def test_load_documents(temp_files): @patch("reviewllama.vector_store.OllamaEmbeddings") @patch("reviewllama.vector_store.FAISS") @patch("reviewllama.vector_store.documents_from_path_list") -def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings): +def test_create_retriever( + mock_docs_from_list, mock_faiss, mock_embeddings, ollama_config +): """Test successful retriever creation""" # Setup mocks mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})] @@ -67,83 +66,15 @@ def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings): mock_faiss.from_documents.return_value = mock_vectorstore # Test - result = create_retriever(["test.txt"], "test-embedding-model") + result = create_retriever(["test.txt"], ollama_config) # Assertions assert result == mock_retriever - mock_embeddings.assert_called_once_with(model="test-embedding-model") + mock_embeddings.assert_called_once_with( + model=ollama_config.embedding_model, base_url=ollama_config.base_url + ) mock_docs_from_list.assert_called_once_with(["test.txt"]) mock_faiss.from_documents.assert_called_once_with( mock_docs, mock_embedding_instance ) mock_vectorstore.as_retriever.assert_called_once() - -def test_get_context_from_store_success(): - """Test successful context retrieval""" - mock_retriever = Mock(spec=VectorStoreRetriever) - mock_docs = [ - Document(page_content="First relevant document", metadata={}), - Document(page_content="Second relevant document", metadata={}), - Document(page_content="Third relevant document", metadata={}), - ] - mock_retriever.get_relevant_documents.return_value = mock_docs - - result = get_context_from_store("test query", mock_retriever) - - expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document" - assert result == expected - mock_retriever.get_relevant_documents.assert_called_once_with("test query") - -@patch('reviewllama.vector_store.OllamaEmbeddings') -@patch('reviewllama.vector_store.FAISS') -def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files): - """Test the full pipeline with mocked external dependencies""" - # Setup mocks - mock_embedding_instance = Mock() - mock_embeddings.return_value = mock_embedding_instance - - mock_vectorstore = Mock() - mock_retriever = Mock(spec=VectorStoreRetriever) - mock_retriever.get_relevant_documents.return_value = [ - Document(page_content="Relevant test content", metadata={}) - ] - mock_vectorstore.as_retriever.return_value = mock_retriever - mock_faiss.from_documents.return_value = mock_vectorstore - - # Test full pipeline - retriever = create_retriever(temp_files[:2], "test-model") - context = get_context_from_store("test query", retriever) - - assert context == "Relevant test content" - mock_embeddings.assert_called_once_with(model="test-model") - mock_retriever.get_relevant_documents.assert_called_once_with("test query") - -def test_documents_from_list_content_verification(temp_files): - """Test that documents contain expected content""" - docs = documents_from_path_list(temp_files) - - contents = [doc.page_content for doc in docs] - - # Check that we have the expected content - assert any("Python code examples" in content for content in contents) - assert any("JavaScript functions" in content for content in contents) - assert any("testing best practices" in content for content in contents) - assert any("deployment info" in content for content in contents) - -# Optional: Integration test that requires actual Ollama server -def test_create_retriever_with_real_ollama(temp_files, ollama_config): - """Integration test with real Ollama (requires server running)""" - if not is_ollama_available(ollama_config): - pytest.skip("Local Ollama server is not available") - try: - # This test would use a real embedding model - # Skip by default unless explicitly testing integration - retriever = create_retriever(temp_files[:2], "nomic-embed-text") - assert retriever is not None - - # Test actual retrieval - context = get_context_from_store("Python code", retriever) - assert isinstance(context, str) - - except Exception as e: - pytest.skip(f"Ollama server not available or model not found: {e}")