diff --git a/src/reviewllama/cli.py b/src/reviewllama/cli.py index 68d4345..5e5da02 100644 --- a/src/reviewllama/cli.py +++ b/src/reviewllama/cli.py @@ -3,10 +3,15 @@ import sys from pathlib import Path from typing import List, Optional -from reviewllama.reviewllama import run_reviewllama +from reviewllama.git_diff import analyze_git_repository -from .configs import ReviewConfig, namespace_to_config -from .logger import log_paths, log_review_start +from .configs import ReviewConfig, create_config_from_vars +from .logger import ( + log_git_analysis_result, + log_git_analysis_start, + log_paths, + log_review_start, +) def normalize_server_url(url: str) -> str: @@ -25,7 +30,7 @@ def create_argument_parser() -> argparse.ArgumentParser: epilog=""" Examples: reviewllama . --model gemma3:27b --server localhost:11434 - reviewllama src/ tests/ --model gemma3:4b + reviewllama src/ tests/ --model llama3.2:7b """, ) @@ -38,7 +43,7 @@ Examples: parser.add_argument( "--model", - default="gemma3:4b", + default="llama3.2:3b", help="Ollama model to use for code review (default: %(default)s)", ) @@ -56,26 +61,6 @@ Examples: help="Base branch to compare against (default: %(default)s)", ) - parser.add_argument( - "--embedding_model", - dest="embedding_model", - default="nomic-embed-text", - help="Base branch to compare against (default: %(default)s)", - ) - - parser.add_argument( - "--system_prompt", - dest="system_prompt", - default=( - "You are a PR review assistant in charge of softare quality control. " - "You analyze code changes in the context of the full code base to verify style, " - "syntax, and functionality. Each suggestion should consist of the original code, " - "suggested changes if relevant, and a short description of why the change is suggested " - "Examples\nInput:\n```\nvarialbe=1+1\n```\nOutput:\nOriginal:\n```\nvarialbe=1+1\n" - "Suggestion:\n```variable=1+1\n```\nReason: `varialbe` is likely a typo." - ), - help="Base branch to compare against (default: %(default)s)", - ) return parser @@ -85,20 +70,39 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace: return parser.parse_args(args) +def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig: + """Transform argparse namespace into ReviewConfig.""" + paths = [Path(path_str) for path_str in namespace.paths] + + return create_config_from_vars( + paths=paths, + model=namespace.model, + server_url=normalize_server_url(namespace.server_url), + # TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this. + system_prompt="You are a helpful AI assistant", + base_branch=namespace.base_branch, + ) + + def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig: """Parse command line arguments and return validated configuration.""" raw_namespace = parse_raw_arguments(args) - return namespace_to_config(raw_namespace) + return transform_namespace_to_config(raw_namespace) def cli() -> None: """Main entry point for the CLI.""" try: config = parse_arguments() + # TODO: Pass config to review engine log_review_start(config) log_paths(config.paths) - run_reviewllama(config) + for path in config.paths: + analysis = analyze_git_repository(path, config.base_branch) + log_git_analysis_start(path, config.base_branch) + log_git_analysis_result(analysis) + print(analysis.diffs) except SystemExit: # argparse calls sys.exit on error, let it propagate diff --git a/src/reviewllama/configs.py b/src/reviewllama/configs.py index d577915..352d02a 100644 --- a/src/reviewllama/configs.py +++ b/src/reviewllama/configs.py @@ -1,4 +1,3 @@ -import argparse from dataclasses import dataclass, field from pathlib import Path from typing import List @@ -49,13 +48,15 @@ def create_review_config( return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch) -def namespace_to_config( - namespace: argparse.Namespace +def create_config_from_vars( + paths: List[Path], + model: str, + server_url: str, + system_prompt: str, + base_branch: str, ): - """Transform argparse namespace into ReviewConfig.""" - paths = [Path(path_str) for path_str in namespace.paths] ollama_config = OllamaConfig( - chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model + chat_model=model, base_url=server_url, system_prompt=system_prompt ) - return create_review_config(paths, ollama_config, namespace.base_branch) + return create_review_config(paths, ollama_config, base_branch) diff --git a/src/reviewllama/git_diff.py b/src/reviewllama/git_diff.py index 8bf41d3..87dd811 100644 --- a/src/reviewllama/git_diff.py +++ b/src/reviewllama/git_diff.py @@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool: return False -def get_tracked_files(repo: Repo) -> list[Path]: +def get_tracked_files(repo: Repo): return [ - Path(entry.abspath) + entry.abspath for entry in repo.commit().tree.traverse() if Path(entry.abspath).is_file() ] diff --git a/src/reviewllama/llm.py b/src/reviewllama/llm.py index 73a7265..85e6d96 100644 --- a/src/reviewllama/llm.py +++ b/src/reviewllama/llm.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from pathlib import Path from typing import Any from langchain.memory import ConversationBufferMemory diff --git a/src/reviewllama/logger.py b/src/reviewllama/logger.py index 15a5dc6..c1e1e2e 100644 --- a/src/reviewllama/logger.py +++ b/src/reviewllama/logger.py @@ -38,10 +38,6 @@ def log_paths(paths: List[Path]) -> None: console.print(f" • {path}") console.print() -def log_info(info: str) -> None: - """Log error message with colored output.""" - console = create_console() - console.print(f"{info}") def log_error(error: str) -> None: """Log error message with colored output.""" diff --git a/src/reviewllama/reviewllama.py b/src/reviewllama/reviewllama.py deleted file mode 100644 index ae0cbba..0000000 --- a/src/reviewllama/reviewllama.py +++ /dev/null @@ -1,63 +0,0 @@ -from pathlib import Path - -from git import Repo -from langchain_core.vectorstores import VectorStoreRetriever - -from reviewllama.configs import OllamaConfig, ReviewConfig -from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository, - get_tracked_files) -from reviewllama.llm import ChatClient, chat_with_client, create_chat_client -from reviewllama.vector_store import create_retriever - -from .logger import log_git_analysis_result, log_git_analysis_start, log_info - - -def run_reviewllama(config: ReviewConfig): - for path in config.paths: - chat_client = create_and_log_chat_client(config.ollama) - analysis = create_and_log_git_diff_analysis(path, config.base_branch) - retriever = create_and_log_vector_store_retriever(analysis.repo, config.ollama) - - for diff in analysis.diffs: - chat_client = get_suggestions(diff, retriever, chat_client) - - -def create_and_log_chat_client(config: OllamaConfig) -> ChatClient: - log_info("Initializing LLM chat client") - return create_chat_client(config) - - -def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis: - log_git_analysis_start(path, base_branch) - analysis = analyze_git_repository(path, base_branch) - log_git_analysis_result(analysis) - return analysis - - -def create_and_log_vector_store_retriever( - repo: Repo, config: OllamaConfig -) -> VectorStoreRetriever: - log_info("Creating vector_store...") - retriever = create_retriever(get_tracked_files(repo), config) - log_info("Done creating vector store") - return retriever - - -def get_suggestions( - diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient -) -> ChatClient: - new_client = chat_with_client(chat_client, craft_message(diff), retriever) - log_info(str(new_client.get_last_response_or_none().content)) - return new_client - - -def craft_message(diff) -> str: - return ( - "Review the following code changes and make up to three suggestions on " - "how to improve it. If the code is sufficiently simple or accurate then say " - "no suggestions can be found. Important issues you should consider are consistent " - "style, introduction of syntax errors, and potentially breaking changes in " - "interfaces/APIs that aren't properly handled.\n\n" - f"The original code:\n```\n{diff.old_content}\n```\n" - f"The new code:\n```\n{diff.new_content}```" - ) diff --git a/src/reviewllama/vector_store.py b/src/reviewllama/vector_store.py index ac151d5..47303c3 100644 --- a/src/reviewllama/vector_store.py +++ b/src/reviewllama/vector_store.py @@ -6,25 +6,21 @@ from langchain_core.documents.base import Document from langchain_core.vectorstores import VectorStoreRetriever from langchain_ollama.embeddings import OllamaEmbeddings -from .configs import OllamaConfig - def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]: return [doc for file_path in file_paths for doc in TextLoader(file_path).load()] def create_retriever( - file_paths: list[Path | str], config: OllamaConfig + file_paths: list[Path | str], embedding_model: str ) -> VectorStoreRetriever: - embeddings = OllamaEmbeddings( - model=config.embedding_model, base_url=config.base_url - ) + embeddings = OllamaEmbeddings(model=embedding_model) vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings) return vectorstore.as_retriever() def get_context_from_store(message: str, retriever: VectorStoreRetriever): - docs = retriever.invoke(message) + docs = retriever.get_relevant_documents(message) return "\n\n".join([doc.page_content for doc in docs]) diff --git a/tests/conftest.py b/tests/conftest.py index 2b4737c..2c9d092 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,5 +6,5 @@ from reviewllama.configs import create_ollama_config @pytest.fixture def ollama_config(): return create_ollama_config( - "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0, "nomic-embed-text" + "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0 ) diff --git a/tests/test_vector_store.py b/tests/test_vector_store.py index 025ccad..f3a5c0b 100644 --- a/tests/test_vector_store.py +++ b/tests/test_vector_store.py @@ -1,13 +1,16 @@ import os import tempfile from pathlib import Path -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch import pytest from langchain_core.documents.base import Document from langchain_core.vectorstores import VectorStoreRetriever -from reviewllama.vector_store import create_retriever, documents_from_path_list +from reviewllama.utilities import is_ollama_available +from reviewllama.vector_store import (create_retriever, + documents_from_path_list, + get_context_from_store) @pytest.fixture @@ -49,9 +52,7 @@ def test_load_documents(temp_files): @patch("reviewllama.vector_store.OllamaEmbeddings") @patch("reviewllama.vector_store.FAISS") @patch("reviewllama.vector_store.documents_from_path_list") -def test_create_retriever( - mock_docs_from_list, mock_faiss, mock_embeddings, ollama_config -): +def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings): """Test successful retriever creation""" # Setup mocks mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})] @@ -66,15 +67,83 @@ def test_create_retriever( mock_faiss.from_documents.return_value = mock_vectorstore # Test - result = create_retriever(["test.txt"], ollama_config) + result = create_retriever(["test.txt"], "test-embedding-model") # Assertions assert result == mock_retriever - mock_embeddings.assert_called_once_with( - model=ollama_config.embedding_model, base_url=ollama_config.base_url - ) + mock_embeddings.assert_called_once_with(model="test-embedding-model") mock_docs_from_list.assert_called_once_with(["test.txt"]) mock_faiss.from_documents.assert_called_once_with( mock_docs, mock_embedding_instance ) mock_vectorstore.as_retriever.assert_called_once() + +def test_get_context_from_store_success(): + """Test successful context retrieval""" + mock_retriever = Mock(spec=VectorStoreRetriever) + mock_docs = [ + Document(page_content="First relevant document", metadata={}), + Document(page_content="Second relevant document", metadata={}), + Document(page_content="Third relevant document", metadata={}), + ] + mock_retriever.get_relevant_documents.return_value = mock_docs + + result = get_context_from_store("test query", mock_retriever) + + expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document" + assert result == expected + mock_retriever.get_relevant_documents.assert_called_once_with("test query") + +@patch('reviewllama.vector_store.OllamaEmbeddings') +@patch('reviewllama.vector_store.FAISS') +def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files): + """Test the full pipeline with mocked external dependencies""" + # Setup mocks + mock_embedding_instance = Mock() + mock_embeddings.return_value = mock_embedding_instance + + mock_vectorstore = Mock() + mock_retriever = Mock(spec=VectorStoreRetriever) + mock_retriever.get_relevant_documents.return_value = [ + Document(page_content="Relevant test content", metadata={}) + ] + mock_vectorstore.as_retriever.return_value = mock_retriever + mock_faiss.from_documents.return_value = mock_vectorstore + + # Test full pipeline + retriever = create_retriever(temp_files[:2], "test-model") + context = get_context_from_store("test query", retriever) + + assert context == "Relevant test content" + mock_embeddings.assert_called_once_with(model="test-model") + mock_retriever.get_relevant_documents.assert_called_once_with("test query") + +def test_documents_from_list_content_verification(temp_files): + """Test that documents contain expected content""" + docs = documents_from_path_list(temp_files) + + contents = [doc.page_content for doc in docs] + + # Check that we have the expected content + assert any("Python code examples" in content for content in contents) + assert any("JavaScript functions" in content for content in contents) + assert any("testing best practices" in content for content in contents) + assert any("deployment info" in content for content in contents) + +# Optional: Integration test that requires actual Ollama server +def test_create_retriever_with_real_ollama(temp_files, ollama_config): + """Integration test with real Ollama (requires server running)""" + if not is_ollama_available(ollama_config): + pytest.skip("Local Ollama server is not available") + try: + # This test would use a real embedding model + # Skip by default unless explicitly testing integration + retriever = create_retriever(temp_files[:2], "nomic-embed-text") + assert retriever is not None + + # Test actual retrieval + context = get_context_from_store("Python code", retriever) + assert isinstance(context, str) + + except Exception as e: + pytest.skip(f"Ollama server not available or model not found: {e}")