First working version of reviewLlama with some different default

Clean up tests and add base_url to embedding model
Initial integration
2025-07-15 21:45:56 -04:00 · 2025-07-15 20:53:59 -04:00 · 2025-07-14 20:57:47 -04:00
9 changed files with 120 additions and 124 deletions
--- a/src/reviewllama/cli.py
+++ b/src/reviewllama/cli.py
@ -3,15 +3,10 @@ import sys
 from pathlib import Path
 from typing import List, Optional
-from reviewllama.git_diff import analyze_git_repository
+from reviewllama.reviewllama import run_reviewllama
-from .configs import ReviewConfig, create_config_from_vars
+from .configs import ReviewConfig, namespace_to_config
-from .logger import (
+from .logger import log_paths, log_review_start
    log_git_analysis_result,
    log_git_analysis_start,
    log_paths,
    log_review_start,
 )
 def normalize_server_url(url: str) -> str:
@ -30,7 +25,7 @@ def create_argument_parser() -> argparse.ArgumentParser:
        epilog="""
 Examples:
  reviewllama . --model gemma3:27b --server localhost:11434
-  reviewllama src/ tests/ --model llama3.2:7b 
+  reviewllama src/ tests/ --model gemma3:4b
        """,
    )
@ -43,7 +38,7 @@ Examples:
    parser.add_argument(
        "--model",
-        default="llama3.2:3b",
+        default="gemma3:4b",
        help="Ollama model to use for code review (default: %(default)s)",
    )
@ -61,6 +56,26 @@ Examples:
        help="Base branch to compare against (default: %(default)s)",
    )
    parser.add_argument(
        "--embedding_model",
        dest="embedding_model",
        default="nomic-embed-text",
        help="Base branch to compare against (default: %(default)s)",
    )
    parser.add_argument(
        "--system_prompt",
        dest="system_prompt",
        default=(
            "You are a PR review assistant in charge of softare quality control. "
            "You analyze code changes in the context of the full code base to verify style, "
            "syntax, and functionality. Each suggestion should consist of the original code, "
            "suggested changes if relevant, and a short description of why the change is suggested "
            "Examples\nInput:\n```\nvarialbe=1+1\n```\nOutput:\nOriginal:\n```\nvarialbe=1+1\n"
            "Suggestion:\n```variable=1+1\n```\nReason: `varialbe` is likely a typo."
        ),
        help="Base branch to compare against (default: %(default)s)",
    )
    return parser
@ -70,39 +85,20 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace:
    return parser.parse_args(args)
 def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig:
    """Transform argparse namespace into ReviewConfig."""
    paths = [Path(path_str) for path_str in namespace.paths]
    return create_config_from_vars(
        paths=paths,
        model=namespace.model,
        server_url=normalize_server_url(namespace.server_url),
        # TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this.
        system_prompt="You are a helpful AI assistant",
        base_branch=namespace.base_branch,
    )
 def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig:
    """Parse command line arguments and return validated configuration."""
    raw_namespace = parse_raw_arguments(args)
-    return transform_namespace_to_config(raw_namespace)
+    return namespace_to_config(raw_namespace)
 def cli() -> None:
    """Main entry point for the CLI."""
    try:
        config = parse_arguments()
        # TODO: Pass config to review engine
        log_review_start(config)
        log_paths(config.paths)
-        for path in config.paths:
+        run_reviewllama(config)
            analysis = analyze_git_repository(path, config.base_branch)
            log_git_analysis_start(path, config.base_branch)
            log_git_analysis_result(analysis)
            print(analysis.diffs)
    except SystemExit:
        # argparse calls sys.exit on error, let it propagate
--- a/src/reviewllama/configs.py
+++ b/src/reviewllama/configs.py
@ -1,3 +1,4 @@
 import argparse
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import List
@ -48,15 +49,13 @@ def create_review_config(
    return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch)
-def create_config_from_vars(
+def namespace_to_config(
-    paths: List[Path],
+        namespace: argparse.Namespace
    model: str,
    server_url: str,
    system_prompt: str,
    base_branch: str,
 ):
    """Transform argparse namespace into ReviewConfig."""
    paths = [Path(path_str) for path_str in namespace.paths]
    ollama_config = OllamaConfig(
-        chat_model=model, base_url=server_url, system_prompt=system_prompt
+        chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model
    )
-    return create_review_config(paths, ollama_config, base_branch)
+    return create_review_config(paths, ollama_config, namespace.base_branch)
--- a/src/reviewllama/git_diff.py
+++ b/src/reviewllama/git_diff.py
@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool:
        return False
-def get_tracked_files(repo: Repo):
+def get_tracked_files(repo: Repo) -> list[Path]:
    return [
-        entry.abspath
+        Path(entry.abspath)
        for entry in repo.commit().tree.traverse()
        if Path(entry.abspath).is_file()
    ]
--- a/src/reviewllama/llm.py
+++ b/src/reviewllama/llm.py
@ -1,5 +1,4 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 from langchain.memory import ConversationBufferMemory
--- a/src/reviewllama/logger.py
+++ b/src/reviewllama/logger.py
@ -38,6 +38,10 @@ def log_paths(paths: List[Path]) -> None:
        console.print(f"  • {path}")
    console.print()
 def log_info(info: str) -> None:
    """Log error message with colored output."""
    console = create_console()
    console.print(f"{info}")
 def log_error(error: str) -> None:
    """Log error message with colored output."""
--- a/src/reviewllama/reviewllama.py
+++ b/src/reviewllama/reviewllama.py
@ -0,0 +1,63 @@
 from pathlib import Path
 from git import Repo
 from langchain_core.vectorstores import VectorStoreRetriever
 from reviewllama.configs import OllamaConfig, ReviewConfig
 from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository,
                                  get_tracked_files)
 from reviewllama.llm import ChatClient, chat_with_client, create_chat_client
 from reviewllama.vector_store import create_retriever
 from .logger import log_git_analysis_result, log_git_analysis_start, log_info
 def run_reviewllama(config: ReviewConfig):
    for path in config.paths:
        chat_client = create_and_log_chat_client(config.ollama)
        analysis = create_and_log_git_diff_analysis(path, config.base_branch)
        retriever = create_and_log_vector_store_retriever(analysis.repo, config.ollama)
        for diff in analysis.diffs:
            chat_client = get_suggestions(diff, retriever, chat_client)
 def create_and_log_chat_client(config: OllamaConfig) -> ChatClient:
    log_info("Initializing LLM chat client")
    return create_chat_client(config)
 def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis:
    log_git_analysis_start(path, base_branch)
    analysis = analyze_git_repository(path, base_branch)
    log_git_analysis_result(analysis)
    return analysis
 def create_and_log_vector_store_retriever(
    repo: Repo, config: OllamaConfig
 ) -> VectorStoreRetriever:
    log_info("Creating vector_store...")
    retriever = create_retriever(get_tracked_files(repo), config)
    log_info("Done creating vector store")
    return retriever
 def get_suggestions(
    diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient
 ) -> ChatClient:
    new_client = chat_with_client(chat_client, craft_message(diff), retriever)
    log_info(str(new_client.get_last_response_or_none().content))
    return new_client
 def craft_message(diff) -> str:
    return (
        "Review the following code changes and make up to three suggestions on "
        "how to improve it. If the code is sufficiently simple or accurate then say "
        "no suggestions can be found. Important issues you should consider are consistent "
        "style, introduction of syntax errors, and potentially breaking changes in "
        "interfaces/APIs that aren't properly handled.\n\n"
        f"The original code:\n```\n{diff.old_content}\n```\n"
        f"The new code:\n```\n{diff.new_content}```"
    )
--- a/src/reviewllama/vector_store.py
+++ b/src/reviewllama/vector_store.py
@ -6,21 +6,25 @@ from langchain_core.documents.base import Document
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_ollama.embeddings import OllamaEmbeddings
 from .configs import OllamaConfig
 def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]:
    return [doc for file_path in file_paths for doc in TextLoader(file_path).load()]
 def create_retriever(
-    file_paths: list[Path | str], embedding_model: str
+    file_paths: list[Path | str], config: OllamaConfig
 ) -> VectorStoreRetriever:
-    embeddings = OllamaEmbeddings(model=embedding_model)
+    embeddings = OllamaEmbeddings(
        model=config.embedding_model, base_url=config.base_url
    )
    vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings)
    return vectorstore.as_retriever()
 def get_context_from_store(message: str, retriever: VectorStoreRetriever):
-    docs = retriever.get_relevant_documents(message)
+    docs = retriever.invoke(message)
    return "\n\n".join([doc.page_content for doc in docs])
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -6,5 +6,5 @@ from reviewllama.configs import create_ollama_config
@pytest.fixture
 def ollama_config():
    return create_ollama_config(
-        "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0
+        "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0, "nomic-embed-text"
    )
--- a/tests/test_vector_store.py
+++ b/tests/test_vector_store.py
@ -1,16 +1,13 @@
 import os
 import tempfile
 from pathlib import Path
-from unittest.mock import MagicMock, Mock, patch
+from unittest.mock import Mock, patch
 import pytest
 from langchain_core.documents.base import Document
 from langchain_core.vectorstores import VectorStoreRetriever
-from reviewllama.utilities import is_ollama_available
+from reviewllama.vector_store import create_retriever, documents_from_path_list
 from reviewllama.vector_store import (create_retriever,
                                      documents_from_path_list,
                                      get_context_from_store)
@pytest.fixture
@ -52,7 +49,9 @@ def test_load_documents(temp_files):
@patch("reviewllama.vector_store.OllamaEmbeddings")
@patch("reviewllama.vector_store.FAISS")
@patch("reviewllama.vector_store.documents_from_path_list")
-def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
+def test_create_retriever(
    mock_docs_from_list, mock_faiss, mock_embeddings, ollama_config
 ):
    """Test successful retriever creation"""
    # Setup mocks
    mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})]
@ -67,83 +66,15 @@ def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
    mock_faiss.from_documents.return_value = mock_vectorstore
    # Test
-    result = create_retriever(["test.txt"], "test-embedding-model")
+    result = create_retriever(["test.txt"], ollama_config)
    # Assertions
    assert result == mock_retriever
-    mock_embeddings.assert_called_once_with(model="test-embedding-model")
+    mock_embeddings.assert_called_once_with(
        model=ollama_config.embedding_model, base_url=ollama_config.base_url
    )
    mock_docs_from_list.assert_called_once_with(["test.txt"])
    mock_faiss.from_documents.assert_called_once_with(
        mock_docs, mock_embedding_instance
    )
    mock_vectorstore.as_retriever.assert_called_once()
 def test_get_context_from_store_success():
    """Test successful context retrieval"""
    mock_retriever = Mock(spec=VectorStoreRetriever)
    mock_docs = [
        Document(page_content="First relevant document", metadata={}),
        Document(page_content="Second relevant document", metadata={}),
        Document(page_content="Third relevant document", metadata={}),
    ]
    mock_retriever.get_relevant_documents.return_value = mock_docs
    result = get_context_from_store("test query", mock_retriever)
    expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document"
    assert result == expected
    mock_retriever.get_relevant_documents.assert_called_once_with("test query")
@patch('reviewllama.vector_store.OllamaEmbeddings')
@patch('reviewllama.vector_store.FAISS')
 def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files):
    """Test the full pipeline with mocked external dependencies"""
    # Setup mocks
    mock_embedding_instance = Mock()
    mock_embeddings.return_value = mock_embedding_instance
    mock_vectorstore = Mock()
    mock_retriever = Mock(spec=VectorStoreRetriever)
    mock_retriever.get_relevant_documents.return_value = [
        Document(page_content="Relevant test content", metadata={})
    ]
    mock_vectorstore.as_retriever.return_value = mock_retriever
    mock_faiss.from_documents.return_value = mock_vectorstore
    # Test full pipeline
    retriever = create_retriever(temp_files[:2], "test-model")
    context = get_context_from_store("test query", retriever)
    assert context == "Relevant test content"
    mock_embeddings.assert_called_once_with(model="test-model")
    mock_retriever.get_relevant_documents.assert_called_once_with("test query")
 def test_documents_from_list_content_verification(temp_files):
    """Test that documents contain expected content"""
    docs = documents_from_path_list(temp_files)
    contents = [doc.page_content for doc in docs]
    # Check that we have the expected content
    assert any("Python code examples" in content for content in contents)
    assert any("JavaScript functions" in content for content in contents)
    assert any("testing best practices" in content for content in contents)
    assert any("deployment info" in content for content in contents)
 # Optional: Integration test that requires actual Ollama server
 def test_create_retriever_with_real_ollama(temp_files, ollama_config):
    """Integration test with real Ollama (requires server running)"""
    if not is_ollama_available(ollama_config):
        pytest.skip("Local Ollama server is not available")
    try:
        # This test would use a real embedding model
        # Skip by default unless explicitly testing integration
        retriever = create_retriever(temp_files[:2], "nomic-embed-text")
        assert retriever is not None
        # Test actual retrieval
        context = get_context_from_store("Python code", retriever)
        assert isinstance(context, str)
    except Exception as e:
        pytest.skip(f"Ollama server not available or model not found: {e}")
Author	SHA1	Message	Date
Alex Selimov	f8044a043b	First working version of reviewLlama with some different default	2025-07-15 21:45:56 -04:00
Alex Selimov	d917a9c067	Clean up tests and add base_url to embedding model	2025-07-15 20:53:59 -04:00
Alex Selimov	cb49495211	Initial integration - Clean up a few functions - Combine all components into a single pipeline	2025-07-14 20:57:47 -04:00