Compare commits

...

3 commits

Author SHA1 Message Date
f8044a043b
First working version of reviewLlama with some different default 2025-07-15 21:45:56 -04:00
d917a9c067
Clean up tests and add base_url to embedding model 2025-07-15 20:53:59 -04:00
cb49495211
Initial integration
- Clean up a few functions
- Combine all components into a single pipeline
2025-07-14 20:57:47 -04:00
9 changed files with 120 additions and 124 deletions

View file

@ -3,15 +3,10 @@ import sys
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import List, Optional
from reviewllama.git_diff import analyze_git_repository from reviewllama.reviewllama import run_reviewllama
from .configs import ReviewConfig, create_config_from_vars from .configs import ReviewConfig, namespace_to_config
from .logger import ( from .logger import log_paths, log_review_start
log_git_analysis_result,
log_git_analysis_start,
log_paths,
log_review_start,
)
def normalize_server_url(url: str) -> str: def normalize_server_url(url: str) -> str:
@ -30,7 +25,7 @@ def create_argument_parser() -> argparse.ArgumentParser:
epilog=""" epilog="""
Examples: Examples:
reviewllama . --model gemma3:27b --server localhost:11434 reviewllama . --model gemma3:27b --server localhost:11434
reviewllama src/ tests/ --model llama3.2:7b reviewllama src/ tests/ --model gemma3:4b
""", """,
) )
@ -43,7 +38,7 @@ Examples:
parser.add_argument( parser.add_argument(
"--model", "--model",
default="llama3.2:3b", default="gemma3:4b",
help="Ollama model to use for code review (default: %(default)s)", help="Ollama model to use for code review (default: %(default)s)",
) )
@ -61,6 +56,26 @@ Examples:
help="Base branch to compare against (default: %(default)s)", help="Base branch to compare against (default: %(default)s)",
) )
parser.add_argument(
"--embedding_model",
dest="embedding_model",
default="nomic-embed-text",
help="Base branch to compare against (default: %(default)s)",
)
parser.add_argument(
"--system_prompt",
dest="system_prompt",
default=(
"You are a PR review assistant in charge of softare quality control. "
"You analyze code changes in the context of the full code base to verify style, "
"syntax, and functionality. Each suggestion should consist of the original code, "
"suggested changes if relevant, and a short description of why the change is suggested "
"Examples\nInput:\n```\nvarialbe=1+1\n```\nOutput:\nOriginal:\n```\nvarialbe=1+1\n"
"Suggestion:\n```variable=1+1\n```\nReason: `varialbe` is likely a typo."
),
help="Base branch to compare against (default: %(default)s)",
)
return parser return parser
@ -70,39 +85,20 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace:
return parser.parse_args(args) return parser.parse_args(args)
def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig:
"""Transform argparse namespace into ReviewConfig."""
paths = [Path(path_str) for path_str in namespace.paths]
return create_config_from_vars(
paths=paths,
model=namespace.model,
server_url=normalize_server_url(namespace.server_url),
# TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this.
system_prompt="You are a helpful AI assistant",
base_branch=namespace.base_branch,
)
def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig: def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig:
"""Parse command line arguments and return validated configuration.""" """Parse command line arguments and return validated configuration."""
raw_namespace = parse_raw_arguments(args) raw_namespace = parse_raw_arguments(args)
return transform_namespace_to_config(raw_namespace) return namespace_to_config(raw_namespace)
def cli() -> None: def cli() -> None:
"""Main entry point for the CLI.""" """Main entry point for the CLI."""
try: try:
config = parse_arguments() config = parse_arguments()
# TODO: Pass config to review engine
log_review_start(config) log_review_start(config)
log_paths(config.paths) log_paths(config.paths)
for path in config.paths: run_reviewllama(config)
analysis = analyze_git_repository(path, config.base_branch)
log_git_analysis_start(path, config.base_branch)
log_git_analysis_result(analysis)
print(analysis.diffs)
except SystemExit: except SystemExit:
# argparse calls sys.exit on error, let it propagate # argparse calls sys.exit on error, let it propagate

View file

@ -1,3 +1,4 @@
import argparse
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import List from typing import List
@ -48,15 +49,13 @@ def create_review_config(
return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch) return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch)
def create_config_from_vars( def namespace_to_config(
paths: List[Path], namespace: argparse.Namespace
model: str,
server_url: str,
system_prompt: str,
base_branch: str,
): ):
"""Transform argparse namespace into ReviewConfig."""
paths = [Path(path_str) for path_str in namespace.paths]
ollama_config = OllamaConfig( ollama_config = OllamaConfig(
chat_model=model, base_url=server_url, system_prompt=system_prompt chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model
) )
return create_review_config(paths, ollama_config, base_branch) return create_review_config(paths, ollama_config, namespace.base_branch)

View file

@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool:
return False return False
def get_tracked_files(repo: Repo): def get_tracked_files(repo: Repo) -> list[Path]:
return [ return [
entry.abspath Path(entry.abspath)
for entry in repo.commit().tree.traverse() for entry in repo.commit().tree.traverse()
if Path(entry.abspath).is_file() if Path(entry.abspath).is_file()
] ]

View file

@ -1,5 +1,4 @@
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from typing import Any from typing import Any
from langchain.memory import ConversationBufferMemory from langchain.memory import ConversationBufferMemory

View file

@ -38,6 +38,10 @@ def log_paths(paths: List[Path]) -> None:
console.print(f"{path}") console.print(f"{path}")
console.print() console.print()
def log_info(info: str) -> None:
"""Log error message with colored output."""
console = create_console()
console.print(f"{info}")
def log_error(error: str) -> None: def log_error(error: str) -> None:
"""Log error message with colored output.""" """Log error message with colored output."""

View file

@ -0,0 +1,63 @@
from pathlib import Path
from git import Repo
from langchain_core.vectorstores import VectorStoreRetriever
from reviewllama.configs import OllamaConfig, ReviewConfig
from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository,
get_tracked_files)
from reviewllama.llm import ChatClient, chat_with_client, create_chat_client
from reviewllama.vector_store import create_retriever
from .logger import log_git_analysis_result, log_git_analysis_start, log_info
def run_reviewllama(config: ReviewConfig):
for path in config.paths:
chat_client = create_and_log_chat_client(config.ollama)
analysis = create_and_log_git_diff_analysis(path, config.base_branch)
retriever = create_and_log_vector_store_retriever(analysis.repo, config.ollama)
for diff in analysis.diffs:
chat_client = get_suggestions(diff, retriever, chat_client)
def create_and_log_chat_client(config: OllamaConfig) -> ChatClient:
log_info("Initializing LLM chat client")
return create_chat_client(config)
def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis:
log_git_analysis_start(path, base_branch)
analysis = analyze_git_repository(path, base_branch)
log_git_analysis_result(analysis)
return analysis
def create_and_log_vector_store_retriever(
repo: Repo, config: OllamaConfig
) -> VectorStoreRetriever:
log_info("Creating vector_store...")
retriever = create_retriever(get_tracked_files(repo), config)
log_info("Done creating vector store")
return retriever
def get_suggestions(
diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient
) -> ChatClient:
new_client = chat_with_client(chat_client, craft_message(diff), retriever)
log_info(str(new_client.get_last_response_or_none().content))
return new_client
def craft_message(diff) -> str:
return (
"Review the following code changes and make up to three suggestions on "
"how to improve it. If the code is sufficiently simple or accurate then say "
"no suggestions can be found. Important issues you should consider are consistent "
"style, introduction of syntax errors, and potentially breaking changes in "
"interfaces/APIs that aren't properly handled.\n\n"
f"The original code:\n```\n{diff.old_content}\n```\n"
f"The new code:\n```\n{diff.new_content}```"
)

View file

@ -6,21 +6,25 @@ from langchain_core.documents.base import Document
from langchain_core.vectorstores import VectorStoreRetriever from langchain_core.vectorstores import VectorStoreRetriever
from langchain_ollama.embeddings import OllamaEmbeddings from langchain_ollama.embeddings import OllamaEmbeddings
from .configs import OllamaConfig
def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]: def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]:
return [doc for file_path in file_paths for doc in TextLoader(file_path).load()] return [doc for file_path in file_paths for doc in TextLoader(file_path).load()]
def create_retriever( def create_retriever(
file_paths: list[Path | str], embedding_model: str file_paths: list[Path | str], config: OllamaConfig
) -> VectorStoreRetriever: ) -> VectorStoreRetriever:
embeddings = OllamaEmbeddings(model=embedding_model) embeddings = OllamaEmbeddings(
model=config.embedding_model, base_url=config.base_url
)
vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings) vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings)
return vectorstore.as_retriever() return vectorstore.as_retriever()
def get_context_from_store(message: str, retriever: VectorStoreRetriever): def get_context_from_store(message: str, retriever: VectorStoreRetriever):
docs = retriever.get_relevant_documents(message) docs = retriever.invoke(message)
return "\n\n".join([doc.page_content for doc in docs]) return "\n\n".join([doc.page_content for doc in docs])

View file

@ -6,5 +6,5 @@ from reviewllama.configs import create_ollama_config
@pytest.fixture @pytest.fixture
def ollama_config(): def ollama_config():
return create_ollama_config( return create_ollama_config(
"gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0 "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0, "nomic-embed-text"
) )

View file

@ -1,16 +1,13 @@
import os import os
import tempfile import tempfile
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, Mock, patch from unittest.mock import Mock, patch
import pytest import pytest
from langchain_core.documents.base import Document from langchain_core.documents.base import Document
from langchain_core.vectorstores import VectorStoreRetriever from langchain_core.vectorstores import VectorStoreRetriever
from reviewllama.utilities import is_ollama_available from reviewllama.vector_store import create_retriever, documents_from_path_list
from reviewllama.vector_store import (create_retriever,
documents_from_path_list,
get_context_from_store)
@pytest.fixture @pytest.fixture
@ -52,7 +49,9 @@ def test_load_documents(temp_files):
@patch("reviewllama.vector_store.OllamaEmbeddings") @patch("reviewllama.vector_store.OllamaEmbeddings")
@patch("reviewllama.vector_store.FAISS") @patch("reviewllama.vector_store.FAISS")
@patch("reviewllama.vector_store.documents_from_path_list") @patch("reviewllama.vector_store.documents_from_path_list")
def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings): def test_create_retriever(
mock_docs_from_list, mock_faiss, mock_embeddings, ollama_config
):
"""Test successful retriever creation""" """Test successful retriever creation"""
# Setup mocks # Setup mocks
mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})] mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})]
@ -67,83 +66,15 @@ def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
mock_faiss.from_documents.return_value = mock_vectorstore mock_faiss.from_documents.return_value = mock_vectorstore
# Test # Test
result = create_retriever(["test.txt"], "test-embedding-model") result = create_retriever(["test.txt"], ollama_config)
# Assertions # Assertions
assert result == mock_retriever assert result == mock_retriever
mock_embeddings.assert_called_once_with(model="test-embedding-model") mock_embeddings.assert_called_once_with(
model=ollama_config.embedding_model, base_url=ollama_config.base_url
)
mock_docs_from_list.assert_called_once_with(["test.txt"]) mock_docs_from_list.assert_called_once_with(["test.txt"])
mock_faiss.from_documents.assert_called_once_with( mock_faiss.from_documents.assert_called_once_with(
mock_docs, mock_embedding_instance mock_docs, mock_embedding_instance
) )
mock_vectorstore.as_retriever.assert_called_once() mock_vectorstore.as_retriever.assert_called_once()
def test_get_context_from_store_success():
"""Test successful context retrieval"""
mock_retriever = Mock(spec=VectorStoreRetriever)
mock_docs = [
Document(page_content="First relevant document", metadata={}),
Document(page_content="Second relevant document", metadata={}),
Document(page_content="Third relevant document", metadata={}),
]
mock_retriever.get_relevant_documents.return_value = mock_docs
result = get_context_from_store("test query", mock_retriever)
expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document"
assert result == expected
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
@patch('reviewllama.vector_store.OllamaEmbeddings')
@patch('reviewllama.vector_store.FAISS')
def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files):
"""Test the full pipeline with mocked external dependencies"""
# Setup mocks
mock_embedding_instance = Mock()
mock_embeddings.return_value = mock_embedding_instance
mock_vectorstore = Mock()
mock_retriever = Mock(spec=VectorStoreRetriever)
mock_retriever.get_relevant_documents.return_value = [
Document(page_content="Relevant test content", metadata={})
]
mock_vectorstore.as_retriever.return_value = mock_retriever
mock_faiss.from_documents.return_value = mock_vectorstore
# Test full pipeline
retriever = create_retriever(temp_files[:2], "test-model")
context = get_context_from_store("test query", retriever)
assert context == "Relevant test content"
mock_embeddings.assert_called_once_with(model="test-model")
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
def test_documents_from_list_content_verification(temp_files):
"""Test that documents contain expected content"""
docs = documents_from_path_list(temp_files)
contents = [doc.page_content for doc in docs]
# Check that we have the expected content
assert any("Python code examples" in content for content in contents)
assert any("JavaScript functions" in content for content in contents)
assert any("testing best practices" in content for content in contents)
assert any("deployment info" in content for content in contents)
# Optional: Integration test that requires actual Ollama server
def test_create_retriever_with_real_ollama(temp_files, ollama_config):
"""Integration test with real Ollama (requires server running)"""
if not is_ollama_available(ollama_config):
pytest.skip("Local Ollama server is not available")
try:
# This test would use a real embedding model
# Skip by default unless explicitly testing integration
retriever = create_retriever(temp_files[:2], "nomic-embed-text")
assert retriever is not None
# Test actual retrieval
context = get_context_from_store("Python code", retriever)
assert isinstance(context, str)
except Exception as e:
pytest.skip(f"Ollama server not available or model not found: {e}")