Compare commits

..

No commits in common. "f8044a043bce8cb993cdca5b2a59f692fa1d1658" and "0bff803b91f6addd9a866e9acd64938a9d3fcc39" have entirely different histories.

9 changed files with 124 additions and 120 deletions

View file

@ -3,10 +3,15 @@ import sys
from pathlib import Path
from typing import List, Optional
from reviewllama.reviewllama import run_reviewllama
from reviewllama.git_diff import analyze_git_repository
from .configs import ReviewConfig, namespace_to_config
from .logger import log_paths, log_review_start
from .configs import ReviewConfig, create_config_from_vars
from .logger import (
log_git_analysis_result,
log_git_analysis_start,
log_paths,
log_review_start,
)
def normalize_server_url(url: str) -> str:
@ -25,7 +30,7 @@ def create_argument_parser() -> argparse.ArgumentParser:
epilog="""
Examples:
reviewllama . --model gemma3:27b --server localhost:11434
reviewllama src/ tests/ --model gemma3:4b
reviewllama src/ tests/ --model llama3.2:7b
""",
)
@ -38,7 +43,7 @@ Examples:
parser.add_argument(
"--model",
default="gemma3:4b",
default="llama3.2:3b",
help="Ollama model to use for code review (default: %(default)s)",
)
@ -56,26 +61,6 @@ Examples:
help="Base branch to compare against (default: %(default)s)",
)
parser.add_argument(
"--embedding_model",
dest="embedding_model",
default="nomic-embed-text",
help="Base branch to compare against (default: %(default)s)",
)
parser.add_argument(
"--system_prompt",
dest="system_prompt",
default=(
"You are a PR review assistant in charge of softare quality control. "
"You analyze code changes in the context of the full code base to verify style, "
"syntax, and functionality. Each suggestion should consist of the original code, "
"suggested changes if relevant, and a short description of why the change is suggested "
"Examples\nInput:\n```\nvarialbe=1+1\n```\nOutput:\nOriginal:\n```\nvarialbe=1+1\n"
"Suggestion:\n```variable=1+1\n```\nReason: `varialbe` is likely a typo."
),
help="Base branch to compare against (default: %(default)s)",
)
return parser
@ -85,20 +70,39 @@ def parse_raw_arguments(args: Optional[List[str]] = None) -> argparse.Namespace:
return parser.parse_args(args)
def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig:
"""Transform argparse namespace into ReviewConfig."""
paths = [Path(path_str) for path_str in namespace.paths]
return create_config_from_vars(
paths=paths,
model=namespace.model,
server_url=normalize_server_url(namespace.server_url),
# TODO: Update this system prompt. Either allow the user to provide it or engineer our own for this.
system_prompt="You are a helpful AI assistant",
base_branch=namespace.base_branch,
)
def parse_arguments(args: Optional[List[str]] = None) -> ReviewConfig:
"""Parse command line arguments and return validated configuration."""
raw_namespace = parse_raw_arguments(args)
return namespace_to_config(raw_namespace)
return transform_namespace_to_config(raw_namespace)
def cli() -> None:
"""Main entry point for the CLI."""
try:
config = parse_arguments()
# TODO: Pass config to review engine
log_review_start(config)
log_paths(config.paths)
run_reviewllama(config)
for path in config.paths:
analysis = analyze_git_repository(path, config.base_branch)
log_git_analysis_start(path, config.base_branch)
log_git_analysis_result(analysis)
print(analysis.diffs)
except SystemExit:
# argparse calls sys.exit on error, let it propagate

View file

@ -1,4 +1,3 @@
import argparse
from dataclasses import dataclass, field
from pathlib import Path
from typing import List
@ -49,13 +48,15 @@ def create_review_config(
return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch)
def namespace_to_config(
namespace: argparse.Namespace
def create_config_from_vars(
paths: List[Path],
model: str,
server_url: str,
system_prompt: str,
base_branch: str,
):
"""Transform argparse namespace into ReviewConfig."""
paths = [Path(path_str) for path_str in namespace.paths]
ollama_config = OllamaConfig(
chat_model=namespace.model, base_url=namespace.server_url, system_prompt=namespace.system_prompt, embedding_model=namespace.embedding_model
chat_model=model, base_url=server_url, system_prompt=system_prompt
)
return create_review_config(paths, ollama_config, namespace.base_branch)
return create_review_config(paths, ollama_config, base_branch)

View file

@ -62,9 +62,9 @@ def branch_exists(repo: Repo, branch_name: str) -> bool:
return False
def get_tracked_files(repo: Repo) -> list[Path]:
def get_tracked_files(repo: Repo):
return [
Path(entry.abspath)
entry.abspath
for entry in repo.commit().tree.traverse()
if Path(entry.abspath).is_file()
]

View file

@ -1,4 +1,5 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from langchain.memory import ConversationBufferMemory

View file

@ -38,10 +38,6 @@ def log_paths(paths: List[Path]) -> None:
console.print(f"{path}")
console.print()
def log_info(info: str) -> None:
"""Log error message with colored output."""
console = create_console()
console.print(f"{info}")
def log_error(error: str) -> None:
"""Log error message with colored output."""

View file

@ -1,63 +0,0 @@
from pathlib import Path
from git import Repo
from langchain_core.vectorstores import VectorStoreRetriever
from reviewllama.configs import OllamaConfig, ReviewConfig
from reviewllama.git_diff import (GitAnalysis, GitDiff, analyze_git_repository,
get_tracked_files)
from reviewllama.llm import ChatClient, chat_with_client, create_chat_client
from reviewllama.vector_store import create_retriever
from .logger import log_git_analysis_result, log_git_analysis_start, log_info
def run_reviewllama(config: ReviewConfig):
for path in config.paths:
chat_client = create_and_log_chat_client(config.ollama)
analysis = create_and_log_git_diff_analysis(path, config.base_branch)
retriever = create_and_log_vector_store_retriever(analysis.repo, config.ollama)
for diff in analysis.diffs:
chat_client = get_suggestions(diff, retriever, chat_client)
def create_and_log_chat_client(config: OllamaConfig) -> ChatClient:
log_info("Initializing LLM chat client")
return create_chat_client(config)
def create_and_log_git_diff_analysis(path: Path, base_branch: str) -> GitAnalysis:
log_git_analysis_start(path, base_branch)
analysis = analyze_git_repository(path, base_branch)
log_git_analysis_result(analysis)
return analysis
def create_and_log_vector_store_retriever(
repo: Repo, config: OllamaConfig
) -> VectorStoreRetriever:
log_info("Creating vector_store...")
retriever = create_retriever(get_tracked_files(repo), config)
log_info("Done creating vector store")
return retriever
def get_suggestions(
diff: GitDiff, retriever: VectorStoreRetriever, chat_client: ChatClient
) -> ChatClient:
new_client = chat_with_client(chat_client, craft_message(diff), retriever)
log_info(str(new_client.get_last_response_or_none().content))
return new_client
def craft_message(diff) -> str:
return (
"Review the following code changes and make up to three suggestions on "
"how to improve it. If the code is sufficiently simple or accurate then say "
"no suggestions can be found. Important issues you should consider are consistent "
"style, introduction of syntax errors, and potentially breaking changes in "
"interfaces/APIs that aren't properly handled.\n\n"
f"The original code:\n```\n{diff.old_content}\n```\n"
f"The new code:\n```\n{diff.new_content}```"
)

View file

@ -6,25 +6,21 @@ from langchain_core.documents.base import Document
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_ollama.embeddings import OllamaEmbeddings
from .configs import OllamaConfig
def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]:
return [doc for file_path in file_paths for doc in TextLoader(file_path).load()]
def create_retriever(
file_paths: list[Path | str], config: OllamaConfig
file_paths: list[Path | str], embedding_model: str
) -> VectorStoreRetriever:
embeddings = OllamaEmbeddings(
model=config.embedding_model, base_url=config.base_url
)
embeddings = OllamaEmbeddings(model=embedding_model)
vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings)
return vectorstore.as_retriever()
def get_context_from_store(message: str, retriever: VectorStoreRetriever):
docs = retriever.invoke(message)
docs = retriever.get_relevant_documents(message)
return "\n\n".join([doc.page_content for doc in docs])

View file

@ -6,5 +6,5 @@ from reviewllama.configs import create_ollama_config
@pytest.fixture
def ollama_config():
return create_ollama_config(
"gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0, "nomic-embed-text"
"gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0
)

View file

@ -1,13 +1,16 @@
import os
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import MagicMock, Mock, patch
import pytest
from langchain_core.documents.base import Document
from langchain_core.vectorstores import VectorStoreRetriever
from reviewllama.vector_store import create_retriever, documents_from_path_list
from reviewllama.utilities import is_ollama_available
from reviewllama.vector_store import (create_retriever,
documents_from_path_list,
get_context_from_store)
@pytest.fixture
@ -49,9 +52,7 @@ def test_load_documents(temp_files):
@patch("reviewllama.vector_store.OllamaEmbeddings")
@patch("reviewllama.vector_store.FAISS")
@patch("reviewllama.vector_store.documents_from_path_list")
def test_create_retriever(
mock_docs_from_list, mock_faiss, mock_embeddings, ollama_config
):
def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
"""Test successful retriever creation"""
# Setup mocks
mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})]
@ -66,15 +67,83 @@ def test_create_retriever(
mock_faiss.from_documents.return_value = mock_vectorstore
# Test
result = create_retriever(["test.txt"], ollama_config)
result = create_retriever(["test.txt"], "test-embedding-model")
# Assertions
assert result == mock_retriever
mock_embeddings.assert_called_once_with(
model=ollama_config.embedding_model, base_url=ollama_config.base_url
)
mock_embeddings.assert_called_once_with(model="test-embedding-model")
mock_docs_from_list.assert_called_once_with(["test.txt"])
mock_faiss.from_documents.assert_called_once_with(
mock_docs, mock_embedding_instance
)
mock_vectorstore.as_retriever.assert_called_once()
def test_get_context_from_store_success():
"""Test successful context retrieval"""
mock_retriever = Mock(spec=VectorStoreRetriever)
mock_docs = [
Document(page_content="First relevant document", metadata={}),
Document(page_content="Second relevant document", metadata={}),
Document(page_content="Third relevant document", metadata={}),
]
mock_retriever.get_relevant_documents.return_value = mock_docs
result = get_context_from_store("test query", mock_retriever)
expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document"
assert result == expected
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
@patch('reviewllama.vector_store.OllamaEmbeddings')
@patch('reviewllama.vector_store.FAISS')
def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files):
"""Test the full pipeline with mocked external dependencies"""
# Setup mocks
mock_embedding_instance = Mock()
mock_embeddings.return_value = mock_embedding_instance
mock_vectorstore = Mock()
mock_retriever = Mock(spec=VectorStoreRetriever)
mock_retriever.get_relevant_documents.return_value = [
Document(page_content="Relevant test content", metadata={})
]
mock_vectorstore.as_retriever.return_value = mock_retriever
mock_faiss.from_documents.return_value = mock_vectorstore
# Test full pipeline
retriever = create_retriever(temp_files[:2], "test-model")
context = get_context_from_store("test query", retriever)
assert context == "Relevant test content"
mock_embeddings.assert_called_once_with(model="test-model")
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
def test_documents_from_list_content_verification(temp_files):
"""Test that documents contain expected content"""
docs = documents_from_path_list(temp_files)
contents = [doc.page_content for doc in docs]
# Check that we have the expected content
assert any("Python code examples" in content for content in contents)
assert any("JavaScript functions" in content for content in contents)
assert any("testing best practices" in content for content in contents)
assert any("deployment info" in content for content in contents)
# Optional: Integration test that requires actual Ollama server
def test_create_retriever_with_real_ollama(temp_files, ollama_config):
"""Integration test with real Ollama (requires server running)"""
if not is_ollama_available(ollama_config):
pytest.skip("Local Ollama server is not available")
try:
# This test would use a real embedding model
# Skip by default unless explicitly testing integration
retriever = create_retriever(temp_files[:2], "nomic-embed-text")
assert retriever is not None
# Test actual retrieval
context = get_context_from_store("Python code", retriever)
assert isinstance(context, str)
except Exception as e:
pytest.skip(f"Ollama server not available or model not found: {e}")