Update tests for vector_store_code

2025-07-14 14:26:56 -04:00 · 2025-07-14 14:26:56 -04:00 · 0bff803b91
commit 0bff803b91
parent 24bfef99a2
7 changed files with 210 additions and 10 deletions
--- a/dev_log.md
+++ b/dev_log.md
@ -0,0 +1,29 @@
 # Development log for ReviewLlama
 ## 06-12-2025
 - Fixed the tests for git functionality
 ## 06-16-2025
 - Worked on the ollama integration module
 ## 06-24-2025
 - Spent 5 minutes fixing some types for creating the Ollama chain
 ## 06-25-2024
 - Spent time building out the llm client. I've added code up to the chat client. 
 TODOS:
 - [x] Add tests for the basic chat client
 - [x] Update ChatClient to use a LangChain Rag pipeline
 ## 07-14-2025
 - Implement the Rag pipeline using langchain
 - Add tests for new functionality 
--- a/pyproject.toml
+++ b/pyproject.toml
@ -6,6 +6,7 @@ readme = "README.md"
 authors = [{ name = "Alex Selimov", email = "alex@alexselimov.com" }]
 requires-python = ">=3.13"
 dependencies = [
    "faiss-cpu>=1.11.0",
    "gitpython>=3.1.44",
    "langchain>=0.3.25",
    "langchain-community>=0.3.25",
--- a/src/reviewllama/vector_store.py
+++ b/src/reviewllama/vector_store.py
@ -7,7 +7,7 @@ from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_ollama.embeddings import OllamaEmbeddings
-def documents_from_list(file_paths: list[Path | str]) -> list[Document]:
+def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]:
    return [doc for file_path in file_paths for doc in TextLoader(file_path).load()]
@ -15,7 +15,7 @@ def create_retriever(
    file_paths: list[Path | str], embedding_model: str
 ) -> VectorStoreRetriever:
    embeddings = OllamaEmbeddings(model=embedding_model)
-    vectorstore = FAISS.from_documents(documents_from_list(file_paths), embeddings)
+    vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings)
    return vectorstore.as_retriever()
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -0,0 +1,10 @@
 import pytest
 from reviewllama.configs import create_ollama_config
@pytest.fixture
 def ollama_config():
    return create_ollama_config(
        "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0
    )
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@ -4,18 +4,10 @@ Unit tests for llm chat client functionality
 import pytest
 from reviewllama.configs import create_ollama_config
 from reviewllama.llm import chat_with_client, create_chat_client
 from reviewllama.utilities import is_ollama_available
@pytest.fixture
 def ollama_config():
    return create_ollama_config(
        "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0
    )
@pytest.fixture
 def chat_client(ollama_config):
    return create_chat_client(ollama_config)
--- a/tests/test_vector_store.py
+++ b/tests/test_vector_store.py
@ -0,0 +1,149 @@
 import os
 import tempfile
 from pathlib import Path
 from unittest.mock import MagicMock, Mock, patch
 import pytest
 from langchain_core.documents.base import Document
 from langchain_core.vectorstores import VectorStoreRetriever
 from reviewllama.utilities import is_ollama_available
 from reviewllama.vector_store import (create_retriever,
                                      documents_from_path_list,
                                      get_context_from_store)
@pytest.fixture
 def temp_files():
    """Create temporary test files"""
    temp_dir = tempfile.mkdtemp()
    files = []
    # Create test files with different content
    file_contents = [
        "This is the first test file with Python code examples.",
        "The second file contains JavaScript functions and classes.",
        "File three has documentation about testing best practices.",
        "Final file includes configuration settings and deployment info.",
    ]
    for i, content in enumerate(file_contents):
        file_path = Path(temp_dir) / f"test_file_{i}.txt"
        file_path.write_text(content)
        files.append(file_path)
    yield files
    # Cleanup
    for file_path in files:
        file_path.unlink()
    os.rmdir(temp_dir)
 def test_load_documents(temp_files):
    docs = documents_from_path_list(temp_files)
    assert len(docs) == 4
    assert all(isinstance(doc, Document) for doc in docs)
    assert all(doc.page_content for doc in docs)
    assert all(doc.metadata.get("source") for doc in docs)
@patch("reviewllama.vector_store.OllamaEmbeddings")
@patch("reviewllama.vector_store.FAISS")
@patch("reviewllama.vector_store.documents_from_path_list")
 def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
    """Test successful retriever creation"""
    # Setup mocks
    mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})]
    mock_docs_from_list.return_value = mock_docs
    mock_embedding_instance = Mock()
    mock_embeddings.return_value = mock_embedding_instance
    mock_vectorstore = Mock()
    mock_retriever = Mock(spec=VectorStoreRetriever)
    mock_vectorstore.as_retriever.return_value = mock_retriever
    mock_faiss.from_documents.return_value = mock_vectorstore
    # Test
    result = create_retriever(["test.txt"], "test-embedding-model")
    # Assertions
    assert result == mock_retriever
    mock_embeddings.assert_called_once_with(model="test-embedding-model")
    mock_docs_from_list.assert_called_once_with(["test.txt"])
    mock_faiss.from_documents.assert_called_once_with(
        mock_docs, mock_embedding_instance
    )
    mock_vectorstore.as_retriever.assert_called_once()
 def test_get_context_from_store_success():
    """Test successful context retrieval"""
    mock_retriever = Mock(spec=VectorStoreRetriever)
    mock_docs = [
        Document(page_content="First relevant document", metadata={}),
        Document(page_content="Second relevant document", metadata={}),
        Document(page_content="Third relevant document", metadata={}),
    ]
    mock_retriever.get_relevant_documents.return_value = mock_docs
    result = get_context_from_store("test query", mock_retriever)
    expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document"
    assert result == expected
    mock_retriever.get_relevant_documents.assert_called_once_with("test query")
@patch('reviewllama.vector_store.OllamaEmbeddings')
@patch('reviewllama.vector_store.FAISS')
 def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files):
    """Test the full pipeline with mocked external dependencies"""
    # Setup mocks
    mock_embedding_instance = Mock()
    mock_embeddings.return_value = mock_embedding_instance
    mock_vectorstore = Mock()
    mock_retriever = Mock(spec=VectorStoreRetriever)
    mock_retriever.get_relevant_documents.return_value = [
        Document(page_content="Relevant test content", metadata={})
    ]
    mock_vectorstore.as_retriever.return_value = mock_retriever
    mock_faiss.from_documents.return_value = mock_vectorstore
    # Test full pipeline
    retriever = create_retriever(temp_files[:2], "test-model")
    context = get_context_from_store("test query", retriever)
    assert context == "Relevant test content"
    mock_embeddings.assert_called_once_with(model="test-model")
    mock_retriever.get_relevant_documents.assert_called_once_with("test query")
 def test_documents_from_list_content_verification(temp_files):
    """Test that documents contain expected content"""
    docs = documents_from_path_list(temp_files)
    contents = [doc.page_content for doc in docs]
    # Check that we have the expected content
    assert any("Python code examples" in content for content in contents)
    assert any("JavaScript functions" in content for content in contents)
    assert any("testing best practices" in content for content in contents)
    assert any("deployment info" in content for content in contents)
 # Optional: Integration test that requires actual Ollama server
 def test_create_retriever_with_real_ollama(temp_files, ollama_config):
    """Integration test with real Ollama (requires server running)"""
    if not is_ollama_available(ollama_config):
        pytest.skip("Local Ollama server is not available")
    try:
        # This test would use a real embedding model
        # Skip by default unless explicitly testing integration
        retriever = create_retriever(temp_files[:2], "nomic-embed-text")
        assert retriever is not None
        # Test actual retrieval
        context = get_context_from_store("Python code", retriever)
        assert isinstance(context, str)
    except Exception as e:
        pytest.skip(f"Ollama server not available or model not found: {e}")
--- a/uv.lock
+++ b/uv.lock
@ -163,6 +163,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
 ]
 [[package]]
 name = "faiss-cpu"
 version = "1.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "numpy" },
    { name = "packaging" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e7/9a/e33fc563f007924dd4ec3c5101fe5320298d6c13c158a24a9ed849058569/faiss_cpu-1.11.0.tar.gz", hash = "sha256:44877b896a2b30a61e35ea4970d008e8822545cb340eca4eff223ac7f40a1db9", size = 70218, upload-time = "2025-04-28T07:48:30.459Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/92/90/d2329ce56423cc61f4c20ae6b4db001c6f88f28bf5a7ef7f8bbc246fd485/faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0c98e5feff83b87348e44eac4d578d6f201780dae6f27f08a11d55536a20b3a8", size = 3313807, upload-time = "2025-04-28T07:48:06.486Z" },
    { url = "https://files.pythonhosted.org/packages/24/14/8af8f996d54e6097a86e6048b1a2c958c52dc985eb4f935027615079939e/faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:796e90389427b1c1fb06abdb0427bb343b6350f80112a2e6090ac8f176ff7416", size = 7913539, upload-time = "2025-04-28T07:48:08.338Z" },
    { url = "https://files.pythonhosted.org/packages/b2/2b/437c2f36c3aa3cffe041479fced1c76420d3e92e1f434f1da3be3e6f32b1/faiss_cpu-1.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b6e355dda72b3050991bc32031b558b8f83a2b3537a2b9e905a84f28585b47e", size = 3785181, upload-time = "2025-04-28T07:48:10.594Z" },
    { url = "https://files.pythonhosted.org/packages/66/75/955527414371843f558234df66fa0b62c6e86e71e4022b1be9333ac6004c/faiss_cpu-1.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6c482d07194638c169b4422774366e7472877d09181ea86835e782e6304d4185", size = 31287635, upload-time = "2025-04-28T07:48:12.93Z" },
    { url = "https://files.pythonhosted.org/packages/50/51/35b7a3f47f7859363a367c344ae5d415ea9eda65db0a7d497c7ea2c0b576/faiss_cpu-1.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:13eac45299532b10e911bff1abbb19d1bf5211aa9e72afeade653c3f1e50e042", size = 15005455, upload-time = "2025-04-28T07:48:16.173Z" },
 ]
 [[package]]
 name = "frozenlist"
 version = "1.7.0"
@ -802,6 +819,7 @@ name = "reviewllama"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [
    { name = "faiss-cpu" },
    { name = "gitpython" },
    { name = "langchain" },
    { name = "langchain-community" },
@ -812,6 +830,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
    { name = "faiss-cpu", specifier = ">=1.11.0" },
    { name = "gitpython", specifier = ">=3.1.44" },
    { name = "langchain", specifier = ">=0.3.25" },
    { name = "langchain-community", specifier = ">=0.3.25" },