From 0bff803b91f6addd9a866e9acd64938a9d3fcc39 Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Mon, 14 Jul 2025 14:26:56 -0400 Subject: [PATCH] Update tests for vector_store_code --- dev_log.md | 29 +++++++ pyproject.toml | 1 + src/reviewllama/vector_store.py | 4 +- tests/conftest.py | 10 +++ tests/test_llm.py | 8 -- tests/test_vector_store.py | 149 ++++++++++++++++++++++++++++++++ uv.lock | 19 ++++ 7 files changed, 210 insertions(+), 10 deletions(-) create mode 100644 dev_log.md create mode 100644 tests/conftest.py create mode 100644 tests/test_vector_store.py diff --git a/dev_log.md b/dev_log.md new file mode 100644 index 0000000..1540629 --- /dev/null +++ b/dev_log.md @@ -0,0 +1,29 @@ +# Development log for ReviewLlama + +## 06-12-2025 + +- Fixed the tests for git functionality + +## 06-16-2025 + +- Worked on the ollama integration module + +## 06-24-2025 + +- Spent 5 minutes fixing some types for creating the Ollama chain + +## 06-25-2024 + +- Spent time building out the llm client. I've added code up to the chat client. + +TODOS: + +- [x] Add tests for the basic chat client +- [x] Update ChatClient to use a LangChain Rag pipeline + +## 07-14-2025 + +- Implement the Rag pipeline using langchain +- Add tests for new functionality + + diff --git a/pyproject.toml b/pyproject.toml index 7279b67..2098553 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" authors = [{ name = "Alex Selimov", email = "alex@alexselimov.com" }] requires-python = ">=3.13" dependencies = [ + "faiss-cpu>=1.11.0", "gitpython>=3.1.44", "langchain>=0.3.25", "langchain-community>=0.3.25", diff --git a/src/reviewllama/vector_store.py b/src/reviewllama/vector_store.py index 31b0aa9..47303c3 100644 --- a/src/reviewllama/vector_store.py +++ b/src/reviewllama/vector_store.py @@ -7,7 +7,7 @@ from langchain_core.vectorstores import VectorStoreRetriever from langchain_ollama.embeddings import OllamaEmbeddings -def documents_from_list(file_paths: list[Path | str]) -> list[Document]: +def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]: return [doc for file_path in file_paths for doc in TextLoader(file_path).load()] @@ -15,7 +15,7 @@ def create_retriever( file_paths: list[Path | str], embedding_model: str ) -> VectorStoreRetriever: embeddings = OllamaEmbeddings(model=embedding_model) - vectorstore = FAISS.from_documents(documents_from_list(file_paths), embeddings) + vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings) return vectorstore.as_retriever() diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2c9d092 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +import pytest + +from reviewllama.configs import create_ollama_config + + +@pytest.fixture +def ollama_config(): + return create_ollama_config( + "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0 + ) diff --git a/tests/test_llm.py b/tests/test_llm.py index e714220..faa3a4e 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -4,18 +4,10 @@ Unit tests for llm chat client functionality import pytest -from reviewllama.configs import create_ollama_config from reviewllama.llm import chat_with_client, create_chat_client from reviewllama.utilities import is_ollama_available -@pytest.fixture -def ollama_config(): - return create_ollama_config( - "gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0 - ) - - @pytest.fixture def chat_client(ollama_config): return create_chat_client(ollama_config) diff --git a/tests/test_vector_store.py b/tests/test_vector_store.py new file mode 100644 index 0000000..f3a5c0b --- /dev/null +++ b/tests/test_vector_store.py @@ -0,0 +1,149 @@ +import os +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, Mock, patch + +import pytest +from langchain_core.documents.base import Document +from langchain_core.vectorstores import VectorStoreRetriever + +from reviewllama.utilities import is_ollama_available +from reviewllama.vector_store import (create_retriever, + documents_from_path_list, + get_context_from_store) + + +@pytest.fixture +def temp_files(): + """Create temporary test files""" + temp_dir = tempfile.mkdtemp() + files = [] + + # Create test files with different content + file_contents = [ + "This is the first test file with Python code examples.", + "The second file contains JavaScript functions and classes.", + "File three has documentation about testing best practices.", + "Final file includes configuration settings and deployment info.", + ] + + for i, content in enumerate(file_contents): + file_path = Path(temp_dir) / f"test_file_{i}.txt" + file_path.write_text(content) + files.append(file_path) + + yield files + + # Cleanup + for file_path in files: + file_path.unlink() + os.rmdir(temp_dir) + + +def test_load_documents(temp_files): + docs = documents_from_path_list(temp_files) + + assert len(docs) == 4 + assert all(isinstance(doc, Document) for doc in docs) + assert all(doc.page_content for doc in docs) + assert all(doc.metadata.get("source") for doc in docs) + + +@patch("reviewllama.vector_store.OllamaEmbeddings") +@patch("reviewllama.vector_store.FAISS") +@patch("reviewllama.vector_store.documents_from_path_list") +def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings): + """Test successful retriever creation""" + # Setup mocks + mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})] + mock_docs_from_list.return_value = mock_docs + + mock_embedding_instance = Mock() + mock_embeddings.return_value = mock_embedding_instance + + mock_vectorstore = Mock() + mock_retriever = Mock(spec=VectorStoreRetriever) + mock_vectorstore.as_retriever.return_value = mock_retriever + mock_faiss.from_documents.return_value = mock_vectorstore + + # Test + result = create_retriever(["test.txt"], "test-embedding-model") + + # Assertions + assert result == mock_retriever + mock_embeddings.assert_called_once_with(model="test-embedding-model") + mock_docs_from_list.assert_called_once_with(["test.txt"]) + mock_faiss.from_documents.assert_called_once_with( + mock_docs, mock_embedding_instance + ) + mock_vectorstore.as_retriever.assert_called_once() + +def test_get_context_from_store_success(): + """Test successful context retrieval""" + mock_retriever = Mock(spec=VectorStoreRetriever) + mock_docs = [ + Document(page_content="First relevant document", metadata={}), + Document(page_content="Second relevant document", metadata={}), + Document(page_content="Third relevant document", metadata={}), + ] + mock_retriever.get_relevant_documents.return_value = mock_docs + + result = get_context_from_store("test query", mock_retriever) + + expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document" + assert result == expected + mock_retriever.get_relevant_documents.assert_called_once_with("test query") + +@patch('reviewllama.vector_store.OllamaEmbeddings') +@patch('reviewllama.vector_store.FAISS') +def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files): + """Test the full pipeline with mocked external dependencies""" + # Setup mocks + mock_embedding_instance = Mock() + mock_embeddings.return_value = mock_embedding_instance + + mock_vectorstore = Mock() + mock_retriever = Mock(spec=VectorStoreRetriever) + mock_retriever.get_relevant_documents.return_value = [ + Document(page_content="Relevant test content", metadata={}) + ] + mock_vectorstore.as_retriever.return_value = mock_retriever + mock_faiss.from_documents.return_value = mock_vectorstore + + # Test full pipeline + retriever = create_retriever(temp_files[:2], "test-model") + context = get_context_from_store("test query", retriever) + + assert context == "Relevant test content" + mock_embeddings.assert_called_once_with(model="test-model") + mock_retriever.get_relevant_documents.assert_called_once_with("test query") + +def test_documents_from_list_content_verification(temp_files): + """Test that documents contain expected content""" + docs = documents_from_path_list(temp_files) + + contents = [doc.page_content for doc in docs] + + # Check that we have the expected content + assert any("Python code examples" in content for content in contents) + assert any("JavaScript functions" in content for content in contents) + assert any("testing best practices" in content for content in contents) + assert any("deployment info" in content for content in contents) + +# Optional: Integration test that requires actual Ollama server +def test_create_retriever_with_real_ollama(temp_files, ollama_config): + """Integration test with real Ollama (requires server running)""" + if not is_ollama_available(ollama_config): + pytest.skip("Local Ollama server is not available") + try: + # This test would use a real embedding model + # Skip by default unless explicitly testing integration + retriever = create_retriever(temp_files[:2], "nomic-embed-text") + assert retriever is not None + + # Test actual retrieval + context = get_context_from_store("Python code", retriever) + assert isinstance(context, str) + + except Exception as e: + pytest.skip(f"Ollama server not available or model not found: {e}") diff --git a/uv.lock b/uv.lock index dc81486..b4d7758 100644 --- a/uv.lock +++ b/uv.lock @@ -163,6 +163,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" }, ] +[[package]] +name = "faiss-cpu" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/9a/e33fc563f007924dd4ec3c5101fe5320298d6c13c158a24a9ed849058569/faiss_cpu-1.11.0.tar.gz", hash = "sha256:44877b896a2b30a61e35ea4970d008e8822545cb340eca4eff223ac7f40a1db9", size = 70218, upload-time = "2025-04-28T07:48:30.459Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/90/d2329ce56423cc61f4c20ae6b4db001c6f88f28bf5a7ef7f8bbc246fd485/faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0c98e5feff83b87348e44eac4d578d6f201780dae6f27f08a11d55536a20b3a8", size = 3313807, upload-time = "2025-04-28T07:48:06.486Z" }, + { url = "https://files.pythonhosted.org/packages/24/14/8af8f996d54e6097a86e6048b1a2c958c52dc985eb4f935027615079939e/faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:796e90389427b1c1fb06abdb0427bb343b6350f80112a2e6090ac8f176ff7416", size = 7913539, upload-time = "2025-04-28T07:48:08.338Z" }, + { url = "https://files.pythonhosted.org/packages/b2/2b/437c2f36c3aa3cffe041479fced1c76420d3e92e1f434f1da3be3e6f32b1/faiss_cpu-1.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b6e355dda72b3050991bc32031b558b8f83a2b3537a2b9e905a84f28585b47e", size = 3785181, upload-time = "2025-04-28T07:48:10.594Z" }, + { url = "https://files.pythonhosted.org/packages/66/75/955527414371843f558234df66fa0b62c6e86e71e4022b1be9333ac6004c/faiss_cpu-1.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6c482d07194638c169b4422774366e7472877d09181ea86835e782e6304d4185", size = 31287635, upload-time = "2025-04-28T07:48:12.93Z" }, + { url = "https://files.pythonhosted.org/packages/50/51/35b7a3f47f7859363a367c344ae5d415ea9eda65db0a7d497c7ea2c0b576/faiss_cpu-1.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:13eac45299532b10e911bff1abbb19d1bf5211aa9e72afeade653c3f1e50e042", size = 15005455, upload-time = "2025-04-28T07:48:16.173Z" }, +] + [[package]] name = "frozenlist" version = "1.7.0" @@ -802,6 +819,7 @@ name = "reviewllama" version = "0.1.0" source = { editable = "." } dependencies = [ + { name = "faiss-cpu" }, { name = "gitpython" }, { name = "langchain" }, { name = "langchain-community" }, @@ -812,6 +830,7 @@ dependencies = [ [package.metadata] requires-dist = [ + { name = "faiss-cpu", specifier = ">=1.11.0" }, { name = "gitpython", specifier = ">=3.1.44" }, { name = "langchain", specifier = ">=0.3.25" }, { name = "langchain-community", specifier = ">=0.3.25" },