Update tests for vector_store_code
This commit is contained in:
parent
24bfef99a2
commit
0bff803b91
7 changed files with 210 additions and 10 deletions
29
dev_log.md
Normal file
29
dev_log.md
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# Development log for ReviewLlama
|
||||||
|
|
||||||
|
## 06-12-2025
|
||||||
|
|
||||||
|
- Fixed the tests for git functionality
|
||||||
|
|
||||||
|
## 06-16-2025
|
||||||
|
|
||||||
|
- Worked on the ollama integration module
|
||||||
|
|
||||||
|
## 06-24-2025
|
||||||
|
|
||||||
|
- Spent 5 minutes fixing some types for creating the Ollama chain
|
||||||
|
|
||||||
|
## 06-25-2024
|
||||||
|
|
||||||
|
- Spent time building out the llm client. I've added code up to the chat client.
|
||||||
|
|
||||||
|
TODOS:
|
||||||
|
|
||||||
|
- [x] Add tests for the basic chat client
|
||||||
|
- [x] Update ChatClient to use a LangChain Rag pipeline
|
||||||
|
|
||||||
|
## 07-14-2025
|
||||||
|
|
||||||
|
- Implement the Rag pipeline using langchain
|
||||||
|
- Add tests for new functionality
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ readme = "README.md"
|
||||||
authors = [{ name = "Alex Selimov", email = "alex@alexselimov.com" }]
|
authors = [{ name = "Alex Selimov", email = "alex@alexselimov.com" }]
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"faiss-cpu>=1.11.0",
|
||||||
"gitpython>=3.1.44",
|
"gitpython>=3.1.44",
|
||||||
"langchain>=0.3.25",
|
"langchain>=0.3.25",
|
||||||
"langchain-community>=0.3.25",
|
"langchain-community>=0.3.25",
|
||||||
|
|
|
@ -7,7 +7,7 @@ from langchain_core.vectorstores import VectorStoreRetriever
|
||||||
from langchain_ollama.embeddings import OllamaEmbeddings
|
from langchain_ollama.embeddings import OllamaEmbeddings
|
||||||
|
|
||||||
|
|
||||||
def documents_from_list(file_paths: list[Path | str]) -> list[Document]:
|
def documents_from_path_list(file_paths: list[Path | str]) -> list[Document]:
|
||||||
return [doc for file_path in file_paths for doc in TextLoader(file_path).load()]
|
return [doc for file_path in file_paths for doc in TextLoader(file_path).load()]
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ def create_retriever(
|
||||||
file_paths: list[Path | str], embedding_model: str
|
file_paths: list[Path | str], embedding_model: str
|
||||||
) -> VectorStoreRetriever:
|
) -> VectorStoreRetriever:
|
||||||
embeddings = OllamaEmbeddings(model=embedding_model)
|
embeddings = OllamaEmbeddings(model=embedding_model)
|
||||||
vectorstore = FAISS.from_documents(documents_from_list(file_paths), embeddings)
|
vectorstore = FAISS.from_documents(documents_from_path_list(file_paths), embeddings)
|
||||||
return vectorstore.as_retriever()
|
return vectorstore.as_retriever()
|
||||||
|
|
||||||
|
|
||||||
|
|
10
tests/conftest.py
Normal file
10
tests/conftest.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from reviewllama.configs import create_ollama_config
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def ollama_config():
|
||||||
|
return create_ollama_config(
|
||||||
|
"gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0
|
||||||
|
)
|
|
@ -4,18 +4,10 @@ Unit tests for llm chat client functionality
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from reviewllama.configs import create_ollama_config
|
|
||||||
from reviewllama.llm import chat_with_client, create_chat_client
|
from reviewllama.llm import chat_with_client, create_chat_client
|
||||||
from reviewllama.utilities import is_ollama_available
|
from reviewllama.utilities import is_ollama_available
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def ollama_config():
|
|
||||||
return create_ollama_config(
|
|
||||||
"gemma3:4b", "localhost:11434", "You are a helpful assistant.", 0.0
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def chat_client(ollama_config):
|
def chat_client(ollama_config):
|
||||||
return create_chat_client(ollama_config)
|
return create_chat_client(ollama_config)
|
||||||
|
|
149
tests/test_vector_store.py
Normal file
149
tests/test_vector_store.py
Normal file
|
@ -0,0 +1,149 @@
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.documents.base import Document
|
||||||
|
from langchain_core.vectorstores import VectorStoreRetriever
|
||||||
|
|
||||||
|
from reviewllama.utilities import is_ollama_available
|
||||||
|
from reviewllama.vector_store import (create_retriever,
|
||||||
|
documents_from_path_list,
|
||||||
|
get_context_from_store)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_files():
|
||||||
|
"""Create temporary test files"""
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
files = []
|
||||||
|
|
||||||
|
# Create test files with different content
|
||||||
|
file_contents = [
|
||||||
|
"This is the first test file with Python code examples.",
|
||||||
|
"The second file contains JavaScript functions and classes.",
|
||||||
|
"File three has documentation about testing best practices.",
|
||||||
|
"Final file includes configuration settings and deployment info.",
|
||||||
|
]
|
||||||
|
|
||||||
|
for i, content in enumerate(file_contents):
|
||||||
|
file_path = Path(temp_dir) / f"test_file_{i}.txt"
|
||||||
|
file_path.write_text(content)
|
||||||
|
files.append(file_path)
|
||||||
|
|
||||||
|
yield files
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
for file_path in files:
|
||||||
|
file_path.unlink()
|
||||||
|
os.rmdir(temp_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_documents(temp_files):
|
||||||
|
docs = documents_from_path_list(temp_files)
|
||||||
|
|
||||||
|
assert len(docs) == 4
|
||||||
|
assert all(isinstance(doc, Document) for doc in docs)
|
||||||
|
assert all(doc.page_content for doc in docs)
|
||||||
|
assert all(doc.metadata.get("source") for doc in docs)
|
||||||
|
|
||||||
|
|
||||||
|
@patch("reviewllama.vector_store.OllamaEmbeddings")
|
||||||
|
@patch("reviewllama.vector_store.FAISS")
|
||||||
|
@patch("reviewllama.vector_store.documents_from_path_list")
|
||||||
|
def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
|
||||||
|
"""Test successful retriever creation"""
|
||||||
|
# Setup mocks
|
||||||
|
mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})]
|
||||||
|
mock_docs_from_list.return_value = mock_docs
|
||||||
|
|
||||||
|
mock_embedding_instance = Mock()
|
||||||
|
mock_embeddings.return_value = mock_embedding_instance
|
||||||
|
|
||||||
|
mock_vectorstore = Mock()
|
||||||
|
mock_retriever = Mock(spec=VectorStoreRetriever)
|
||||||
|
mock_vectorstore.as_retriever.return_value = mock_retriever
|
||||||
|
mock_faiss.from_documents.return_value = mock_vectorstore
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = create_retriever(["test.txt"], "test-embedding-model")
|
||||||
|
|
||||||
|
# Assertions
|
||||||
|
assert result == mock_retriever
|
||||||
|
mock_embeddings.assert_called_once_with(model="test-embedding-model")
|
||||||
|
mock_docs_from_list.assert_called_once_with(["test.txt"])
|
||||||
|
mock_faiss.from_documents.assert_called_once_with(
|
||||||
|
mock_docs, mock_embedding_instance
|
||||||
|
)
|
||||||
|
mock_vectorstore.as_retriever.assert_called_once()
|
||||||
|
|
||||||
|
def test_get_context_from_store_success():
|
||||||
|
"""Test successful context retrieval"""
|
||||||
|
mock_retriever = Mock(spec=VectorStoreRetriever)
|
||||||
|
mock_docs = [
|
||||||
|
Document(page_content="First relevant document", metadata={}),
|
||||||
|
Document(page_content="Second relevant document", metadata={}),
|
||||||
|
Document(page_content="Third relevant document", metadata={}),
|
||||||
|
]
|
||||||
|
mock_retriever.get_relevant_documents.return_value = mock_docs
|
||||||
|
|
||||||
|
result = get_context_from_store("test query", mock_retriever)
|
||||||
|
|
||||||
|
expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document"
|
||||||
|
assert result == expected
|
||||||
|
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
|
||||||
|
|
||||||
|
@patch('reviewllama.vector_store.OllamaEmbeddings')
|
||||||
|
@patch('reviewllama.vector_store.FAISS')
|
||||||
|
def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files):
|
||||||
|
"""Test the full pipeline with mocked external dependencies"""
|
||||||
|
# Setup mocks
|
||||||
|
mock_embedding_instance = Mock()
|
||||||
|
mock_embeddings.return_value = mock_embedding_instance
|
||||||
|
|
||||||
|
mock_vectorstore = Mock()
|
||||||
|
mock_retriever = Mock(spec=VectorStoreRetriever)
|
||||||
|
mock_retriever.get_relevant_documents.return_value = [
|
||||||
|
Document(page_content="Relevant test content", metadata={})
|
||||||
|
]
|
||||||
|
mock_vectorstore.as_retriever.return_value = mock_retriever
|
||||||
|
mock_faiss.from_documents.return_value = mock_vectorstore
|
||||||
|
|
||||||
|
# Test full pipeline
|
||||||
|
retriever = create_retriever(temp_files[:2], "test-model")
|
||||||
|
context = get_context_from_store("test query", retriever)
|
||||||
|
|
||||||
|
assert context == "Relevant test content"
|
||||||
|
mock_embeddings.assert_called_once_with(model="test-model")
|
||||||
|
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
|
||||||
|
|
||||||
|
def test_documents_from_list_content_verification(temp_files):
|
||||||
|
"""Test that documents contain expected content"""
|
||||||
|
docs = documents_from_path_list(temp_files)
|
||||||
|
|
||||||
|
contents = [doc.page_content for doc in docs]
|
||||||
|
|
||||||
|
# Check that we have the expected content
|
||||||
|
assert any("Python code examples" in content for content in contents)
|
||||||
|
assert any("JavaScript functions" in content for content in contents)
|
||||||
|
assert any("testing best practices" in content for content in contents)
|
||||||
|
assert any("deployment info" in content for content in contents)
|
||||||
|
|
||||||
|
# Optional: Integration test that requires actual Ollama server
|
||||||
|
def test_create_retriever_with_real_ollama(temp_files, ollama_config):
|
||||||
|
"""Integration test with real Ollama (requires server running)"""
|
||||||
|
if not is_ollama_available(ollama_config):
|
||||||
|
pytest.skip("Local Ollama server is not available")
|
||||||
|
try:
|
||||||
|
# This test would use a real embedding model
|
||||||
|
# Skip by default unless explicitly testing integration
|
||||||
|
retriever = create_retriever(temp_files[:2], "nomic-embed-text")
|
||||||
|
assert retriever is not None
|
||||||
|
|
||||||
|
# Test actual retrieval
|
||||||
|
context = get_context_from_store("Python code", retriever)
|
||||||
|
assert isinstance(context, str)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.skip(f"Ollama server not available or model not found: {e}")
|
19
uv.lock
generated
19
uv.lock
generated
|
@ -163,6 +163,23 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
|
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "faiss-cpu"
|
||||||
|
version = "1.11.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "numpy" },
|
||||||
|
{ name = "packaging" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/e7/9a/e33fc563f007924dd4ec3c5101fe5320298d6c13c158a24a9ed849058569/faiss_cpu-1.11.0.tar.gz", hash = "sha256:44877b896a2b30a61e35ea4970d008e8822545cb340eca4eff223ac7f40a1db9", size = 70218, upload-time = "2025-04-28T07:48:30.459Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/92/90/d2329ce56423cc61f4c20ae6b4db001c6f88f28bf5a7ef7f8bbc246fd485/faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0c98e5feff83b87348e44eac4d578d6f201780dae6f27f08a11d55536a20b3a8", size = 3313807, upload-time = "2025-04-28T07:48:06.486Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/24/14/8af8f996d54e6097a86e6048b1a2c958c52dc985eb4f935027615079939e/faiss_cpu-1.11.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:796e90389427b1c1fb06abdb0427bb343b6350f80112a2e6090ac8f176ff7416", size = 7913539, upload-time = "2025-04-28T07:48:08.338Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b2/2b/437c2f36c3aa3cffe041479fced1c76420d3e92e1f434f1da3be3e6f32b1/faiss_cpu-1.11.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2b6e355dda72b3050991bc32031b558b8f83a2b3537a2b9e905a84f28585b47e", size = 3785181, upload-time = "2025-04-28T07:48:10.594Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/66/75/955527414371843f558234df66fa0b62c6e86e71e4022b1be9333ac6004c/faiss_cpu-1.11.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6c482d07194638c169b4422774366e7472877d09181ea86835e782e6304d4185", size = 31287635, upload-time = "2025-04-28T07:48:12.93Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/50/51/35b7a3f47f7859363a367c344ae5d415ea9eda65db0a7d497c7ea2c0b576/faiss_cpu-1.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:13eac45299532b10e911bff1abbb19d1bf5211aa9e72afeade653c3f1e50e042", size = 15005455, upload-time = "2025-04-28T07:48:16.173Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "frozenlist"
|
name = "frozenlist"
|
||||||
version = "1.7.0"
|
version = "1.7.0"
|
||||||
|
@ -802,6 +819,7 @@ name = "reviewllama"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "faiss-cpu" },
|
||||||
{ name = "gitpython" },
|
{ name = "gitpython" },
|
||||||
{ name = "langchain" },
|
{ name = "langchain" },
|
||||||
{ name = "langchain-community" },
|
{ name = "langchain-community" },
|
||||||
|
@ -812,6 +830,7 @@ dependencies = [
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "faiss-cpu", specifier = ">=1.11.0" },
|
||||||
{ name = "gitpython", specifier = ">=3.1.44" },
|
{ name = "gitpython", specifier = ">=3.1.44" },
|
||||||
{ name = "langchain", specifier = ">=0.3.25" },
|
{ name = "langchain", specifier = ">=0.3.25" },
|
||||||
{ name = "langchain-community", specifier = ">=0.3.25" },
|
{ name = "langchain-community", specifier = ">=0.3.25" },
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue