150 lines
5.7 KiB
Python
150 lines
5.7 KiB
Python
|
import os
|
||
|
import tempfile
|
||
|
from pathlib import Path
|
||
|
from unittest.mock import MagicMock, Mock, patch
|
||
|
|
||
|
import pytest
|
||
|
from langchain_core.documents.base import Document
|
||
|
from langchain_core.vectorstores import VectorStoreRetriever
|
||
|
|
||
|
from reviewllama.utilities import is_ollama_available
|
||
|
from reviewllama.vector_store import (create_retriever,
|
||
|
documents_from_path_list,
|
||
|
get_context_from_store)
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def temp_files():
|
||
|
"""Create temporary test files"""
|
||
|
temp_dir = tempfile.mkdtemp()
|
||
|
files = []
|
||
|
|
||
|
# Create test files with different content
|
||
|
file_contents = [
|
||
|
"This is the first test file with Python code examples.",
|
||
|
"The second file contains JavaScript functions and classes.",
|
||
|
"File three has documentation about testing best practices.",
|
||
|
"Final file includes configuration settings and deployment info.",
|
||
|
]
|
||
|
|
||
|
for i, content in enumerate(file_contents):
|
||
|
file_path = Path(temp_dir) / f"test_file_{i}.txt"
|
||
|
file_path.write_text(content)
|
||
|
files.append(file_path)
|
||
|
|
||
|
yield files
|
||
|
|
||
|
# Cleanup
|
||
|
for file_path in files:
|
||
|
file_path.unlink()
|
||
|
os.rmdir(temp_dir)
|
||
|
|
||
|
|
||
|
def test_load_documents(temp_files):
|
||
|
docs = documents_from_path_list(temp_files)
|
||
|
|
||
|
assert len(docs) == 4
|
||
|
assert all(isinstance(doc, Document) for doc in docs)
|
||
|
assert all(doc.page_content for doc in docs)
|
||
|
assert all(doc.metadata.get("source") for doc in docs)
|
||
|
|
||
|
|
||
|
@patch("reviewllama.vector_store.OllamaEmbeddings")
|
||
|
@patch("reviewllama.vector_store.FAISS")
|
||
|
@patch("reviewllama.vector_store.documents_from_path_list")
|
||
|
def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings):
|
||
|
"""Test successful retriever creation"""
|
||
|
# Setup mocks
|
||
|
mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})]
|
||
|
mock_docs_from_list.return_value = mock_docs
|
||
|
|
||
|
mock_embedding_instance = Mock()
|
||
|
mock_embeddings.return_value = mock_embedding_instance
|
||
|
|
||
|
mock_vectorstore = Mock()
|
||
|
mock_retriever = Mock(spec=VectorStoreRetriever)
|
||
|
mock_vectorstore.as_retriever.return_value = mock_retriever
|
||
|
mock_faiss.from_documents.return_value = mock_vectorstore
|
||
|
|
||
|
# Test
|
||
|
result = create_retriever(["test.txt"], "test-embedding-model")
|
||
|
|
||
|
# Assertions
|
||
|
assert result == mock_retriever
|
||
|
mock_embeddings.assert_called_once_with(model="test-embedding-model")
|
||
|
mock_docs_from_list.assert_called_once_with(["test.txt"])
|
||
|
mock_faiss.from_documents.assert_called_once_with(
|
||
|
mock_docs, mock_embedding_instance
|
||
|
)
|
||
|
mock_vectorstore.as_retriever.assert_called_once()
|
||
|
|
||
|
def test_get_context_from_store_success():
|
||
|
"""Test successful context retrieval"""
|
||
|
mock_retriever = Mock(spec=VectorStoreRetriever)
|
||
|
mock_docs = [
|
||
|
Document(page_content="First relevant document", metadata={}),
|
||
|
Document(page_content="Second relevant document", metadata={}),
|
||
|
Document(page_content="Third relevant document", metadata={}),
|
||
|
]
|
||
|
mock_retriever.get_relevant_documents.return_value = mock_docs
|
||
|
|
||
|
result = get_context_from_store("test query", mock_retriever)
|
||
|
|
||
|
expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document"
|
||
|
assert result == expected
|
||
|
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
|
||
|
|
||
|
@patch('reviewllama.vector_store.OllamaEmbeddings')
|
||
|
@patch('reviewllama.vector_store.FAISS')
|
||
|
def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files):
|
||
|
"""Test the full pipeline with mocked external dependencies"""
|
||
|
# Setup mocks
|
||
|
mock_embedding_instance = Mock()
|
||
|
mock_embeddings.return_value = mock_embedding_instance
|
||
|
|
||
|
mock_vectorstore = Mock()
|
||
|
mock_retriever = Mock(spec=VectorStoreRetriever)
|
||
|
mock_retriever.get_relevant_documents.return_value = [
|
||
|
Document(page_content="Relevant test content", metadata={})
|
||
|
]
|
||
|
mock_vectorstore.as_retriever.return_value = mock_retriever
|
||
|
mock_faiss.from_documents.return_value = mock_vectorstore
|
||
|
|
||
|
# Test full pipeline
|
||
|
retriever = create_retriever(temp_files[:2], "test-model")
|
||
|
context = get_context_from_store("test query", retriever)
|
||
|
|
||
|
assert context == "Relevant test content"
|
||
|
mock_embeddings.assert_called_once_with(model="test-model")
|
||
|
mock_retriever.get_relevant_documents.assert_called_once_with("test query")
|
||
|
|
||
|
def test_documents_from_list_content_verification(temp_files):
|
||
|
"""Test that documents contain expected content"""
|
||
|
docs = documents_from_path_list(temp_files)
|
||
|
|
||
|
contents = [doc.page_content for doc in docs]
|
||
|
|
||
|
# Check that we have the expected content
|
||
|
assert any("Python code examples" in content for content in contents)
|
||
|
assert any("JavaScript functions" in content for content in contents)
|
||
|
assert any("testing best practices" in content for content in contents)
|
||
|
assert any("deployment info" in content for content in contents)
|
||
|
|
||
|
# Optional: Integration test that requires actual Ollama server
|
||
|
def test_create_retriever_with_real_ollama(temp_files, ollama_config):
|
||
|
"""Integration test with real Ollama (requires server running)"""
|
||
|
if not is_ollama_available(ollama_config):
|
||
|
pytest.skip("Local Ollama server is not available")
|
||
|
try:
|
||
|
# This test would use a real embedding model
|
||
|
# Skip by default unless explicitly testing integration
|
||
|
retriever = create_retriever(temp_files[:2], "nomic-embed-text")
|
||
|
assert retriever is not None
|
||
|
|
||
|
# Test actual retrieval
|
||
|
context = get_context_from_store("Python code", retriever)
|
||
|
assert isinstance(context, str)
|
||
|
|
||
|
except Exception as e:
|
||
|
pytest.skip(f"Ollama server not available or model not found: {e}")
|