import os import tempfile from pathlib import Path from unittest.mock import MagicMock, Mock, patch import pytest from langchain_core.documents.base import Document from langchain_core.vectorstores import VectorStoreRetriever from reviewllama.utilities import is_ollama_available from reviewllama.vector_store import (create_retriever, documents_from_path_list, get_context_from_store) @pytest.fixture def temp_files(): """Create temporary test files""" temp_dir = tempfile.mkdtemp() files = [] # Create test files with different content file_contents = [ "This is the first test file with Python code examples.", "The second file contains JavaScript functions and classes.", "File three has documentation about testing best practices.", "Final file includes configuration settings and deployment info.", ] for i, content in enumerate(file_contents): file_path = Path(temp_dir) / f"test_file_{i}.txt" file_path.write_text(content) files.append(file_path) yield files # Cleanup for file_path in files: file_path.unlink() os.rmdir(temp_dir) def test_load_documents(temp_files): docs = documents_from_path_list(temp_files) assert len(docs) == 4 assert all(isinstance(doc, Document) for doc in docs) assert all(doc.page_content for doc in docs) assert all(doc.metadata.get("source") for doc in docs) @patch("reviewllama.vector_store.OllamaEmbeddings") @patch("reviewllama.vector_store.FAISS") @patch("reviewllama.vector_store.documents_from_path_list") def test_create_retriever(mock_docs_from_list, mock_faiss, mock_embeddings): """Test successful retriever creation""" # Setup mocks mock_docs = [Document(page_content="test", metadata={"source": "test.txt"})] mock_docs_from_list.return_value = mock_docs mock_embedding_instance = Mock() mock_embeddings.return_value = mock_embedding_instance mock_vectorstore = Mock() mock_retriever = Mock(spec=VectorStoreRetriever) mock_vectorstore.as_retriever.return_value = mock_retriever mock_faiss.from_documents.return_value = mock_vectorstore # Test result = create_retriever(["test.txt"], "test-embedding-model") # Assertions assert result == mock_retriever mock_embeddings.assert_called_once_with(model="test-embedding-model") mock_docs_from_list.assert_called_once_with(["test.txt"]) mock_faiss.from_documents.assert_called_once_with( mock_docs, mock_embedding_instance ) mock_vectorstore.as_retriever.assert_called_once() def test_get_context_from_store_success(): """Test successful context retrieval""" mock_retriever = Mock(spec=VectorStoreRetriever) mock_docs = [ Document(page_content="First relevant document", metadata={}), Document(page_content="Second relevant document", metadata={}), Document(page_content="Third relevant document", metadata={}), ] mock_retriever.get_relevant_documents.return_value = mock_docs result = get_context_from_store("test query", mock_retriever) expected = "First relevant document\n\nSecond relevant document\n\nThird relevant document" assert result == expected mock_retriever.get_relevant_documents.assert_called_once_with("test query") @patch('reviewllama.vector_store.OllamaEmbeddings') @patch('reviewllama.vector_store.FAISS') def test_full_pipeline_mock(mock_faiss, mock_embeddings, temp_files): """Test the full pipeline with mocked external dependencies""" # Setup mocks mock_embedding_instance = Mock() mock_embeddings.return_value = mock_embedding_instance mock_vectorstore = Mock() mock_retriever = Mock(spec=VectorStoreRetriever) mock_retriever.get_relevant_documents.return_value = [ Document(page_content="Relevant test content", metadata={}) ] mock_vectorstore.as_retriever.return_value = mock_retriever mock_faiss.from_documents.return_value = mock_vectorstore # Test full pipeline retriever = create_retriever(temp_files[:2], "test-model") context = get_context_from_store("test query", retriever) assert context == "Relevant test content" mock_embeddings.assert_called_once_with(model="test-model") mock_retriever.get_relevant_documents.assert_called_once_with("test query") def test_documents_from_list_content_verification(temp_files): """Test that documents contain expected content""" docs = documents_from_path_list(temp_files) contents = [doc.page_content for doc in docs] # Check that we have the expected content assert any("Python code examples" in content for content in contents) assert any("JavaScript functions" in content for content in contents) assert any("testing best practices" in content for content in contents) assert any("deployment info" in content for content in contents) # Optional: Integration test that requires actual Ollama server def test_create_retriever_with_real_ollama(temp_files, ollama_config): """Integration test with real Ollama (requires server running)""" if not is_ollama_available(ollama_config): pytest.skip("Local Ollama server is not available") try: # This test would use a real embedding model # Skip by default unless explicitly testing integration retriever = create_retriever(temp_files[:2], "nomic-embed-text") assert retriever is not None # Test actual retrieval context = get_context_from_store("Python code", retriever) assert isinstance(context, str) except Exception as e: pytest.skip(f"Ollama server not available or model not found: {e}")