-- Document indexing coordination module local M = {} local database = require('notex.database.init') local migrations = require('notex.database.migrations') local updater = require('notex.index.updater') local scanner = require('notex.index.scanner') local parser = require('notex.parser') local utils = require('notex.utils') -- Initialize indexing system function M.init(database_path) local ok, err = database.init(database_path) if not ok then return false, "Failed to initialize database: " .. err end ok, err = migrations.init() if not ok then return false, "Failed to initialize migrations: " .. err end utils.log("INFO", "Document indexing system initialized") return true, "Indexing system initialized successfully" end -- Index documents in directory function M.index_documents(directory_path, options) options = options or {} local force_reindex = options.force_reindex or false local recursive = options.recursive ~= false local result = { success = false, directory_path = directory_path, stats = {}, errors = {}, operation = force_reindex and "reindex" or "update" } -- Validate directory exists if not utils.file_exists(directory_path) then table.insert(result.errors, "Directory does not exist: " .. directory_path) return result end -- Check if directory is actually a directory local dir_check = io.open(directory_path) if not dir_check then table.insert(result.errors, "Path is not a directory: " .. directory_path) return result end dir_check:close() local start_timer = utils.timer("Document indexing") if force_reindex then -- Full reindex local ok, reindex_result = updater.reindex_directory(directory_path) if not ok then table.insert(result.errors, "Reindex failed: " .. reindex_result) return result end result.stats = reindex_result.stats utils.log("INFO", string.format("Completed full reindex of %s", directory_path)) else -- Incremental update local ok, update_result = updater.update_directory(directory_path) if not ok then table.insert(result.errors, "Update failed: " .. update_result) return result end result.stats = update_result.stats utils.log("INFO", string.format("Completed incremental update of %s", directory_path)) end start_timer() result.success = true return result end -- Get indexed documents function M.get_indexed_documents(filters) filters = filters or {} local limit = filters.limit or 100 local offset = filters.offset or 0 local order_by = filters.order_by or "updated_at DESC" local query = string.format([[ SELECT d.*, COUNT(p.id) as property_count FROM documents d LEFT JOIN properties p ON d.id = p.document_id GROUP BY d.id ORDER BY %s LIMIT %d OFFSET %d ]], order_by, limit, offset) local ok, result = database.execute(query) if not ok then return nil, "Failed to get indexed documents: " .. result end return result end -- Search documents by properties function M.search_documents(search_criteria) local conditions = {} local params = {} local joins = {} -- Build WHERE clause if search_criteria.status then table.insert(conditions, "p.key = 'status' AND p.value = :status") params.status = search_criteria.status end if search_criteria.tags then if type(search_criteria.tags) == "string" then table.insert(conditions, "p.key = 'tags' AND p.value LIKE :tag") params.tag = '%' .. search_criteria.tags .. '%' elseif type(search_criteria.tags) == "table" then local tag_conditions = {} for i, tag in ipairs(search_criteria.tags) do local param_name = "tag_" .. i table.insert(tag_conditions, "p.key = 'tags' AND p.value LIKE :" .. param_name) params[param_name] = '%' .. tag .. '%' end table.insert(conditions, "(" .. table.concat(tag_conditions, " OR ") .. ")") end end if search_criteria.created_after then table.insert(conditions, "d.created_at >= :created_after") params.created_after = search_criteria.created_after end if search_criteria.created_before then table.insert(conditions, "d.created_at <= :created_before") params.created_before = search_criteria.created_before end if search_criteria.text_search then table.insert(conditions, "d.file_path LIKE :text_search OR EXISTS (SELECT 1 FROM properties p2 WHERE p2.document_id = d.id AND p2.value LIKE :text_search)") params.text_search = '%' .. search_criteria.text_search .. '%' end -- Build query local where_clause = #conditions > 0 and "WHERE " .. table.concat(conditions, " AND ") or "" local limit = search_criteria.limit or 50 local offset = search_criteria.offset or 0 local query = string.format([[ SELECT DISTINCT d.*, COUNT(p.id) as property_count FROM documents d %s LEFT JOIN properties p ON d.id = p.document_id %s GROUP BY d.id ORDER BY d.updated_at DESC LIMIT %d OFFSET %d ]], #joins > 0 and table.concat(joins, " ") or "", where_clause, limit, offset) local ok, result = database.execute(query, params) if not ok then return nil, "Search failed: " .. result end -- Get total count local count_query = string.format([[ SELECT COUNT(DISTINCT d.id) as total FROM documents d %s LEFT JOIN properties p ON d.id = p.document_id %s ]], #joins > 0 and table.concat(joins, " ") or "", where_clause) local count_ok, count_result = database.execute(count_query, params) local total_count = count_ok and count_result[1].total or 0 return { documents = result, total_count = total_count, limit = limit, offset = offset } end -- Get document details function M.get_document_details(document_id) -- Get document local ok, doc_result = database.documents.get_by_id(document_id) if not ok then return nil, "Failed to get document: " .. doc_result end if not doc_result then return nil, "Document not found: " .. document_id end -- Get properties local ok, prop_result = database.properties.get_by_document(document_id) if not ok then return nil, "Failed to get document properties: " .. prop_result end -- Parse document for additional details local parse_result, parse_err = parser.parse_document(doc_result.file_path) if parse_err then utils.log("WARN", "Failed to parse document for details", { document_id = document_id, error = parse_err }) end return { document = doc_result, properties = prop_result or {}, parse_result = parse_result, file_exists = utils.file_exists(doc_result.file_path), is_current = parse_result and parse_result.success or false } end -- Remove document from index function M.remove_document(document_id) -- Get document details first local doc_details, err = M.get_document_details(document_id) if not doc_details then return false, err end local ok, remove_result = updater.remove_document(doc_details.document.file_path) if not ok then return false, "Failed to remove document: " .. remove_result end utils.log("INFO", string.format("Removed document from index: %s", doc_details.document.file_path)) return true, remove_result end -- Update document in index function M.update_document(file_path) local ok, result = updater.index_document(file_path) if not ok then return false, "Failed to update document: " .. result end utils.log("INFO", string.format("Updated document in index: %s", file_path)) return true, result end -- Get index statistics function M.get_statistics() local stats = updater.get_index_stats() -- Add additional statistics local db_status = database.status() stats.database = db_status -- Get recent activity local recent_query = [[ SELECT COUNT(*) as count, strftime('%Y-%m-%d', datetime(created_at, 'unixepoch')) as date FROM documents WHERE created_at > strftime('%s', 'now', '-7 days') GROUP BY date ORDER BY date DESC ]] local recent_ok, recent_result = database.execute(recent_query) if recent_ok then stats.recent_activity = recent_result end return stats end -- Validate index integrity function M.validate_index() local validation_result = { valid = true, issues = {}, stats = {} } -- Check for orphaned properties local orphaned_query = [[ SELECT COUNT(*) as count FROM properties p LEFT JOIN documents d ON p.document_id = d.id WHERE d.id IS NULL ]] local ok, result = database.execute(orphaned_query) if ok and result[1].count > 0 then validation_result.valid = false table.insert(validation_result.issues, string.format("Found %d orphaned properties", result[1].count)) end -- Check for documents that no longer exist local docs_query = "SELECT id, file_path FROM documents" ok, result = database.execute(docs_query) if ok then local missing_files = 0 for _, doc in ipairs(result) do if not utils.file_exists(doc.file_path) then missing_files = missing_files + 1 end end if missing_files > 0 then table.insert(validation_result.issues, string.format("Found %d documents pointing to missing files", missing_files)) end validation_result.stats.missing_files = missing_files end -- Get overall statistics validation_result.stats = M.get_statistics() return validation_result end -- Cleanup orphaned data function M.cleanup_index() local cleanup_result = { removed_orphans = 0, removed_missing = 0, errors = {} } -- Remove orphaned properties local orphaned_query = [[ DELETE FROM properties WHERE document_id NOT IN (SELECT id FROM documents) ]] local ok, err = database.execute(orphaned_query) if not ok then table.insert(cleanup_result.errors, "Failed to remove orphaned properties: " .. err) else cleanup_result.removed_orphans = err -- In this case, err is actually the affected row count end -- Remove documents pointing to missing files local docs_query = "SELECT id, file_path FROM documents" ok, result = database.execute(docs_query) if ok then for _, doc in ipairs(result) do if not utils.file_exists(doc.file_path) then local remove_ok, remove_err = updater.remove_document(doc.file_path) if remove_ok then cleanup_result.removed_missing = cleanup_result.removed_missing + 1 else table.insert(cleanup_result.errors, string.format("Failed to remove missing document %s: %s", doc.file_path, remove_err)) end end end end return cleanup_result end return M