368 lines
10 KiB
Lua
368 lines
10 KiB
Lua
|
-- Document indexing coordination module
|
||
|
local M = {}
|
||
|
|
||
|
local database = require('notex.database.init')
|
||
|
local migrations = require('notex.database.migrations')
|
||
|
local updater = require('notex.index.updater')
|
||
|
local scanner = require('notex.index.scanner')
|
||
|
local parser = require('notex.parser')
|
||
|
local utils = require('notex.utils')
|
||
|
|
||
|
-- Initialize indexing system
|
||
|
function M.init(database_path)
|
||
|
local ok, err = database.init(database_path)
|
||
|
if not ok then
|
||
|
return false, "Failed to initialize database: " .. err
|
||
|
end
|
||
|
|
||
|
ok, err = migrations.init()
|
||
|
if not ok then
|
||
|
return false, "Failed to initialize migrations: " .. err
|
||
|
end
|
||
|
|
||
|
utils.log("INFO", "Document indexing system initialized")
|
||
|
return true, "Indexing system initialized successfully"
|
||
|
end
|
||
|
|
||
|
-- Index documents in directory
|
||
|
function M.index_documents(directory_path, options)
|
||
|
options = options or {}
|
||
|
local force_reindex = options.force_reindex or false
|
||
|
local recursive = options.recursive ~= false
|
||
|
|
||
|
local result = {
|
||
|
success = false,
|
||
|
directory_path = directory_path,
|
||
|
stats = {},
|
||
|
errors = {},
|
||
|
operation = force_reindex and "reindex" or "update"
|
||
|
}
|
||
|
|
||
|
-- Validate directory exists
|
||
|
if not utils.file_exists(directory_path) then
|
||
|
table.insert(result.errors, "Directory does not exist: " .. directory_path)
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Check if directory is actually a directory
|
||
|
local dir_check = io.open(directory_path)
|
||
|
if not dir_check then
|
||
|
table.insert(result.errors, "Path is not a directory: " .. directory_path)
|
||
|
return result
|
||
|
end
|
||
|
dir_check:close()
|
||
|
|
||
|
local start_timer = utils.timer("Document indexing")
|
||
|
|
||
|
if force_reindex then
|
||
|
-- Full reindex
|
||
|
local ok, reindex_result = updater.reindex_directory(directory_path)
|
||
|
if not ok then
|
||
|
table.insert(result.errors, "Reindex failed: " .. reindex_result)
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
result.stats = reindex_result.stats
|
||
|
utils.log("INFO", string.format("Completed full reindex of %s", directory_path))
|
||
|
else
|
||
|
-- Incremental update
|
||
|
local ok, update_result = updater.update_directory(directory_path)
|
||
|
if not ok then
|
||
|
table.insert(result.errors, "Update failed: " .. update_result)
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
result.stats = update_result.stats
|
||
|
utils.log("INFO", string.format("Completed incremental update of %s", directory_path))
|
||
|
end
|
||
|
|
||
|
start_timer()
|
||
|
result.success = true
|
||
|
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Get indexed documents
|
||
|
function M.get_indexed_documents(filters)
|
||
|
filters = filters or {}
|
||
|
local limit = filters.limit or 100
|
||
|
local offset = filters.offset or 0
|
||
|
local order_by = filters.order_by or "updated_at DESC"
|
||
|
|
||
|
local query = string.format([[
|
||
|
SELECT d.*, COUNT(p.id) as property_count
|
||
|
FROM documents d
|
||
|
LEFT JOIN properties p ON d.id = p.document_id
|
||
|
GROUP BY d.id
|
||
|
ORDER BY %s
|
||
|
LIMIT %d OFFSET %d
|
||
|
]], order_by, limit, offset)
|
||
|
|
||
|
local ok, result = database.execute(query)
|
||
|
if not ok then
|
||
|
return nil, "Failed to get indexed documents: " .. result
|
||
|
end
|
||
|
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Search documents by properties
|
||
|
function M.search_documents(search_criteria)
|
||
|
local conditions = {}
|
||
|
local params = {}
|
||
|
local joins = {}
|
||
|
|
||
|
-- Build WHERE clause
|
||
|
if search_criteria.status then
|
||
|
table.insert(conditions, "p.key = 'status' AND p.value = :status")
|
||
|
params.status = search_criteria.status
|
||
|
end
|
||
|
|
||
|
if search_criteria.tags then
|
||
|
if type(search_criteria.tags) == "string" then
|
||
|
table.insert(conditions, "p.key = 'tags' AND p.value LIKE :tag")
|
||
|
params.tag = '%' .. search_criteria.tags .. '%'
|
||
|
elseif type(search_criteria.tags) == "table" then
|
||
|
local tag_conditions = {}
|
||
|
for i, tag in ipairs(search_criteria.tags) do
|
||
|
local param_name = "tag_" .. i
|
||
|
table.insert(tag_conditions, "p.key = 'tags' AND p.value LIKE :" .. param_name)
|
||
|
params[param_name] = '%' .. tag .. '%'
|
||
|
end
|
||
|
table.insert(conditions, "(" .. table.concat(tag_conditions, " OR ") .. ")")
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if search_criteria.created_after then
|
||
|
table.insert(conditions, "d.created_at >= :created_after")
|
||
|
params.created_after = search_criteria.created_after
|
||
|
end
|
||
|
|
||
|
if search_criteria.created_before then
|
||
|
table.insert(conditions, "d.created_at <= :created_before")
|
||
|
params.created_before = search_criteria.created_before
|
||
|
end
|
||
|
|
||
|
if search_criteria.text_search then
|
||
|
table.insert(conditions, "d.file_path LIKE :text_search OR EXISTS (SELECT 1 FROM properties p2 WHERE p2.document_id = d.id AND p2.value LIKE :text_search)")
|
||
|
params.text_search = '%' .. search_criteria.text_search .. '%'
|
||
|
end
|
||
|
|
||
|
-- Build query
|
||
|
local where_clause = #conditions > 0 and "WHERE " .. table.concat(conditions, " AND ") or ""
|
||
|
local limit = search_criteria.limit or 50
|
||
|
local offset = search_criteria.offset or 0
|
||
|
|
||
|
local query = string.format([[
|
||
|
SELECT DISTINCT d.*, COUNT(p.id) as property_count
|
||
|
FROM documents d
|
||
|
%s
|
||
|
LEFT JOIN properties p ON d.id = p.document_id
|
||
|
%s
|
||
|
GROUP BY d.id
|
||
|
ORDER BY d.updated_at DESC
|
||
|
LIMIT %d OFFSET %d
|
||
|
]], #joins > 0 and table.concat(joins, " ") or "", where_clause, limit, offset)
|
||
|
|
||
|
local ok, result = database.execute(query, params)
|
||
|
if not ok then
|
||
|
return nil, "Search failed: " .. result
|
||
|
end
|
||
|
|
||
|
-- Get total count
|
||
|
local count_query = string.format([[
|
||
|
SELECT COUNT(DISTINCT d.id) as total
|
||
|
FROM documents d
|
||
|
%s
|
||
|
LEFT JOIN properties p ON d.id = p.document_id
|
||
|
%s
|
||
|
]], #joins > 0 and table.concat(joins, " ") or "", where_clause)
|
||
|
|
||
|
local count_ok, count_result = database.execute(count_query, params)
|
||
|
local total_count = count_ok and count_result[1].total or 0
|
||
|
|
||
|
return {
|
||
|
documents = result,
|
||
|
total_count = total_count,
|
||
|
limit = limit,
|
||
|
offset = offset
|
||
|
}
|
||
|
end
|
||
|
|
||
|
-- Get document details
|
||
|
function M.get_document_details(document_id)
|
||
|
-- Get document
|
||
|
local ok, doc_result = database.documents.get_by_id(document_id)
|
||
|
if not ok then
|
||
|
return nil, "Failed to get document: " .. doc_result
|
||
|
end
|
||
|
|
||
|
if not doc_result then
|
||
|
return nil, "Document not found: " .. document_id
|
||
|
end
|
||
|
|
||
|
-- Get properties
|
||
|
local ok, prop_result = database.properties.get_by_document(document_id)
|
||
|
if not ok then
|
||
|
return nil, "Failed to get document properties: " .. prop_result
|
||
|
end
|
||
|
|
||
|
-- Parse document for additional details
|
||
|
local parse_result, parse_err = parser.parse_document(doc_result.file_path)
|
||
|
if parse_err then
|
||
|
utils.log("WARN", "Failed to parse document for details", {
|
||
|
document_id = document_id,
|
||
|
error = parse_err
|
||
|
})
|
||
|
end
|
||
|
|
||
|
return {
|
||
|
document = doc_result,
|
||
|
properties = prop_result or {},
|
||
|
parse_result = parse_result,
|
||
|
file_exists = utils.file_exists(doc_result.file_path),
|
||
|
is_current = parse_result and parse_result.success or false
|
||
|
}
|
||
|
end
|
||
|
|
||
|
-- Remove document from index
|
||
|
function M.remove_document(document_id)
|
||
|
-- Get document details first
|
||
|
local doc_details, err = M.get_document_details(document_id)
|
||
|
if not doc_details then
|
||
|
return false, err
|
||
|
end
|
||
|
|
||
|
local ok, remove_result = updater.remove_document(doc_details.document.file_path)
|
||
|
if not ok then
|
||
|
return false, "Failed to remove document: " .. remove_result
|
||
|
end
|
||
|
|
||
|
utils.log("INFO", string.format("Removed document from index: %s", doc_details.document.file_path))
|
||
|
|
||
|
return true, remove_result
|
||
|
end
|
||
|
|
||
|
-- Update document in index
|
||
|
function M.update_document(file_path)
|
||
|
local ok, result = updater.index_document(file_path)
|
||
|
if not ok then
|
||
|
return false, "Failed to update document: " .. result
|
||
|
end
|
||
|
|
||
|
utils.log("INFO", string.format("Updated document in index: %s", file_path))
|
||
|
|
||
|
return true, result
|
||
|
end
|
||
|
|
||
|
-- Get index statistics
|
||
|
function M.get_statistics()
|
||
|
local stats = updater.get_index_stats()
|
||
|
|
||
|
-- Add additional statistics
|
||
|
local db_status = database.status()
|
||
|
stats.database = db_status
|
||
|
|
||
|
-- Get recent activity
|
||
|
local recent_query = [[
|
||
|
SELECT COUNT(*) as count,
|
||
|
strftime('%Y-%m-%d', datetime(created_at, 'unixepoch')) as date
|
||
|
FROM documents
|
||
|
WHERE created_at > strftime('%s', 'now', '-7 days')
|
||
|
GROUP BY date
|
||
|
ORDER BY date DESC
|
||
|
]]
|
||
|
|
||
|
local recent_ok, recent_result = database.execute(recent_query)
|
||
|
if recent_ok then
|
||
|
stats.recent_activity = recent_result
|
||
|
end
|
||
|
|
||
|
return stats
|
||
|
end
|
||
|
|
||
|
-- Validate index integrity
|
||
|
function M.validate_index()
|
||
|
local validation_result = {
|
||
|
valid = true,
|
||
|
issues = {},
|
||
|
stats = {}
|
||
|
}
|
||
|
|
||
|
-- Check for orphaned properties
|
||
|
local orphaned_query = [[
|
||
|
SELECT COUNT(*) as count FROM properties p
|
||
|
LEFT JOIN documents d ON p.document_id = d.id
|
||
|
WHERE d.id IS NULL
|
||
|
]]
|
||
|
|
||
|
local ok, result = database.execute(orphaned_query)
|
||
|
if ok and result[1].count > 0 then
|
||
|
validation_result.valid = false
|
||
|
table.insert(validation_result.issues, string.format("Found %d orphaned properties", result[1].count))
|
||
|
end
|
||
|
|
||
|
-- Check for documents that no longer exist
|
||
|
local docs_query = "SELECT id, file_path FROM documents"
|
||
|
ok, result = database.execute(docs_query)
|
||
|
if ok then
|
||
|
local missing_files = 0
|
||
|
for _, doc in ipairs(result) do
|
||
|
if not utils.file_exists(doc.file_path) then
|
||
|
missing_files = missing_files + 1
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if missing_files > 0 then
|
||
|
table.insert(validation_result.issues, string.format("Found %d documents pointing to missing files", missing_files))
|
||
|
end
|
||
|
|
||
|
validation_result.stats.missing_files = missing_files
|
||
|
end
|
||
|
|
||
|
-- Get overall statistics
|
||
|
validation_result.stats = M.get_statistics()
|
||
|
|
||
|
return validation_result
|
||
|
end
|
||
|
|
||
|
-- Cleanup orphaned data
|
||
|
function M.cleanup_index()
|
||
|
local cleanup_result = {
|
||
|
removed_orphans = 0,
|
||
|
removed_missing = 0,
|
||
|
errors = {}
|
||
|
}
|
||
|
|
||
|
-- Remove orphaned properties
|
||
|
local orphaned_query = [[
|
||
|
DELETE FROM properties WHERE document_id NOT IN (SELECT id FROM documents)
|
||
|
]]
|
||
|
|
||
|
local ok, err = database.execute(orphaned_query)
|
||
|
if not ok then
|
||
|
table.insert(cleanup_result.errors, "Failed to remove orphaned properties: " .. err)
|
||
|
else
|
||
|
cleanup_result.removed_orphans = err -- In this case, err is actually the affected row count
|
||
|
end
|
||
|
|
||
|
-- Remove documents pointing to missing files
|
||
|
local docs_query = "SELECT id, file_path FROM documents"
|
||
|
ok, result = database.execute(docs_query)
|
||
|
if ok then
|
||
|
for _, doc in ipairs(result) do
|
||
|
if not utils.file_exists(doc.file_path) then
|
||
|
local remove_ok, remove_err = updater.remove_document(doc.file_path)
|
||
|
if remove_ok then
|
||
|
cleanup_result.removed_missing = cleanup_result.removed_missing + 1
|
||
|
else
|
||
|
table.insert(cleanup_result.errors, string.format("Failed to remove missing document %s: %s", doc.file_path, remove_err))
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return cleanup_result
|
||
|
end
|
||
|
|
||
|
return M
|