notex.nvim/lua/notex/index/init.lua

368 lines
10 KiB
Lua
Raw Normal View History

2025-10-05 20:16:33 -04:00
-- Document indexing coordination module
local M = {}
local database = require('notex.database.init')
local migrations = require('notex.database.migrations')
local updater = require('notex.index.updater')
local scanner = require('notex.index.scanner')
local parser = require('notex.parser')
local utils = require('notex.utils')
-- Initialize indexing system
function M.init(database_path)
local ok, err = database.init(database_path)
if not ok then
return false, "Failed to initialize database: " .. err
end
ok, err = migrations.init()
if not ok then
return false, "Failed to initialize migrations: " .. err
end
utils.log("INFO", "Document indexing system initialized")
return true, "Indexing system initialized successfully"
end
-- Index documents in directory
function M.index_documents(directory_path, options)
options = options or {}
local force_reindex = options.force_reindex or false
local recursive = options.recursive ~= false
local result = {
success = false,
directory_path = directory_path,
stats = {},
errors = {},
operation = force_reindex and "reindex" or "update"
}
-- Validate directory exists
if not utils.file_exists(directory_path) then
table.insert(result.errors, "Directory does not exist: " .. directory_path)
return result
end
-- Check if directory is actually a directory
local dir_check = io.open(directory_path)
if not dir_check then
table.insert(result.errors, "Path is not a directory: " .. directory_path)
return result
end
dir_check:close()
local start_timer = utils.timer("Document indexing")
if force_reindex then
-- Full reindex
local ok, reindex_result = updater.reindex_directory(directory_path)
if not ok then
table.insert(result.errors, "Reindex failed: " .. reindex_result)
return result
end
result.stats = reindex_result.stats
utils.log("INFO", string.format("Completed full reindex of %s", directory_path))
else
-- Incremental update
local ok, update_result = updater.update_directory(directory_path)
if not ok then
table.insert(result.errors, "Update failed: " .. update_result)
return result
end
result.stats = update_result.stats
utils.log("INFO", string.format("Completed incremental update of %s", directory_path))
end
start_timer()
result.success = true
return result
end
-- Get indexed documents
function M.get_indexed_documents(filters)
filters = filters or {}
local limit = filters.limit or 100
local offset = filters.offset or 0
local order_by = filters.order_by or "updated_at DESC"
local query = string.format([[
SELECT d.*, COUNT(p.id) as property_count
FROM documents d
LEFT JOIN properties p ON d.id = p.document_id
GROUP BY d.id
ORDER BY %s
LIMIT %d OFFSET %d
]], order_by, limit, offset)
local ok, result = database.execute(query)
if not ok then
return nil, "Failed to get indexed documents: " .. result
end
return result
end
-- Search documents by properties
function M.search_documents(search_criteria)
local conditions = {}
local params = {}
local joins = {}
-- Build WHERE clause
if search_criteria.status then
table.insert(conditions, "p.key = 'status' AND p.value = :status")
params.status = search_criteria.status
end
if search_criteria.tags then
if type(search_criteria.tags) == "string" then
table.insert(conditions, "p.key = 'tags' AND p.value LIKE :tag")
params.tag = '%' .. search_criteria.tags .. '%'
elseif type(search_criteria.tags) == "table" then
local tag_conditions = {}
for i, tag in ipairs(search_criteria.tags) do
local param_name = "tag_" .. i
table.insert(tag_conditions, "p.key = 'tags' AND p.value LIKE :" .. param_name)
params[param_name] = '%' .. tag .. '%'
end
table.insert(conditions, "(" .. table.concat(tag_conditions, " OR ") .. ")")
end
end
if search_criteria.created_after then
table.insert(conditions, "d.created_at >= :created_after")
params.created_after = search_criteria.created_after
end
if search_criteria.created_before then
table.insert(conditions, "d.created_at <= :created_before")
params.created_before = search_criteria.created_before
end
if search_criteria.text_search then
table.insert(conditions, "d.file_path LIKE :text_search OR EXISTS (SELECT 1 FROM properties p2 WHERE p2.document_id = d.id AND p2.value LIKE :text_search)")
params.text_search = '%' .. search_criteria.text_search .. '%'
end
-- Build query
local where_clause = #conditions > 0 and "WHERE " .. table.concat(conditions, " AND ") or ""
local limit = search_criteria.limit or 50
local offset = search_criteria.offset or 0
local query = string.format([[
SELECT DISTINCT d.*, COUNT(p.id) as property_count
FROM documents d
%s
LEFT JOIN properties p ON d.id = p.document_id
%s
GROUP BY d.id
ORDER BY d.updated_at DESC
LIMIT %d OFFSET %d
]], #joins > 0 and table.concat(joins, " ") or "", where_clause, limit, offset)
local ok, result = database.execute(query, params)
if not ok then
return nil, "Search failed: " .. result
end
-- Get total count
local count_query = string.format([[
SELECT COUNT(DISTINCT d.id) as total
FROM documents d
%s
LEFT JOIN properties p ON d.id = p.document_id
%s
]], #joins > 0 and table.concat(joins, " ") or "", where_clause)
local count_ok, count_result = database.execute(count_query, params)
local total_count = count_ok and count_result[1].total or 0
return {
documents = result,
total_count = total_count,
limit = limit,
offset = offset
}
end
-- Get document details
function M.get_document_details(document_id)
-- Get document
local ok, doc_result = database.documents.get_by_id(document_id)
if not ok then
return nil, "Failed to get document: " .. doc_result
end
if not doc_result then
return nil, "Document not found: " .. document_id
end
-- Get properties
local ok, prop_result = database.properties.get_by_document(document_id)
if not ok then
return nil, "Failed to get document properties: " .. prop_result
end
-- Parse document for additional details
local parse_result, parse_err = parser.parse_document(doc_result.file_path)
if parse_err then
utils.log("WARN", "Failed to parse document for details", {
document_id = document_id,
error = parse_err
})
end
return {
document = doc_result,
properties = prop_result or {},
parse_result = parse_result,
file_exists = utils.file_exists(doc_result.file_path),
is_current = parse_result and parse_result.success or false
}
end
-- Remove document from index
function M.remove_document(document_id)
-- Get document details first
local doc_details, err = M.get_document_details(document_id)
if not doc_details then
return false, err
end
local ok, remove_result = updater.remove_document(doc_details.document.file_path)
if not ok then
return false, "Failed to remove document: " .. remove_result
end
utils.log("INFO", string.format("Removed document from index: %s", doc_details.document.file_path))
return true, remove_result
end
-- Update document in index
function M.update_document(file_path)
local ok, result = updater.index_document(file_path)
if not ok then
return false, "Failed to update document: " .. result
end
utils.log("INFO", string.format("Updated document in index: %s", file_path))
return true, result
end
-- Get index statistics
function M.get_statistics()
local stats = updater.get_index_stats()
-- Add additional statistics
local db_status = database.status()
stats.database = db_status
-- Get recent activity
local recent_query = [[
SELECT COUNT(*) as count,
strftime('%Y-%m-%d', datetime(created_at, 'unixepoch')) as date
FROM documents
WHERE created_at > strftime('%s', 'now', '-7 days')
GROUP BY date
ORDER BY date DESC
]]
local recent_ok, recent_result = database.execute(recent_query)
if recent_ok then
stats.recent_activity = recent_result
end
return stats
end
-- Validate index integrity
function M.validate_index()
local validation_result = {
valid = true,
issues = {},
stats = {}
}
-- Check for orphaned properties
local orphaned_query = [[
SELECT COUNT(*) as count FROM properties p
LEFT JOIN documents d ON p.document_id = d.id
WHERE d.id IS NULL
]]
local ok, result = database.execute(orphaned_query)
if ok and result[1].count > 0 then
validation_result.valid = false
table.insert(validation_result.issues, string.format("Found %d orphaned properties", result[1].count))
end
-- Check for documents that no longer exist
local docs_query = "SELECT id, file_path FROM documents"
ok, result = database.execute(docs_query)
if ok then
local missing_files = 0
for _, doc in ipairs(result) do
if not utils.file_exists(doc.file_path) then
missing_files = missing_files + 1
end
end
if missing_files > 0 then
table.insert(validation_result.issues, string.format("Found %d documents pointing to missing files", missing_files))
end
validation_result.stats.missing_files = missing_files
end
-- Get overall statistics
validation_result.stats = M.get_statistics()
return validation_result
end
-- Cleanup orphaned data
function M.cleanup_index()
local cleanup_result = {
removed_orphans = 0,
removed_missing = 0,
errors = {}
}
-- Remove orphaned properties
local orphaned_query = [[
DELETE FROM properties WHERE document_id NOT IN (SELECT id FROM documents)
]]
local ok, err = database.execute(orphaned_query)
if not ok then
table.insert(cleanup_result.errors, "Failed to remove orphaned properties: " .. err)
else
cleanup_result.removed_orphans = err -- In this case, err is actually the affected row count
end
-- Remove documents pointing to missing files
local docs_query = "SELECT id, file_path FROM documents"
ok, result = database.execute(docs_query)
if ok then
for _, doc in ipairs(result) do
if not utils.file_exists(doc.file_path) then
local remove_ok, remove_err = updater.remove_document(doc.file_path)
if remove_ok then
cleanup_result.removed_missing = cleanup_result.removed_missing + 1
else
table.insert(cleanup_result.errors, string.format("Failed to remove missing document %s: %s", doc.file_path, remove_err))
end
end
end
end
return cleanup_result
end
return M