Initial vibecoded proof of concept
This commit is contained in:
parent
74812459af
commit
461318a656
61 changed files with 13306 additions and 0 deletions
368
lua/notex/index/init.lua
Normal file
368
lua/notex/index/init.lua
Normal file
|
@ -0,0 +1,368 @@
|
|||
-- Document indexing coordination module
|
||||
local M = {}
|
||||
|
||||
local database = require('notex.database.init')
|
||||
local migrations = require('notex.database.migrations')
|
||||
local updater = require('notex.index.updater')
|
||||
local scanner = require('notex.index.scanner')
|
||||
local parser = require('notex.parser')
|
||||
local utils = require('notex.utils')
|
||||
|
||||
-- Initialize indexing system
|
||||
function M.init(database_path)
|
||||
local ok, err = database.init(database_path)
|
||||
if not ok then
|
||||
return false, "Failed to initialize database: " .. err
|
||||
end
|
||||
|
||||
ok, err = migrations.init()
|
||||
if not ok then
|
||||
return false, "Failed to initialize migrations: " .. err
|
||||
end
|
||||
|
||||
utils.log("INFO", "Document indexing system initialized")
|
||||
return true, "Indexing system initialized successfully"
|
||||
end
|
||||
|
||||
-- Index documents in directory
|
||||
function M.index_documents(directory_path, options)
|
||||
options = options or {}
|
||||
local force_reindex = options.force_reindex or false
|
||||
local recursive = options.recursive ~= false
|
||||
|
||||
local result = {
|
||||
success = false,
|
||||
directory_path = directory_path,
|
||||
stats = {},
|
||||
errors = {},
|
||||
operation = force_reindex and "reindex" or "update"
|
||||
}
|
||||
|
||||
-- Validate directory exists
|
||||
if not utils.file_exists(directory_path) then
|
||||
table.insert(result.errors, "Directory does not exist: " .. directory_path)
|
||||
return result
|
||||
end
|
||||
|
||||
-- Check if directory is actually a directory
|
||||
local dir_check = io.open(directory_path)
|
||||
if not dir_check then
|
||||
table.insert(result.errors, "Path is not a directory: " .. directory_path)
|
||||
return result
|
||||
end
|
||||
dir_check:close()
|
||||
|
||||
local start_timer = utils.timer("Document indexing")
|
||||
|
||||
if force_reindex then
|
||||
-- Full reindex
|
||||
local ok, reindex_result = updater.reindex_directory(directory_path)
|
||||
if not ok then
|
||||
table.insert(result.errors, "Reindex failed: " .. reindex_result)
|
||||
return result
|
||||
end
|
||||
|
||||
result.stats = reindex_result.stats
|
||||
utils.log("INFO", string.format("Completed full reindex of %s", directory_path))
|
||||
else
|
||||
-- Incremental update
|
||||
local ok, update_result = updater.update_directory(directory_path)
|
||||
if not ok then
|
||||
table.insert(result.errors, "Update failed: " .. update_result)
|
||||
return result
|
||||
end
|
||||
|
||||
result.stats = update_result.stats
|
||||
utils.log("INFO", string.format("Completed incremental update of %s", directory_path))
|
||||
end
|
||||
|
||||
start_timer()
|
||||
result.success = true
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
-- Get indexed documents
|
||||
function M.get_indexed_documents(filters)
|
||||
filters = filters or {}
|
||||
local limit = filters.limit or 100
|
||||
local offset = filters.offset or 0
|
||||
local order_by = filters.order_by or "updated_at DESC"
|
||||
|
||||
local query = string.format([[
|
||||
SELECT d.*, COUNT(p.id) as property_count
|
||||
FROM documents d
|
||||
LEFT JOIN properties p ON d.id = p.document_id
|
||||
GROUP BY d.id
|
||||
ORDER BY %s
|
||||
LIMIT %d OFFSET %d
|
||||
]], order_by, limit, offset)
|
||||
|
||||
local ok, result = database.execute(query)
|
||||
if not ok then
|
||||
return nil, "Failed to get indexed documents: " .. result
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
-- Search documents by properties
|
||||
function M.search_documents(search_criteria)
|
||||
local conditions = {}
|
||||
local params = {}
|
||||
local joins = {}
|
||||
|
||||
-- Build WHERE clause
|
||||
if search_criteria.status then
|
||||
table.insert(conditions, "p.key = 'status' AND p.value = :status")
|
||||
params.status = search_criteria.status
|
||||
end
|
||||
|
||||
if search_criteria.tags then
|
||||
if type(search_criteria.tags) == "string" then
|
||||
table.insert(conditions, "p.key = 'tags' AND p.value LIKE :tag")
|
||||
params.tag = '%' .. search_criteria.tags .. '%'
|
||||
elseif type(search_criteria.tags) == "table" then
|
||||
local tag_conditions = {}
|
||||
for i, tag in ipairs(search_criteria.tags) do
|
||||
local param_name = "tag_" .. i
|
||||
table.insert(tag_conditions, "p.key = 'tags' AND p.value LIKE :" .. param_name)
|
||||
params[param_name] = '%' .. tag .. '%'
|
||||
end
|
||||
table.insert(conditions, "(" .. table.concat(tag_conditions, " OR ") .. ")")
|
||||
end
|
||||
end
|
||||
|
||||
if search_criteria.created_after then
|
||||
table.insert(conditions, "d.created_at >= :created_after")
|
||||
params.created_after = search_criteria.created_after
|
||||
end
|
||||
|
||||
if search_criteria.created_before then
|
||||
table.insert(conditions, "d.created_at <= :created_before")
|
||||
params.created_before = search_criteria.created_before
|
||||
end
|
||||
|
||||
if search_criteria.text_search then
|
||||
table.insert(conditions, "d.file_path LIKE :text_search OR EXISTS (SELECT 1 FROM properties p2 WHERE p2.document_id = d.id AND p2.value LIKE :text_search)")
|
||||
params.text_search = '%' .. search_criteria.text_search .. '%'
|
||||
end
|
||||
|
||||
-- Build query
|
||||
local where_clause = #conditions > 0 and "WHERE " .. table.concat(conditions, " AND ") or ""
|
||||
local limit = search_criteria.limit or 50
|
||||
local offset = search_criteria.offset or 0
|
||||
|
||||
local query = string.format([[
|
||||
SELECT DISTINCT d.*, COUNT(p.id) as property_count
|
||||
FROM documents d
|
||||
%s
|
||||
LEFT JOIN properties p ON d.id = p.document_id
|
||||
%s
|
||||
GROUP BY d.id
|
||||
ORDER BY d.updated_at DESC
|
||||
LIMIT %d OFFSET %d
|
||||
]], #joins > 0 and table.concat(joins, " ") or "", where_clause, limit, offset)
|
||||
|
||||
local ok, result = database.execute(query, params)
|
||||
if not ok then
|
||||
return nil, "Search failed: " .. result
|
||||
end
|
||||
|
||||
-- Get total count
|
||||
local count_query = string.format([[
|
||||
SELECT COUNT(DISTINCT d.id) as total
|
||||
FROM documents d
|
||||
%s
|
||||
LEFT JOIN properties p ON d.id = p.document_id
|
||||
%s
|
||||
]], #joins > 0 and table.concat(joins, " ") or "", where_clause)
|
||||
|
||||
local count_ok, count_result = database.execute(count_query, params)
|
||||
local total_count = count_ok and count_result[1].total or 0
|
||||
|
||||
return {
|
||||
documents = result,
|
||||
total_count = total_count,
|
||||
limit = limit,
|
||||
offset = offset
|
||||
}
|
||||
end
|
||||
|
||||
-- Get document details
|
||||
function M.get_document_details(document_id)
|
||||
-- Get document
|
||||
local ok, doc_result = database.documents.get_by_id(document_id)
|
||||
if not ok then
|
||||
return nil, "Failed to get document: " .. doc_result
|
||||
end
|
||||
|
||||
if not doc_result then
|
||||
return nil, "Document not found: " .. document_id
|
||||
end
|
||||
|
||||
-- Get properties
|
||||
local ok, prop_result = database.properties.get_by_document(document_id)
|
||||
if not ok then
|
||||
return nil, "Failed to get document properties: " .. prop_result
|
||||
end
|
||||
|
||||
-- Parse document for additional details
|
||||
local parse_result, parse_err = parser.parse_document(doc_result.file_path)
|
||||
if parse_err then
|
||||
utils.log("WARN", "Failed to parse document for details", {
|
||||
document_id = document_id,
|
||||
error = parse_err
|
||||
})
|
||||
end
|
||||
|
||||
return {
|
||||
document = doc_result,
|
||||
properties = prop_result or {},
|
||||
parse_result = parse_result,
|
||||
file_exists = utils.file_exists(doc_result.file_path),
|
||||
is_current = parse_result and parse_result.success or false
|
||||
}
|
||||
end
|
||||
|
||||
-- Remove document from index
|
||||
function M.remove_document(document_id)
|
||||
-- Get document details first
|
||||
local doc_details, err = M.get_document_details(document_id)
|
||||
if not doc_details then
|
||||
return false, err
|
||||
end
|
||||
|
||||
local ok, remove_result = updater.remove_document(doc_details.document.file_path)
|
||||
if not ok then
|
||||
return false, "Failed to remove document: " .. remove_result
|
||||
end
|
||||
|
||||
utils.log("INFO", string.format("Removed document from index: %s", doc_details.document.file_path))
|
||||
|
||||
return true, remove_result
|
||||
end
|
||||
|
||||
-- Update document in index
|
||||
function M.update_document(file_path)
|
||||
local ok, result = updater.index_document(file_path)
|
||||
if not ok then
|
||||
return false, "Failed to update document: " .. result
|
||||
end
|
||||
|
||||
utils.log("INFO", string.format("Updated document in index: %s", file_path))
|
||||
|
||||
return true, result
|
||||
end
|
||||
|
||||
-- Get index statistics
|
||||
function M.get_statistics()
|
||||
local stats = updater.get_index_stats()
|
||||
|
||||
-- Add additional statistics
|
||||
local db_status = database.status()
|
||||
stats.database = db_status
|
||||
|
||||
-- Get recent activity
|
||||
local recent_query = [[
|
||||
SELECT COUNT(*) as count,
|
||||
strftime('%Y-%m-%d', datetime(created_at, 'unixepoch')) as date
|
||||
FROM documents
|
||||
WHERE created_at > strftime('%s', 'now', '-7 days')
|
||||
GROUP BY date
|
||||
ORDER BY date DESC
|
||||
]]
|
||||
|
||||
local recent_ok, recent_result = database.execute(recent_query)
|
||||
if recent_ok then
|
||||
stats.recent_activity = recent_result
|
||||
end
|
||||
|
||||
return stats
|
||||
end
|
||||
|
||||
-- Validate index integrity
|
||||
function M.validate_index()
|
||||
local validation_result = {
|
||||
valid = true,
|
||||
issues = {},
|
||||
stats = {}
|
||||
}
|
||||
|
||||
-- Check for orphaned properties
|
||||
local orphaned_query = [[
|
||||
SELECT COUNT(*) as count FROM properties p
|
||||
LEFT JOIN documents d ON p.document_id = d.id
|
||||
WHERE d.id IS NULL
|
||||
]]
|
||||
|
||||
local ok, result = database.execute(orphaned_query)
|
||||
if ok and result[1].count > 0 then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.issues, string.format("Found %d orphaned properties", result[1].count))
|
||||
end
|
||||
|
||||
-- Check for documents that no longer exist
|
||||
local docs_query = "SELECT id, file_path FROM documents"
|
||||
ok, result = database.execute(docs_query)
|
||||
if ok then
|
||||
local missing_files = 0
|
||||
for _, doc in ipairs(result) do
|
||||
if not utils.file_exists(doc.file_path) then
|
||||
missing_files = missing_files + 1
|
||||
end
|
||||
end
|
||||
|
||||
if missing_files > 0 then
|
||||
table.insert(validation_result.issues, string.format("Found %d documents pointing to missing files", missing_files))
|
||||
end
|
||||
|
||||
validation_result.stats.missing_files = missing_files
|
||||
end
|
||||
|
||||
-- Get overall statistics
|
||||
validation_result.stats = M.get_statistics()
|
||||
|
||||
return validation_result
|
||||
end
|
||||
|
||||
-- Cleanup orphaned data
|
||||
function M.cleanup_index()
|
||||
local cleanup_result = {
|
||||
removed_orphans = 0,
|
||||
removed_missing = 0,
|
||||
errors = {}
|
||||
}
|
||||
|
||||
-- Remove orphaned properties
|
||||
local orphaned_query = [[
|
||||
DELETE FROM properties WHERE document_id NOT IN (SELECT id FROM documents)
|
||||
]]
|
||||
|
||||
local ok, err = database.execute(orphaned_query)
|
||||
if not ok then
|
||||
table.insert(cleanup_result.errors, "Failed to remove orphaned properties: " .. err)
|
||||
else
|
||||
cleanup_result.removed_orphans = err -- In this case, err is actually the affected row count
|
||||
end
|
||||
|
||||
-- Remove documents pointing to missing files
|
||||
local docs_query = "SELECT id, file_path FROM documents"
|
||||
ok, result = database.execute(docs_query)
|
||||
if ok then
|
||||
for _, doc in ipairs(result) do
|
||||
if not utils.file_exists(doc.file_path) then
|
||||
local remove_ok, remove_err = updater.remove_document(doc.file_path)
|
||||
if remove_ok then
|
||||
cleanup_result.removed_missing = cleanup_result.removed_missing + 1
|
||||
else
|
||||
table.insert(cleanup_result.errors, string.format("Failed to remove missing document %s: %s", doc.file_path, remove_err))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return cleanup_result
|
||||
end
|
||||
|
||||
return M
|
Loading…
Add table
Add a link
Reference in a new issue