317 lines
8.9 KiB
Lua
317 lines
8.9 KiB
Lua
|
-- Incremental index updater
|
||
|
local M = {}
|
||
|
|
||
|
local database = require('notex.database.schema')
|
||
|
local scanner = require('notex.index.scanner')
|
||
|
local yaml_parser = require('notex.parser.yaml')
|
||
|
local utils = require('notex.utils')
|
||
|
|
||
|
-- Index a single document
|
||
|
function M.index_document(file_path)
|
||
|
local document_id = utils.generate_id()
|
||
|
local current_time = os.time()
|
||
|
|
||
|
-- Get file metadata
|
||
|
local metadata = scanner.get_file_metadata(file_path)
|
||
|
|
||
|
if not metadata.exists then
|
||
|
return false, "File does not exist: " .. file_path
|
||
|
end
|
||
|
|
||
|
if metadata.has_errors then
|
||
|
return false, "File has errors: " .. file_path
|
||
|
end
|
||
|
|
||
|
-- Read and parse file
|
||
|
local yaml_data, err = yaml_parser.parse_markdown_file(file_path)
|
||
|
if not yaml_data then
|
||
|
return false, "Failed to parse YAML: " .. err
|
||
|
end
|
||
|
|
||
|
-- Create document record
|
||
|
local document_record = {
|
||
|
id = document_id,
|
||
|
file_path = file_path,
|
||
|
content_hash = metadata.content_hash,
|
||
|
last_modified = metadata.last_modified,
|
||
|
created_at = current_time,
|
||
|
updated_at = current_time
|
||
|
}
|
||
|
|
||
|
-- Check if document already exists
|
||
|
local existing_doc, get_err = database.documents.get_by_path(file_path)
|
||
|
if get_err then
|
||
|
return false, "Failed to check existing document: " .. get_err
|
||
|
end
|
||
|
|
||
|
local ok
|
||
|
if existing_doc then
|
||
|
-- Update existing document
|
||
|
document_record.id = existing_doc.id
|
||
|
ok, err = database.documents.update(document_record)
|
||
|
|
||
|
if not ok then
|
||
|
return false, "Failed to update document: " .. err
|
||
|
end
|
||
|
|
||
|
-- Delete existing properties
|
||
|
ok, err = database.properties.delete_by_document(document_record.id)
|
||
|
if not ok then
|
||
|
return false, "Failed to delete existing properties: " .. err
|
||
|
end
|
||
|
|
||
|
document_id = existing_doc.id
|
||
|
else
|
||
|
-- Create new document
|
||
|
ok, err = database.documents.create(document_record)
|
||
|
if not ok then
|
||
|
return false, "Failed to create document: " .. err
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Process and create properties
|
||
|
local properties = yaml_parser.process_properties(yaml_data)
|
||
|
|
||
|
for _, prop in ipairs(properties) do
|
||
|
local property_record = {
|
||
|
id = utils.generate_id(),
|
||
|
document_id = document_id,
|
||
|
key = prop.key,
|
||
|
value = tostring(prop.value),
|
||
|
value_type = prop.value_type,
|
||
|
created_at = current_time,
|
||
|
updated_at = current_time
|
||
|
}
|
||
|
|
||
|
ok, err = database.properties.create(property_record)
|
||
|
if not ok then
|
||
|
utils.log("ERROR", "Failed to create property", {
|
||
|
document_id = document_id,
|
||
|
property_key = prop.key,
|
||
|
error = err
|
||
|
})
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Update schema metadata
|
||
|
M.update_schema_metadata(properties)
|
||
|
|
||
|
return true, {
|
||
|
document_id = document_id,
|
||
|
properties_count = #properties,
|
||
|
action = existing_doc and "updated" or "created"
|
||
|
}
|
||
|
end
|
||
|
|
||
|
-- Update schema metadata based on properties
|
||
|
function M.update_schema_metadata(properties)
|
||
|
-- Count property types
|
||
|
local property_counts = {}
|
||
|
local property_types = {}
|
||
|
|
||
|
for _, prop in ipairs(properties) do
|
||
|
if not property_counts[prop.key] then
|
||
|
property_counts[prop.key] = 0
|
||
|
property_types[prop.key] = {}
|
||
|
end
|
||
|
|
||
|
property_counts[prop.key] = property_counts[prop.key] + 1
|
||
|
|
||
|
if not property_types[prop.key][prop.value_type] then
|
||
|
property_types[prop.key][prop.value_type] = 0
|
||
|
end
|
||
|
property_types[prop.key][prop.value_type] = property_types[prop.key][prop.value_type] + 1
|
||
|
end
|
||
|
|
||
|
-- Update schema metadata for each property
|
||
|
for property_key, count in pairs(property_counts) do
|
||
|
-- Find most common type
|
||
|
local most_common_type = nil
|
||
|
local max_count = 0
|
||
|
|
||
|
for type_name, type_count in pairs(property_types[property_key]) do
|
||
|
if type_count > max_count then
|
||
|
max_count = type_count
|
||
|
most_common_type = type_name
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Create validation rules
|
||
|
local validation_rules = vim.json.encode({
|
||
|
allowed_types = vim.tbl_keys(property_types[property_key]),
|
||
|
most_common_type = most_common_type
|
||
|
})
|
||
|
|
||
|
database.schema.update_property(property_key, most_common_type, validation_rules, count)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Remove document from index
|
||
|
function M.remove_document(file_path)
|
||
|
local existing_doc, err = database.documents.get_by_path(file_path)
|
||
|
if not existing_doc then
|
||
|
return false, "Document not found in index: " .. file_path
|
||
|
end
|
||
|
|
||
|
-- Properties will be deleted automatically due to foreign key constraint
|
||
|
local ok, delete_err = database.documents.delete(existing_doc.id)
|
||
|
if not ok then
|
||
|
return false, "Failed to delete document: " .. delete_err
|
||
|
end
|
||
|
|
||
|
return true, {
|
||
|
document_id = existing_doc.id,
|
||
|
file_path = file_path,
|
||
|
action = "deleted"
|
||
|
}
|
||
|
end
|
||
|
|
||
|
-- Incremental update for directory
|
||
|
function M.update_directory(directory_path)
|
||
|
local result = {
|
||
|
updated_files = {},
|
||
|
removed_files = {},
|
||
|
errors = {},
|
||
|
stats = {
|
||
|
processed = 0,
|
||
|
updated = 0,
|
||
|
removed = 0,
|
||
|
failed = 0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
-- Get currently indexed files
|
||
|
local indexed_docs, err = database.execute("SELECT file_path, last_modified FROM documents")
|
||
|
if not indexed_docs then
|
||
|
return false, "Failed to get indexed documents: " .. err
|
||
|
end
|
||
|
|
||
|
-- Scan for changes
|
||
|
local changed_files, removed_files, scan_err = scanner.scan_for_changes(directory_path, indexed_docs)
|
||
|
if not changed_files then
|
||
|
return false, "Failed to scan for changes: " .. scan_err
|
||
|
end
|
||
|
|
||
|
-- Process changed files
|
||
|
for _, change_info in ipairs(changed_files) do
|
||
|
result.stats.processed = result.stats.processed + 1
|
||
|
|
||
|
local ok, update_result = M.index_document(change_info.file_path)
|
||
|
if ok then
|
||
|
result.stats.updated = result.stats.updated + 1
|
||
|
table.insert(result.updated_files, update_result)
|
||
|
utils.log("INFO", string.format("Updated document: %s", change_info.file_path))
|
||
|
else
|
||
|
result.stats.failed = result.stats.failed + 1
|
||
|
table.insert(result.errors, {
|
||
|
file_path = change_info.file_path,
|
||
|
error = update_result
|
||
|
})
|
||
|
utils.log("ERROR", string.format("Failed to update document: %s - %s", change_info.file_path, update_result))
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Process removed files
|
||
|
for _, file_path in ipairs(removed_files) do
|
||
|
local ok, remove_result = M.remove_document(file_path)
|
||
|
if ok then
|
||
|
result.stats.removed = result.stats.removed + 1
|
||
|
table.insert(result.removed_files, remove_result)
|
||
|
utils.log("INFO", string.format("Removed document: %s", file_path))
|
||
|
else
|
||
|
result.stats.failed = result.stats.failed + 1
|
||
|
table.insert(result.errors, {
|
||
|
file_path = file_path,
|
||
|
error = remove_result
|
||
|
})
|
||
|
utils.log("ERROR", string.format("Failed to remove document: %s - %s", file_path, remove_result))
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return true, result
|
||
|
end
|
||
|
|
||
|
-- Full reindex of directory
|
||
|
function M.reindex_directory(directory_path)
|
||
|
local result = {
|
||
|
indexed_files = {},
|
||
|
errors = {},
|
||
|
stats = {
|
||
|
scanned = 0,
|
||
|
indexed = 0,
|
||
|
failed = 0,
|
||
|
skipped = 0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
-- Clear existing index
|
||
|
local ok, err = database.execute("DELETE FROM documents")
|
||
|
if not ok then
|
||
|
return false, "Failed to clear existing index: " .. err
|
||
|
end
|
||
|
|
||
|
-- Scan and validate directory
|
||
|
local scan_result, scan_err = scanner.scan_and_validate(directory_path)
|
||
|
if not scan_result then
|
||
|
return false, "Failed to scan directory: " .. scan_err
|
||
|
end
|
||
|
|
||
|
result.stats.scanned = scan_result.stats.total_scanned
|
||
|
|
||
|
-- Index valid files
|
||
|
for _, file_path in ipairs(scan_result.valid_files) do
|
||
|
local ok, index_result = M.index_document(file_path)
|
||
|
if ok then
|
||
|
result.stats.indexed = result.stats.indexed + 1
|
||
|
table.insert(result.indexed_files, index_result)
|
||
|
utils.log("INFO", string.format("Indexed document: %s", file_path))
|
||
|
else
|
||
|
result.stats.failed = result.stats.failed + 1
|
||
|
table.insert(result.errors, {
|
||
|
file_path = file_path,
|
||
|
error = index_result
|
||
|
})
|
||
|
utils.log("ERROR", string.format("Failed to index document: %s - %s", file_path, index_result))
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Log invalid files
|
||
|
for _, invalid_file in ipairs(scan_result.invalid_files) do
|
||
|
result.stats.skipped = result.stats.skipped + 1
|
||
|
utils.log("WARN", string.format("Skipped invalid file: %s", invalid_file.file_path), invalid_file.errors)
|
||
|
end
|
||
|
|
||
|
return true, result
|
||
|
end
|
||
|
|
||
|
-- Get index statistics
|
||
|
function M.get_index_stats()
|
||
|
local stats = {}
|
||
|
|
||
|
-- Document counts
|
||
|
local doc_count, err = database.execute("SELECT COUNT(*) as count FROM documents")
|
||
|
if doc_count then
|
||
|
stats.document_count = doc_count[1].count
|
||
|
end
|
||
|
|
||
|
-- Property counts
|
||
|
local prop_count, err = database.execute("SELECT COUNT(*) as count FROM properties")
|
||
|
if prop_count then
|
||
|
stats.property_count = prop_count[1].count
|
||
|
end
|
||
|
|
||
|
-- Schema statistics
|
||
|
local schema_stats, err = database.schema.get_all()
|
||
|
if schema_stats then
|
||
|
stats.unique_properties = #schema_stats
|
||
|
stats.schema_entries = schema_stats
|
||
|
end
|
||
|
|
||
|
-- Database status
|
||
|
local db_status = require('notex.database.init').status()
|
||
|
stats.database = db_status
|
||
|
|
||
|
return stats
|
||
|
end
|
||
|
|
||
|
return M
|