notex.nvim/lua/notex/index/updater.lua

317 lines
8.9 KiB
Lua
Raw Permalink Normal View History

2025-10-05 20:16:33 -04:00
-- Incremental index updater
local M = {}
local database = require('notex.database.schema')
local scanner = require('notex.index.scanner')
local yaml_parser = require('notex.parser.yaml')
local utils = require('notex.utils')
-- Index a single document
function M.index_document(file_path)
local document_id = utils.generate_id()
local current_time = os.time()
-- Get file metadata
local metadata = scanner.get_file_metadata(file_path)
if not metadata.exists then
return false, "File does not exist: " .. file_path
end
if metadata.has_errors then
return false, "File has errors: " .. file_path
end
-- Read and parse file
local yaml_data, err = yaml_parser.parse_markdown_file(file_path)
if not yaml_data then
return false, "Failed to parse YAML: " .. err
end
-- Create document record
local document_record = {
id = document_id,
file_path = file_path,
content_hash = metadata.content_hash,
last_modified = metadata.last_modified,
created_at = current_time,
updated_at = current_time
}
-- Check if document already exists
local existing_doc, get_err = database.documents.get_by_path(file_path)
if get_err then
return false, "Failed to check existing document: " .. get_err
end
local ok
if existing_doc then
-- Update existing document
document_record.id = existing_doc.id
ok, err = database.documents.update(document_record)
if not ok then
return false, "Failed to update document: " .. err
end
-- Delete existing properties
ok, err = database.properties.delete_by_document(document_record.id)
if not ok then
return false, "Failed to delete existing properties: " .. err
end
document_id = existing_doc.id
else
-- Create new document
ok, err = database.documents.create(document_record)
if not ok then
return false, "Failed to create document: " .. err
end
end
-- Process and create properties
local properties = yaml_parser.process_properties(yaml_data)
for _, prop in ipairs(properties) do
local property_record = {
id = utils.generate_id(),
document_id = document_id,
key = prop.key,
value = tostring(prop.value),
value_type = prop.value_type,
created_at = current_time,
updated_at = current_time
}
ok, err = database.properties.create(property_record)
if not ok then
utils.log("ERROR", "Failed to create property", {
document_id = document_id,
property_key = prop.key,
error = err
})
end
end
-- Update schema metadata
M.update_schema_metadata(properties)
return true, {
document_id = document_id,
properties_count = #properties,
action = existing_doc and "updated" or "created"
}
end
-- Update schema metadata based on properties
function M.update_schema_metadata(properties)
-- Count property types
local property_counts = {}
local property_types = {}
for _, prop in ipairs(properties) do
if not property_counts[prop.key] then
property_counts[prop.key] = 0
property_types[prop.key] = {}
end
property_counts[prop.key] = property_counts[prop.key] + 1
if not property_types[prop.key][prop.value_type] then
property_types[prop.key][prop.value_type] = 0
end
property_types[prop.key][prop.value_type] = property_types[prop.key][prop.value_type] + 1
end
-- Update schema metadata for each property
for property_key, count in pairs(property_counts) do
-- Find most common type
local most_common_type = nil
local max_count = 0
for type_name, type_count in pairs(property_types[property_key]) do
if type_count > max_count then
max_count = type_count
most_common_type = type_name
end
end
-- Create validation rules
local validation_rules = vim.json.encode({
allowed_types = vim.tbl_keys(property_types[property_key]),
most_common_type = most_common_type
})
database.schema.update_property(property_key, most_common_type, validation_rules, count)
end
end
-- Remove document from index
function M.remove_document(file_path)
local existing_doc, err = database.documents.get_by_path(file_path)
if not existing_doc then
return false, "Document not found in index: " .. file_path
end
-- Properties will be deleted automatically due to foreign key constraint
local ok, delete_err = database.documents.delete(existing_doc.id)
if not ok then
return false, "Failed to delete document: " .. delete_err
end
return true, {
document_id = existing_doc.id,
file_path = file_path,
action = "deleted"
}
end
-- Incremental update for directory
function M.update_directory(directory_path)
local result = {
updated_files = {},
removed_files = {},
errors = {},
stats = {
processed = 0,
updated = 0,
removed = 0,
failed = 0
}
}
-- Get currently indexed files
local indexed_docs, err = database.execute("SELECT file_path, last_modified FROM documents")
if not indexed_docs then
return false, "Failed to get indexed documents: " .. err
end
-- Scan for changes
local changed_files, removed_files, scan_err = scanner.scan_for_changes(directory_path, indexed_docs)
if not changed_files then
return false, "Failed to scan for changes: " .. scan_err
end
-- Process changed files
for _, change_info in ipairs(changed_files) do
result.stats.processed = result.stats.processed + 1
local ok, update_result = M.index_document(change_info.file_path)
if ok then
result.stats.updated = result.stats.updated + 1
table.insert(result.updated_files, update_result)
utils.log("INFO", string.format("Updated document: %s", change_info.file_path))
else
result.stats.failed = result.stats.failed + 1
table.insert(result.errors, {
file_path = change_info.file_path,
error = update_result
})
utils.log("ERROR", string.format("Failed to update document: %s - %s", change_info.file_path, update_result))
end
end
-- Process removed files
for _, file_path in ipairs(removed_files) do
local ok, remove_result = M.remove_document(file_path)
if ok then
result.stats.removed = result.stats.removed + 1
table.insert(result.removed_files, remove_result)
utils.log("INFO", string.format("Removed document: %s", file_path))
else
result.stats.failed = result.stats.failed + 1
table.insert(result.errors, {
file_path = file_path,
error = remove_result
})
utils.log("ERROR", string.format("Failed to remove document: %s - %s", file_path, remove_result))
end
end
return true, result
end
-- Full reindex of directory
function M.reindex_directory(directory_path)
local result = {
indexed_files = {},
errors = {},
stats = {
scanned = 0,
indexed = 0,
failed = 0,
skipped = 0
}
}
-- Clear existing index
local ok, err = database.execute("DELETE FROM documents")
if not ok then
return false, "Failed to clear existing index: " .. err
end
-- Scan and validate directory
local scan_result, scan_err = scanner.scan_and_validate(directory_path)
if not scan_result then
return false, "Failed to scan directory: " .. scan_err
end
result.stats.scanned = scan_result.stats.total_scanned
-- Index valid files
for _, file_path in ipairs(scan_result.valid_files) do
local ok, index_result = M.index_document(file_path)
if ok then
result.stats.indexed = result.stats.indexed + 1
table.insert(result.indexed_files, index_result)
utils.log("INFO", string.format("Indexed document: %s", file_path))
else
result.stats.failed = result.stats.failed + 1
table.insert(result.errors, {
file_path = file_path,
error = index_result
})
utils.log("ERROR", string.format("Failed to index document: %s - %s", file_path, index_result))
end
end
-- Log invalid files
for _, invalid_file in ipairs(scan_result.invalid_files) do
result.stats.skipped = result.stats.skipped + 1
utils.log("WARN", string.format("Skipped invalid file: %s", invalid_file.file_path), invalid_file.errors)
end
return true, result
end
-- Get index statistics
function M.get_index_stats()
local stats = {}
-- Document counts
local doc_count, err = database.execute("SELECT COUNT(*) as count FROM documents")
if doc_count then
stats.document_count = doc_count[1].count
end
-- Property counts
local prop_count, err = database.execute("SELECT COUNT(*) as count FROM properties")
if prop_count then
stats.property_count = prop_count[1].count
end
-- Schema statistics
local schema_stats, err = database.schema.get_all()
if schema_stats then
stats.unique_properties = #schema_stats
stats.schema_entries = schema_stats
end
-- Database status
local db_status = require('notex.database.init').status()
stats.database = db_status
return stats
end
return M