Initial vibecoded proof of concept
This commit is contained in:
parent
74812459af
commit
461318a656
61 changed files with 13306 additions and 0 deletions
368
lua/notex/index/init.lua
Normal file
368
lua/notex/index/init.lua
Normal file
|
@ -0,0 +1,368 @@
|
|||
-- Document indexing coordination module
|
||||
local M = {}
|
||||
|
||||
local database = require('notex.database.init')
|
||||
local migrations = require('notex.database.migrations')
|
||||
local updater = require('notex.index.updater')
|
||||
local scanner = require('notex.index.scanner')
|
||||
local parser = require('notex.parser')
|
||||
local utils = require('notex.utils')
|
||||
|
||||
-- Initialize indexing system
|
||||
function M.init(database_path)
|
||||
local ok, err = database.init(database_path)
|
||||
if not ok then
|
||||
return false, "Failed to initialize database: " .. err
|
||||
end
|
||||
|
||||
ok, err = migrations.init()
|
||||
if not ok then
|
||||
return false, "Failed to initialize migrations: " .. err
|
||||
end
|
||||
|
||||
utils.log("INFO", "Document indexing system initialized")
|
||||
return true, "Indexing system initialized successfully"
|
||||
end
|
||||
|
||||
-- Index documents in directory
|
||||
function M.index_documents(directory_path, options)
|
||||
options = options or {}
|
||||
local force_reindex = options.force_reindex or false
|
||||
local recursive = options.recursive ~= false
|
||||
|
||||
local result = {
|
||||
success = false,
|
||||
directory_path = directory_path,
|
||||
stats = {},
|
||||
errors = {},
|
||||
operation = force_reindex and "reindex" or "update"
|
||||
}
|
||||
|
||||
-- Validate directory exists
|
||||
if not utils.file_exists(directory_path) then
|
||||
table.insert(result.errors, "Directory does not exist: " .. directory_path)
|
||||
return result
|
||||
end
|
||||
|
||||
-- Check if directory is actually a directory
|
||||
local dir_check = io.open(directory_path)
|
||||
if not dir_check then
|
||||
table.insert(result.errors, "Path is not a directory: " .. directory_path)
|
||||
return result
|
||||
end
|
||||
dir_check:close()
|
||||
|
||||
local start_timer = utils.timer("Document indexing")
|
||||
|
||||
if force_reindex then
|
||||
-- Full reindex
|
||||
local ok, reindex_result = updater.reindex_directory(directory_path)
|
||||
if not ok then
|
||||
table.insert(result.errors, "Reindex failed: " .. reindex_result)
|
||||
return result
|
||||
end
|
||||
|
||||
result.stats = reindex_result.stats
|
||||
utils.log("INFO", string.format("Completed full reindex of %s", directory_path))
|
||||
else
|
||||
-- Incremental update
|
||||
local ok, update_result = updater.update_directory(directory_path)
|
||||
if not ok then
|
||||
table.insert(result.errors, "Update failed: " .. update_result)
|
||||
return result
|
||||
end
|
||||
|
||||
result.stats = update_result.stats
|
||||
utils.log("INFO", string.format("Completed incremental update of %s", directory_path))
|
||||
end
|
||||
|
||||
start_timer()
|
||||
result.success = true
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
-- Get indexed documents
|
||||
function M.get_indexed_documents(filters)
|
||||
filters = filters or {}
|
||||
local limit = filters.limit or 100
|
||||
local offset = filters.offset or 0
|
||||
local order_by = filters.order_by or "updated_at DESC"
|
||||
|
||||
local query = string.format([[
|
||||
SELECT d.*, COUNT(p.id) as property_count
|
||||
FROM documents d
|
||||
LEFT JOIN properties p ON d.id = p.document_id
|
||||
GROUP BY d.id
|
||||
ORDER BY %s
|
||||
LIMIT %d OFFSET %d
|
||||
]], order_by, limit, offset)
|
||||
|
||||
local ok, result = database.execute(query)
|
||||
if not ok then
|
||||
return nil, "Failed to get indexed documents: " .. result
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
-- Search documents by properties
|
||||
function M.search_documents(search_criteria)
|
||||
local conditions = {}
|
||||
local params = {}
|
||||
local joins = {}
|
||||
|
||||
-- Build WHERE clause
|
||||
if search_criteria.status then
|
||||
table.insert(conditions, "p.key = 'status' AND p.value = :status")
|
||||
params.status = search_criteria.status
|
||||
end
|
||||
|
||||
if search_criteria.tags then
|
||||
if type(search_criteria.tags) == "string" then
|
||||
table.insert(conditions, "p.key = 'tags' AND p.value LIKE :tag")
|
||||
params.tag = '%' .. search_criteria.tags .. '%'
|
||||
elseif type(search_criteria.tags) == "table" then
|
||||
local tag_conditions = {}
|
||||
for i, tag in ipairs(search_criteria.tags) do
|
||||
local param_name = "tag_" .. i
|
||||
table.insert(tag_conditions, "p.key = 'tags' AND p.value LIKE :" .. param_name)
|
||||
params[param_name] = '%' .. tag .. '%'
|
||||
end
|
||||
table.insert(conditions, "(" .. table.concat(tag_conditions, " OR ") .. ")")
|
||||
end
|
||||
end
|
||||
|
||||
if search_criteria.created_after then
|
||||
table.insert(conditions, "d.created_at >= :created_after")
|
||||
params.created_after = search_criteria.created_after
|
||||
end
|
||||
|
||||
if search_criteria.created_before then
|
||||
table.insert(conditions, "d.created_at <= :created_before")
|
||||
params.created_before = search_criteria.created_before
|
||||
end
|
||||
|
||||
if search_criteria.text_search then
|
||||
table.insert(conditions, "d.file_path LIKE :text_search OR EXISTS (SELECT 1 FROM properties p2 WHERE p2.document_id = d.id AND p2.value LIKE :text_search)")
|
||||
params.text_search = '%' .. search_criteria.text_search .. '%'
|
||||
end
|
||||
|
||||
-- Build query
|
||||
local where_clause = #conditions > 0 and "WHERE " .. table.concat(conditions, " AND ") or ""
|
||||
local limit = search_criteria.limit or 50
|
||||
local offset = search_criteria.offset or 0
|
||||
|
||||
local query = string.format([[
|
||||
SELECT DISTINCT d.*, COUNT(p.id) as property_count
|
||||
FROM documents d
|
||||
%s
|
||||
LEFT JOIN properties p ON d.id = p.document_id
|
||||
%s
|
||||
GROUP BY d.id
|
||||
ORDER BY d.updated_at DESC
|
||||
LIMIT %d OFFSET %d
|
||||
]], #joins > 0 and table.concat(joins, " ") or "", where_clause, limit, offset)
|
||||
|
||||
local ok, result = database.execute(query, params)
|
||||
if not ok then
|
||||
return nil, "Search failed: " .. result
|
||||
end
|
||||
|
||||
-- Get total count
|
||||
local count_query = string.format([[
|
||||
SELECT COUNT(DISTINCT d.id) as total
|
||||
FROM documents d
|
||||
%s
|
||||
LEFT JOIN properties p ON d.id = p.document_id
|
||||
%s
|
||||
]], #joins > 0 and table.concat(joins, " ") or "", where_clause)
|
||||
|
||||
local count_ok, count_result = database.execute(count_query, params)
|
||||
local total_count = count_ok and count_result[1].total or 0
|
||||
|
||||
return {
|
||||
documents = result,
|
||||
total_count = total_count,
|
||||
limit = limit,
|
||||
offset = offset
|
||||
}
|
||||
end
|
||||
|
||||
-- Get document details
|
||||
function M.get_document_details(document_id)
|
||||
-- Get document
|
||||
local ok, doc_result = database.documents.get_by_id(document_id)
|
||||
if not ok then
|
||||
return nil, "Failed to get document: " .. doc_result
|
||||
end
|
||||
|
||||
if not doc_result then
|
||||
return nil, "Document not found: " .. document_id
|
||||
end
|
||||
|
||||
-- Get properties
|
||||
local ok, prop_result = database.properties.get_by_document(document_id)
|
||||
if not ok then
|
||||
return nil, "Failed to get document properties: " .. prop_result
|
||||
end
|
||||
|
||||
-- Parse document for additional details
|
||||
local parse_result, parse_err = parser.parse_document(doc_result.file_path)
|
||||
if parse_err then
|
||||
utils.log("WARN", "Failed to parse document for details", {
|
||||
document_id = document_id,
|
||||
error = parse_err
|
||||
})
|
||||
end
|
||||
|
||||
return {
|
||||
document = doc_result,
|
||||
properties = prop_result or {},
|
||||
parse_result = parse_result,
|
||||
file_exists = utils.file_exists(doc_result.file_path),
|
||||
is_current = parse_result and parse_result.success or false
|
||||
}
|
||||
end
|
||||
|
||||
-- Remove document from index
|
||||
function M.remove_document(document_id)
|
||||
-- Get document details first
|
||||
local doc_details, err = M.get_document_details(document_id)
|
||||
if not doc_details then
|
||||
return false, err
|
||||
end
|
||||
|
||||
local ok, remove_result = updater.remove_document(doc_details.document.file_path)
|
||||
if not ok then
|
||||
return false, "Failed to remove document: " .. remove_result
|
||||
end
|
||||
|
||||
utils.log("INFO", string.format("Removed document from index: %s", doc_details.document.file_path))
|
||||
|
||||
return true, remove_result
|
||||
end
|
||||
|
||||
-- Update document in index
|
||||
function M.update_document(file_path)
|
||||
local ok, result = updater.index_document(file_path)
|
||||
if not ok then
|
||||
return false, "Failed to update document: " .. result
|
||||
end
|
||||
|
||||
utils.log("INFO", string.format("Updated document in index: %s", file_path))
|
||||
|
||||
return true, result
|
||||
end
|
||||
|
||||
-- Get index statistics
|
||||
function M.get_statistics()
|
||||
local stats = updater.get_index_stats()
|
||||
|
||||
-- Add additional statistics
|
||||
local db_status = database.status()
|
||||
stats.database = db_status
|
||||
|
||||
-- Get recent activity
|
||||
local recent_query = [[
|
||||
SELECT COUNT(*) as count,
|
||||
strftime('%Y-%m-%d', datetime(created_at, 'unixepoch')) as date
|
||||
FROM documents
|
||||
WHERE created_at > strftime('%s', 'now', '-7 days')
|
||||
GROUP BY date
|
||||
ORDER BY date DESC
|
||||
]]
|
||||
|
||||
local recent_ok, recent_result = database.execute(recent_query)
|
||||
if recent_ok then
|
||||
stats.recent_activity = recent_result
|
||||
end
|
||||
|
||||
return stats
|
||||
end
|
||||
|
||||
-- Validate index integrity
|
||||
function M.validate_index()
|
||||
local validation_result = {
|
||||
valid = true,
|
||||
issues = {},
|
||||
stats = {}
|
||||
}
|
||||
|
||||
-- Check for orphaned properties
|
||||
local orphaned_query = [[
|
||||
SELECT COUNT(*) as count FROM properties p
|
||||
LEFT JOIN documents d ON p.document_id = d.id
|
||||
WHERE d.id IS NULL
|
||||
]]
|
||||
|
||||
local ok, result = database.execute(orphaned_query)
|
||||
if ok and result[1].count > 0 then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.issues, string.format("Found %d orphaned properties", result[1].count))
|
||||
end
|
||||
|
||||
-- Check for documents that no longer exist
|
||||
local docs_query = "SELECT id, file_path FROM documents"
|
||||
ok, result = database.execute(docs_query)
|
||||
if ok then
|
||||
local missing_files = 0
|
||||
for _, doc in ipairs(result) do
|
||||
if not utils.file_exists(doc.file_path) then
|
||||
missing_files = missing_files + 1
|
||||
end
|
||||
end
|
||||
|
||||
if missing_files > 0 then
|
||||
table.insert(validation_result.issues, string.format("Found %d documents pointing to missing files", missing_files))
|
||||
end
|
||||
|
||||
validation_result.stats.missing_files = missing_files
|
||||
end
|
||||
|
||||
-- Get overall statistics
|
||||
validation_result.stats = M.get_statistics()
|
||||
|
||||
return validation_result
|
||||
end
|
||||
|
||||
-- Cleanup orphaned data
|
||||
function M.cleanup_index()
|
||||
local cleanup_result = {
|
||||
removed_orphans = 0,
|
||||
removed_missing = 0,
|
||||
errors = {}
|
||||
}
|
||||
|
||||
-- Remove orphaned properties
|
||||
local orphaned_query = [[
|
||||
DELETE FROM properties WHERE document_id NOT IN (SELECT id FROM documents)
|
||||
]]
|
||||
|
||||
local ok, err = database.execute(orphaned_query)
|
||||
if not ok then
|
||||
table.insert(cleanup_result.errors, "Failed to remove orphaned properties: " .. err)
|
||||
else
|
||||
cleanup_result.removed_orphans = err -- In this case, err is actually the affected row count
|
||||
end
|
||||
|
||||
-- Remove documents pointing to missing files
|
||||
local docs_query = "SELECT id, file_path FROM documents"
|
||||
ok, result = database.execute(docs_query)
|
||||
if ok then
|
||||
for _, doc in ipairs(result) do
|
||||
if not utils.file_exists(doc.file_path) then
|
||||
local remove_ok, remove_err = updater.remove_document(doc.file_path)
|
||||
if remove_ok then
|
||||
cleanup_result.removed_missing = cleanup_result.removed_missing + 1
|
||||
else
|
||||
table.insert(cleanup_result.errors, string.format("Failed to remove missing document %s: %s", doc.file_path, remove_err))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return cleanup_result
|
||||
end
|
||||
|
||||
return M
|
258
lua/notex/index/scanner.lua
Normal file
258
lua/notex/index/scanner.lua
Normal file
|
@ -0,0 +1,258 @@
|
|||
-- File system scanner for markdown documents
|
||||
local M = {}
|
||||
|
||||
local utils = require('notex.utils')
|
||||
local yaml_parser = require('notex.parser.yaml')
|
||||
local markdown_parser = require('notex.parser.markdown')
|
||||
|
||||
-- Scan directory for markdown files
|
||||
function M.scan_directory(directory_path, recursive)
|
||||
recursive = recursive ~= false -- Default to true
|
||||
|
||||
local markdown_files = {}
|
||||
local scan_command
|
||||
|
||||
if recursive then
|
||||
scan_command = string.format('find "%s" -name "*.md" -type f 2>/dev/null', directory_path)
|
||||
else
|
||||
scan_command = string.format('find "%s" -maxdepth 1 -name "*.md" -type f 2>/dev/null', directory_path)
|
||||
end
|
||||
|
||||
local handle = io.popen(scan_command)
|
||||
if not handle then
|
||||
return nil, "Failed to scan directory: " .. directory_path
|
||||
end
|
||||
|
||||
for file_path in handle:lines() do
|
||||
table.insert(markdown_files, file_path)
|
||||
end
|
||||
|
||||
handle:close()
|
||||
|
||||
return markdown_files
|
||||
end
|
||||
|
||||
-- Check if file has been modified since last index
|
||||
function M.is_file_modified(file_path, last_modified)
|
||||
local current_mtime = utils.get_file_mtime(file_path)
|
||||
|
||||
if not current_mtime then
|
||||
return false, "Cannot get file modification time"
|
||||
end
|
||||
|
||||
return current_mtime > last_modified
|
||||
end
|
||||
|
||||
-- Scan for changed files
|
||||
function M.scan_for_changes(directory_path, indexed_files)
|
||||
local changed_files = {}
|
||||
local removed_files = {}
|
||||
|
||||
-- Get current files
|
||||
local current_files, err = M.scan_directory(directory_path, true)
|
||||
if not current_files then
|
||||
return nil, nil, err
|
||||
end
|
||||
|
||||
-- Convert indexed files to a set for faster lookup
|
||||
local indexed_set = {}
|
||||
for _, file_info in ipairs(indexed_files) do
|
||||
indexed_set[file_info.file_path] = file_info
|
||||
end
|
||||
|
||||
-- Convert current files to a set
|
||||
local current_set = {}
|
||||
for _, file_path in ipairs(current_files) do
|
||||
current_set[file_path] = true
|
||||
end
|
||||
|
||||
-- Check for modified files
|
||||
for file_path, file_info in pairs(indexed_set) do
|
||||
if not current_set[file_path] then
|
||||
-- File was removed
|
||||
table.insert(removed_files, file_path)
|
||||
else
|
||||
-- Check if modified
|
||||
local is_modified, mod_err = M.is_file_modified(file_path, file_info.last_modified)
|
||||
if mod_err then
|
||||
return nil, nil, "Error checking file modification: " .. mod_err
|
||||
elseif is_modified then
|
||||
table.insert(changed_files, {
|
||||
file_path = file_path,
|
||||
change_type = "modified"
|
||||
})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Check for new files
|
||||
for _, file_path in ipairs(current_files) do
|
||||
if not indexed_set[file_path] then
|
||||
table.insert(changed_files, {
|
||||
file_path = file_path,
|
||||
change_type = "new"
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
return changed_files, removed_files
|
||||
end
|
||||
|
||||
-- Validate markdown file
|
||||
function M.validate_markdown_file(file_path)
|
||||
local validation_result = {
|
||||
valid = true,
|
||||
errors = {},
|
||||
warnings = {}
|
||||
}
|
||||
|
||||
-- Check if file exists
|
||||
if not utils.file_exists(file_path) then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.errors, "File does not exist")
|
||||
return validation_result
|
||||
end
|
||||
|
||||
-- Check file extension
|
||||
if not file_path:match("%.md$") then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.errors, "File must have .md extension")
|
||||
return validation_result
|
||||
end
|
||||
|
||||
-- Check file size (warn if too large)
|
||||
local file_size = select(2, file_path:match("(.+)"))
|
||||
if file_size and file_size > 10 * 1024 * 1024 then -- 10MB
|
||||
table.insert(validation_result.warnings, "File is very large (>10MB), indexing may be slow")
|
||||
end
|
||||
|
||||
-- Validate UTF-8 encoding
|
||||
if not utils.is_utf8(file_path) then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.errors, "File is not valid UTF-8 encoding")
|
||||
return validation_result
|
||||
end
|
||||
|
||||
-- Validate markdown format
|
||||
local content, err = utils.read_file(file_path)
|
||||
if not content then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.errors, "Cannot read file: " .. err)
|
||||
return validation_result
|
||||
end
|
||||
|
||||
local markdown_errors = markdown_parser.validate_markdown(content)
|
||||
for _, error in ipairs(markdown_errors) do
|
||||
table.insert(validation_result.errors, "Markdown format error: " .. error)
|
||||
end
|
||||
|
||||
-- Check for YAML header
|
||||
local yaml_content, yaml_err = yaml_parser.extract_yaml_header(content)
|
||||
if not yaml_content then
|
||||
table.insert(validation_result.warnings, "No YAML header found")
|
||||
else
|
||||
-- Validate YAML header
|
||||
local yaml_data, parse_err = yaml_parser.parse_yaml(yaml_content)
|
||||
if not yaml_data then
|
||||
validation_result.valid = false
|
||||
table.insert(validation_result.errors, "YAML parsing error: " .. parse_err)
|
||||
else
|
||||
local yaml_errors = yaml_parser.validate_yaml(yaml_data)
|
||||
for _, error in ipairs(yaml_errors) do
|
||||
table.insert(validation_result.errors, "YAML validation error: " .. error)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
validation_result.valid = #validation_result.errors == 0
|
||||
|
||||
return validation_result
|
||||
end
|
||||
|
||||
-- Scan and validate directory
|
||||
function M.scan_and_validate(directory_path)
|
||||
local files, err = M.scan_directory(directory_path, true)
|
||||
if not files then
|
||||
return nil, err
|
||||
end
|
||||
|
||||
local valid_files = {}
|
||||
local invalid_files = {}
|
||||
local scan_stats = {
|
||||
total_scanned = #files,
|
||||
valid = 0,
|
||||
invalid = 0,
|
||||
warnings = 0
|
||||
}
|
||||
|
||||
for _, file_path in ipairs(files) do
|
||||
local validation = M.validate_markdown_file(file_path)
|
||||
|
||||
if validation.valid then
|
||||
table.insert(valid_files, file_path)
|
||||
scan_stats.valid = scan_stats.valid + 1
|
||||
|
||||
if #validation.warnings > 0 then
|
||||
scan_stats.warnings = scan_stats.warnings + #validation.warnings
|
||||
end
|
||||
else
|
||||
table.insert(invalid_files, {
|
||||
file_path = file_path,
|
||||
errors = validation.errors,
|
||||
warnings = validation.warnings
|
||||
})
|
||||
scan_stats.invalid = scan_stats.invalid + 1
|
||||
end
|
||||
end
|
||||
|
||||
return {
|
||||
valid_files = valid_files,
|
||||
invalid_files = invalid_files,
|
||||
stats = scan_stats
|
||||
}
|
||||
end
|
||||
|
||||
-- Get file metadata
|
||||
function M.get_file_metadata(file_path)
|
||||
local metadata = {
|
||||
file_path = file_path,
|
||||
exists = false,
|
||||
size = 0,
|
||||
last_modified = 0,
|
||||
content_hash = nil,
|
||||
yaml_header = false,
|
||||
word_count = 0,
|
||||
has_errors = false
|
||||
}
|
||||
|
||||
-- Check if file exists
|
||||
if not utils.file_exists(file_path) then
|
||||
return metadata
|
||||
end
|
||||
|
||||
metadata.exists = true
|
||||
|
||||
-- Get file stats
|
||||
metadata.last_modified = utils.get_file_mtime(file_path) or 0
|
||||
|
||||
-- Read content
|
||||
local content, err = utils.read_file(file_path)
|
||||
if not content then
|
||||
metadata.has_errors = true
|
||||
return metadata
|
||||
end
|
||||
|
||||
metadata.size = #content
|
||||
metadata.content_hash = utils.sha256(content)
|
||||
|
||||
-- Check for YAML header
|
||||
local yaml_content = yaml_parser.extract_yaml_header(content)
|
||||
metadata.yaml_header = yaml_content ~= nil
|
||||
|
||||
-- Get word count
|
||||
metadata.word_count = markdown_parser.count_words(content)
|
||||
|
||||
return metadata
|
||||
end
|
||||
|
||||
return M
|
317
lua/notex/index/updater.lua
Normal file
317
lua/notex/index/updater.lua
Normal file
|
@ -0,0 +1,317 @@
|
|||
-- Incremental index updater
|
||||
local M = {}
|
||||
|
||||
local database = require('notex.database.schema')
|
||||
local scanner = require('notex.index.scanner')
|
||||
local yaml_parser = require('notex.parser.yaml')
|
||||
local utils = require('notex.utils')
|
||||
|
||||
-- Index a single document
|
||||
function M.index_document(file_path)
|
||||
local document_id = utils.generate_id()
|
||||
local current_time = os.time()
|
||||
|
||||
-- Get file metadata
|
||||
local metadata = scanner.get_file_metadata(file_path)
|
||||
|
||||
if not metadata.exists then
|
||||
return false, "File does not exist: " .. file_path
|
||||
end
|
||||
|
||||
if metadata.has_errors then
|
||||
return false, "File has errors: " .. file_path
|
||||
end
|
||||
|
||||
-- Read and parse file
|
||||
local yaml_data, err = yaml_parser.parse_markdown_file(file_path)
|
||||
if not yaml_data then
|
||||
return false, "Failed to parse YAML: " .. err
|
||||
end
|
||||
|
||||
-- Create document record
|
||||
local document_record = {
|
||||
id = document_id,
|
||||
file_path = file_path,
|
||||
content_hash = metadata.content_hash,
|
||||
last_modified = metadata.last_modified,
|
||||
created_at = current_time,
|
||||
updated_at = current_time
|
||||
}
|
||||
|
||||
-- Check if document already exists
|
||||
local existing_doc, get_err = database.documents.get_by_path(file_path)
|
||||
if get_err then
|
||||
return false, "Failed to check existing document: " .. get_err
|
||||
end
|
||||
|
||||
local ok
|
||||
if existing_doc then
|
||||
-- Update existing document
|
||||
document_record.id = existing_doc.id
|
||||
ok, err = database.documents.update(document_record)
|
||||
|
||||
if not ok then
|
||||
return false, "Failed to update document: " .. err
|
||||
end
|
||||
|
||||
-- Delete existing properties
|
||||
ok, err = database.properties.delete_by_document(document_record.id)
|
||||
if not ok then
|
||||
return false, "Failed to delete existing properties: " .. err
|
||||
end
|
||||
|
||||
document_id = existing_doc.id
|
||||
else
|
||||
-- Create new document
|
||||
ok, err = database.documents.create(document_record)
|
||||
if not ok then
|
||||
return false, "Failed to create document: " .. err
|
||||
end
|
||||
end
|
||||
|
||||
-- Process and create properties
|
||||
local properties = yaml_parser.process_properties(yaml_data)
|
||||
|
||||
for _, prop in ipairs(properties) do
|
||||
local property_record = {
|
||||
id = utils.generate_id(),
|
||||
document_id = document_id,
|
||||
key = prop.key,
|
||||
value = tostring(prop.value),
|
||||
value_type = prop.value_type,
|
||||
created_at = current_time,
|
||||
updated_at = current_time
|
||||
}
|
||||
|
||||
ok, err = database.properties.create(property_record)
|
||||
if not ok then
|
||||
utils.log("ERROR", "Failed to create property", {
|
||||
document_id = document_id,
|
||||
property_key = prop.key,
|
||||
error = err
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
-- Update schema metadata
|
||||
M.update_schema_metadata(properties)
|
||||
|
||||
return true, {
|
||||
document_id = document_id,
|
||||
properties_count = #properties,
|
||||
action = existing_doc and "updated" or "created"
|
||||
}
|
||||
end
|
||||
|
||||
-- Update schema metadata based on properties
|
||||
function M.update_schema_metadata(properties)
|
||||
-- Count property types
|
||||
local property_counts = {}
|
||||
local property_types = {}
|
||||
|
||||
for _, prop in ipairs(properties) do
|
||||
if not property_counts[prop.key] then
|
||||
property_counts[prop.key] = 0
|
||||
property_types[prop.key] = {}
|
||||
end
|
||||
|
||||
property_counts[prop.key] = property_counts[prop.key] + 1
|
||||
|
||||
if not property_types[prop.key][prop.value_type] then
|
||||
property_types[prop.key][prop.value_type] = 0
|
||||
end
|
||||
property_types[prop.key][prop.value_type] = property_types[prop.key][prop.value_type] + 1
|
||||
end
|
||||
|
||||
-- Update schema metadata for each property
|
||||
for property_key, count in pairs(property_counts) do
|
||||
-- Find most common type
|
||||
local most_common_type = nil
|
||||
local max_count = 0
|
||||
|
||||
for type_name, type_count in pairs(property_types[property_key]) do
|
||||
if type_count > max_count then
|
||||
max_count = type_count
|
||||
most_common_type = type_name
|
||||
end
|
||||
end
|
||||
|
||||
-- Create validation rules
|
||||
local validation_rules = vim.json.encode({
|
||||
allowed_types = vim.tbl_keys(property_types[property_key]),
|
||||
most_common_type = most_common_type
|
||||
})
|
||||
|
||||
database.schema.update_property(property_key, most_common_type, validation_rules, count)
|
||||
end
|
||||
end
|
||||
|
||||
-- Remove document from index
|
||||
function M.remove_document(file_path)
|
||||
local existing_doc, err = database.documents.get_by_path(file_path)
|
||||
if not existing_doc then
|
||||
return false, "Document not found in index: " .. file_path
|
||||
end
|
||||
|
||||
-- Properties will be deleted automatically due to foreign key constraint
|
||||
local ok, delete_err = database.documents.delete(existing_doc.id)
|
||||
if not ok then
|
||||
return false, "Failed to delete document: " .. delete_err
|
||||
end
|
||||
|
||||
return true, {
|
||||
document_id = existing_doc.id,
|
||||
file_path = file_path,
|
||||
action = "deleted"
|
||||
}
|
||||
end
|
||||
|
||||
-- Incremental update for directory
|
||||
function M.update_directory(directory_path)
|
||||
local result = {
|
||||
updated_files = {},
|
||||
removed_files = {},
|
||||
errors = {},
|
||||
stats = {
|
||||
processed = 0,
|
||||
updated = 0,
|
||||
removed = 0,
|
||||
failed = 0
|
||||
}
|
||||
}
|
||||
|
||||
-- Get currently indexed files
|
||||
local indexed_docs, err = database.execute("SELECT file_path, last_modified FROM documents")
|
||||
if not indexed_docs then
|
||||
return false, "Failed to get indexed documents: " .. err
|
||||
end
|
||||
|
||||
-- Scan for changes
|
||||
local changed_files, removed_files, scan_err = scanner.scan_for_changes(directory_path, indexed_docs)
|
||||
if not changed_files then
|
||||
return false, "Failed to scan for changes: " .. scan_err
|
||||
end
|
||||
|
||||
-- Process changed files
|
||||
for _, change_info in ipairs(changed_files) do
|
||||
result.stats.processed = result.stats.processed + 1
|
||||
|
||||
local ok, update_result = M.index_document(change_info.file_path)
|
||||
if ok then
|
||||
result.stats.updated = result.stats.updated + 1
|
||||
table.insert(result.updated_files, update_result)
|
||||
utils.log("INFO", string.format("Updated document: %s", change_info.file_path))
|
||||
else
|
||||
result.stats.failed = result.stats.failed + 1
|
||||
table.insert(result.errors, {
|
||||
file_path = change_info.file_path,
|
||||
error = update_result
|
||||
})
|
||||
utils.log("ERROR", string.format("Failed to update document: %s - %s", change_info.file_path, update_result))
|
||||
end
|
||||
end
|
||||
|
||||
-- Process removed files
|
||||
for _, file_path in ipairs(removed_files) do
|
||||
local ok, remove_result = M.remove_document(file_path)
|
||||
if ok then
|
||||
result.stats.removed = result.stats.removed + 1
|
||||
table.insert(result.removed_files, remove_result)
|
||||
utils.log("INFO", string.format("Removed document: %s", file_path))
|
||||
else
|
||||
result.stats.failed = result.stats.failed + 1
|
||||
table.insert(result.errors, {
|
||||
file_path = file_path,
|
||||
error = remove_result
|
||||
})
|
||||
utils.log("ERROR", string.format("Failed to remove document: %s - %s", file_path, remove_result))
|
||||
end
|
||||
end
|
||||
|
||||
return true, result
|
||||
end
|
||||
|
||||
-- Full reindex of directory
|
||||
function M.reindex_directory(directory_path)
|
||||
local result = {
|
||||
indexed_files = {},
|
||||
errors = {},
|
||||
stats = {
|
||||
scanned = 0,
|
||||
indexed = 0,
|
||||
failed = 0,
|
||||
skipped = 0
|
||||
}
|
||||
}
|
||||
|
||||
-- Clear existing index
|
||||
local ok, err = database.execute("DELETE FROM documents")
|
||||
if not ok then
|
||||
return false, "Failed to clear existing index: " .. err
|
||||
end
|
||||
|
||||
-- Scan and validate directory
|
||||
local scan_result, scan_err = scanner.scan_and_validate(directory_path)
|
||||
if not scan_result then
|
||||
return false, "Failed to scan directory: " .. scan_err
|
||||
end
|
||||
|
||||
result.stats.scanned = scan_result.stats.total_scanned
|
||||
|
||||
-- Index valid files
|
||||
for _, file_path in ipairs(scan_result.valid_files) do
|
||||
local ok, index_result = M.index_document(file_path)
|
||||
if ok then
|
||||
result.stats.indexed = result.stats.indexed + 1
|
||||
table.insert(result.indexed_files, index_result)
|
||||
utils.log("INFO", string.format("Indexed document: %s", file_path))
|
||||
else
|
||||
result.stats.failed = result.stats.failed + 1
|
||||
table.insert(result.errors, {
|
||||
file_path = file_path,
|
||||
error = index_result
|
||||
})
|
||||
utils.log("ERROR", string.format("Failed to index document: %s - %s", file_path, index_result))
|
||||
end
|
||||
end
|
||||
|
||||
-- Log invalid files
|
||||
for _, invalid_file in ipairs(scan_result.invalid_files) do
|
||||
result.stats.skipped = result.stats.skipped + 1
|
||||
utils.log("WARN", string.format("Skipped invalid file: %s", invalid_file.file_path), invalid_file.errors)
|
||||
end
|
||||
|
||||
return true, result
|
||||
end
|
||||
|
||||
-- Get index statistics
|
||||
function M.get_index_stats()
|
||||
local stats = {}
|
||||
|
||||
-- Document counts
|
||||
local doc_count, err = database.execute("SELECT COUNT(*) as count FROM documents")
|
||||
if doc_count then
|
||||
stats.document_count = doc_count[1].count
|
||||
end
|
||||
|
||||
-- Property counts
|
||||
local prop_count, err = database.execute("SELECT COUNT(*) as count FROM properties")
|
||||
if prop_count then
|
||||
stats.property_count = prop_count[1].count
|
||||
end
|
||||
|
||||
-- Schema statistics
|
||||
local schema_stats, err = database.schema.get_all()
|
||||
if schema_stats then
|
||||
stats.unique_properties = #schema_stats
|
||||
stats.schema_entries = schema_stats
|
||||
end
|
||||
|
||||
-- Database status
|
||||
local db_status = require('notex.database.init').status()
|
||||
stats.database = db_status
|
||||
|
||||
return stats
|
||||
end
|
||||
|
||||
return M
|
Loading…
Add table
Add a link
Reference in a new issue