Initial vibecoded proof of concept

This commit is contained in:
Alex Selimov 2025-10-05 20:16:33 -04:00
parent 74812459af
commit 461318a656
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
61 changed files with 13306 additions and 0 deletions

368
lua/notex/index/init.lua Normal file
View file

@ -0,0 +1,368 @@
-- Document indexing coordination module
local M = {}
local database = require('notex.database.init')
local migrations = require('notex.database.migrations')
local updater = require('notex.index.updater')
local scanner = require('notex.index.scanner')
local parser = require('notex.parser')
local utils = require('notex.utils')
-- Initialize indexing system
function M.init(database_path)
local ok, err = database.init(database_path)
if not ok then
return false, "Failed to initialize database: " .. err
end
ok, err = migrations.init()
if not ok then
return false, "Failed to initialize migrations: " .. err
end
utils.log("INFO", "Document indexing system initialized")
return true, "Indexing system initialized successfully"
end
-- Index documents in directory
function M.index_documents(directory_path, options)
options = options or {}
local force_reindex = options.force_reindex or false
local recursive = options.recursive ~= false
local result = {
success = false,
directory_path = directory_path,
stats = {},
errors = {},
operation = force_reindex and "reindex" or "update"
}
-- Validate directory exists
if not utils.file_exists(directory_path) then
table.insert(result.errors, "Directory does not exist: " .. directory_path)
return result
end
-- Check if directory is actually a directory
local dir_check = io.open(directory_path)
if not dir_check then
table.insert(result.errors, "Path is not a directory: " .. directory_path)
return result
end
dir_check:close()
local start_timer = utils.timer("Document indexing")
if force_reindex then
-- Full reindex
local ok, reindex_result = updater.reindex_directory(directory_path)
if not ok then
table.insert(result.errors, "Reindex failed: " .. reindex_result)
return result
end
result.stats = reindex_result.stats
utils.log("INFO", string.format("Completed full reindex of %s", directory_path))
else
-- Incremental update
local ok, update_result = updater.update_directory(directory_path)
if not ok then
table.insert(result.errors, "Update failed: " .. update_result)
return result
end
result.stats = update_result.stats
utils.log("INFO", string.format("Completed incremental update of %s", directory_path))
end
start_timer()
result.success = true
return result
end
-- Get indexed documents
function M.get_indexed_documents(filters)
filters = filters or {}
local limit = filters.limit or 100
local offset = filters.offset or 0
local order_by = filters.order_by or "updated_at DESC"
local query = string.format([[
SELECT d.*, COUNT(p.id) as property_count
FROM documents d
LEFT JOIN properties p ON d.id = p.document_id
GROUP BY d.id
ORDER BY %s
LIMIT %d OFFSET %d
]], order_by, limit, offset)
local ok, result = database.execute(query)
if not ok then
return nil, "Failed to get indexed documents: " .. result
end
return result
end
-- Search documents by properties
function M.search_documents(search_criteria)
local conditions = {}
local params = {}
local joins = {}
-- Build WHERE clause
if search_criteria.status then
table.insert(conditions, "p.key = 'status' AND p.value = :status")
params.status = search_criteria.status
end
if search_criteria.tags then
if type(search_criteria.tags) == "string" then
table.insert(conditions, "p.key = 'tags' AND p.value LIKE :tag")
params.tag = '%' .. search_criteria.tags .. '%'
elseif type(search_criteria.tags) == "table" then
local tag_conditions = {}
for i, tag in ipairs(search_criteria.tags) do
local param_name = "tag_" .. i
table.insert(tag_conditions, "p.key = 'tags' AND p.value LIKE :" .. param_name)
params[param_name] = '%' .. tag .. '%'
end
table.insert(conditions, "(" .. table.concat(tag_conditions, " OR ") .. ")")
end
end
if search_criteria.created_after then
table.insert(conditions, "d.created_at >= :created_after")
params.created_after = search_criteria.created_after
end
if search_criteria.created_before then
table.insert(conditions, "d.created_at <= :created_before")
params.created_before = search_criteria.created_before
end
if search_criteria.text_search then
table.insert(conditions, "d.file_path LIKE :text_search OR EXISTS (SELECT 1 FROM properties p2 WHERE p2.document_id = d.id AND p2.value LIKE :text_search)")
params.text_search = '%' .. search_criteria.text_search .. '%'
end
-- Build query
local where_clause = #conditions > 0 and "WHERE " .. table.concat(conditions, " AND ") or ""
local limit = search_criteria.limit or 50
local offset = search_criteria.offset or 0
local query = string.format([[
SELECT DISTINCT d.*, COUNT(p.id) as property_count
FROM documents d
%s
LEFT JOIN properties p ON d.id = p.document_id
%s
GROUP BY d.id
ORDER BY d.updated_at DESC
LIMIT %d OFFSET %d
]], #joins > 0 and table.concat(joins, " ") or "", where_clause, limit, offset)
local ok, result = database.execute(query, params)
if not ok then
return nil, "Search failed: " .. result
end
-- Get total count
local count_query = string.format([[
SELECT COUNT(DISTINCT d.id) as total
FROM documents d
%s
LEFT JOIN properties p ON d.id = p.document_id
%s
]], #joins > 0 and table.concat(joins, " ") or "", where_clause)
local count_ok, count_result = database.execute(count_query, params)
local total_count = count_ok and count_result[1].total or 0
return {
documents = result,
total_count = total_count,
limit = limit,
offset = offset
}
end
-- Get document details
function M.get_document_details(document_id)
-- Get document
local ok, doc_result = database.documents.get_by_id(document_id)
if not ok then
return nil, "Failed to get document: " .. doc_result
end
if not doc_result then
return nil, "Document not found: " .. document_id
end
-- Get properties
local ok, prop_result = database.properties.get_by_document(document_id)
if not ok then
return nil, "Failed to get document properties: " .. prop_result
end
-- Parse document for additional details
local parse_result, parse_err = parser.parse_document(doc_result.file_path)
if parse_err then
utils.log("WARN", "Failed to parse document for details", {
document_id = document_id,
error = parse_err
})
end
return {
document = doc_result,
properties = prop_result or {},
parse_result = parse_result,
file_exists = utils.file_exists(doc_result.file_path),
is_current = parse_result and parse_result.success or false
}
end
-- Remove document from index
function M.remove_document(document_id)
-- Get document details first
local doc_details, err = M.get_document_details(document_id)
if not doc_details then
return false, err
end
local ok, remove_result = updater.remove_document(doc_details.document.file_path)
if not ok then
return false, "Failed to remove document: " .. remove_result
end
utils.log("INFO", string.format("Removed document from index: %s", doc_details.document.file_path))
return true, remove_result
end
-- Update document in index
function M.update_document(file_path)
local ok, result = updater.index_document(file_path)
if not ok then
return false, "Failed to update document: " .. result
end
utils.log("INFO", string.format("Updated document in index: %s", file_path))
return true, result
end
-- Get index statistics
function M.get_statistics()
local stats = updater.get_index_stats()
-- Add additional statistics
local db_status = database.status()
stats.database = db_status
-- Get recent activity
local recent_query = [[
SELECT COUNT(*) as count,
strftime('%Y-%m-%d', datetime(created_at, 'unixepoch')) as date
FROM documents
WHERE created_at > strftime('%s', 'now', '-7 days')
GROUP BY date
ORDER BY date DESC
]]
local recent_ok, recent_result = database.execute(recent_query)
if recent_ok then
stats.recent_activity = recent_result
end
return stats
end
-- Validate index integrity
function M.validate_index()
local validation_result = {
valid = true,
issues = {},
stats = {}
}
-- Check for orphaned properties
local orphaned_query = [[
SELECT COUNT(*) as count FROM properties p
LEFT JOIN documents d ON p.document_id = d.id
WHERE d.id IS NULL
]]
local ok, result = database.execute(orphaned_query)
if ok and result[1].count > 0 then
validation_result.valid = false
table.insert(validation_result.issues, string.format("Found %d orphaned properties", result[1].count))
end
-- Check for documents that no longer exist
local docs_query = "SELECT id, file_path FROM documents"
ok, result = database.execute(docs_query)
if ok then
local missing_files = 0
for _, doc in ipairs(result) do
if not utils.file_exists(doc.file_path) then
missing_files = missing_files + 1
end
end
if missing_files > 0 then
table.insert(validation_result.issues, string.format("Found %d documents pointing to missing files", missing_files))
end
validation_result.stats.missing_files = missing_files
end
-- Get overall statistics
validation_result.stats = M.get_statistics()
return validation_result
end
-- Cleanup orphaned data
function M.cleanup_index()
local cleanup_result = {
removed_orphans = 0,
removed_missing = 0,
errors = {}
}
-- Remove orphaned properties
local orphaned_query = [[
DELETE FROM properties WHERE document_id NOT IN (SELECT id FROM documents)
]]
local ok, err = database.execute(orphaned_query)
if not ok then
table.insert(cleanup_result.errors, "Failed to remove orphaned properties: " .. err)
else
cleanup_result.removed_orphans = err -- In this case, err is actually the affected row count
end
-- Remove documents pointing to missing files
local docs_query = "SELECT id, file_path FROM documents"
ok, result = database.execute(docs_query)
if ok then
for _, doc in ipairs(result) do
if not utils.file_exists(doc.file_path) then
local remove_ok, remove_err = updater.remove_document(doc.file_path)
if remove_ok then
cleanup_result.removed_missing = cleanup_result.removed_missing + 1
else
table.insert(cleanup_result.errors, string.format("Failed to remove missing document %s: %s", doc.file_path, remove_err))
end
end
end
end
return cleanup_result
end
return M

258
lua/notex/index/scanner.lua Normal file
View file

@ -0,0 +1,258 @@
-- File system scanner for markdown documents
local M = {}
local utils = require('notex.utils')
local yaml_parser = require('notex.parser.yaml')
local markdown_parser = require('notex.parser.markdown')
-- Scan directory for markdown files
function M.scan_directory(directory_path, recursive)
recursive = recursive ~= false -- Default to true
local markdown_files = {}
local scan_command
if recursive then
scan_command = string.format('find "%s" -name "*.md" -type f 2>/dev/null', directory_path)
else
scan_command = string.format('find "%s" -maxdepth 1 -name "*.md" -type f 2>/dev/null', directory_path)
end
local handle = io.popen(scan_command)
if not handle then
return nil, "Failed to scan directory: " .. directory_path
end
for file_path in handle:lines() do
table.insert(markdown_files, file_path)
end
handle:close()
return markdown_files
end
-- Check if file has been modified since last index
function M.is_file_modified(file_path, last_modified)
local current_mtime = utils.get_file_mtime(file_path)
if not current_mtime then
return false, "Cannot get file modification time"
end
return current_mtime > last_modified
end
-- Scan for changed files
function M.scan_for_changes(directory_path, indexed_files)
local changed_files = {}
local removed_files = {}
-- Get current files
local current_files, err = M.scan_directory(directory_path, true)
if not current_files then
return nil, nil, err
end
-- Convert indexed files to a set for faster lookup
local indexed_set = {}
for _, file_info in ipairs(indexed_files) do
indexed_set[file_info.file_path] = file_info
end
-- Convert current files to a set
local current_set = {}
for _, file_path in ipairs(current_files) do
current_set[file_path] = true
end
-- Check for modified files
for file_path, file_info in pairs(indexed_set) do
if not current_set[file_path] then
-- File was removed
table.insert(removed_files, file_path)
else
-- Check if modified
local is_modified, mod_err = M.is_file_modified(file_path, file_info.last_modified)
if mod_err then
return nil, nil, "Error checking file modification: " .. mod_err
elseif is_modified then
table.insert(changed_files, {
file_path = file_path,
change_type = "modified"
})
end
end
end
-- Check for new files
for _, file_path in ipairs(current_files) do
if not indexed_set[file_path] then
table.insert(changed_files, {
file_path = file_path,
change_type = "new"
})
end
end
return changed_files, removed_files
end
-- Validate markdown file
function M.validate_markdown_file(file_path)
local validation_result = {
valid = true,
errors = {},
warnings = {}
}
-- Check if file exists
if not utils.file_exists(file_path) then
validation_result.valid = false
table.insert(validation_result.errors, "File does not exist")
return validation_result
end
-- Check file extension
if not file_path:match("%.md$") then
validation_result.valid = false
table.insert(validation_result.errors, "File must have .md extension")
return validation_result
end
-- Check file size (warn if too large)
local file_size = select(2, file_path:match("(.+)"))
if file_size and file_size > 10 * 1024 * 1024 then -- 10MB
table.insert(validation_result.warnings, "File is very large (>10MB), indexing may be slow")
end
-- Validate UTF-8 encoding
if not utils.is_utf8(file_path) then
validation_result.valid = false
table.insert(validation_result.errors, "File is not valid UTF-8 encoding")
return validation_result
end
-- Validate markdown format
local content, err = utils.read_file(file_path)
if not content then
validation_result.valid = false
table.insert(validation_result.errors, "Cannot read file: " .. err)
return validation_result
end
local markdown_errors = markdown_parser.validate_markdown(content)
for _, error in ipairs(markdown_errors) do
table.insert(validation_result.errors, "Markdown format error: " .. error)
end
-- Check for YAML header
local yaml_content, yaml_err = yaml_parser.extract_yaml_header(content)
if not yaml_content then
table.insert(validation_result.warnings, "No YAML header found")
else
-- Validate YAML header
local yaml_data, parse_err = yaml_parser.parse_yaml(yaml_content)
if not yaml_data then
validation_result.valid = false
table.insert(validation_result.errors, "YAML parsing error: " .. parse_err)
else
local yaml_errors = yaml_parser.validate_yaml(yaml_data)
for _, error in ipairs(yaml_errors) do
table.insert(validation_result.errors, "YAML validation error: " .. error)
end
end
end
validation_result.valid = #validation_result.errors == 0
return validation_result
end
-- Scan and validate directory
function M.scan_and_validate(directory_path)
local files, err = M.scan_directory(directory_path, true)
if not files then
return nil, err
end
local valid_files = {}
local invalid_files = {}
local scan_stats = {
total_scanned = #files,
valid = 0,
invalid = 0,
warnings = 0
}
for _, file_path in ipairs(files) do
local validation = M.validate_markdown_file(file_path)
if validation.valid then
table.insert(valid_files, file_path)
scan_stats.valid = scan_stats.valid + 1
if #validation.warnings > 0 then
scan_stats.warnings = scan_stats.warnings + #validation.warnings
end
else
table.insert(invalid_files, {
file_path = file_path,
errors = validation.errors,
warnings = validation.warnings
})
scan_stats.invalid = scan_stats.invalid + 1
end
end
return {
valid_files = valid_files,
invalid_files = invalid_files,
stats = scan_stats
}
end
-- Get file metadata
function M.get_file_metadata(file_path)
local metadata = {
file_path = file_path,
exists = false,
size = 0,
last_modified = 0,
content_hash = nil,
yaml_header = false,
word_count = 0,
has_errors = false
}
-- Check if file exists
if not utils.file_exists(file_path) then
return metadata
end
metadata.exists = true
-- Get file stats
metadata.last_modified = utils.get_file_mtime(file_path) or 0
-- Read content
local content, err = utils.read_file(file_path)
if not content then
metadata.has_errors = true
return metadata
end
metadata.size = #content
metadata.content_hash = utils.sha256(content)
-- Check for YAML header
local yaml_content = yaml_parser.extract_yaml_header(content)
metadata.yaml_header = yaml_content ~= nil
-- Get word count
metadata.word_count = markdown_parser.count_words(content)
return metadata
end
return M

317
lua/notex/index/updater.lua Normal file
View file

@ -0,0 +1,317 @@
-- Incremental index updater
local M = {}
local database = require('notex.database.schema')
local scanner = require('notex.index.scanner')
local yaml_parser = require('notex.parser.yaml')
local utils = require('notex.utils')
-- Index a single document
function M.index_document(file_path)
local document_id = utils.generate_id()
local current_time = os.time()
-- Get file metadata
local metadata = scanner.get_file_metadata(file_path)
if not metadata.exists then
return false, "File does not exist: " .. file_path
end
if metadata.has_errors then
return false, "File has errors: " .. file_path
end
-- Read and parse file
local yaml_data, err = yaml_parser.parse_markdown_file(file_path)
if not yaml_data then
return false, "Failed to parse YAML: " .. err
end
-- Create document record
local document_record = {
id = document_id,
file_path = file_path,
content_hash = metadata.content_hash,
last_modified = metadata.last_modified,
created_at = current_time,
updated_at = current_time
}
-- Check if document already exists
local existing_doc, get_err = database.documents.get_by_path(file_path)
if get_err then
return false, "Failed to check existing document: " .. get_err
end
local ok
if existing_doc then
-- Update existing document
document_record.id = existing_doc.id
ok, err = database.documents.update(document_record)
if not ok then
return false, "Failed to update document: " .. err
end
-- Delete existing properties
ok, err = database.properties.delete_by_document(document_record.id)
if not ok then
return false, "Failed to delete existing properties: " .. err
end
document_id = existing_doc.id
else
-- Create new document
ok, err = database.documents.create(document_record)
if not ok then
return false, "Failed to create document: " .. err
end
end
-- Process and create properties
local properties = yaml_parser.process_properties(yaml_data)
for _, prop in ipairs(properties) do
local property_record = {
id = utils.generate_id(),
document_id = document_id,
key = prop.key,
value = tostring(prop.value),
value_type = prop.value_type,
created_at = current_time,
updated_at = current_time
}
ok, err = database.properties.create(property_record)
if not ok then
utils.log("ERROR", "Failed to create property", {
document_id = document_id,
property_key = prop.key,
error = err
})
end
end
-- Update schema metadata
M.update_schema_metadata(properties)
return true, {
document_id = document_id,
properties_count = #properties,
action = existing_doc and "updated" or "created"
}
end
-- Update schema metadata based on properties
function M.update_schema_metadata(properties)
-- Count property types
local property_counts = {}
local property_types = {}
for _, prop in ipairs(properties) do
if not property_counts[prop.key] then
property_counts[prop.key] = 0
property_types[prop.key] = {}
end
property_counts[prop.key] = property_counts[prop.key] + 1
if not property_types[prop.key][prop.value_type] then
property_types[prop.key][prop.value_type] = 0
end
property_types[prop.key][prop.value_type] = property_types[prop.key][prop.value_type] + 1
end
-- Update schema metadata for each property
for property_key, count in pairs(property_counts) do
-- Find most common type
local most_common_type = nil
local max_count = 0
for type_name, type_count in pairs(property_types[property_key]) do
if type_count > max_count then
max_count = type_count
most_common_type = type_name
end
end
-- Create validation rules
local validation_rules = vim.json.encode({
allowed_types = vim.tbl_keys(property_types[property_key]),
most_common_type = most_common_type
})
database.schema.update_property(property_key, most_common_type, validation_rules, count)
end
end
-- Remove document from index
function M.remove_document(file_path)
local existing_doc, err = database.documents.get_by_path(file_path)
if not existing_doc then
return false, "Document not found in index: " .. file_path
end
-- Properties will be deleted automatically due to foreign key constraint
local ok, delete_err = database.documents.delete(existing_doc.id)
if not ok then
return false, "Failed to delete document: " .. delete_err
end
return true, {
document_id = existing_doc.id,
file_path = file_path,
action = "deleted"
}
end
-- Incremental update for directory
function M.update_directory(directory_path)
local result = {
updated_files = {},
removed_files = {},
errors = {},
stats = {
processed = 0,
updated = 0,
removed = 0,
failed = 0
}
}
-- Get currently indexed files
local indexed_docs, err = database.execute("SELECT file_path, last_modified FROM documents")
if not indexed_docs then
return false, "Failed to get indexed documents: " .. err
end
-- Scan for changes
local changed_files, removed_files, scan_err = scanner.scan_for_changes(directory_path, indexed_docs)
if not changed_files then
return false, "Failed to scan for changes: " .. scan_err
end
-- Process changed files
for _, change_info in ipairs(changed_files) do
result.stats.processed = result.stats.processed + 1
local ok, update_result = M.index_document(change_info.file_path)
if ok then
result.stats.updated = result.stats.updated + 1
table.insert(result.updated_files, update_result)
utils.log("INFO", string.format("Updated document: %s", change_info.file_path))
else
result.stats.failed = result.stats.failed + 1
table.insert(result.errors, {
file_path = change_info.file_path,
error = update_result
})
utils.log("ERROR", string.format("Failed to update document: %s - %s", change_info.file_path, update_result))
end
end
-- Process removed files
for _, file_path in ipairs(removed_files) do
local ok, remove_result = M.remove_document(file_path)
if ok then
result.stats.removed = result.stats.removed + 1
table.insert(result.removed_files, remove_result)
utils.log("INFO", string.format("Removed document: %s", file_path))
else
result.stats.failed = result.stats.failed + 1
table.insert(result.errors, {
file_path = file_path,
error = remove_result
})
utils.log("ERROR", string.format("Failed to remove document: %s - %s", file_path, remove_result))
end
end
return true, result
end
-- Full reindex of directory
function M.reindex_directory(directory_path)
local result = {
indexed_files = {},
errors = {},
stats = {
scanned = 0,
indexed = 0,
failed = 0,
skipped = 0
}
}
-- Clear existing index
local ok, err = database.execute("DELETE FROM documents")
if not ok then
return false, "Failed to clear existing index: " .. err
end
-- Scan and validate directory
local scan_result, scan_err = scanner.scan_and_validate(directory_path)
if not scan_result then
return false, "Failed to scan directory: " .. scan_err
end
result.stats.scanned = scan_result.stats.total_scanned
-- Index valid files
for _, file_path in ipairs(scan_result.valid_files) do
local ok, index_result = M.index_document(file_path)
if ok then
result.stats.indexed = result.stats.indexed + 1
table.insert(result.indexed_files, index_result)
utils.log("INFO", string.format("Indexed document: %s", file_path))
else
result.stats.failed = result.stats.failed + 1
table.insert(result.errors, {
file_path = file_path,
error = index_result
})
utils.log("ERROR", string.format("Failed to index document: %s - %s", file_path, index_result))
end
end
-- Log invalid files
for _, invalid_file in ipairs(scan_result.invalid_files) do
result.stats.skipped = result.stats.skipped + 1
utils.log("WARN", string.format("Skipped invalid file: %s", invalid_file.file_path), invalid_file.errors)
end
return true, result
end
-- Get index statistics
function M.get_index_stats()
local stats = {}
-- Document counts
local doc_count, err = database.execute("SELECT COUNT(*) as count FROM documents")
if doc_count then
stats.document_count = doc_count[1].count
end
-- Property counts
local prop_count, err = database.execute("SELECT COUNT(*) as count FROM properties")
if prop_count then
stats.property_count = prop_count[1].count
end
-- Schema statistics
local schema_stats, err = database.schema.get_all()
if schema_stats then
stats.unique_properties = #schema_stats
stats.schema_entries = schema_stats
end
-- Database status
local db_status = require('notex.database.init').status()
stats.database = db_status
return stats
end
return M