notex.nvim/lua/notex/index/scanner.lua

258 lines
6.7 KiB
Lua
Raw Normal View History

2025-10-05 20:16:33 -04:00
-- File system scanner for markdown documents
local M = {}
local utils = require('notex.utils')
local yaml_parser = require('notex.parser.yaml')
local markdown_parser = require('notex.parser.markdown')
-- Scan directory for markdown files
function M.scan_directory(directory_path, recursive)
recursive = recursive ~= false -- Default to true
local markdown_files = {}
local scan_command
if recursive then
scan_command = string.format('find "%s" -name "*.md" -type f 2>/dev/null', directory_path)
else
scan_command = string.format('find "%s" -maxdepth 1 -name "*.md" -type f 2>/dev/null', directory_path)
end
local handle = io.popen(scan_command)
if not handle then
return nil, "Failed to scan directory: " .. directory_path
end
for file_path in handle:lines() do
table.insert(markdown_files, file_path)
end
handle:close()
return markdown_files
end
-- Check if file has been modified since last index
function M.is_file_modified(file_path, last_modified)
local current_mtime = utils.get_file_mtime(file_path)
if not current_mtime then
return false, "Cannot get file modification time"
end
return current_mtime > last_modified
end
-- Scan for changed files
function M.scan_for_changes(directory_path, indexed_files)
local changed_files = {}
local removed_files = {}
-- Get current files
local current_files, err = M.scan_directory(directory_path, true)
if not current_files then
return nil, nil, err
end
-- Convert indexed files to a set for faster lookup
local indexed_set = {}
for _, file_info in ipairs(indexed_files) do
indexed_set[file_info.file_path] = file_info
end
-- Convert current files to a set
local current_set = {}
for _, file_path in ipairs(current_files) do
current_set[file_path] = true
end
-- Check for modified files
for file_path, file_info in pairs(indexed_set) do
if not current_set[file_path] then
-- File was removed
table.insert(removed_files, file_path)
else
-- Check if modified
local is_modified, mod_err = M.is_file_modified(file_path, file_info.last_modified)
if mod_err then
return nil, nil, "Error checking file modification: " .. mod_err
elseif is_modified then
table.insert(changed_files, {
file_path = file_path,
change_type = "modified"
})
end
end
end
-- Check for new files
for _, file_path in ipairs(current_files) do
if not indexed_set[file_path] then
table.insert(changed_files, {
file_path = file_path,
change_type = "new"
})
end
end
return changed_files, removed_files
end
-- Validate markdown file
function M.validate_markdown_file(file_path)
local validation_result = {
valid = true,
errors = {},
warnings = {}
}
-- Check if file exists
if not utils.file_exists(file_path) then
validation_result.valid = false
table.insert(validation_result.errors, "File does not exist")
return validation_result
end
-- Check file extension
if not file_path:match("%.md$") then
validation_result.valid = false
table.insert(validation_result.errors, "File must have .md extension")
return validation_result
end
-- Check file size (warn if too large)
local file_size = select(2, file_path:match("(.+)"))
if file_size and file_size > 10 * 1024 * 1024 then -- 10MB
table.insert(validation_result.warnings, "File is very large (>10MB), indexing may be slow")
end
-- Validate UTF-8 encoding
if not utils.is_utf8(file_path) then
validation_result.valid = false
table.insert(validation_result.errors, "File is not valid UTF-8 encoding")
return validation_result
end
-- Validate markdown format
local content, err = utils.read_file(file_path)
if not content then
validation_result.valid = false
table.insert(validation_result.errors, "Cannot read file: " .. err)
return validation_result
end
local markdown_errors = markdown_parser.validate_markdown(content)
for _, error in ipairs(markdown_errors) do
table.insert(validation_result.errors, "Markdown format error: " .. error)
end
-- Check for YAML header
local yaml_content, yaml_err = yaml_parser.extract_yaml_header(content)
if not yaml_content then
table.insert(validation_result.warnings, "No YAML header found")
else
-- Validate YAML header
local yaml_data, parse_err = yaml_parser.parse_yaml(yaml_content)
if not yaml_data then
validation_result.valid = false
table.insert(validation_result.errors, "YAML parsing error: " .. parse_err)
else
local yaml_errors = yaml_parser.validate_yaml(yaml_data)
for _, error in ipairs(yaml_errors) do
table.insert(validation_result.errors, "YAML validation error: " .. error)
end
end
end
validation_result.valid = #validation_result.errors == 0
return validation_result
end
-- Scan and validate directory
function M.scan_and_validate(directory_path)
local files, err = M.scan_directory(directory_path, true)
if not files then
return nil, err
end
local valid_files = {}
local invalid_files = {}
local scan_stats = {
total_scanned = #files,
valid = 0,
invalid = 0,
warnings = 0
}
for _, file_path in ipairs(files) do
local validation = M.validate_markdown_file(file_path)
if validation.valid then
table.insert(valid_files, file_path)
scan_stats.valid = scan_stats.valid + 1
if #validation.warnings > 0 then
scan_stats.warnings = scan_stats.warnings + #validation.warnings
end
else
table.insert(invalid_files, {
file_path = file_path,
errors = validation.errors,
warnings = validation.warnings
})
scan_stats.invalid = scan_stats.invalid + 1
end
end
return {
valid_files = valid_files,
invalid_files = invalid_files,
stats = scan_stats
}
end
-- Get file metadata
function M.get_file_metadata(file_path)
local metadata = {
file_path = file_path,
exists = false,
size = 0,
last_modified = 0,
content_hash = nil,
yaml_header = false,
word_count = 0,
has_errors = false
}
-- Check if file exists
if not utils.file_exists(file_path) then
return metadata
end
metadata.exists = true
-- Get file stats
metadata.last_modified = utils.get_file_mtime(file_path) or 0
-- Read content
local content, err = utils.read_file(file_path)
if not content then
metadata.has_errors = true
return metadata
end
metadata.size = #content
metadata.content_hash = utils.sha256(content)
-- Check for YAML header
local yaml_content = yaml_parser.extract_yaml_header(content)
metadata.yaml_header = yaml_content ~= nil
-- Get word count
metadata.word_count = markdown_parser.count_words(content)
return metadata
end
return M