258 lines
No EOL
6.7 KiB
Lua
258 lines
No EOL
6.7 KiB
Lua
-- File system scanner for markdown documents
|
|
local M = {}
|
|
|
|
local utils = require('notex.utils')
|
|
local yaml_parser = require('notex.parser.yaml')
|
|
local markdown_parser = require('notex.parser.markdown')
|
|
|
|
-- Scan directory for markdown files
|
|
function M.scan_directory(directory_path, recursive)
|
|
recursive = recursive ~= false -- Default to true
|
|
|
|
local markdown_files = {}
|
|
local scan_command
|
|
|
|
if recursive then
|
|
scan_command = string.format('find "%s" -name "*.md" -type f 2>/dev/null', directory_path)
|
|
else
|
|
scan_command = string.format('find "%s" -maxdepth 1 -name "*.md" -type f 2>/dev/null', directory_path)
|
|
end
|
|
|
|
local handle = io.popen(scan_command)
|
|
if not handle then
|
|
return nil, "Failed to scan directory: " .. directory_path
|
|
end
|
|
|
|
for file_path in handle:lines() do
|
|
table.insert(markdown_files, file_path)
|
|
end
|
|
|
|
handle:close()
|
|
|
|
return markdown_files
|
|
end
|
|
|
|
-- Check if file has been modified since last index
|
|
function M.is_file_modified(file_path, last_modified)
|
|
local current_mtime = utils.get_file_mtime(file_path)
|
|
|
|
if not current_mtime then
|
|
return false, "Cannot get file modification time"
|
|
end
|
|
|
|
return current_mtime > last_modified
|
|
end
|
|
|
|
-- Scan for changed files
|
|
function M.scan_for_changes(directory_path, indexed_files)
|
|
local changed_files = {}
|
|
local removed_files = {}
|
|
|
|
-- Get current files
|
|
local current_files, err = M.scan_directory(directory_path, true)
|
|
if not current_files then
|
|
return nil, nil, err
|
|
end
|
|
|
|
-- Convert indexed files to a set for faster lookup
|
|
local indexed_set = {}
|
|
for _, file_info in ipairs(indexed_files) do
|
|
indexed_set[file_info.file_path] = file_info
|
|
end
|
|
|
|
-- Convert current files to a set
|
|
local current_set = {}
|
|
for _, file_path in ipairs(current_files) do
|
|
current_set[file_path] = true
|
|
end
|
|
|
|
-- Check for modified files
|
|
for file_path, file_info in pairs(indexed_set) do
|
|
if not current_set[file_path] then
|
|
-- File was removed
|
|
table.insert(removed_files, file_path)
|
|
else
|
|
-- Check if modified
|
|
local is_modified, mod_err = M.is_file_modified(file_path, file_info.last_modified)
|
|
if mod_err then
|
|
return nil, nil, "Error checking file modification: " .. mod_err
|
|
elseif is_modified then
|
|
table.insert(changed_files, {
|
|
file_path = file_path,
|
|
change_type = "modified"
|
|
})
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Check for new files
|
|
for _, file_path in ipairs(current_files) do
|
|
if not indexed_set[file_path] then
|
|
table.insert(changed_files, {
|
|
file_path = file_path,
|
|
change_type = "new"
|
|
})
|
|
end
|
|
end
|
|
|
|
return changed_files, removed_files
|
|
end
|
|
|
|
-- Validate markdown file
|
|
function M.validate_markdown_file(file_path)
|
|
local validation_result = {
|
|
valid = true,
|
|
errors = {},
|
|
warnings = {}
|
|
}
|
|
|
|
-- Check if file exists
|
|
if not utils.file_exists(file_path) then
|
|
validation_result.valid = false
|
|
table.insert(validation_result.errors, "File does not exist")
|
|
return validation_result
|
|
end
|
|
|
|
-- Check file extension
|
|
if not file_path:match("%.md$") then
|
|
validation_result.valid = false
|
|
table.insert(validation_result.errors, "File must have .md extension")
|
|
return validation_result
|
|
end
|
|
|
|
-- Check file size (warn if too large)
|
|
local file_size = select(2, file_path:match("(.+)"))
|
|
if file_size and file_size > 10 * 1024 * 1024 then -- 10MB
|
|
table.insert(validation_result.warnings, "File is very large (>10MB), indexing may be slow")
|
|
end
|
|
|
|
-- Validate UTF-8 encoding
|
|
if not utils.is_utf8(file_path) then
|
|
validation_result.valid = false
|
|
table.insert(validation_result.errors, "File is not valid UTF-8 encoding")
|
|
return validation_result
|
|
end
|
|
|
|
-- Validate markdown format
|
|
local content, err = utils.read_file(file_path)
|
|
if not content then
|
|
validation_result.valid = false
|
|
table.insert(validation_result.errors, "Cannot read file: " .. err)
|
|
return validation_result
|
|
end
|
|
|
|
local markdown_errors = markdown_parser.validate_markdown(content)
|
|
for _, error in ipairs(markdown_errors) do
|
|
table.insert(validation_result.errors, "Markdown format error: " .. error)
|
|
end
|
|
|
|
-- Check for YAML header
|
|
local yaml_content, yaml_err = yaml_parser.extract_yaml_header(content)
|
|
if not yaml_content then
|
|
table.insert(validation_result.warnings, "No YAML header found")
|
|
else
|
|
-- Validate YAML header
|
|
local yaml_data, parse_err = yaml_parser.parse_yaml(yaml_content)
|
|
if not yaml_data then
|
|
validation_result.valid = false
|
|
table.insert(validation_result.errors, "YAML parsing error: " .. parse_err)
|
|
else
|
|
local yaml_errors = yaml_parser.validate_yaml(yaml_data)
|
|
for _, error in ipairs(yaml_errors) do
|
|
table.insert(validation_result.errors, "YAML validation error: " .. error)
|
|
end
|
|
end
|
|
end
|
|
|
|
validation_result.valid = #validation_result.errors == 0
|
|
|
|
return validation_result
|
|
end
|
|
|
|
-- Scan and validate directory
|
|
function M.scan_and_validate(directory_path)
|
|
local files, err = M.scan_directory(directory_path, true)
|
|
if not files then
|
|
return nil, err
|
|
end
|
|
|
|
local valid_files = {}
|
|
local invalid_files = {}
|
|
local scan_stats = {
|
|
total_scanned = #files,
|
|
valid = 0,
|
|
invalid = 0,
|
|
warnings = 0
|
|
}
|
|
|
|
for _, file_path in ipairs(files) do
|
|
local validation = M.validate_markdown_file(file_path)
|
|
|
|
if validation.valid then
|
|
table.insert(valid_files, file_path)
|
|
scan_stats.valid = scan_stats.valid + 1
|
|
|
|
if #validation.warnings > 0 then
|
|
scan_stats.warnings = scan_stats.warnings + #validation.warnings
|
|
end
|
|
else
|
|
table.insert(invalid_files, {
|
|
file_path = file_path,
|
|
errors = validation.errors,
|
|
warnings = validation.warnings
|
|
})
|
|
scan_stats.invalid = scan_stats.invalid + 1
|
|
end
|
|
end
|
|
|
|
return {
|
|
valid_files = valid_files,
|
|
invalid_files = invalid_files,
|
|
stats = scan_stats
|
|
}
|
|
end
|
|
|
|
-- Get file metadata
|
|
function M.get_file_metadata(file_path)
|
|
local metadata = {
|
|
file_path = file_path,
|
|
exists = false,
|
|
size = 0,
|
|
last_modified = 0,
|
|
content_hash = nil,
|
|
yaml_header = false,
|
|
word_count = 0,
|
|
has_errors = false
|
|
}
|
|
|
|
-- Check if file exists
|
|
if not utils.file_exists(file_path) then
|
|
return metadata
|
|
end
|
|
|
|
metadata.exists = true
|
|
|
|
-- Get file stats
|
|
metadata.last_modified = utils.get_file_mtime(file_path) or 0
|
|
|
|
-- Read content
|
|
local content, err = utils.read_file(file_path)
|
|
if not content then
|
|
metadata.has_errors = true
|
|
return metadata
|
|
end
|
|
|
|
metadata.size = #content
|
|
metadata.content_hash = utils.sha256(content)
|
|
|
|
-- Check for YAML header
|
|
local yaml_content = yaml_parser.extract_yaml_header(content)
|
|
metadata.yaml_header = yaml_content ~= nil
|
|
|
|
-- Get word count
|
|
metadata.word_count = markdown_parser.count_words(content)
|
|
|
|
return metadata
|
|
end
|
|
|
|
return M |