-- File system scanner for markdown documents local M = {} local utils = require('notex.utils') local yaml_parser = require('notex.parser.yaml') local markdown_parser = require('notex.parser.markdown') -- Scan directory for markdown files function M.scan_directory(directory_path, recursive) recursive = recursive ~= false -- Default to true local markdown_files = {} local scan_command if recursive then scan_command = string.format('find "%s" -name "*.md" -type f 2>/dev/null', directory_path) else scan_command = string.format('find "%s" -maxdepth 1 -name "*.md" -type f 2>/dev/null', directory_path) end local handle = io.popen(scan_command) if not handle then return nil, "Failed to scan directory: " .. directory_path end for file_path in handle:lines() do table.insert(markdown_files, file_path) end handle:close() return markdown_files end -- Check if file has been modified since last index function M.is_file_modified(file_path, last_modified) local current_mtime = utils.get_file_mtime(file_path) if not current_mtime then return false, "Cannot get file modification time" end return current_mtime > last_modified end -- Scan for changed files function M.scan_for_changes(directory_path, indexed_files) local changed_files = {} local removed_files = {} -- Get current files local current_files, err = M.scan_directory(directory_path, true) if not current_files then return nil, nil, err end -- Convert indexed files to a set for faster lookup local indexed_set = {} for _, file_info in ipairs(indexed_files) do indexed_set[file_info.file_path] = file_info end -- Convert current files to a set local current_set = {} for _, file_path in ipairs(current_files) do current_set[file_path] = true end -- Check for modified files for file_path, file_info in pairs(indexed_set) do if not current_set[file_path] then -- File was removed table.insert(removed_files, file_path) else -- Check if modified local is_modified, mod_err = M.is_file_modified(file_path, file_info.last_modified) if mod_err then return nil, nil, "Error checking file modification: " .. mod_err elseif is_modified then table.insert(changed_files, { file_path = file_path, change_type = "modified" }) end end end -- Check for new files for _, file_path in ipairs(current_files) do if not indexed_set[file_path] then table.insert(changed_files, { file_path = file_path, change_type = "new" }) end end return changed_files, removed_files end -- Validate markdown file function M.validate_markdown_file(file_path) local validation_result = { valid = true, errors = {}, warnings = {} } -- Check if file exists if not utils.file_exists(file_path) then validation_result.valid = false table.insert(validation_result.errors, "File does not exist") return validation_result end -- Check file extension if not file_path:match("%.md$") then validation_result.valid = false table.insert(validation_result.errors, "File must have .md extension") return validation_result end -- Check file size (warn if too large) local file_size = select(2, file_path:match("(.+)")) if file_size and file_size > 10 * 1024 * 1024 then -- 10MB table.insert(validation_result.warnings, "File is very large (>10MB), indexing may be slow") end -- Validate UTF-8 encoding if not utils.is_utf8(file_path) then validation_result.valid = false table.insert(validation_result.errors, "File is not valid UTF-8 encoding") return validation_result end -- Validate markdown format local content, err = utils.read_file(file_path) if not content then validation_result.valid = false table.insert(validation_result.errors, "Cannot read file: " .. err) return validation_result end local markdown_errors = markdown_parser.validate_markdown(content) for _, error in ipairs(markdown_errors) do table.insert(validation_result.errors, "Markdown format error: " .. error) end -- Check for YAML header local yaml_content, yaml_err = yaml_parser.extract_yaml_header(content) if not yaml_content then table.insert(validation_result.warnings, "No YAML header found") else -- Validate YAML header local yaml_data, parse_err = yaml_parser.parse_yaml(yaml_content) if not yaml_data then validation_result.valid = false table.insert(validation_result.errors, "YAML parsing error: " .. parse_err) else local yaml_errors = yaml_parser.validate_yaml(yaml_data) for _, error in ipairs(yaml_errors) do table.insert(validation_result.errors, "YAML validation error: " .. error) end end end validation_result.valid = #validation_result.errors == 0 return validation_result end -- Scan and validate directory function M.scan_and_validate(directory_path) local files, err = M.scan_directory(directory_path, true) if not files then return nil, err end local valid_files = {} local invalid_files = {} local scan_stats = { total_scanned = #files, valid = 0, invalid = 0, warnings = 0 } for _, file_path in ipairs(files) do local validation = M.validate_markdown_file(file_path) if validation.valid then table.insert(valid_files, file_path) scan_stats.valid = scan_stats.valid + 1 if #validation.warnings > 0 then scan_stats.warnings = scan_stats.warnings + #validation.warnings end else table.insert(invalid_files, { file_path = file_path, errors = validation.errors, warnings = validation.warnings }) scan_stats.invalid = scan_stats.invalid + 1 end end return { valid_files = valid_files, invalid_files = invalid_files, stats = scan_stats } end -- Get file metadata function M.get_file_metadata(file_path) local metadata = { file_path = file_path, exists = false, size = 0, last_modified = 0, content_hash = nil, yaml_header = false, word_count = 0, has_errors = false } -- Check if file exists if not utils.file_exists(file_path) then return metadata end metadata.exists = true -- Get file stats metadata.last_modified = utils.get_file_mtime(file_path) or 0 -- Read content local content, err = utils.read_file(file_path) if not content then metadata.has_errors = true return metadata end metadata.size = #content metadata.content_hash = utils.sha256(content) -- Check for YAML header local yaml_content = yaml_parser.extract_yaml_header(content) metadata.yaml_header = yaml_content ~= nil -- Get word count metadata.word_count = markdown_parser.count_words(content) return metadata end return M