notex.nvim/lua/notex/parser/init.lua

311 lines
No EOL
8.9 KiB
Lua

-- Parser coordination module
local M = {}
local yaml_parser = require('notex.parser.yaml')
local markdown_parser = require('notex.parser.markdown')
local utils = require('notex.utils')
-- Parse complete markdown document
function M.parse_document(file_path)
local result = {
file_path = file_path,
success = false,
yaml_data = {},
markdown_analysis = {},
properties = {},
errors = {},
warnings = {}
}
-- Validate file
if not utils.file_exists(file_path) then
table.insert(result.errors, "File does not exist: " .. file_path)
return result
end
if not utils.is_utf8(file_path) then
table.insert(result.errors, "File is not valid UTF-8: " .. file_path)
return result
end
-- Read file content
local content, err = utils.read_file(file_path)
if not content then
table.insert(result.errors, "Cannot read file: " .. err)
return result
end
-- Parse YAML header
local yaml_data, yaml_err = yaml_parser.parse_markdown_file(file_path)
if yaml_data then
result.yaml_data = yaml_data
-- Process properties
result.properties = yaml_parser.process_properties(yaml_data)
elseif yaml_err then
table.insert(result.errors, "YAML parsing error: " .. yaml_err)
else
table.insert(result.warnings, "No YAML header found")
end
-- Analyze markdown content
local markdown_analysis = markdown_parser.analyze_structure(content)
result.markdown_analysis = markdown_analysis
result.success = #result.errors == 0
return result
end
-- Parse multiple documents
function M.parse_documents(file_paths)
local results = {
total = #file_paths,
successful = 0,
failed = 0,
documents = {},
errors = {}
}
for _, file_path in ipairs(file_paths) do
local doc_result = M.parse_document(file_path)
table.insert(results.documents, doc_result)
if doc_result.success then
results.successful = results.successful + 1
else
results.failed = results.failed + 1
for _, error in ipairs(doc_result.errors) do
table.insert(results.errors, {
file_path = file_path,
error = error
})
end
end
end
return results
end
-- Extract document summary
function M.get_document_summary(file_path)
local parse_result = M.parse_document(file_path)
if not parse_result.success then
return nil, "Failed to parse document: " .. table.concat(parse_result.errors, ", ")
end
local summary = {
file_path = file_path,
title = parse_result.yaml_data.title or parse_result.markdown_analysis.headings[1] and parse_result.markdown_analysis.headings[1].title or "Untitled",
status = parse_result.yaml_data.status or "unknown",
priority = parse_result.yaml_data.priority or 0,
tags = parse_result.yaml_data.tags or {},
word_count = parse_result.markdown_analysis.word_count,
created_at = parse_result.yaml_data.created_at,
updated_at = parse_result.yaml_data.updated_at,
summary = parse_result.markdown_analysis.summary,
properties_count = #parse_result.properties,
has_headings = #parse_result.markdown_analysis.headings > 0,
has_links = #parse_result.markdown_analysis.links > 0,
has_code = #parse_result.markdown_analysis.code_blocks > 0,
reading_time = parse_result.markdown_analysis.reading_time_minutes
}
return summary
end
-- Validate document against schema
function M.validate_document_schema(file_path, schema_requirements)
local parse_result = M.parse_document(file_path)
if not parse_result.success then
return false, parse_result.errors
end
local validation_errors = {}
-- Check required properties
if schema_requirements.required then
for _, required_prop in ipairs(schema_requirements.required) do
local found = false
for _, prop in ipairs(parse_result.properties) do
if prop.key == required_prop then
found = true
break
end
end
if not found then
table.insert(validation_errors, string.format("Missing required property: %s", required_prop))
end
end
end
-- Check property types
if schema_requirements.property_types then
for _, prop in ipairs(parse_result.properties) do
local expected_type = schema_requirements.property_types[prop.key]
if expected_type and prop.value_type ~= expected_type then
table.insert(validation_errors, string.format("Property '%s' should be %s, got %s", prop.key, expected_type, prop.value_type))
end
end
end
-- Check property patterns
if schema_requirements.patterns then
for _, prop in ipairs(parse_result.properties) do
local pattern = schema_requirements.patterns[prop.key]
if pattern and not prop.value:match(pattern) then
table.insert(validation_errors, string.format("Property '%s' does not match required pattern", prop.key))
end
end
end
if #validation_errors > 0 then
return false, validation_errors
end
return true, parse_result
end
-- Extract document relationships
function M.extract_relationships(file_paths)
local relationships = {
links = {},
references = {},
backlinks = {}
}
-- Parse all documents
local parse_results = M.parse_documents(file_paths)
-- Build document lookup
local docs_by_path = {}
for _, doc_result in ipairs(parse_results.documents) do
if doc_result.success then
docs_by_path[doc_result.file_path] = doc_result
end
end
-- Extract links and references
for _, doc_result in ipairs(parse_results.documents) do
if doc_result.success then
local source_doc = doc_result.file_path
-- Extract markdown links
for _, link in ipairs(doc_result.markdown_analysis.links) do
table.insert(relationships.links, {
source = source_doc,
target = link.url,
text = link.text,
type = "markdown_link"
})
end
-- Extract property references (if any)
for _, prop in ipairs(doc_result.properties) do
if prop.key:match("ref") or prop.key:match("reference") then
table.insert(relationships.references, {
source = source_doc,
target = prop.value,
property = prop.key,
type = "property_reference"
})
end
end
end
end
-- Build backlinks
for _, link in ipairs(relationships.links) do
for target_path, target_doc in pairs(docs_by_path) do
if link.target == target_path or link.target:match(target_path) then
table.insert(relationships.backlinks, {
source = target_path,
target = link.source,
text = link.text,
type = "backlink"
})
end
end
end
return relationships
end
-- Generate document statistics
function M.generate_statistics(file_paths)
local stats = {
total_documents = #file_paths,
total_words = 0,
total_properties = 0,
property_distribution = {},
status_distribution = {},
tag_distribution = {},
file_size_distribution = {},
average_word_count = 0,
documents_with_headings = 0,
documents_with_links = 0,
documents_with_code = 0
}
-- Parse all documents
local parse_results = M.parse_documents(file_paths)
for _, doc_result in ipairs(parse_results.documents) do
if doc_result.success then
-- Word count
stats.total_words = stats.total_words + doc_result.markdown_analysis.word_count
-- Properties
stats.total_properties = stats.total_properties + #doc_result.properties
-- Property distribution
for _, prop in ipairs(doc_result.properties) do
if not stats.property_distribution[prop.key] then
stats.property_distribution[prop.key] = 0
end
stats.property_distribution[prop.key] = stats.property_distribution[prop.key] + 1
end
-- Status distribution
local status = doc_result.yaml_data.status or "unknown"
if not stats.status_distribution[status] then
stats.status_distribution[status] = 0
end
stats.status_distribution[status] = stats.status_distribution[status] + 1
-- Tag distribution
local tags = doc_result.yaml_data.tags or {}
for _, tag in ipairs(tags) do
if not stats.tag_distribution[tag] then
stats.tag_distribution[tag] = 0
end
stats.tag_distribution[tag] = stats.tag_distribution[tag] + 1
end
-- Feature flags
if doc_result.markdown_analysis.has_headings then
stats.documents_with_headings = stats.documents_with_headings + 1
end
if doc_result.markdown_analysis.has_links then
stats.documents_with_links = stats.documents_with_links + 1
end
if doc_result.markdown_analysis.has_code then
stats.documents_with_code = stats.documents_with_code + 1
end
end
end
-- Calculate averages
if stats.total_documents > 0 then
stats.average_word_count = math.floor(stats.total_words / stats.total_documents)
end
return stats
end
return M