311 lines
8.9 KiB
Lua
311 lines
8.9 KiB
Lua
|
-- Parser coordination module
|
||
|
local M = {}
|
||
|
|
||
|
local yaml_parser = require('notex.parser.yaml')
|
||
|
local markdown_parser = require('notex.parser.markdown')
|
||
|
local utils = require('notex.utils')
|
||
|
|
||
|
-- Parse complete markdown document
|
||
|
function M.parse_document(file_path)
|
||
|
local result = {
|
||
|
file_path = file_path,
|
||
|
success = false,
|
||
|
yaml_data = {},
|
||
|
markdown_analysis = {},
|
||
|
properties = {},
|
||
|
errors = {},
|
||
|
warnings = {}
|
||
|
}
|
||
|
|
||
|
-- Validate file
|
||
|
if not utils.file_exists(file_path) then
|
||
|
table.insert(result.errors, "File does not exist: " .. file_path)
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
if not utils.is_utf8(file_path) then
|
||
|
table.insert(result.errors, "File is not valid UTF-8: " .. file_path)
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Read file content
|
||
|
local content, err = utils.read_file(file_path)
|
||
|
if not content then
|
||
|
table.insert(result.errors, "Cannot read file: " .. err)
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Parse YAML header
|
||
|
local yaml_data, yaml_err = yaml_parser.parse_markdown_file(file_path)
|
||
|
if yaml_data then
|
||
|
result.yaml_data = yaml_data
|
||
|
|
||
|
-- Process properties
|
||
|
result.properties = yaml_parser.process_properties(yaml_data)
|
||
|
elseif yaml_err then
|
||
|
table.insert(result.errors, "YAML parsing error: " .. yaml_err)
|
||
|
else
|
||
|
table.insert(result.warnings, "No YAML header found")
|
||
|
end
|
||
|
|
||
|
-- Analyze markdown content
|
||
|
local markdown_analysis = markdown_parser.analyze_structure(content)
|
||
|
result.markdown_analysis = markdown_analysis
|
||
|
|
||
|
result.success = #result.errors == 0
|
||
|
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Parse multiple documents
|
||
|
function M.parse_documents(file_paths)
|
||
|
local results = {
|
||
|
total = #file_paths,
|
||
|
successful = 0,
|
||
|
failed = 0,
|
||
|
documents = {},
|
||
|
errors = {}
|
||
|
}
|
||
|
|
||
|
for _, file_path in ipairs(file_paths) do
|
||
|
local doc_result = M.parse_document(file_path)
|
||
|
table.insert(results.documents, doc_result)
|
||
|
|
||
|
if doc_result.success then
|
||
|
results.successful = results.successful + 1
|
||
|
else
|
||
|
results.failed = results.failed + 1
|
||
|
for _, error in ipairs(doc_result.errors) do
|
||
|
table.insert(results.errors, {
|
||
|
file_path = file_path,
|
||
|
error = error
|
||
|
})
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return results
|
||
|
end
|
||
|
|
||
|
-- Extract document summary
|
||
|
function M.get_document_summary(file_path)
|
||
|
local parse_result = M.parse_document(file_path)
|
||
|
|
||
|
if not parse_result.success then
|
||
|
return nil, "Failed to parse document: " .. table.concat(parse_result.errors, ", ")
|
||
|
end
|
||
|
|
||
|
local summary = {
|
||
|
file_path = file_path,
|
||
|
title = parse_result.yaml_data.title or parse_result.markdown_analysis.headings[1] and parse_result.markdown_analysis.headings[1].title or "Untitled",
|
||
|
status = parse_result.yaml_data.status or "unknown",
|
||
|
priority = parse_result.yaml_data.priority or 0,
|
||
|
tags = parse_result.yaml_data.tags or {},
|
||
|
word_count = parse_result.markdown_analysis.word_count,
|
||
|
created_at = parse_result.yaml_data.created_at,
|
||
|
updated_at = parse_result.yaml_data.updated_at,
|
||
|
summary = parse_result.markdown_analysis.summary,
|
||
|
properties_count = #parse_result.properties,
|
||
|
has_headings = #parse_result.markdown_analysis.headings > 0,
|
||
|
has_links = #parse_result.markdown_analysis.links > 0,
|
||
|
has_code = #parse_result.markdown_analysis.code_blocks > 0,
|
||
|
reading_time = parse_result.markdown_analysis.reading_time_minutes
|
||
|
}
|
||
|
|
||
|
return summary
|
||
|
end
|
||
|
|
||
|
-- Validate document against schema
|
||
|
function M.validate_document_schema(file_path, schema_requirements)
|
||
|
local parse_result = M.parse_document(file_path)
|
||
|
|
||
|
if not parse_result.success then
|
||
|
return false, parse_result.errors
|
||
|
end
|
||
|
|
||
|
local validation_errors = {}
|
||
|
|
||
|
-- Check required properties
|
||
|
if schema_requirements.required then
|
||
|
for _, required_prop in ipairs(schema_requirements.required) do
|
||
|
local found = false
|
||
|
for _, prop in ipairs(parse_result.properties) do
|
||
|
if prop.key == required_prop then
|
||
|
found = true
|
||
|
break
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if not found then
|
||
|
table.insert(validation_errors, string.format("Missing required property: %s", required_prop))
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Check property types
|
||
|
if schema_requirements.property_types then
|
||
|
for _, prop in ipairs(parse_result.properties) do
|
||
|
local expected_type = schema_requirements.property_types[prop.key]
|
||
|
if expected_type and prop.value_type ~= expected_type then
|
||
|
table.insert(validation_errors, string.format("Property '%s' should be %s, got %s", prop.key, expected_type, prop.value_type))
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Check property patterns
|
||
|
if schema_requirements.patterns then
|
||
|
for _, prop in ipairs(parse_result.properties) do
|
||
|
local pattern = schema_requirements.patterns[prop.key]
|
||
|
if pattern and not prop.value:match(pattern) then
|
||
|
table.insert(validation_errors, string.format("Property '%s' does not match required pattern", prop.key))
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if #validation_errors > 0 then
|
||
|
return false, validation_errors
|
||
|
end
|
||
|
|
||
|
return true, parse_result
|
||
|
end
|
||
|
|
||
|
-- Extract document relationships
|
||
|
function M.extract_relationships(file_paths)
|
||
|
local relationships = {
|
||
|
links = {},
|
||
|
references = {},
|
||
|
backlinks = {}
|
||
|
}
|
||
|
|
||
|
-- Parse all documents
|
||
|
local parse_results = M.parse_documents(file_paths)
|
||
|
|
||
|
-- Build document lookup
|
||
|
local docs_by_path = {}
|
||
|
for _, doc_result in ipairs(parse_results.documents) do
|
||
|
if doc_result.success then
|
||
|
docs_by_path[doc_result.file_path] = doc_result
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Extract links and references
|
||
|
for _, doc_result in ipairs(parse_results.documents) do
|
||
|
if doc_result.success then
|
||
|
local source_doc = doc_result.file_path
|
||
|
|
||
|
-- Extract markdown links
|
||
|
for _, link in ipairs(doc_result.markdown_analysis.links) do
|
||
|
table.insert(relationships.links, {
|
||
|
source = source_doc,
|
||
|
target = link.url,
|
||
|
text = link.text,
|
||
|
type = "markdown_link"
|
||
|
})
|
||
|
end
|
||
|
|
||
|
-- Extract property references (if any)
|
||
|
for _, prop in ipairs(doc_result.properties) do
|
||
|
if prop.key:match("ref") or prop.key:match("reference") then
|
||
|
table.insert(relationships.references, {
|
||
|
source = source_doc,
|
||
|
target = prop.value,
|
||
|
property = prop.key,
|
||
|
type = "property_reference"
|
||
|
})
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Build backlinks
|
||
|
for _, link in ipairs(relationships.links) do
|
||
|
for target_path, target_doc in pairs(docs_by_path) do
|
||
|
if link.target == target_path or link.target:match(target_path) then
|
||
|
table.insert(relationships.backlinks, {
|
||
|
source = target_path,
|
||
|
target = link.source,
|
||
|
text = link.text,
|
||
|
type = "backlink"
|
||
|
})
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return relationships
|
||
|
end
|
||
|
|
||
|
-- Generate document statistics
|
||
|
function M.generate_statistics(file_paths)
|
||
|
local stats = {
|
||
|
total_documents = #file_paths,
|
||
|
total_words = 0,
|
||
|
total_properties = 0,
|
||
|
property_distribution = {},
|
||
|
status_distribution = {},
|
||
|
tag_distribution = {},
|
||
|
file_size_distribution = {},
|
||
|
average_word_count = 0,
|
||
|
documents_with_headings = 0,
|
||
|
documents_with_links = 0,
|
||
|
documents_with_code = 0
|
||
|
}
|
||
|
|
||
|
-- Parse all documents
|
||
|
local parse_results = M.parse_documents(file_paths)
|
||
|
|
||
|
for _, doc_result in ipairs(parse_results.documents) do
|
||
|
if doc_result.success then
|
||
|
-- Word count
|
||
|
stats.total_words = stats.total_words + doc_result.markdown_analysis.word_count
|
||
|
|
||
|
-- Properties
|
||
|
stats.total_properties = stats.total_properties + #doc_result.properties
|
||
|
|
||
|
-- Property distribution
|
||
|
for _, prop in ipairs(doc_result.properties) do
|
||
|
if not stats.property_distribution[prop.key] then
|
||
|
stats.property_distribution[prop.key] = 0
|
||
|
end
|
||
|
stats.property_distribution[prop.key] = stats.property_distribution[prop.key] + 1
|
||
|
end
|
||
|
|
||
|
-- Status distribution
|
||
|
local status = doc_result.yaml_data.status or "unknown"
|
||
|
if not stats.status_distribution[status] then
|
||
|
stats.status_distribution[status] = 0
|
||
|
end
|
||
|
stats.status_distribution[status] = stats.status_distribution[status] + 1
|
||
|
|
||
|
-- Tag distribution
|
||
|
local tags = doc_result.yaml_data.tags or {}
|
||
|
for _, tag in ipairs(tags) do
|
||
|
if not stats.tag_distribution[tag] then
|
||
|
stats.tag_distribution[tag] = 0
|
||
|
end
|
||
|
stats.tag_distribution[tag] = stats.tag_distribution[tag] + 1
|
||
|
end
|
||
|
|
||
|
-- Feature flags
|
||
|
if doc_result.markdown_analysis.has_headings then
|
||
|
stats.documents_with_headings = stats.documents_with_headings + 1
|
||
|
end
|
||
|
|
||
|
if doc_result.markdown_analysis.has_links then
|
||
|
stats.documents_with_links = stats.documents_with_links + 1
|
||
|
end
|
||
|
|
||
|
if doc_result.markdown_analysis.has_code then
|
||
|
stats.documents_with_code = stats.documents_with_code + 1
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Calculate averages
|
||
|
if stats.total_documents > 0 then
|
||
|
stats.average_word_count = math.floor(stats.total_words / stats.total_documents)
|
||
|
end
|
||
|
|
||
|
return stats
|
||
|
end
|
||
|
|
||
|
return M
|