Initial vibecoded proof of concept

This commit is contained in:
Alex Selimov 2025-10-05 20:16:33 -04:00
parent 74812459af
commit 461318a656
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
61 changed files with 13306 additions and 0 deletions

311
lua/notex/parser/init.lua Normal file
View file

@ -0,0 +1,311 @@
-- Parser coordination module
local M = {}
local yaml_parser = require('notex.parser.yaml')
local markdown_parser = require('notex.parser.markdown')
local utils = require('notex.utils')
-- Parse complete markdown document
function M.parse_document(file_path)
local result = {
file_path = file_path,
success = false,
yaml_data = {},
markdown_analysis = {},
properties = {},
errors = {},
warnings = {}
}
-- Validate file
if not utils.file_exists(file_path) then
table.insert(result.errors, "File does not exist: " .. file_path)
return result
end
if not utils.is_utf8(file_path) then
table.insert(result.errors, "File is not valid UTF-8: " .. file_path)
return result
end
-- Read file content
local content, err = utils.read_file(file_path)
if not content then
table.insert(result.errors, "Cannot read file: " .. err)
return result
end
-- Parse YAML header
local yaml_data, yaml_err = yaml_parser.parse_markdown_file(file_path)
if yaml_data then
result.yaml_data = yaml_data
-- Process properties
result.properties = yaml_parser.process_properties(yaml_data)
elseif yaml_err then
table.insert(result.errors, "YAML parsing error: " .. yaml_err)
else
table.insert(result.warnings, "No YAML header found")
end
-- Analyze markdown content
local markdown_analysis = markdown_parser.analyze_structure(content)
result.markdown_analysis = markdown_analysis
result.success = #result.errors == 0
return result
end
-- Parse multiple documents
function M.parse_documents(file_paths)
local results = {
total = #file_paths,
successful = 0,
failed = 0,
documents = {},
errors = {}
}
for _, file_path in ipairs(file_paths) do
local doc_result = M.parse_document(file_path)
table.insert(results.documents, doc_result)
if doc_result.success then
results.successful = results.successful + 1
else
results.failed = results.failed + 1
for _, error in ipairs(doc_result.errors) do
table.insert(results.errors, {
file_path = file_path,
error = error
})
end
end
end
return results
end
-- Extract document summary
function M.get_document_summary(file_path)
local parse_result = M.parse_document(file_path)
if not parse_result.success then
return nil, "Failed to parse document: " .. table.concat(parse_result.errors, ", ")
end
local summary = {
file_path = file_path,
title = parse_result.yaml_data.title or parse_result.markdown_analysis.headings[1] and parse_result.markdown_analysis.headings[1].title or "Untitled",
status = parse_result.yaml_data.status or "unknown",
priority = parse_result.yaml_data.priority or 0,
tags = parse_result.yaml_data.tags or {},
word_count = parse_result.markdown_analysis.word_count,
created_at = parse_result.yaml_data.created_at,
updated_at = parse_result.yaml_data.updated_at,
summary = parse_result.markdown_analysis.summary,
properties_count = #parse_result.properties,
has_headings = #parse_result.markdown_analysis.headings > 0,
has_links = #parse_result.markdown_analysis.links > 0,
has_code = #parse_result.markdown_analysis.code_blocks > 0,
reading_time = parse_result.markdown_analysis.reading_time_minutes
}
return summary
end
-- Validate document against schema
function M.validate_document_schema(file_path, schema_requirements)
local parse_result = M.parse_document(file_path)
if not parse_result.success then
return false, parse_result.errors
end
local validation_errors = {}
-- Check required properties
if schema_requirements.required then
for _, required_prop in ipairs(schema_requirements.required) do
local found = false
for _, prop in ipairs(parse_result.properties) do
if prop.key == required_prop then
found = true
break
end
end
if not found then
table.insert(validation_errors, string.format("Missing required property: %s", required_prop))
end
end
end
-- Check property types
if schema_requirements.property_types then
for _, prop in ipairs(parse_result.properties) do
local expected_type = schema_requirements.property_types[prop.key]
if expected_type and prop.value_type ~= expected_type then
table.insert(validation_errors, string.format("Property '%s' should be %s, got %s", prop.key, expected_type, prop.value_type))
end
end
end
-- Check property patterns
if schema_requirements.patterns then
for _, prop in ipairs(parse_result.properties) do
local pattern = schema_requirements.patterns[prop.key]
if pattern and not prop.value:match(pattern) then
table.insert(validation_errors, string.format("Property '%s' does not match required pattern", prop.key))
end
end
end
if #validation_errors > 0 then
return false, validation_errors
end
return true, parse_result
end
-- Extract document relationships
function M.extract_relationships(file_paths)
local relationships = {
links = {},
references = {},
backlinks = {}
}
-- Parse all documents
local parse_results = M.parse_documents(file_paths)
-- Build document lookup
local docs_by_path = {}
for _, doc_result in ipairs(parse_results.documents) do
if doc_result.success then
docs_by_path[doc_result.file_path] = doc_result
end
end
-- Extract links and references
for _, doc_result in ipairs(parse_results.documents) do
if doc_result.success then
local source_doc = doc_result.file_path
-- Extract markdown links
for _, link in ipairs(doc_result.markdown_analysis.links) do
table.insert(relationships.links, {
source = source_doc,
target = link.url,
text = link.text,
type = "markdown_link"
})
end
-- Extract property references (if any)
for _, prop in ipairs(doc_result.properties) do
if prop.key:match("ref") or prop.key:match("reference") then
table.insert(relationships.references, {
source = source_doc,
target = prop.value,
property = prop.key,
type = "property_reference"
})
end
end
end
end
-- Build backlinks
for _, link in ipairs(relationships.links) do
for target_path, target_doc in pairs(docs_by_path) do
if link.target == target_path or link.target:match(target_path) then
table.insert(relationships.backlinks, {
source = target_path,
target = link.source,
text = link.text,
type = "backlink"
})
end
end
end
return relationships
end
-- Generate document statistics
function M.generate_statistics(file_paths)
local stats = {
total_documents = #file_paths,
total_words = 0,
total_properties = 0,
property_distribution = {},
status_distribution = {},
tag_distribution = {},
file_size_distribution = {},
average_word_count = 0,
documents_with_headings = 0,
documents_with_links = 0,
documents_with_code = 0
}
-- Parse all documents
local parse_results = M.parse_documents(file_paths)
for _, doc_result in ipairs(parse_results.documents) do
if doc_result.success then
-- Word count
stats.total_words = stats.total_words + doc_result.markdown_analysis.word_count
-- Properties
stats.total_properties = stats.total_properties + #doc_result.properties
-- Property distribution
for _, prop in ipairs(doc_result.properties) do
if not stats.property_distribution[prop.key] then
stats.property_distribution[prop.key] = 0
end
stats.property_distribution[prop.key] = stats.property_distribution[prop.key] + 1
end
-- Status distribution
local status = doc_result.yaml_data.status or "unknown"
if not stats.status_distribution[status] then
stats.status_distribution[status] = 0
end
stats.status_distribution[status] = stats.status_distribution[status] + 1
-- Tag distribution
local tags = doc_result.yaml_data.tags or {}
for _, tag in ipairs(tags) do
if not stats.tag_distribution[tag] then
stats.tag_distribution[tag] = 0
end
stats.tag_distribution[tag] = stats.tag_distribution[tag] + 1
end
-- Feature flags
if doc_result.markdown_analysis.has_headings then
stats.documents_with_headings = stats.documents_with_headings + 1
end
if doc_result.markdown_analysis.has_links then
stats.documents_with_links = stats.documents_with_links + 1
end
if doc_result.markdown_analysis.has_code then
stats.documents_with_code = stats.documents_with_code + 1
end
end
end
-- Calculate averages
if stats.total_documents > 0 then
stats.average_word_count = math.floor(stats.total_words / stats.total_documents)
end
return stats
end
return M

View file

@ -0,0 +1,224 @@
-- Markdown content parsing module
local M = {}
local utils = require('notex.utils')
-- Extract content between lines
function M.extract_content_between(content, start_pattern, end_pattern)
local start_pos = content:find(start_pattern)
if not start_pos then
return nil
end
local end_pos = content:find(end_pattern, start_pos)
if not end_pos then
return content:sub(start_pos)
end
return content:sub(start_pos, end_pos)
end
-- Remove YAML header from content
function M.remove_yaml_header(content)
return content:gsub("^%s*%-%-%-\n.-\n%-%-%-\n", "", 1)
end
-- Extract markdown body (content after YAML header)
function M.get_body(content)
return M.remove_yaml_header(content)
end
-- Count words in markdown content
function M.count_words(content)
local body = M.get_body(content)
if not body then
return 0
end
-- Remove markdown syntax for accurate word count
local clean = body
:gsub("#+ ", "") -- Headers
:gsub("%*%*(.-)%*%*", "%1") -- Bold
:gsub("%*(.-)%*", "%1") -- Italic
:gsub("`(.-)`", "%1") -- Inline code
:gsub("```.-```", "") -- Code blocks
:gsub("%[.-%]%(.-%)", "") -- Links
:gsub("%!%[.-%]%(.-%)", "") -- Images
:gsub("%W+", " ") -- Replace non-word chars with spaces
:gsub("%s+", " ") -- Collapse multiple spaces
local words = {}
for word in clean:gmatch("%S+") do
if #word > 0 then
table.insert(words, word)
end
end
return #words
end
-- Count characters in markdown content
function M.count_characters(content)
local body = M.get_body(content)
return body and #body or 0
end
-- Extract headings from markdown
function M.extract_headings(content)
local headings = {}
local body = M.get_body(content)
for level, title in body:gmatch("^(#+)%s+(.+)$") do
table.insert(headings, {
level = #level,
title = title:trim(),
raw = level .. " " .. title
})
end
return headings
end
-- Extract links from markdown
function M.extract_links(content)
local links = {}
for text, url in content:gmatch("%[([^%]]*)%]%(([^)]+)%)") do
table.insert(links, {
text = text,
url = url,
raw = "[" .. text .. "](" .. url .. ")"
})
end
return links
end
-- Extract code blocks from markdown
function M.extract_code_blocks(content)
local code_blocks = {}
for lang, code in content:gmatch("```(%w*)\n(.-)\n```") do
table.insert(code_blocks, {
language = lang ~= "" and lang or "text",
code = code,
lines = select(2, code:gsub("\n", "")) + 1
})
end
return code_blocks
end
-- Get content summary (first paragraph)
function M.get_summary(content, max_length)
local body = M.get_body(content)
if not body then
return ""
end
-- Remove code blocks to avoid including them in summary
local clean_body = body:gsub("```.-```", "")
-- Extract first paragraph
local first_paragraph = clean_body:match("\n\n([^%[].-)\n\n") or
clean_body:match("^([^%[].-)\n\n") or
clean_body:match("^([^%[].-)")
if not first_paragraph then
return ""
end
-- Clean up markdown formatting
local summary = first_paragraph
:gsub("#+ ", "")
:gsub("%*%*(.-)%*%*", "%1")
:gsub("%*(.-)%*", "%1")
:gsub("`(.-)`", "%1")
:gsub("%[.-%]%(.-%)", "")
summary = summary:gsub("%s+", " "):trim()
if #summary > max_length then
summary = summary:sub(1, max_length - 3) .. "..."
end
return summary
end
-- Analyze markdown structure
function M.analyze_structure(content)
local body = M.get_body(content)
return {
word_count = M.count_words(content),
character_count = M.count_characters(content),
headings = M.extract_headings(content),
links = M.extract_links(content),
code_blocks = M.extract_code_blocks(content),
summary = M.get_summary(content, 200),
line_count = select(2, body:gsub("\n", "")) + 1,
has_toc = body:find("^%s*%[TOC%]") ~= nil,
reading_time_minutes = math.ceil(M.count_words(content) / 200) -- Assuming 200 WPM
}
end
-- Validate markdown format
function M.validate_markdown(content)
local errors = {}
if not content or content == "" then
table.insert(errors, "Empty content")
return errors
end
-- Check for balanced markdown syntax
local function check_balance(content, open, close)
local count_open = select(2, content:gsub(open, ""))
local count_close = select(2, content:gsub(close, ""))
return count_open == count_close
end
-- Check balanced headers
local headers = content:match("#+")
if headers and not check_balance(content, "```", "```") then
table.insert(errors, "Unbalanced code blocks")
end
-- Check for malformed links
for link in content:gmatch("%[.-%]%(.-%)") do
if not link:match("%[.-%]%(([^)]+)%)") or
link:match("%[.-%]%(%s*%)") then
table.insert(errors, "Malformed link: " .. link)
end
end
return errors
end
-- Convert markdown to plain text
function M.to_plain_text(content)
local body = M.get_body(content)
if not body then
return ""
end
local plain = body
:gsub("^#%s+", "\n") -- Headers to newlines
:gsub("\n#%s+", "\n") -- Headers to newlines
:gsub("%*%*(.-)%*%*", "%1") -- Bold
:gsub("%*(.-)%*", "%1") -- Italic
:gsub("`(.-)`", "%1") -- Inline code
:gsub("```%w*\n(.-)\n```", "%1") -- Code blocks
:gsub("%[([^%]]*)%]%(([^)]+)%)", "%1") -- Links to text
:gsub("%!%[([^%]]*)%]%(([^)]+)%)", "[Image: %1]") -- Images
:gsub("\n%s*[-*+]%s+", "\n") -- List items
:gsub("\n%s*%d+%.%s+", "\n") -- Numbered lists
:gsub("\n%s*%[%s*%]%s+", "\n") -- Checkbox lists
:gsub("\n%s*%[%s*x%s*%]%s+", "\n") -- Checked items
:gsub("\n\n+", "\n\n") -- Multiple newlines
:trim()
return plain
end
return M

192
lua/notex/parser/yaml.lua Normal file
View file

@ -0,0 +1,192 @@
-- YAML header parsing module
local M = {}
local lyaml = require('lyaml')
local utils = require('notex.utils')
-- Extract YAML header from markdown content
function M.extract_yaml_header(content)
if not content or content == "" then
return nil, "Empty content provided"
end
-- Check for YAML header delimiters
local start_pos = content:find("^%s*%-%-%-%s*\n")
if not start_pos then
return nil, "No YAML header found"
end
local end_pos = content:find("\n%s*%-%-%-%s*\n", start_pos + 4)
if not end_pos then
return nil, "Unclosed YAML header"
end
-- Extract YAML content
local yaml_content = content:sub(start_pos + 4, end_pos - 1)
return yaml_content, nil
end
-- Parse YAML header content
function M.parse_yaml(yaml_content)
if not yaml_content or yaml_content == "" then
return {}, nil
end
local ok, data = pcall(lyaml.load, yaml_content)
if not ok then
return nil, "YAML parsing failed: " .. tostring(data)
end
if type(data) ~= "table" then
return {}, nil
end
return data, nil
end
-- Parse markdown file and extract YAML header
function M.parse_markdown_file(file_path)
-- Validate file exists
if not utils.file_exists(file_path) then
return nil, "File not found: " .. file_path
end
-- Validate UTF-8 encoding
if not utils.is_utf8(file_path) then
return nil, "File is not valid UTF-8: " .. file_path
end
-- Read file content
local content, err = utils.read_file(file_path)
if not content then
return nil, err
end
-- Extract YAML header
local yaml_content, extract_err = M.extract_yaml_header(content)
if not yaml_content then
return nil, extract_err
end
-- Parse YAML
local yaml_data, parse_err = M.parse_yaml(yaml_content)
if not yaml_data then
return nil, parse_err
end
return yaml_data, nil
end
-- Flatten YAML data into key-value pairs
function M.flatten_yaml(data, prefix)
local flattened = {}
prefix = prefix or ""
for key, value in pairs(data) do
local full_key = prefix .. (prefix ~= "" and "." or "") .. key
if type(value) == "table" then
-- Recursively flatten nested tables
local nested = M.flatten_yaml(value, full_key)
for nested_key, nested_value in pairs(nested) do
flattened[nested_key] = nested_value
end
else
flattened[full_key] = value
end
end
return flattened
end
-- Validate YAML structure
function M.validate_yaml(yaml_data)
local errors = {}
if type(yaml_data) ~= "table" then
table.insert(errors, "YAML data must be a table")
return errors
end
-- Check for required fields (if any)
local required_fields = {} -- Add required fields as needed
for _, field in ipairs(required_fields) do
if yaml_data[field] == nil then
table.insert(errors, string.format("Required field '%s' is missing", field))
end
end
-- Validate field types
local field_types = {
-- Define expected types for specific fields
}
for field, expected_type in pairs(field_types) do
if yaml_data[field] ~= nil and type(yaml_data[field]) ~= expected_type then
table.insert(errors, string.format("Field '%s' should be %s, got %s",
field, expected_type, type(yaml_data[field])))
end
end
return errors
end
-- Detect and convert property types
function M.detect_property_type(value)
local value_type = type(value)
if value_type == "boolean" then
return "boolean", value
elseif value_type == "number" then
return "number", value
elseif value_type == "string" then
-- Check for ISO 8601 date format
if value:match("^%d%d%d%d%-%d%d%-%d%d$") or
value:match("^%d%d%d%d%-%d%d%-%d%dT%d%d:%d%d:%d%dZ?$") then
return "date", value
end
-- Check for numeric strings
local num = tonumber(value)
if num and value:match("^%-?%d+%.?%d*$") then
return "number", num
end
-- Check for boolean strings
local lower = value:lower()
if lower == "true" then
return "boolean", true
elseif lower == "false" then
return "boolean", false
end
return "string", value
elseif value_type == "table" then
return "array", vim.json.encode(value)
else
return "string", tostring(value)
end
end
-- Process YAML data into property format
function M.process_properties(yaml_data)
local flattened = M.flatten_yaml(yaml_data)
local properties = {}
for key, value in pairs(flattened) do
local prop_type, processed_value = M.detect_property_type(value)
table.insert(properties, {
key = key,
value = processed_value,
value_type = prop_type
})
end
return properties
end
return M