Initial vibecoded proof of concept
This commit is contained in:
parent
74812459af
commit
461318a656
61 changed files with 13306 additions and 0 deletions
224
lua/notex/parser/markdown.lua
Normal file
224
lua/notex/parser/markdown.lua
Normal file
|
@ -0,0 +1,224 @@
|
|||
-- Markdown content parsing module
|
||||
local M = {}
|
||||
|
||||
local utils = require('notex.utils')
|
||||
|
||||
-- Extract content between lines
|
||||
function M.extract_content_between(content, start_pattern, end_pattern)
|
||||
local start_pos = content:find(start_pattern)
|
||||
if not start_pos then
|
||||
return nil
|
||||
end
|
||||
|
||||
local end_pos = content:find(end_pattern, start_pos)
|
||||
if not end_pos then
|
||||
return content:sub(start_pos)
|
||||
end
|
||||
|
||||
return content:sub(start_pos, end_pos)
|
||||
end
|
||||
|
||||
-- Remove YAML header from content
|
||||
function M.remove_yaml_header(content)
|
||||
return content:gsub("^%s*%-%-%-\n.-\n%-%-%-\n", "", 1)
|
||||
end
|
||||
|
||||
-- Extract markdown body (content after YAML header)
|
||||
function M.get_body(content)
|
||||
return M.remove_yaml_header(content)
|
||||
end
|
||||
|
||||
-- Count words in markdown content
|
||||
function M.count_words(content)
|
||||
local body = M.get_body(content)
|
||||
if not body then
|
||||
return 0
|
||||
end
|
||||
|
||||
-- Remove markdown syntax for accurate word count
|
||||
local clean = body
|
||||
:gsub("#+ ", "") -- Headers
|
||||
:gsub("%*%*(.-)%*%*", "%1") -- Bold
|
||||
:gsub("%*(.-)%*", "%1") -- Italic
|
||||
:gsub("`(.-)`", "%1") -- Inline code
|
||||
:gsub("```.-```", "") -- Code blocks
|
||||
:gsub("%[.-%]%(.-%)", "") -- Links
|
||||
:gsub("%!%[.-%]%(.-%)", "") -- Images
|
||||
:gsub("%W+", " ") -- Replace non-word chars with spaces
|
||||
:gsub("%s+", " ") -- Collapse multiple spaces
|
||||
|
||||
local words = {}
|
||||
for word in clean:gmatch("%S+") do
|
||||
if #word > 0 then
|
||||
table.insert(words, word)
|
||||
end
|
||||
end
|
||||
|
||||
return #words
|
||||
end
|
||||
|
||||
-- Count characters in markdown content
|
||||
function M.count_characters(content)
|
||||
local body = M.get_body(content)
|
||||
return body and #body or 0
|
||||
end
|
||||
|
||||
-- Extract headings from markdown
|
||||
function M.extract_headings(content)
|
||||
local headings = {}
|
||||
local body = M.get_body(content)
|
||||
|
||||
for level, title in body:gmatch("^(#+)%s+(.+)$") do
|
||||
table.insert(headings, {
|
||||
level = #level,
|
||||
title = title:trim(),
|
||||
raw = level .. " " .. title
|
||||
})
|
||||
end
|
||||
|
||||
return headings
|
||||
end
|
||||
|
||||
-- Extract links from markdown
|
||||
function M.extract_links(content)
|
||||
local links = {}
|
||||
|
||||
for text, url in content:gmatch("%[([^%]]*)%]%(([^)]+)%)") do
|
||||
table.insert(links, {
|
||||
text = text,
|
||||
url = url,
|
||||
raw = "[" .. text .. "](" .. url .. ")"
|
||||
})
|
||||
end
|
||||
|
||||
return links
|
||||
end
|
||||
|
||||
-- Extract code blocks from markdown
|
||||
function M.extract_code_blocks(content)
|
||||
local code_blocks = {}
|
||||
|
||||
for lang, code in content:gmatch("```(%w*)\n(.-)\n```") do
|
||||
table.insert(code_blocks, {
|
||||
language = lang ~= "" and lang or "text",
|
||||
code = code,
|
||||
lines = select(2, code:gsub("\n", "")) + 1
|
||||
})
|
||||
end
|
||||
|
||||
return code_blocks
|
||||
end
|
||||
|
||||
-- Get content summary (first paragraph)
|
||||
function M.get_summary(content, max_length)
|
||||
local body = M.get_body(content)
|
||||
if not body then
|
||||
return ""
|
||||
end
|
||||
|
||||
-- Remove code blocks to avoid including them in summary
|
||||
local clean_body = body:gsub("```.-```", "")
|
||||
|
||||
-- Extract first paragraph
|
||||
local first_paragraph = clean_body:match("\n\n([^%[].-)\n\n") or
|
||||
clean_body:match("^([^%[].-)\n\n") or
|
||||
clean_body:match("^([^%[].-)")
|
||||
|
||||
if not first_paragraph then
|
||||
return ""
|
||||
end
|
||||
|
||||
-- Clean up markdown formatting
|
||||
local summary = first_paragraph
|
||||
:gsub("#+ ", "")
|
||||
:gsub("%*%*(.-)%*%*", "%1")
|
||||
:gsub("%*(.-)%*", "%1")
|
||||
:gsub("`(.-)`", "%1")
|
||||
:gsub("%[.-%]%(.-%)", "")
|
||||
|
||||
summary = summary:gsub("%s+", " "):trim()
|
||||
|
||||
if #summary > max_length then
|
||||
summary = summary:sub(1, max_length - 3) .. "..."
|
||||
end
|
||||
|
||||
return summary
|
||||
end
|
||||
|
||||
-- Analyze markdown structure
|
||||
function M.analyze_structure(content)
|
||||
local body = M.get_body(content)
|
||||
|
||||
return {
|
||||
word_count = M.count_words(content),
|
||||
character_count = M.count_characters(content),
|
||||
headings = M.extract_headings(content),
|
||||
links = M.extract_links(content),
|
||||
code_blocks = M.extract_code_blocks(content),
|
||||
summary = M.get_summary(content, 200),
|
||||
line_count = select(2, body:gsub("\n", "")) + 1,
|
||||
has_toc = body:find("^%s*%[TOC%]") ~= nil,
|
||||
reading_time_minutes = math.ceil(M.count_words(content) / 200) -- Assuming 200 WPM
|
||||
}
|
||||
end
|
||||
|
||||
-- Validate markdown format
|
||||
function M.validate_markdown(content)
|
||||
local errors = {}
|
||||
|
||||
if not content or content == "" then
|
||||
table.insert(errors, "Empty content")
|
||||
return errors
|
||||
end
|
||||
|
||||
-- Check for balanced markdown syntax
|
||||
local function check_balance(content, open, close)
|
||||
local count_open = select(2, content:gsub(open, ""))
|
||||
local count_close = select(2, content:gsub(close, ""))
|
||||
return count_open == count_close
|
||||
end
|
||||
|
||||
-- Check balanced headers
|
||||
local headers = content:match("#+")
|
||||
if headers and not check_balance(content, "```", "```") then
|
||||
table.insert(errors, "Unbalanced code blocks")
|
||||
end
|
||||
|
||||
-- Check for malformed links
|
||||
for link in content:gmatch("%[.-%]%(.-%)") do
|
||||
if not link:match("%[.-%]%(([^)]+)%)") or
|
||||
link:match("%[.-%]%(%s*%)") then
|
||||
table.insert(errors, "Malformed link: " .. link)
|
||||
end
|
||||
end
|
||||
|
||||
return errors
|
||||
end
|
||||
|
||||
-- Convert markdown to plain text
|
||||
function M.to_plain_text(content)
|
||||
local body = M.get_body(content)
|
||||
if not body then
|
||||
return ""
|
||||
end
|
||||
|
||||
local plain = body
|
||||
:gsub("^#%s+", "\n") -- Headers to newlines
|
||||
:gsub("\n#%s+", "\n") -- Headers to newlines
|
||||
:gsub("%*%*(.-)%*%*", "%1") -- Bold
|
||||
:gsub("%*(.-)%*", "%1") -- Italic
|
||||
:gsub("`(.-)`", "%1") -- Inline code
|
||||
:gsub("```%w*\n(.-)\n```", "%1") -- Code blocks
|
||||
:gsub("%[([^%]]*)%]%(([^)]+)%)", "%1") -- Links to text
|
||||
:gsub("%!%[([^%]]*)%]%(([^)]+)%)", "[Image: %1]") -- Images
|
||||
:gsub("\n%s*[-*+]%s+", "\n• ") -- List items
|
||||
:gsub("\n%s*%d+%.%s+", "\n• ") -- Numbered lists
|
||||
:gsub("\n%s*%[%s*%]%s+", "\n• ") -- Checkbox lists
|
||||
:gsub("\n%s*%[%s*x%s*%]%s+", "\n✓ ") -- Checked items
|
||||
:gsub("\n\n+", "\n\n") -- Multiple newlines
|
||||
:trim()
|
||||
|
||||
return plain
|
||||
end
|
||||
|
||||
return M
|
Loading…
Add table
Add a link
Reference in a new issue