-- Markdown content parsing module local M = {} local utils = require('notex.utils') -- Extract content between lines function M.extract_content_between(content, start_pattern, end_pattern) local start_pos = content:find(start_pattern) if not start_pos then return nil end local end_pos = content:find(end_pattern, start_pos) if not end_pos then return content:sub(start_pos) end return content:sub(start_pos, end_pos) end -- Remove YAML header from content function M.remove_yaml_header(content) return content:gsub("^%s*%-%-%-\n.-\n%-%-%-\n", "", 1) end -- Extract markdown body (content after YAML header) function M.get_body(content) return M.remove_yaml_header(content) end -- Count words in markdown content function M.count_words(content) local body = M.get_body(content) if not body then return 0 end -- Remove markdown syntax for accurate word count local clean = body :gsub("#+ ", "") -- Headers :gsub("%*%*(.-)%*%*", "%1") -- Bold :gsub("%*(.-)%*", "%1") -- Italic :gsub("`(.-)`", "%1") -- Inline code :gsub("```.-```", "") -- Code blocks :gsub("%[.-%]%(.-%)", "") -- Links :gsub("%!%[.-%]%(.-%)", "") -- Images :gsub("%W+", " ") -- Replace non-word chars with spaces :gsub("%s+", " ") -- Collapse multiple spaces local words = {} for word in clean:gmatch("%S+") do if #word > 0 then table.insert(words, word) end end return #words end -- Count characters in markdown content function M.count_characters(content) local body = M.get_body(content) return body and #body or 0 end -- Extract headings from markdown function M.extract_headings(content) local headings = {} local body = M.get_body(content) for level, title in body:gmatch("^(#+)%s+(.+)$") do table.insert(headings, { level = #level, title = title:trim(), raw = level .. " " .. title }) end return headings end -- Extract links from markdown function M.extract_links(content) local links = {} for text, url in content:gmatch("%[([^%]]*)%]%(([^)]+)%)") do table.insert(links, { text = text, url = url, raw = "[" .. text .. "](" .. url .. ")" }) end return links end -- Extract code blocks from markdown function M.extract_code_blocks(content) local code_blocks = {} for lang, code in content:gmatch("```(%w*)\n(.-)\n```") do table.insert(code_blocks, { language = lang ~= "" and lang or "text", code = code, lines = select(2, code:gsub("\n", "")) + 1 }) end return code_blocks end -- Get content summary (first paragraph) function M.get_summary(content, max_length) local body = M.get_body(content) if not body then return "" end -- Remove code blocks to avoid including them in summary local clean_body = body:gsub("```.-```", "") -- Extract first paragraph local first_paragraph = clean_body:match("\n\n([^%[].-)\n\n") or clean_body:match("^([^%[].-)\n\n") or clean_body:match("^([^%[].-)") if not first_paragraph then return "" end -- Clean up markdown formatting local summary = first_paragraph :gsub("#+ ", "") :gsub("%*%*(.-)%*%*", "%1") :gsub("%*(.-)%*", "%1") :gsub("`(.-)`", "%1") :gsub("%[.-%]%(.-%)", "") summary = summary:gsub("%s+", " "):trim() if #summary > max_length then summary = summary:sub(1, max_length - 3) .. "..." end return summary end -- Analyze markdown structure function M.analyze_structure(content) local body = M.get_body(content) return { word_count = M.count_words(content), character_count = M.count_characters(content), headings = M.extract_headings(content), links = M.extract_links(content), code_blocks = M.extract_code_blocks(content), summary = M.get_summary(content, 200), line_count = select(2, body:gsub("\n", "")) + 1, has_toc = body:find("^%s*%[TOC%]") ~= nil, reading_time_minutes = math.ceil(M.count_words(content) / 200) -- Assuming 200 WPM } end -- Validate markdown format function M.validate_markdown(content) local errors = {} if not content or content == "" then table.insert(errors, "Empty content") return errors end -- Check for balanced markdown syntax local function check_balance(content, open, close) local count_open = select(2, content:gsub(open, "")) local count_close = select(2, content:gsub(close, "")) return count_open == count_close end -- Check balanced headers local headers = content:match("#+") if headers and not check_balance(content, "```", "```") then table.insert(errors, "Unbalanced code blocks") end -- Check for malformed links for link in content:gmatch("%[.-%]%(.-%)") do if not link:match("%[.-%]%(([^)]+)%)") or link:match("%[.-%]%(%s*%)") then table.insert(errors, "Malformed link: " .. link) end end return errors end -- Convert markdown to plain text function M.to_plain_text(content) local body = M.get_body(content) if not body then return "" end local plain = body :gsub("^#%s+", "\n") -- Headers to newlines :gsub("\n#%s+", "\n") -- Headers to newlines :gsub("%*%*(.-)%*%*", "%1") -- Bold :gsub("%*(.-)%*", "%1") -- Italic :gsub("`(.-)`", "%1") -- Inline code :gsub("```%w*\n(.-)\n```", "%1") -- Code blocks :gsub("%[([^%]]*)%]%(([^)]+)%)", "%1") -- Links to text :gsub("%!%[([^%]]*)%]%(([^)]+)%)", "[Image: %1]") -- Images :gsub("\n%s*[-*+]%s+", "\n• ") -- List items :gsub("\n%s*%d+%.%s+", "\n• ") -- Numbered lists :gsub("\n%s*%[%s*%]%s+", "\n• ") -- Checkbox lists :gsub("\n%s*%[%s*x%s*%]%s+", "\n✓ ") -- Checked items :gsub("\n\n+", "\n\n") -- Multiple newlines :trim() return plain end return M