Last active
February 6, 2026 13:00
-
-
Save sogaiu/e923318051ccba3fa19205fdc95e3cf3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (defn capture-empty-element | |
| [& args] | |
| (def sname (first args)) | |
| (case (length args) | |
| 0 (errorf "expected one or more args, got 0") | |
| # | |
| 1 {:tag sname} | |
| # | |
| {:tag sname | |
| :attrs (table ;(drop 1 args))})) | |
| (comment | |
| (capture-empty-element "empty") | |
| # => | |
| {:tag "empty"} | |
| (capture-empty-element "empty" "what" "mind") | |
| # => | |
| {:attrs @{"what" "mind"} :tag "empty"} | |
| ) | |
| (comment | |
| (->> (partition-by |(not (string? $)) | |
| ["" :x "hi" :a "there " "mate" {:a 1} "ho"]) | |
| (keep |(if (string? (first $)) | |
| (string/join $ "") | |
| $)) | |
| flatten) | |
| # => | |
| @["" :x "hi" :a "there mate" {:a 1} "ho"] | |
| ) | |
| (defn merge-strings | |
| [ind] | |
| (->> (partition-by |(not (string? $)) | |
| ind) | |
| (keep |(if (string? (first $)) | |
| (string/join $ "") | |
| $)) | |
| (filter |(not (empty? $))) | |
| flatten)) | |
| (comment | |
| (merge-strings ["<" "p" ">" | |
| "<" | |
| "a href=\"https://lobste.rs/\"" | |
| ">" "Comments" "<" "/a" ">" | |
| "<" "/p" ">"]) | |
| # => | |
| @[(string "<p>" | |
| "<a href=\"https://lobste.rs/\">Comments</a>" | |
| "</p>")] | |
| (merge-strings ["" :x "hi" :a "there " "mate" {:a 1} "ho"]) | |
| # => | |
| @[:x "hi" :a "there mate" {:a 1} "ho"] | |
| ) | |
| (defn capture-nonempty-element | |
| [& args] | |
| (def stag (first args)) | |
| (def sname (first stag)) | |
| (case (length args) | |
| 0 (errorf "expected one or more args, got 0") | |
| # | |
| 1 {:tag sname} | |
| # | |
| 2 (when (= sname (last args)) | |
| {:tag sname}) | |
| # | |
| (when (= sname (last args)) | |
| (def attrs (table ;(drop 1 stag))) | |
| (def content (merge-strings (slice args 1 -2))) | |
| {:tag sname | |
| :attrs (if (not (empty? attrs)) attrs nil) | |
| :content (if (not (empty? content)) content nil)}))) | |
| (comment | |
| (capture-nonempty-element @["fun"]) | |
| # => | |
| {:tag "fun"} | |
| (capture-nonempty-element @["fun"] "fun") | |
| # => | |
| {:tag "fun"} | |
| (capture-nonempty-element @["fun"] "" "fun") | |
| # => | |
| {:tag "fun"} | |
| (capture-nonempty-element @["fun" "flavor" "spicy"] "more" "fun") | |
| # => | |
| {:attrs @{"flavor" "spicy"} | |
| :content @["more"] | |
| :tag "fun"} | |
| (capture-nonempty-element | |
| @["description"] | |
| "" "<" "p" ">" | |
| "" "<" "a href=\"https://lobste.rs/s/2svv99/competence_as_tragedy\"" | |
| ">" "Comments" "<" "/a" ">" | |
| "" "<" "/p" ">" "" "description") | |
| # => | |
| {:content | |
| @[(string "<" "p" ">" | |
| "<" | |
| "a href=\"https://lobste.rs/s/2svv99/competence_as_tragedy\"" | |
| ">" "Comments" "<" "/a" ">" | |
| "<" "/p" ">")] | |
| :tag "description"} | |
| ) | |
| # https://www.w3.org/TR/REC-xml/ | |
| # https://www.xml.com/axml/axml.html | |
| (def xmlish-peg | |
| # XXX: things marked with just SSS below were simplified | |
| ~@{# 2.1 Well-Formed XML Documents | |
| :main (sequence :prolog :element (any :misc)) | |
| # 3 Logical Structures | |
| :element (choice :empty-elem-tag :non-empty-elem) | |
| # | |
| :empty-elem-tag (cmt (sequence "<" | |
| (capture :name) | |
| (any (sequence :s+ :attribute)) | |
| :s* | |
| "/>") | |
| ,capture-empty-element) | |
| # | |
| :non-empty-elem (cmt (sequence :stag :content :etag) | |
| ,capture-nonempty-element) | |
| # 2.3 Common Syntactic Constructs | |
| :name-start-char (choice ":" :a "_") # SSS | |
| :name-char (sequence (choice :name-start-char "-" "." :d)) # SSS | |
| :name (sequence :name-start-char (any :name-char)) | |
| # | |
| :att-value | |
| (choice (sequence | |
| `"` | |
| (capture (any (choice (sequence (not (set "%&")) (to `"`)) | |
| :reference))) | |
| `"`) | |
| (sequence | |
| "'" | |
| (capture (any (choice (sequence (not (set "%&")) (to "'")) | |
| :reference))) | |
| "'")) | |
| # 2.4 Character Data and Markup | |
| :char-data (any (to (set "<&"))) # XXX: `any` is correct? | |
| # 2.5 Comments | |
| :comment (sequence "<!--" | |
| (any (choice (to "-") | |
| (sequence "-" (to "-")))) | |
| "-->") # SSS | |
| # 2.6 Processing Instructions | |
| :pi (sequence "<?" (to "?>") "?>") # SSS | |
| # 2.7 CDATA Sections | |
| :cdsect (sequence "<![CDATA[" (to "]]>") "]]>") | |
| # 2.8 Prolog and Document Type Declaration | |
| :prolog (sequence (opt :xml-decl) | |
| (any :misc) | |
| (opt (sequence :doctype-decl (any :misc)))) | |
| :xml-decl (sequence "<?xml" (to "?>") "?>") # SSS | |
| :eq (sequence :s* "=" :s*) | |
| :misc (choice :comment :pi :s+) | |
| # | |
| :doctype-decl (sequence "<!DOCTYPE" (to ">") ">") # SSS | |
| # 3.1 Start-Tags, End-Tags, and Empty-Element Tags | |
| :stag (group (sequence "<" | |
| (capture :name) | |
| (any (sequence :s+ :attribute)) | |
| :s* | |
| ">")) | |
| # | |
| :attribute (sequence (capture :name) :eq :att-value) | |
| # | |
| :etag (sequence "</" | |
| (capture :name) | |
| :s* | |
| ">") | |
| # | |
| :content (sequence (capture (opt :char-data)) | |
| (any (sequence (choice :element | |
| (capture :reference) | |
| (capture :cdsect) | |
| :pi | |
| :comment) | |
| (capture (opt :char-data))))) | |
| # 4.1 Character and Entity References | |
| :char-ref (choice (sequence "&#" :d+ ";") | |
| (sequence "&#x" :h+ ";")) | |
| :reference (choice :entity-ref :char-ref) | |
| :entity-ref (sequence "&" :name ";")}) | |
| (comment | |
| (peg/match xmlish-peg "<fun></fun>") | |
| # => | |
| @[{:tag "fun"}] | |
| (peg/match xmlish-peg "<fun>more</fun>") | |
| # => | |
| @[{:content @["more"] :tag "fun"}] | |
| (peg/match xmlish-peg `<fun flavor="spicy">more</fun>`) | |
| # => | |
| @[{:attrs @{"flavor" "spicy"} | |
| :content @["more"] | |
| :tag "fun"}] | |
| (peg/match (merge xmlish-peg {:main :attribute}) | |
| `flavor="spicy"`) | |
| # => | |
| @["flavor" "spicy"] | |
| (peg/match (merge xmlish-peg {:main :att-value}) | |
| `"spicy"`) | |
| # => | |
| @["spicy"] | |
| (peg/match xmlish-peg | |
| `<fun flavor="spicy" place="home">more</fun>`) | |
| # => | |
| @[{:attrs @{"flavor" "spicy" "place" "home"} | |
| :content @["more"] | |
| :tag "fun"}] | |
| (peg/match xmlish-peg "<empty />") | |
| # => | |
| @[{:tag "empty"}] | |
| (peg/match xmlish-peg "<empty what='mind'/>") | |
| # => | |
| @[{:attrs @{"what" "mind"} | |
| :tag "empty"}] | |
| (peg/match xmlish-peg `<empty what='mind' when="now" />`) | |
| # => | |
| @[{:attrs @{"what" "mind" | |
| "when" "now"} | |
| :tag "empty"}] | |
| (peg/match xmlish-peg "<rss>\n</rss>") | |
| # => | |
| @[{:content @["\n"] :tag "rss"}] | |
| (def desc-with-content | |
| (string | |
| `<description>` | |
| `<p>` | |
| `<a href="https://lobste.rs/">` | |
| `Comments</a>` | |
| `</p>` | |
| `</description>`)) | |
| (peg/match (merge xmlish-peg {:main :non-empty-elem}) | |
| desc-with-content) | |
| # => | |
| @[{:content | |
| @[(string "<" "p" ">" | |
| "<" | |
| "a href=\"https://lobste.rs/\"" | |
| ">" "Comments" "<" "/a" ">" | |
| "<" "/p" ">")] | |
| :tag "description"}] | |
| ) | |
| (comment | |
| (def feed-1 | |
| (string | |
| `<?xml version="1.0" encoding="UTF-8"?>` "\n" | |
| `<rss version="2.0">` | |
| `<description>` | |
| `<![CDATA[a]]>` | |
| `</description>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-1) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content @[{:content @["<![CDATA[a]]>"] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-2 | |
| (string | |
| `<?xml version="1.0" encoding="UTF-8"?>` "\n" | |
| `<rss version="2.0">` | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-2) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[(string "<![CDATA[" | |
| "<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>" | |
| "]]>")] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-3 | |
| (string | |
| `<rss version="2.0">` | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-3) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[(string "<![CDATA[" | |
| "<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>" | |
| "]]>")] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-4 | |
| (string | |
| `<rss version="2.0">` | |
| `<channel>` | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `</channel>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-4) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content | |
| @[(string "<![CDATA[" | |
| "<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>" | |
| "]]>")] | |
| :tag "description"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-5 | |
| (string | |
| `<rss version="2.0">` | |
| `<channel>` | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `<item>` | |
| `</item>` | |
| `</channel>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-5) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content | |
| @[(string "<![CDATA[" | |
| "<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>" | |
| "]]>")] | |
| :tag "description"} | |
| {:tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-6 | |
| (string | |
| `<rss version="2.0">` | |
| `<channel>` | |
| `<title>Hacker News</title>` | |
| `<link>https://news.ycombinator.com/</link>` | |
| `<description>` | |
| `Links for the intellectually curious, ranked by readers.` | |
| `</description>` | |
| `<item></item>` | |
| `</channel>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-6) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content @["Hacker News"] :tag "title"} | |
| {:content @["https://news.ycombinator.com/"] :tag "link"} | |
| {:content @[(string "Links for the intellectually curious, " | |
| "ranked by readers.")] | |
| :tag "description"} | |
| {:tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-7 | |
| (string | |
| `<?xml version="1.0" encoding="UTF-8"?>` "\n" | |
| `<rss version="2.0">` | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-7) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[(string "<![CDATA[<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>]]>")] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-8 | |
| (string | |
| `<?xml version="1.0" encoding="UTF-8"?>` "\n" | |
| `<rss version="2.0">` "\n" | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-8) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @["\n" | |
| {:content | |
| @[(string "<![CDATA[<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>]]>")] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-9 | |
| (string | |
| `<rss version="2.0">` "\n" | |
| `<description>` | |
| `<![CDATA[` | |
| `<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>` | |
| `]]>` | |
| `</description>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-9) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @["\n" | |
| {:content | |
| @[(string "<![CDATA[<a href=\"https://news.ycombinator.com/" | |
| "item?id=46895972\">Comments</a>]]>")] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-10 | |
| (string | |
| `<?xml version="1.0" encoding="UTF-8"?>` "\n" | |
| `<rss version="2.0">` | |
| `<channel>` | |
| `<title>Hacker News</title>` | |
| `<link>https://news.ycombinator.com/</link>` | |
| `<description>` | |
| `Links for the intellectually curious, ranked by readers.` | |
| `</description>` | |
| `<item>` | |
| `<title>When internal hostnames are leaked to the clown</title>` | |
| `<link>https://rachelbythebay.com/w/2026/02/03/badnas/</link>` | |
| `<pubDate>Thu, 05 Feb 2026 05:22:36 +0000</pubDate>` | |
| `<comments>https://news.ycombinator.com/item?id=46895972</comments>` | |
| `<description>` | |
| `<![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>]]>` | |
| `</description>` | |
| `</item>` | |
| `</channel>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-10) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content | |
| @["Hacker News"] | |
| :tag "title"} | |
| {:content @["https://news.ycombinator.com/"] | |
| :tag "link"} | |
| {:content @[(string "Links for the intellectually curious, " | |
| "ranked by readers.")] | |
| :tag "description"} | |
| {:content | |
| @[{:content @["When internal hostnames are leaked to the clown"] | |
| :tag "title"} | |
| {:content @["https://rachelbythebay.com/w/2026/02/03/badnas/"] | |
| :tag "link"} | |
| {:content @["Thu, 05 Feb 2026 05:22:36 +0000"] | |
| :tag "pubDate"} | |
| {:content @["https://news.ycombinator.com/item?id=46895972"] | |
| :tag "comments"} | |
| {:content | |
| @[(string "<![CDATA[<a href=\"https://" | |
| "news.ycombinator.com/item?id=46895972\">" | |
| "Comments</a>]]>")] | |
| :tag "description"}] | |
| :tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-11 | |
| (string | |
| `<rss version="2.0">` | |
| `<channel>` | |
| `<title>Hacker News</title>` | |
| `<link>https://news.ycombinator.com/</link>` | |
| `<description>` | |
| `Links for the intellectually curious, ranked by readers.` | |
| `</description>` | |
| `<item>` | |
| `<title>When internal hostnames are leaked to the clown</title>` | |
| `<link>https://rachelbythebay.com/w/2026/02/03/badnas/</link>` | |
| `<pubDate>Thu, 05 Feb 2026 05:22:36 +0000</pubDate>` | |
| `<comments>https://news.ycombinator.com/item?id=46895972</comments>` | |
| `<description>` | |
| `<![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">` | |
| `Comments</a>]]>` | |
| `</description>` | |
| `</item>` | |
| `</channel>` | |
| `</rss>`)) | |
| (peg/match xmlish-peg feed-11) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content @["Hacker News"] :tag "title"} | |
| {:content @["https://news.ycombinator.com/"] :tag "link"} | |
| {:content @[(string "Links for the intellectually curious, " | |
| "ranked by readers.")] | |
| :tag "description"} | |
| {:content | |
| @[{:content @["When internal hostnames are leaked to the clown"] | |
| :tag "title"} | |
| {:content @["https://rachelbythebay.com/w/2026/02/03/badnas/"] | |
| :tag "link"} | |
| {:content @["Thu, 05 Feb 2026 05:22:36 +0000"] | |
| :tag "pubDate"} | |
| {:content @["https://news.ycombinator.com/item?id=46895972"] | |
| :tag "comments"} | |
| {:content | |
| @[(string "<![CDATA[<a href=\"https://" | |
| "news.ycombinator.com/item?id=46895972\">" | |
| "Comments</a>]]>")] | |
| :tag "description"}] | |
| :tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> | |
| <channel> | |
| <title>Lobsters</title> | |
| <link>https://lobste.rs/</link> | |
| <atom:link href="https://lobste.rs/rss" rel="self"/> | |
| <description></description> | |
| <pubDate>Wed, 04 Feb 2026 13:20:32 -0600</pubDate> | |
| <ttl>120</ttl> | |
| <item> | |
| <title>Recreating PDFs</title> | |
| <link>https://neosmart.net/blog/</link> | |
| <guid>https://lobste.rs/s/iyu0f8</guid> | |
| <author>neosmart.net by mqudsi</author> | |
| <pubDate>Wed, 04 Feb 2026 13:20:32 -0600</pubDate> | |
| <comments>https://lobste.rs/s/iyu0f8/recreating</comments> | |
| <description><p><a href="https://lobste.rs/s/2svv99/competence_as_tragedy">Comments</a></p></description> | |
| <category>security</category> | |
| <category>reversing</category> | |
| </item> | |
| </channel> | |
| </rss> | |
| ``) | |
| (peg/match xmlish-peg feed) | |
| # => | |
| @[{:attrs @{"version" "2.0" | |
| "xmlns:atom" "http://www.w3.org/2005/Atom"} | |
| :content | |
| @["\n " | |
| {:content | |
| @["\n " | |
| {:content @["Lobsters"] :tag "title"} | |
| "\n " | |
| {:content @["https://lobste.rs/"] :tag "link"} | |
| "\n " | |
| {:attrs @{"href" "https://lobste.rs/rss" | |
| "rel" "self"} | |
| :tag "atom:link"} | |
| "\n " | |
| {:tag "description"} | |
| "\n " | |
| {:content @["Wed, 04 Feb 2026 13:20:32 -0600"] | |
| :tag "pubDate"} | |
| "\n " | |
| {:content @["120"] :tag "ttl"} | |
| "\n " | |
| {:content | |
| @["\n " | |
| {:content @["Recreating PDFs"] :tag "title"} | |
| "\n " | |
| {:content @["https://neosmart.net/blog/"] :tag "link"} | |
| "\n " | |
| {:content @["https://lobste.rs/s/iyu0f8"] :tag "guid"} | |
| "\n " | |
| {:content @["neosmart.net by mqudsi"] :tag "author"} | |
| "\n " | |
| {:content @["Wed, 04 Feb 2026 13:20:32 -0600"] | |
| :tag "pubDate"} | |
| "\n " | |
| {:content @["https://lobste.rs/s/iyu0f8/recreating"] | |
| :tag "comments"} | |
| "\n " | |
| {:content | |
| @[(string "<p>" | |
| "<" | |
| "a href=\"https://lobste.rs/s/2svv99/" | |
| "competence_as_tragedy\"" | |
| ">" | |
| "Comments" | |
| "</a>" | |
| "</p>")] | |
| :tag "description"} | |
| "\n " | |
| {:content @["security"] :tag "category"} | |
| "\n " | |
| {:content @["reversing"] :tag "category"} | |
| "\n "] | |
| :tag "item"} | |
| "\n "] | |
| :tag "channel"} | |
| "\n"] | |
| :tag "rss"}] | |
| ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment