Last active
February 5, 2026 09:57
-
-
Save sogaiu/ff8abc7730c238ec3cbf82699d91c638 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env janet | |
| # based on: | |
| # | |
| # https://codeberg.org/veqq/rss-reader/src/branch/master/rss-reader.janet | |
| # usage: pass feed urls on command line | |
| # requirements: | |
| # | |
| # * bash (or similar?) | |
| # * md5sum | |
| # * awk | |
| # * w3m | |
| # * grep | |
| # * curl | |
| ######################################################################## | |
| (import spork/sh-dsl :as sh) | |
| ######################################################################## | |
| (def config | |
| {# XXX: unused | |
| :max-items 150 | |
| # limit when converting from html to text | |
| :max-chars 1000 | |
| # stores a hash for each item already encountered | |
| :downloaded "downloaded.txt" | |
| # see usages for load-filters for examples | |
| :filters "filters.txt" | |
| # directory to store files in | |
| :dir (string (os/getenv "HOME") "/Desktop/rss")}) | |
| ######################################################################## | |
| (defn load-hashes | |
| [content] | |
| (when (not content) | |
| (break @{})) | |
| # | |
| (tabseq [line :in (->> (string/trim content) | |
| (string/split "\n")) | |
| :when (not (empty? line))] | |
| (string/trim line) true)) | |
| (comment | |
| (def hash-content | |
| `` | |
| b1946ac92492d2347c6235b4d2611184 | |
| c4ff45bb1fab99f9164b7fec14b2292a | |
| ``) | |
| (load-hashes hash-content) | |
| # => | |
| @{"b1946ac92492d2347c6235b4d2611184" true | |
| "c4ff45bb1fab99f9164b7fec14b2292a" true} | |
| ) | |
| (defn load-filters | |
| [content] | |
| (when (not content) | |
| (break @[])) | |
| # | |
| (seq [line :in (->> (string/trim content) | |
| (string/split "\n")) | |
| :let [parts (string/split "," line)] | |
| :when (>= (length parts) 3)] | |
| {:url (get parts 0) | |
| :type (get parts 1) | |
| :regex (string/join (slice parts 2) ",")})) | |
| (comment | |
| (def filters | |
| `` | |
| *,content,language|poetry|programming | |
| https://news.ycombinator.com/rss,title,TypeScript | |
| ``) | |
| (load-filters filters) | |
| # => | |
| @[{:regex "language|poetry|programming" | |
| :type "content" | |
| :url "*"} | |
| {:regex "TypeScript" | |
| :type "title" | |
| :url "https://news.ycombinator.com/rss"}] | |
| ) | |
| (defn md5 | |
| [s] | |
| (string (sh/$<_ echo -n ,s | | |
| md5sum | | |
| awk {print $1}))) | |
| (comment | |
| (md5 "hello") | |
| # => | |
| "5d41402abc4b2a76b9719d911017c592" | |
| (md5 "hello\n") | |
| # => | |
| "b1946ac92492d2347c6235b4d2611184" | |
| (md5 "there") | |
| # => | |
| "d850f04cdb48312a9be171e214c0b4ee" | |
| (md5 "there\n") | |
| # => | |
| "c4ff45bb1fab99f9164b7fec14b2292a" | |
| ) | |
| ######################################################################## | |
| # modified peg from janet-xmlish | |
| (def xmlish-peg | |
| ~{:main (sequence (opt (drop :xml-declaration)) :s* | |
| (opt (drop :doctype)) :s* | |
| (any :comment) :s* | |
| :element :s* | |
| (any :comment) :s*) | |
| # | |
| :xml-declaration (sequence :s* "<?xml" :s* | |
| (any :attribute) :s* | |
| "?>") | |
| # XXX: only handles very simple case | |
| :doctype (sequence :s* "<!doctype" :s* | |
| :tag-name :s* | |
| ">") | |
| # XXX: not accurate | |
| :attribute (sequence | |
| (capture (to (set " /<=>\""))) :s* | |
| "=" :s* | |
| (choice (sequence `"` (capture (to `"`)) `"`) | |
| (sequence "'" (capture (to "'")) "'")) | |
| :s*) | |
| # section 2.5 of xml spec | |
| :comment (sequence | |
| "<!--" | |
| (any (choice | |
| (if-not (set "-") 1) | |
| (sequence "-" (if-not (set "-") 1)))) | |
| "-->") | |
| # | |
| :element (choice :empty-element :non-empty-element) | |
| # | |
| :empty-element (cmt (sequence | |
| "<" (capture :tag-name) :s* | |
| (any :attribute) | |
| "/>") | |
| ,|(let [args $& | |
| elt-name (first args) | |
| attrs (drop 1 args) | |
| attrs (if (= (length attrs) 0) | |
| nil | |
| (table ;attrs))] | |
| {:attrs attrs | |
| :tag elt-name})) | |
| # | |
| :non-empty-element | |
| (cmt (sequence :open-tag | |
| (any (choice :s+ | |
| (capture :pcdata) | |
| :comment | |
| :element | |
| (capture :content) | |
| )) | |
| :close-tag) | |
| ,|(let [args $& | |
| open-name (first (first args)) | |
| attrs (drop 1 (first args)) | |
| close-name (last args)] | |
| (when (= open-name close-name) | |
| (let [elt-name open-name | |
| content (filter (fn [c-item] | |
| (if (string? c-item) | |
| # drop whitespace-only strings | |
| (not= "" (string/trim c-item)) | |
| (not= "" c-item))) | |
| (tuple/slice args 1 -2)) | |
| content (if (= (length content) 0) | |
| nil | |
| content) | |
| attrs (if (= (length attrs) 0) | |
| nil | |
| (table ;attrs))] | |
| {:attrs attrs | |
| :content content | |
| :tag elt-name})))) | |
| # XXX: could use work? | |
| :tag-name (sequence (not "!") (not "?") (to (set " /<>"))) | |
| # | |
| :open-tag (group | |
| (sequence | |
| "<" (capture :tag-name) :s* | |
| (any :attribute) | |
| ">")) | |
| # | |
| :close-tag (sequence | |
| "</" (capture :tag-name) :s* ">") | |
| # XXX: may not be accurate | |
| :pcdata (sequence "<![CDATA[" (to "]]>") "]]>") | |
| # XXX: may not be accurate | |
| :content (to "<")}) | |
| (comment | |
| (def feed | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> | |
| <channel> | |
| <title>Lobsters</title> | |
| <link>https://lobste.rs/</link> | |
| <atom:link href="https://lobste.rs/rss" rel="self"/> | |
| <description></description> | |
| <pubDate>Wed, 04 Feb 2026 13:20:32 -0600</pubDate> | |
| <ttl>120</ttl> | |
| <item> | |
| <title>Recreating PDFs</title> | |
| <link>https://neosmart.net/blog/</link> | |
| <guid>https://lobste.rs/s/iyu0f8</guid> | |
| <author>neosmart.net by mqudsi</author> | |
| <pubDate>Wed, 04 Feb 2026 13:20:32 -0600</pubDate> | |
| <comments>https://lobste.rs/s/iyu0f8/recreating</comments> | |
| <description><p><a href="https://lobste.rs/s/2svv99/competence_as_tragedy">Comments</a></p></description> | |
| <category>security</category> | |
| <category>reversing</category> | |
| </item> | |
| </channel> | |
| </rss> | |
| ``) | |
| (peg/match xmlish-peg feed) | |
| # => | |
| @[{:attrs @{"version" "2.0" | |
| "xmlns:atom" "http://www.w3.org/2005/Atom"} | |
| :content | |
| @[{:content | |
| @[{:content @["Lobsters"] | |
| :tag "title"} | |
| {:content @["https://lobste.rs/"] | |
| :tag "link"} | |
| {:attrs @{"href" "https://lobste.rs/rss" | |
| "rel" "self"} | |
| :tag "atom:link"} | |
| {:tag "description"} | |
| {:content @["Wed, 04 Feb 2026 13:20:32 -0600"] | |
| :tag "pubDate"} | |
| {:content @["120"] | |
| :tag "ttl"} | |
| {:content | |
| @[{:content @["Recreating PDFs"] | |
| :tag "title"} | |
| {:content @["https://neosmart.net/blog/"] | |
| :tag "link"} | |
| {:content @["https://lobste.rs/s/iyu0f8"] | |
| :tag "guid"} | |
| {:content @["neosmart.net by mqudsi"] | |
| :tag "author"} | |
| {:content @["Wed, 04 Feb 2026 13:20:32 -0600"] | |
| :tag "pubDate"} | |
| {:content @["https://lobste.rs/s/iyu0f8/recreating"] | |
| :tag "comments"} | |
| {:content | |
| @[(string `<p><a href="https://lobste.rs/s/` | |
| `2svv99/competence_as_tragedy">Comments<` | |
| `/a></p>`)] | |
| :tag "description"} | |
| {:content @["security"] | |
| :tag "category"} | |
| {:content @["reversing"] | |
| :tag "category"}] | |
| :tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (peg/match xmlish-peg "<rss>\n</rss>") | |
| # => | |
| @[{:tag "rss"}] | |
| (def feed-1 | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0"><description><![CDATA[a]]></description></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-1) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content @[{:content @["<![CDATA[a]]>"] :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-2 | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0"><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-2) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-3 | |
| `` | |
| <rss version="2.0"><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-3) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-4 | |
| `` | |
| <rss version="2.0"><channel><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></channel></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-4) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content | |
| @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-5 | |
| `` | |
| <rss version="2.0"><channel><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description><item></item></channel></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-5) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content | |
| @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"} | |
| {:tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-6 | |
| `` | |
| <rss version="2.0"><channel><title>Hacker News</title><link>https://news.ycombinator.com/</link><description>Links for the intellectually curious, ranked by readers.</description><item></item></channel></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-6) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content @["Hacker News"] :tag "title"} | |
| {:content @["https://news.ycombinator.com/"] :tag "link"} | |
| {:content @["Links for the intellectually curious, ranked by readers."] | |
| :tag "description"} | |
| {:tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-7 | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0"><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-7) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-8 | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0"> | |
| <description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-8) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-9 | |
| `` | |
| <rss version="2.0"> | |
| <description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-9) | |
| # => | |
| '@[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "rss"}] | |
| (def feed-10 | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0"><channel><title>Hacker News</title><link>https://news.ycombinator.com/</link><description>Links for the intellectually curious, ranked by readers.</description><item><title>When internal hostnames are leaked to the clown</title><link>https://rachelbythebay.com/w/2026/02/03/badnas/</link><pubDate>Thu, 05 Feb 2026 05:22:36 +0000</pubDate><comments>https://news.ycombinator.com/item?id=46895972</comments><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></item></channel></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-10) | |
| # => | |
| @[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content | |
| @["Hacker News"] | |
| :tag "title"} | |
| {:content @["https://news.ycombinator.com/"] | |
| :tag "link"} | |
| {:content @["Links for the intellectually curious, ranked by readers."] | |
| :tag "description"} | |
| {:content | |
| @[{:content @["When internal hostnames are leaked to the clown"] | |
| :tag "title"} | |
| {:content @["https://rachelbythebay.com/w/2026/02/03/badnas/"] | |
| :tag "link"} | |
| {:content @["Thu, 05 Feb 2026 05:22:36 +0000"] | |
| :tag "pubDate"} | |
| {:content @["https://news.ycombinator.com/item?id=46895972"] | |
| :tag "comments"} | |
| {:content @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| (def feed-11 | |
| `` | |
| <rss version="2.0"><channel><title>Hacker News</title><link>https://news.ycombinator.com/</link><description>Links for the intellectually curious, ranked by readers.</description><item><title>When internal hostnames are leaked to the clown</title><link>https://rachelbythebay.com/w/2026/02/03/badnas/</link><pubDate>Thu, 05 Feb 2026 05:22:36 +0000</pubDate><comments>https://news.ycombinator.com/item?id=46895972</comments><description><![CDATA[<a href="https://news.ycombinator.com/item?id=46895972">Comments</a>]]></description></item></channel></rss> | |
| ``) | |
| (peg/match xmlish-peg feed-11) | |
| # => | |
| '@[{:attrs @{"version" "2.0"} | |
| :content | |
| @[{:content | |
| @[{:content @["Hacker News"] :tag "title"} | |
| {:content @["https://news.ycombinator.com/"] :tag "link"} | |
| {:content @["Links for the intellectually curious, ranked by readers."] | |
| :tag "description"} | |
| {:content | |
| @[{:content @["When internal hostnames are leaked to the clown"] | |
| :tag "title"} | |
| {:content @["https://rachelbythebay.com/w/2026/02/03/badnas/"] | |
| :tag "link"} | |
| {:content @["Thu, 05 Feb 2026 05:22:36 +0000"] | |
| :tag "pubDate"} | |
| {:content @["https://news.ycombinator.com/item?id=46895972"] | |
| :tag "comments"} | |
| {:content @["<![CDATA[<a href=\"https://news.ycombinator.com/item?id=46895972\">Comments</a>]]>"] | |
| :tag "description"}] | |
| :tag "item"}] | |
| :tag "channel"}] | |
| :tag "rss"}] | |
| ) | |
| ######################################################################## | |
| (defn html->text | |
| [html &opt max-chars] | |
| (default max-chars (dyn :max-chars 1000)) | |
| (def clipped (string/slice html 0 | |
| (min (length html) max-chars))) | |
| (when (empty? clipped) | |
| (break "")) | |
| # | |
| (sh/$<_ echo ,clipped | | |
| w3m -T text/html -dump)) | |
| (comment | |
| (def html | |
| `` | |
| <html> | |
| <body> | |
| <h1>greetings</h1> | |
| <p>health, happiness, and peace</p> | |
| </body> | |
| </html> | |
| ``) | |
| (html->text html) | |
| # => | |
| @"greetings\n\nhealth, happiness, and peace\n" | |
| ) | |
| (defn extract-href | |
| [xml] | |
| (def href-str `href="`) | |
| (def hslen (length href-str)) | |
| # | |
| (if-let [s (string/find href-str xml) | |
| e (string/find `"` xml (+ s hslen))] | |
| (string/slice xml (+ s hslen) e) "")) | |
| (comment | |
| (def item-str | |
| `` | |
| <item> | |
| <title>I miss thinking hard</title> | |
| <link>https://www.jernesto.com/articles/thinking_hard</link> | |
| <guid>https://lobste.rs/s/hfuiti</guid> | |
| <author>jernesto.com via xyproto</author> | |
| <pubDate>Wed, 04 Feb 2026 02:35:56 -0600</pubDate> | |
| <comments>https://lobste.rs/s/hfuiti/i_miss_thinking_hard</comments> | |
| <description><p><a href="https://lobste.rs/s/hfuiti/i_miss_thinking_hard">Comments</a></p></description> | |
| <category>philosophy</category> | |
| <category>vibecoding</category> | |
| </item> | |
| ``) | |
| (extract-href item-str) | |
| # => | |
| "https://lobste.rs/s/hfuiti/i_miss_thinking_hard" | |
| (def desc-content-str | |
| (string `<p><a href="https://lobste.rs/s/hfuiti/` | |
| `i_miss_thinking_hard">Comments</a></p>`)) | |
| (extract-href desc-content-str) | |
| # => | |
| "https://lobste.rs/s/hfuiti/i_miss_thinking_hard" | |
| ) | |
| ######################################################################## | |
| (defn rss->items | |
| [rss-str] | |
| (def m (peg/match xmlish-peg rss-str)) | |
| (assert m "failed to extract items") | |
| # | |
| (def rss (first m)) | |
| (assertf (= "rss" (get rss :tag)) | |
| "unexpected tag: %n in rss: %n" (get rss :tag) rss) | |
| # | |
| (def rss-content (get rss :content)) | |
| (when (not rss-content) | |
| (break @[])) | |
| # | |
| (def chnl (first rss-content)) | |
| (when (not chnl) | |
| (break @[])) | |
| # | |
| (assertf (= "channel" (get chnl :tag)) | |
| "unexpected tag: %n in channel: %n" (get chnl :tag) chnl) | |
| # | |
| (def chnl-content (get chnl :content)) | |
| (when (not chnl-content) | |
| (break @[])) | |
| # | |
| (def items (filter |(= "item" (get $ :tag)) chnl-content)) | |
| (when (empty? items) | |
| (break @[])) | |
| # | |
| (seq [it :in items | |
| :let [it-content (get it :content)] | |
| :when (and it-content (not (empty? it-content)))] | |
| (def item @{}) | |
| (each thing it-content | |
| (when (dictionary? thing) | |
| # XXX: highlander assumption | |
| (def thing-content (first (get thing :content))) | |
| (put item (get thing :tag) thing-content))) | |
| (def title (get item "title")) | |
| (assertf title "failed to find title in: %n" item) | |
| (def link (get item "link")) | |
| (assertf link "failed to find link in: %n" item) | |
| (def desc (get item "description")) | |
| (assertf desc "failed to find description in: %n" item) | |
| # | |
| (def content (when-let [content-raw (get item "content")] | |
| (html->text content-raw))) | |
| # XXX: should content be examined for oddities? | |
| {:title title | |
| :link link | |
| :desc-link (extract-href desc) | |
| :description (html->text desc) | |
| :content content})) | |
| (comment | |
| (def feed | |
| `` | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"> | |
| <channel> | |
| <title>Lobsters</title> | |
| <link>https://lobste.rs/</link> | |
| <atom:link href="https://lobste.rs/rss" rel="self"/> | |
| <description></description> | |
| <pubDate>Wed, 04 Feb 2026 13:20:32 -0600</pubDate> | |
| <ttl>120</ttl> | |
| <item> | |
| <title>Recreating PDFs</title> | |
| <link>https://neosmart.net/blog/</link> | |
| <guid>https://lobste.rs/s/iyu0f8</guid> | |
| <author>neosmart.net by mqudsi</author> | |
| <pubDate>Wed, 04 Feb 2026 13:20:32 -0600</pubDate> | |
| <comments>https://lobste.rs/s/iyu0f8/recreating</comments> | |
| <description><p><a href="https://lobste.rs/s/2svv99/competence_as_tragedy">Comments</a></p></description> | |
| <category>security</category> | |
| <category>reversing</category> | |
| </item> | |
| </channel> | |
| </rss> | |
| ``) | |
| (rss->items feed) | |
| # => | |
| @[{:desc-link "https://lobste.rs/s/2svv99/competence_as_tragedy" | |
| :description | |
| (buffer "<p><a href=\"https://lobste.rs/s/2svv99/" | |
| "competence_as_tragedy\">Comments</a></p>") | |
| :link "https://neosmart.net/blog/" | |
| :title "Recreating PDFs"}] | |
| ) | |
| ######################################################################## | |
| (defn passes? | |
| [filters url ftype content] | |
| (when (or (not content) (empty? content)) | |
| (break true)) | |
| # | |
| (var passed true) | |
| (each {:type a-type :url a-url :regex a-regex} filters | |
| (when (and (= a-type ftype) | |
| (or (= a-url "*") (= a-url url)) | |
| (not (sh/$? echo ,content | | |
| grep -Eq ,a-regex))) | |
| (set passed false) | |
| (break))) | |
| # | |
| passed) | |
| (comment | |
| (def filters | |
| @[{:regex "language|poetry|programming" | |
| :type "content" | |
| :url "*"} | |
| {:regex "TypeScript" | |
| :type "title" | |
| :url "https://news.ycombinator.com/rss"}]) | |
| (def item-url "https://news.ycombinator.com/rss") | |
| (passes? filters "https://news.ycombinator.com/rss" "url" item-url) | |
| # => | |
| true | |
| (passes? filters "https://news.ycombinator.com/rss" "content" "yuck") | |
| # => | |
| false | |
| ) | |
| (defn get-feed | |
| [url] | |
| (try (sh/$< curl -s ,url) | |
| ([e] (errorf "curl failed for %s with: %s" url e)))) | |
| (comment | |
| (get-feed "https://lobste.rs/rss") | |
| ) | |
| (defn process-feed | |
| [url &opt dl-content filter-content] | |
| (default dl-content (dyn :downloaded)) | |
| (default filter-content (dyn :filters)) | |
| # | |
| (def hashes (load-hashes dl-content)) | |
| (def filters (load-filters filter-content)) | |
| # | |
| (def new @[]) | |
| # | |
| (def rss-str (get-feed url)) | |
| (when (empty? rss-str) | |
| (printf "Empty content from: %s" url) | |
| (break nil)) | |
| # | |
| (print "From " url) | |
| (print) | |
| # | |
| (def items (rss->items rss-str)) | |
| # | |
| (each {:title title | |
| :link link1 | |
| :desc-link link2 | |
| :description description | |
| :content content} items | |
| (def hsh (md5 (string link1 link2 title))) | |
| (when (not (get hashes hsh)) | |
| (when (and (passes? filters url "url" link1) | |
| (passes? filters url "url" link2) | |
| (passes? filters url "title" title) | |
| (passes? filters url "content" description) | |
| (passes? filters url "content" content)) | |
| (print "--- " url) | |
| (print hsh) | |
| (when (not (empty? link1)) (print link1)) | |
| (when (not (empty? link2)) (print link2)) | |
| (when (not (empty? title)) (print title)) | |
| (when (not (empty? description)) (print description)) | |
| (when (not (or (not content) | |
| (empty? content) | |
| (= description content))) | |
| (print content)) | |
| (print) | |
| # | |
| (array/push new hsh) | |
| (put hashes hsh true)))) | |
| # | |
| new) | |
| ######################################################################## | |
| (defn main | |
| [_ & args] | |
| (def {:max-items max-items | |
| :max-chars max-chars | |
| :dir config-dir | |
| :downloaded downloaded | |
| :filters filters} config) | |
| # | |
| (setdyn :max-chars max-chars) | |
| # | |
| (os/mkdir config-dir) | |
| # | |
| (def dl-path (string config-dir "/" downloaded)) | |
| (def dl-content | |
| (when (= :file (os/stat dl-path :mode)) | |
| (slurp dl-path))) | |
| # | |
| (def filters-path (string config-dir "/" filters)) | |
| (def filter-content | |
| (when (= :file (os/stat filters-path :mode)) | |
| (slurp filters-path))) | |
| # | |
| (setdyn :downloaded dl-content) | |
| (setdyn :filters filter-content) | |
| # | |
| (def seen? (if-not dl-content | |
| @{} | |
| (->> (string/trim dl-content) | |
| (string/split "\n") | |
| invert))) | |
| # | |
| (each feed-url args | |
| (def new (process-feed feed-url)) | |
| # | |
| (when (not (empty? new)) | |
| (with [f (file/open dl-path :a)] | |
| (each h new | |
| (when (not (get seen? h)) | |
| (put seen? h true) | |
| (file/write f h "\n"))))))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment