poblabs · August 23, 2022 18:38 · Dec 10, 2015
diff --git a/README.md b/README.md
@@ -0,0 +1,19 @@
+# drangreader
+
+This is a set of scripts for aggregating RSS feeds.  It's based on
+a script originally written by Dr. Drang:
+<http://leancrew.com/all-this/2015/11/simpler-syndication/>
+
+## Installation
+
+Download all the files from this Gist.  Put them all in a directory, create a virtualenv and install requirements:
+
+    ~/drangreader virtualenv env
+    source env/bin/activate
+    pip install -r requirements.txt
+
+Put a list of feed URLs in `feeds.txt`.  One feed per line.  To create the HTML file:
+
+    python main.py
+
+Assuming nothing goes wrong, the posts will be written to `output.html`.
diff --git a/extras.py b/extras.py
@@ -0,0 +1,70 @@
+# encoding = utf-8
+"""
+This file contains the logic for filtering/munging posts.  It's kept in
+a separate file from the main feed parsing logic so the commit history
+for main.py doesn't get polluted with nitpicks and tweaks.
+"""
+
+import collections
+
+
+# List of keywords to filter
+FILTER_WORDS = ['coffee', 'yankees', 'apple watch']
+
+
+ExtendedPost = collections.namedtuple('Post', [
+    'time',
+    'blog',
+    'title',
+    'author',
+    'link',
+    'body',
+    'permalink'
+])
+
+
+def remove_final_link(html_text):
+    return html_text.rsplit('<a', maxsplit=1)[0]
+
+
+def extract_last_link(html_text):
+    return html_text.rsplit('"', maxsplit=2)[-2]
+
+
+def normalise_post(post):
+    """
+    This function takes a post and a blog, and applies some
+    transformations to normalise the text.  This is mostly based on
+    special cases and lots of if statements.
+
+    It returns an ExtendedPost tuple, which includes fields not found
+    in the regular Post.
+
+    It may also return None, which means this post should be hidden.
+    """
+    blog = post.blog
+
+    if any(word.lower() in post.body.lower() for word in FILTER_WORDS):
+        return None
+
+    if (blog == 'Marco.org')
+        if ('coffee' in post.body):
+            return None
+        if post.title.startswith(u'→'):
+            title = post.title[2:]
+            body = remove_final_link(post.body)
+            permalink = extract_last_link(post.body)
+            return ExtendedPost(post.time, post.blog, title, post.author,
+                                post.link, body, permalink)
+
+    elif (blog == 'Daring Fireball') and u'★' in post.body:
+        body = remove_final_link(post.body)
+        permalink = extract_last_link(post.body)
+        return ExtendedPost(post.time, post.blog, post.title, post.author,
+                            post.link, body, permalink)
+
+    elif (blog == 'Erica Sadun') and (post.author == 'erica'):
+        return ExtendedPost(post.time, post.blog, post.title,
+                            None, post.link, post.body, None)
+
+    return ExtendedPost(*post, permalink=None)
diff --git a/feeds.txt b/feeds.txt
@@ -0,0 +1,54 @@
+http://feedpress.me/512pixels
+http://www.leancrew.com/all-this/feed/
+http://ihnatko.com/feed/
+http://blog.ashleynh.me/feed
+http://www.betalogue.com/feed/
+http://bitsplitting.org/feed/
+http://feedpress.me/jxpx777
+http://kieranhealy.org/blog/index.xml
+http://blueplaid.net/news?format=rss
+http://brett.trpstra.net/brettterpstra
+http://feeds.feedburner.com/NerdGap
+http://www.libertypages.com/clarktech/?feed=rss2
+http://feeds.feedburner.com/CommonplaceCartography
+http://kk.org/cooltools/feed
+http://danstan.com/blog/imHotep/files/page0.xml
+http://daringfireball.net/feeds/main
+http://david-smith.org/atom.xml
+http://feeds.feedburner.com/drbunsenblog
+http://stratechery.com/feed/
+http://www.gnuplotting.org/feed/
+http://feeds.feedburner.com/jblanton
+http://feeds.feedburner.com/IgnoreTheCode
+http://indiestack.com/feed/
+http://feedpress.me/inessential
+http://feeds.feedburner.com/JamesFallows
+http://feeds.feedburner.com/theendeavour
+http://feed.katiefloyd.me/
+http://feeds.feedburner.com/KevinDrum
+http://www.kungfugrippe.com/rss
+http://lancemannion.typepad.com/lance_mannion/rss.xml
+http://www.caseyliss.com/rss
+http://www.macdrifter.com/feeds/all.atom.xml
+http://mackenab.com/feed
+http://hints.macworld.com/backend/osxhints.rss
+http://macsparky.com/blog?format=rss
+http://www.macstories.net/feed/
+http://www.marco.org/rss
+http://merrillmarkoe.com/feed
+http://mjtsai.com/blog/feed/
+http://feeds.feedburner.com/mygeekdaddy
+http://nathangrigg.net/feed.rss
+http://onethingwell.org/rss
+http://schmeiser.typepad.com/penny_wiseacre/rss.xml
+http://feeds.feedburner.com/PracticallyEfficient
+http://robjwells.com/rss
+http://www.red-sweater.com/blog/feed/
+http://feedpress.me/sixcolors
+http://feedpress.me/candlerblog
+http://inversesquare.wordpress.com/feed/
+http://high90.com/feed
+http://joe-steel.com/feed
+http://feeds.veritrope.com/
+http://xkcd.com/atom.xml
+http://doingthatwrong.com/?format=rss
diff --git a/main.py b/main.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# coding=utf8
+
+import collections
+from datetime import datetime, timedelta
+import time
+
+import feedparser
+import jinja2
+import pytz
+
+from extras import normalise_post
+
+
+# Get a list of feed URLs
+with open('feeds.txt') as f:
+    SUBSCRIPTIONS = list(f)
+
+# Date and time setup. I want only posts from "today" and "yesterday",
+# where the day lasts until 2 AM.
+TIMEZONE = config.get(section='default', option='timezone', fallback='GMT')
+
+# Get the current time in the home timezone, then step back to include
+# the last two days.
+home_tz = pytz.timezone(TIMEZONE)
+dt = datetime.now(home_tz)
+if dt.hour < 2:
+    dt -= timedelta(hours=72)
+else:
+    dt -= timedelta(hours=48)
+start = dt.replace(hour=0, minute=0, second=0, microsecond=0)
+
+# Convert this time back into UTC.
+utc = pytz.utc
+START = start.astimezone(utc)
+
+
+Post = collections.namedtuple('Post', [
+    'time',
+    'blog',
+    'title',
+    'author',
+    'link',
+    'body'
+])
+
+
+def process_entry(entry, blog):
+    """
+    Coerces an entry from feedparser into a Post tuple.
+
+    Returns None if the entry should be excluded.
+    """
+    # Get the date of the post.  If it was published more than two days
+    # ago, drop the entry.
+    try:
+        when = entry['updated_parsed']
+    except KeyError:
+        when = entry['published_parsed']
+    when = utc.localize(datetime.fromtimestamp(time.mktime(when)))
+
+    if when < START:
+        return
+
+    title = entry['title']
+    try:
+        author = entry['author']
+    except KeyError:
+        author = ', '.join(a['name'] for a in entry.get('authors', []))
+    link = entry['link']
+    try:
+        body = entry['content'][0]['value']
+    except KeyError:
+        body = entry['summary']
+
+    return normalise_post(Post(when, blog, title, author, link, body))
+
+
+posts = []
+for url in SUBSCRIPTIONS:
+    feed = feedparser.parse(url)
+    try:
+        blog = feed['feed']['title']
+    except KeyError:
+        continue
+    for entry in feed['entries']:
+        post = process_entry(entry, blog)
+        if post:
+            posts.append(post)
+
+# Get the template, and drop in the posts
+with open('template.html') as f:
+    template = jinja2.Template(f.read())
+
+with open('output.html', 'w') as f:
+    f.write(template.render(posts=posts, time=datetime.now()))
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+feedparser==5.2.1
+Jinja2==2.8
+MarkupSafe==0.23
+pytz==2015.7
+wheel==0.24.0
diff --git a/style.css b/style.css
@@ -0,0 +1,275 @@
+body {
+    font: 12pt Georgia, Palatino, 'Palatino Linotype', Times, 'Times New Roman', serif;
+                                        /* font: serif */
+    color: #222;                        /* body-gray */
+}
+
+a {
+    color: #732c7b;
+}
+
+a:hover {
+    text-decoration: none;
+}
+
+a:visited {
+    color: #421c52;
+}
+
+hr {
+    background-color: #eee;
+    height: 1px;
+    border: none;
+}
+
+hr.between_posts {
+    margin-top: 3em;
+    margin-bottom: 3em;
+}
+
+body {
+    margin: 0;
+    padding: 0;
+}
+
+#header_text {
+  padding: 1px 20px 0px 20px;
+  color: #eeddf9;
+}
+
+#header_text a {
+  color: #eeddf9;
+  text-decoration: none;
+}
+
+img {
+    margin-left:  auto;
+    margin-right: auto;
+    display: block;
+}
+
+code {
+    margin:  2px;
+    padding: 2px;
+}
+
+code, pre {
+    font-family: Menlo, monospace;      /* font: mono */
+    background-color: #eee;             /* light-gray */
+    font-size: 0.9em;                   /* subfont-size */
+}
+
+/**
+ * The overflow-x line ensures that extra text only appears within the
+ * confines of the gray background, and doesn't spill out onto the page.
+ * Effectively the <pre> becomes a 'window' into the code beneath.
+ */
+pre {
+    padding: 10px;
+    line-height: 1.35em;
+    overflow-x: auto;
+}
+
+
+/*------------------------------------*\
+  # Footnotes
+\*------------------------------------*/
+
+/**
+ * The padding-tops adds a little extra space between the bottom of an article
+ * and the start of the footnote section.
+ */
+.footnote {
+  font-size: 0.9em;                     /* subfont-size */
+}
+
+/**
+ * These rules help the positioning of the footnote markers, although I'm not
+ * entirely sure how they work.
+ */
+sup, sub {
+  vertical-align: 0ex;
+  position: relative;
+}
+
+sup { bottom: 1ex; }
+sub { top: 0.8ex; }
+
+
+
+/*------------------------------------*\
+  # Article titles
+\*------------------------------------*/
+
+.article_title a,
+.permalink a,
+.continue_reading {
+    color: #732c7b !important;          /* primary-red */
+}
+
+.fullpost_title a {
+    text-decoration: none;
+    font-size: 1.5em;
+    line-height: 1.5em;
+}
+
+/**
+ * The article_meta class covers permalinks and posting dates
+ */
+.article_meta {
+    font-size: 0.9em;                   /* subfont-size */
+    color: #999;                        /* accent-gray */
+}
+
+.linkpost_arrow {
+    color: #999;                        /* accent-gray */
+}
+
+.permalink a {
+    font-size: 1.2em;
+    text-decoration: none;
+}
+
+/**
+ * Adjust the spacing around titles to make them look nice
+ */
+.linkpost_title {
+    margin-bottom: -0.5em;
+}
+
+.fullpost_title {
+    margin-bottom: -0.3em;
+}
+
+
+
+/*------------------------------------*\
+  # Blockquotes
+\*------------------------------------*/
+
+blockquote {
+    border-left: 5px solid #ccc;        /* primary-red */
+    margin-left:  15px;
+    margin-right: 0px;
+    padding: 1px 15px;
+    color: #666;                        /* blockquote-gray */
+    font-style: italic;
+}
+
+blockquote p {
+    margin-top: 10px;
+    margin-bottom: 10px;
+}
+
+
+
+/*------------------------------------*\
+  # Tweets
+\*------------------------------------*/
+
+/**
+ * I think Dr. Drang wrote this originally?  Whatever, I have it
+ * inlined so that all my CSS comes down in a single file.
+ */
+
+.bbpBox {
+  width: 80%;
+  background: #8ec2da;
+  margin-left: auto;
+  margin-right: auto;
+  padding: 1em;
+  margin-top: 1em;
+  margin-bottom: 1.1em;
+/*  margin: 1em 0em 1.1em 0em;*/
+  font-family: Georgia !important;
+}
+
+.bbpBox blockquote {
+  background-color: white;
+  margin: 0em !important;
+  padding: .75em .75em .5em .75em !important;
+  -moz-border-radius: 5px;
+  -webkit-border-radius: 5px;
+  border-left-style: none !important;
+  font-style: normal !important;
+  line-height: 1.5em;
+  color: #222;
+}
+
+.bbpBox blockquote a {
+  color: blue;
+  text-decoration: none;
+}
+
+.bbpBox blockquote a:hover {
+  text-decoration: underline;
+}
+
+.bbpBox blockquote .twMeta {
+  font-size: 80%;
+}
+
+.bbpBox blockquote .twContent {
+  margin-bottom: 25em;
+}
+
+body {
+  background-color: #140623;
+  max-width: 750px;
+  margin-top: 0;
+  margin-left: auto;
+  margin-right: auto;
+  padding-top: 0;
+}
+
+h1 {
+  font-size: 2.5em;
+}
+
+.rss {
+  list-style-type: none;
+  margin: 0;
+  padding: .5em 1em 1em 1.5em;
+  background-color: white;
+  margin-bottom: 2em;
+}
+
+.rss li {
+  margin-left: -.5em;
+  line-height: 1.4;
+}
+
+.rss li pre {
+  overflow: auto;
+}
+
+img, figure, iframe {
+  max-width: 700px;
+  height: auto !important;
+}
+
+@media screen and (max-width: 700px) {
+  img, figure, iframe {
+    max-width: 100% !important;
+  }
+}
+
+
+.footnotes {
+    font-size: 0.85em;
+}
+
+a code {
+    text-decoration: none !important;
+}
+
+footer {
+  color: #eeddf9;
+  text-align: center;
+  margin-bottom: 2.2em;
+  font-size: 0.85em;
+}
+
+footer a {
+  color: #eeddf9 !important;
+}
diff --git a/template.html b/template.html
@@ -0,0 +1,47 @@
+<html>
+<meta charset="UTF-8" />
+<meta name="viewport" content="width=device-width" />
+<head>
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <link rel="stylesheet" type="text/css" href="style.css">
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+<style>
+</style>
+<title>drangReader: Today’s RSS</title>
+<body>
+  <aside>
+    <div id="header_text"><h1><a href="javascript:window.location.reload()">drangReader</a></h1></div>
+  </aside>
+  <ul class="rss">
+  {% for post in posts|sort|reverse %}
+  <li>
+    <div class="article_title">
+      <h3 class="{% if post.permalink %}link{% else %}full{% endif %}post_title"><a href="{{ post.link }}">{{ post.title }}</a>{% if post.permalink %} <span class="linkpost_arrow">→</span>{% endif %}</h3>
+    </div>
+    <div class="article_meta"><p>
+      Posted on {{ post.time.strftime('%d %B %Y').strip('0') }} at {{ post.time.strftime('%I:%M&thinsp;%p').strip('0').lower() }}
+      • {{ post.blog }}
+      {% if post.author and post.blog != post.author %}
+        • by {{ post.author }}
+      {% endif %}
+      {% if post.permalink %}
+       • <span class="permalink"><a href="{{ post.permalink }}">∞</a></span>
+      {% endif %}
+    </p></div>
+    {{ post.body|safe }}
+    {# Put a line between posts, but only if this isn't the last one #}
+    {% if loop.index != posts|count %}<hr class="between_posts"/>{% endif %}
+  </li>
+  {% endfor %}
+  </ul>
+  <footer>
+    <p>
+        Made by <a href="http://alexwlchan.net">Alex Chan</a>.
+        Based on a script by <a href="http://leancrew.com/all-this/2015/11/simpler-syndication/">Dr Drang</a>.
+    </p>
+    <p>
+        Last updated on {{ time.strftime('%d %B %Y').strip('0') }} at {{ time.strftime('%I:%M&thinsp;%p').strip('0').lower() }}.
+    </p>
+  </footer>
+</body>
+</html>
No results found