cli commands

This commit is contained in:
cellarspoon
2021-12-15 12:23:37 +01:00
parent 30dbc6212f
commit b385833cbe
6 changed files with 154 additions and 190 deletions

View File

@ -1,8 +1,3 @@
#!/bin/python3
# lumbung.space rss feed aggregator
# © 2021 roel roscam abbing gplv3 etc
import os
import shutil
import time
@ -100,6 +95,9 @@ def create_post(post_dir, entry):
parsed_content = parse_posts(post_dir, post_content)
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
template = env.get_template("feed.md")
with open(os.path.join(post_dir, "index.html"), "w") as f: # n.b. .html
post = template.render(frontmatter=frontmatter, content=parsed_content)
f.write(post)
@ -140,7 +138,6 @@ def parse_posts(post_dir, post_content):
"""
soup = BeautifulSoup(post_content, "html.parser")
allowed_iframe_sources = ["youtube.com", "vimeo.com", "tv.lumbung.space"]
media = []
for img in soup(["img", "object"]):
local_image = grab_media(post_dir, img["src"])
@ -184,70 +181,64 @@ def grab_feed(feed_url):
return False
feed_urls = open("feeds_list.txt", "r").read().splitlines()
def main():
feed_urls = open("feeds_list.txt", "r").read().splitlines()
start = time.time()
start = time.time()
if not os.path.exists("etags"):
os.mkdir("etags")
if not os.path.exists("etags"):
os.mkdir("etags")
output_dir = os.environ.get("OUTPUT_DIR")
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_dir = os.environ.get("OUTPUT_DIR")
# add iframe to the allowlist of feedparser's sanitizer,
# this is now handled in parse_post()
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for feed_url in feed_urls:
template = env.get_template("feed.md")
feed_name = urlparse(feed_url).netloc
# add iframe to the allowlist of feedparser's sanitizer,
# this is now handled in parse_post()
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
feed_dir = os.path.join(output_dir, feed_name)
for feed_url in feed_urls:
if not os.path.exists(feed_dir):
os.makedirs(feed_dir)
feed_name = urlparse(feed_url).netloc
existing_posts = os.listdir(feed_dir)
feed_dir = os.path.join(output_dir, feed_name)
data = grab_feed(feed_url)
if not os.path.exists(feed_dir):
os.makedirs(feed_dir)
if data:
for entry in data.entries:
# if 'tags' in entry:
# for tag in entry.tags:
# for x in ['lumbung.space', 'D15', 'lumbung']:
# if x in tag['term']:
# print(entry.title)
entry["feed_name"] = feed_name
existing_posts = os.listdir(feed_dir)
post_name = slugify(entry.title)
post_dir = os.path.join(output_dir, feed_name, post_name)
data = grab_feed(feed_url)
if post_name not in existing_posts:
# if there is a blog entry we dont already have, make it
create_post(post_dir, entry)
if data:
for entry in data.entries:
# if 'tags' in entry:
# for tag in entry.tags:
# for x in ['lumbung.space', 'D15', 'lumbung']:
# if x in tag['term']:
# print(entry.title)
entry["feed_name"] = feed_name
elif post_name in existing_posts:
# if we already have it, update it
create_post(post_dir, entry)
existing_posts.remove(
post_name
) # create list of posts which have not been returned by the feed
post_name = slugify(entry.title)
post_dir = os.path.join(output_dir, feed_name, post_name)
for post in existing_posts:
# remove blog posts no longer returned by the RSS feed
print("deleted", post)
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
if post_name not in existing_posts:
# if there is a blog entry we dont already have, make it
create_post(post_dir, entry)
end = time.time()
elif post_name in existing_posts:
# if we already have it, update it
create_post(post_dir, entry)
existing_posts.remove(
post_name
) # create list of posts which have not been returned by the feed
for post in existing_posts:
# remove blog posts no longer returned by the RSS feed
print("deleted", post)
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
end = time.time()
print(end - start)
print(end - start)