cli commands
This commit is contained in:
@ -1,8 +1,3 @@
|
||||
#!/bin/python3
|
||||
|
||||
# lumbung.space rss feed aggregator
|
||||
# © 2021 roel roscam abbing gplv3 etc
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
@ -100,6 +95,9 @@ def create_post(post_dir, entry):
|
||||
|
||||
parsed_content = parse_posts(post_dir, post_content)
|
||||
|
||||
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
|
||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
|
||||
template = env.get_template("feed.md")
|
||||
with open(os.path.join(post_dir, "index.html"), "w") as f: # n.b. .html
|
||||
post = template.render(frontmatter=frontmatter, content=parsed_content)
|
||||
f.write(post)
|
||||
@ -140,7 +138,6 @@ def parse_posts(post_dir, post_content):
|
||||
"""
|
||||
soup = BeautifulSoup(post_content, "html.parser")
|
||||
allowed_iframe_sources = ["youtube.com", "vimeo.com", "tv.lumbung.space"]
|
||||
media = []
|
||||
|
||||
for img in soup(["img", "object"]):
|
||||
local_image = grab_media(post_dir, img["src"])
|
||||
@ -184,70 +181,64 @@ def grab_feed(feed_url):
|
||||
return False
|
||||
|
||||
|
||||
feed_urls = open("feeds_list.txt", "r").read().splitlines()
|
||||
def main():
|
||||
feed_urls = open("feeds_list.txt", "r").read().splitlines()
|
||||
|
||||
start = time.time()
|
||||
start = time.time()
|
||||
|
||||
if not os.path.exists("etags"):
|
||||
os.mkdir("etags")
|
||||
if not os.path.exists("etags"):
|
||||
os.mkdir("etags")
|
||||
|
||||
output_dir = os.environ.get("OUTPUT_DIR")
|
||||
|
||||
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
|
||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
output_dir = os.environ.get("OUTPUT_DIR")
|
||||
# add iframe to the allowlist of feedparser's sanitizer,
|
||||
# this is now handled in parse_post()
|
||||
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
|
||||
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
for feed_url in feed_urls:
|
||||
|
||||
template = env.get_template("feed.md")
|
||||
feed_name = urlparse(feed_url).netloc
|
||||
|
||||
# add iframe to the allowlist of feedparser's sanitizer,
|
||||
# this is now handled in parse_post()
|
||||
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
|
||||
feed_dir = os.path.join(output_dir, feed_name)
|
||||
|
||||
for feed_url in feed_urls:
|
||||
if not os.path.exists(feed_dir):
|
||||
os.makedirs(feed_dir)
|
||||
|
||||
feed_name = urlparse(feed_url).netloc
|
||||
existing_posts = os.listdir(feed_dir)
|
||||
|
||||
feed_dir = os.path.join(output_dir, feed_name)
|
||||
data = grab_feed(feed_url)
|
||||
|
||||
if not os.path.exists(feed_dir):
|
||||
os.makedirs(feed_dir)
|
||||
if data:
|
||||
for entry in data.entries:
|
||||
# if 'tags' in entry:
|
||||
# for tag in entry.tags:
|
||||
# for x in ['lumbung.space', 'D15', 'lumbung']:
|
||||
# if x in tag['term']:
|
||||
# print(entry.title)
|
||||
entry["feed_name"] = feed_name
|
||||
|
||||
existing_posts = os.listdir(feed_dir)
|
||||
post_name = slugify(entry.title)
|
||||
post_dir = os.path.join(output_dir, feed_name, post_name)
|
||||
|
||||
data = grab_feed(feed_url)
|
||||
if post_name not in existing_posts:
|
||||
# if there is a blog entry we dont already have, make it
|
||||
create_post(post_dir, entry)
|
||||
|
||||
if data:
|
||||
for entry in data.entries:
|
||||
# if 'tags' in entry:
|
||||
# for tag in entry.tags:
|
||||
# for x in ['lumbung.space', 'D15', 'lumbung']:
|
||||
# if x in tag['term']:
|
||||
# print(entry.title)
|
||||
entry["feed_name"] = feed_name
|
||||
elif post_name in existing_posts:
|
||||
# if we already have it, update it
|
||||
create_post(post_dir, entry)
|
||||
existing_posts.remove(
|
||||
post_name
|
||||
) # create list of posts which have not been returned by the feed
|
||||
|
||||
post_name = slugify(entry.title)
|
||||
post_dir = os.path.join(output_dir, feed_name, post_name)
|
||||
for post in existing_posts:
|
||||
# remove blog posts no longer returned by the RSS feed
|
||||
print("deleted", post)
|
||||
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
|
||||
|
||||
if post_name not in existing_posts:
|
||||
# if there is a blog entry we dont already have, make it
|
||||
create_post(post_dir, entry)
|
||||
end = time.time()
|
||||
|
||||
elif post_name in existing_posts:
|
||||
# if we already have it, update it
|
||||
create_post(post_dir, entry)
|
||||
existing_posts.remove(
|
||||
post_name
|
||||
) # create list of posts which have not been returned by the feed
|
||||
|
||||
for post in existing_posts:
|
||||
# remove blog posts no longer returned by the RSS feed
|
||||
print("deleted", post)
|
||||
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
|
||||
|
||||
|
||||
end = time.time()
|
||||
|
||||
print(end - start)
|
||||
print(end - start)
|
||||
|
Reference in New Issue
Block a user