lumbunglib/lumbung-calendar-prototype/event_feed.py

208 lines
6.4 KiB
Python
Raw Normal View History

2021-12-15 10:30:10 +00:00
#!/bin/python3
2021-12-15 10:41:35 +00:00
# lumbung.space calendar feed generator
# © 2021 roel roscam abbing gplv3 etc
2021-12-15 10:30:10 +00:00
import os
2021-12-15 10:41:35 +00:00
import re
2021-12-15 10:30:10 +00:00
import shutil
from urllib.parse import urlparse
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
import arrow
2021-12-15 10:41:35 +00:00
import jinja2
import requests
from ics import Calendar
from natural import date
from slugify import slugify
2021-12-15 10:43:12 +00:00
# a publicly accessible ICS calendar
calendar_url = os.environ.get("CALENDAR_URL", "")
# your Hugo content directory
output_dir = os.environ.get("OUTPUT_DIR", "")
2021-12-15 10:30:10 +00:00
cal = Calendar(requests.get(calendar_url).text)
2021-12-15 10:41:35 +00:00
env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.curdir))
2021-12-15 10:30:10 +00:00
if not os.path.exists(output_dir):
os.mkdir(output_dir)
2021-12-15 10:41:35 +00:00
template = env.get_template("event_template.md")
2021-12-15 10:30:10 +00:00
existing_posts = os.listdir(output_dir)
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
def findURLs(string):
"""
return all URLs in a given string
"""
regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
2021-12-15 10:41:35 +00:00
url = re.findall(regex, string)
return [x[0] for x in url]
2021-12-15 10:30:10 +00:00
def find_imageURLS(string):
"""
return all image URLS in a given string
"""
2021-12-15 10:41:35 +00:00
regex = r"(?:http\:|https\:)?\/\/.*?\.(?:png|jpg|jpeg|gif|svg)"
2021-12-15 10:30:10 +00:00
img_urls = re.findall(regex, string, flags=re.IGNORECASE)
2021-12-15 10:41:35 +00:00
return img_urls
2021-12-15 10:30:10 +00:00
def create_metadata(event):
"""
construct a formatted dict of event metadata for use as frontmatter for HUGO post
"""
if event.location:
location_urls = findURLs(event.location)
if location_urls:
location_url = location_urls[0]
2021-12-15 10:41:35 +00:00
event.location = "[{}]({})".format(
urlparse(location_url).netloc, location_url
)
2021-12-15 10:30:10 +00:00
event_metadata = {
2021-12-15 10:41:35 +00:00
"name": event.name,
"created": event.created.format(),
"description": event.description,
"localized_begin": "           ".join(
localize_time(event.begin)
), # non-breaking space characters to defeat markdown
"begin": event.begin.format(),
"end": event.end.format(),
"duration": date.compress(event.duration),
"location": event.location,
"uid": event.uid,
"images": find_imageURLS(event.description), # currently not used in template
2021-12-15 10:30:10 +00:00
}
return event_metadata
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
def localize_time(date):
"""
Turn a given date into various timezones
Takes arrow objects
"""
# 3 PM Kassel, Germany, 4 PM Ramallah/Jerusalem, Palestina (QoF),
# 8 AM Bogota, Colombia (MaMa), 8 PM Jakarta, Indonesia (Gudskul),
# 1 PM (+1day) Wellington, New Zealand (Fafswag), 9 AM Havana, Cuba (Instar).
tzs = [
2021-12-15 10:41:35 +00:00
("Kassel", "Europe/Berlin"),
("Bamako", "Europe/London"),
("Palestine", "Asia/Jerusalem"),
("Bogota", "America/Bogota"),
("Jakarta", "Asia/Jakarta"),
("Makassar", "Asia/Makassar"),
("Wellington", "Pacific/Auckland"),
]
localized_begins = []
2021-12-15 10:30:10 +00:00
for location, tz in tzs:
2021-12-15 10:41:35 +00:00
localized_begins.append( # javascript formatting because of string creation from hell
"__{}__ {}".format(
str(location), str(date.to(tz).format("YYYY-MM-DD __HH:mm__"))
2021-12-15 10:30:10 +00:00
)
2021-12-15 10:41:35 +00:00
)
2021-12-15 10:30:10 +00:00
return localized_begins
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
def create_event_post(post_dir, event):
"""
Create HUGO post based on calendar event metadata
Searches for image URLS in description and downloads them
Function is also called when post is in need of updating
In that case it will also delete images no longer in metadata
TODO: split this up into more functions for legibility
"""
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
if not os.path.exists(post_dir):
os.mkdir(post_dir)
event_metadata = create_metadata(event)
2021-12-15 10:41:35 +00:00
# list already existing images
# so we can later delete them if we dont find them in the event metadata anymore
2021-12-15 10:30:10 +00:00
existing_images = os.listdir(post_dir)
try:
2021-12-15 10:41:35 +00:00
existing_images.remove("index.md")
existing_images.remove(".timestamp")
2021-12-15 10:30:10 +00:00
except:
pass
2021-12-15 10:41:35 +00:00
for img in event_metadata["images"]:
# parse img url to safe local image name
img_name = img.split("/")[-1]
fn, ext = img_name.split(".")
img_name = slugify(fn) + "." + ext
2021-12-15 10:30:10 +00:00
local_image = os.path.join(post_dir, img_name)
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
if not os.path.exists(local_image):
2021-12-15 10:41:35 +00:00
# download preview image
2021-12-15 10:30:10 +00:00
response = requests.get(img, stream=True)
2021-12-15 10:41:35 +00:00
with open(local_image, "wb") as img_file:
2021-12-15 10:30:10 +00:00
shutil.copyfileobj(response.raw, img_file)
print('Downloaded image for event "{}"'.format(event.name))
2021-12-15 10:41:35 +00:00
event_metadata["description"] = event_metadata["description"].replace(
img, "![]({})".format(img_name)
)
2021-12-15 10:30:10 +00:00
if img_name in existing_images:
existing_images.remove(img_name)
for left_over_image in existing_images:
2021-12-15 10:41:35 +00:00
# remove images we found, but which are no longer in remote event
os.remove(os.path.join(post_dir, left_over_image))
print("deleted image", left_over_image)
2021-12-15 10:30:10 +00:00
2021-12-15 10:41:35 +00:00
with open(os.path.join(post_dir, "index.md"), "w") as f:
post = template.render(event=event_metadata)
2021-12-15 10:30:10 +00:00
f.write(post)
2021-12-15 10:41:35 +00:00
print("created post for", event.name, "({})".format(event.uid))
2021-12-15 10:30:10 +00:00
2021-12-15 10:41:35 +00:00
with open(os.path.join(post_dir, ".timestamp"), "w") as f:
f.write(event_metadata["created"])
2021-12-15 10:30:10 +00:00
def update_event_post(post_dir, event):
"""
Update a post based on the VCARD event 'created' field which changes when updated
2021-12-15 10:41:35 +00:00
"""
2021-12-15 10:30:10 +00:00
if os.path.exists(post_dir):
2021-12-15 10:41:35 +00:00
old_timestamp = open(os.path.join(post_dir, ".timestamp")).read()
2021-12-15 10:30:10 +00:00
if event.created > arrow.get(old_timestamp):
2021-12-15 10:41:35 +00:00
print("Updating", event.name, "({})".format(event.uid))
2021-12-15 10:30:10 +00:00
create_event_post(post_dir, event)
else:
2021-12-15 10:41:35 +00:00
print("Event current: ", event.name, "({})".format(event.uid))
2021-12-15 10:30:10 +00:00
for event in list(cal.events):
post_dir = os.path.join(output_dir, event.uid)
2021-12-15 10:41:35 +00:00
if event.uid not in existing_posts:
# if there is an event we dont already have, make it
2021-12-15 10:30:10 +00:00
create_event_post(post_dir, event)
2021-12-15 10:41:35 +00:00
elif event.uid in existing_posts:
# if we already have it, update
2021-12-15 10:30:10 +00:00
update_event_post(post_dir, event)
2021-12-15 10:41:35 +00:00
existing_posts.remove(
event.uid
) # create list of posts which have not been returned by the calendar
2021-12-15 10:30:10 +00:00
2021-12-15 10:41:35 +00:00
for post in existing_posts:
# remove events not returned by the calendar (deletion)
print("deleted", post)
shutil.rmtree(os.path.join(output_dir, post))