From 95964862b79d58fa6b3b9312c3a1f9bea83595df Mon Sep 17 00:00:00 2001 From: cellarspoon Date: Wed, 15 Dec 2021 11:30:10 +0100 Subject: [PATCH] init and pull scripts in --- README.md | 3 + lumbung-calendar-prototype/.gitignore | 2 + lumbung-calendar-prototype/README.md | 9 + lumbung-calendar-prototype/event_feed.py | 194 ++++++++++++++ lumbung-calendar-prototype/event_template.md | 21 ++ lumbung-calendar-prototype/requirements.txt | 16 ++ lumbung-feed-aggregator/.gitignore | 2 + lumbung-feed-aggregator/README.md | 11 + lumbung-feed-aggregator/feeds_list.txt | 11 + lumbung-feed-aggregator/post_template.md | 13 + lumbung-feed-aggregator/rss_aggregator.py | 248 ++++++++++++++++++ lumbung-hashtag-bot/.gitignore | 3 + lumbung-hashtag-bot/README.md | 30 +++ lumbung-hashtag-bot/post_template.md | 14 ++ lumbung-hashtag-bot/publish_hashtags.py | 137 ++++++++++ lumbung-video-prototype/README.md | 27 ++ lumbung-video-prototype/index_template.md | 15 ++ lumbung-video-prototype/requirements.txt | 12 + lumbung-video-prototype/video-feed.html | 251 +++++++++++++++++++ lumbung-video-prototype/video-feed.py | 131 ++++++++++ pyproject.toml | 15 ++ 21 files changed, 1165 insertions(+) create mode 100644 README.md create mode 100644 lumbung-calendar-prototype/.gitignore create mode 100644 lumbung-calendar-prototype/README.md create mode 100644 lumbung-calendar-prototype/event_feed.py create mode 100644 lumbung-calendar-prototype/event_template.md create mode 100644 lumbung-calendar-prototype/requirements.txt create mode 100644 lumbung-feed-aggregator/.gitignore create mode 100644 lumbung-feed-aggregator/README.md create mode 100644 lumbung-feed-aggregator/feeds_list.txt create mode 100644 lumbung-feed-aggregator/post_template.md create mode 100644 lumbung-feed-aggregator/rss_aggregator.py create mode 100644 lumbung-hashtag-bot/.gitignore create mode 100644 lumbung-hashtag-bot/README.md create mode 100644 lumbung-hashtag-bot/post_template.md create mode 100644 lumbung-hashtag-bot/publish_hashtags.py create mode 100644 lumbung-video-prototype/README.md create mode 100644 lumbung-video-prototype/index_template.md create mode 100644 lumbung-video-prototype/requirements.txt create mode 100644 lumbung-video-prototype/video-feed.html create mode 100644 lumbung-video-prototype/video-feed.py create mode 100644 pyproject.toml diff --git a/README.md b/README.md new file mode 100644 index 0000000..1b78df7 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# lumbunglib + +> Python lib which powers `lumbung[dot]space` automation diff --git a/lumbung-calendar-prototype/.gitignore b/lumbung-calendar-prototype/.gitignore new file mode 100644 index 0000000..54d69e5 --- /dev/null +++ b/lumbung-calendar-prototype/.gitignore @@ -0,0 +1,2 @@ +event_feed_config.py +__pycache__ diff --git a/lumbung-calendar-prototype/README.md b/lumbung-calendar-prototype/README.md new file mode 100644 index 0000000..62fe8ff --- /dev/null +++ b/lumbung-calendar-prototype/README.md @@ -0,0 +1,9 @@ +# Calendar Feed +Generate HUGO posts based on a publicly accessible ICS calendar. + +## Use +Fill in your details in `calendar_feed_config.py` + +## TODO / FIXME + + * Multiple calendars to multiple hugo categories diff --git a/lumbung-calendar-prototype/event_feed.py b/lumbung-calendar-prototype/event_feed.py new file mode 100644 index 0000000..d11fb1c --- /dev/null +++ b/lumbung-calendar-prototype/event_feed.py @@ -0,0 +1,194 @@ +#!/bin/python3 + +#lumbung.space calendar feed generator +#© 2021 roel roscam abbing gplv3 etc + +from ics import Calendar +import requests +import jinja2 +import os +import shutil +from slugify import slugify +from natural import date +from event_feed_config import calendar_url, output_dir +from urllib.parse import urlparse +import arrow +import re + +cal = Calendar(requests.get(calendar_url).text) + +env = jinja2.Environment( + loader=jinja2.FileSystemLoader(os.path.curdir) + ) + +if not os.path.exists(output_dir): + os.mkdir(output_dir) + +template = env.get_template('event_template.md') + +existing_posts = os.listdir(output_dir) + +def findURLs(string): + """ + return all URLs in a given string + """ + regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" + url = re.findall(regex,string) + return [x[0] for x in url] + +def find_imageURLS(string): + """ + return all image URLS in a given string + """ + regex = r"(?:http\:|https\:)?\/\/.*?\.(?:png|jpg|jpeg|gif|svg)" + + img_urls = re.findall(regex, string, flags=re.IGNORECASE) + return img_urls + +def create_metadata(event): + """ + construct a formatted dict of event metadata for use as frontmatter for HUGO post + """ + + if event.location: + location_urls = findURLs(event.location) + + if location_urls: + location_url = location_urls[0] + event.location = '[{}]({})'.format(urlparse(location_url).netloc, location_url) + + + event_metadata = { + 'name':event.name, + 'created':event.created.format(), + 'description': event.description, + 'localized_begin': '           '.join(localize_time(event.begin)), #non-breaking space characters to defeat markdown + 'begin': event.begin.format(), + 'end': event.end.format(), + 'duration': date.compress(event.duration), + 'location': event.location, + 'uid': event.uid, + 'images' : find_imageURLS(event.description) # currently not used in template + } + + return event_metadata + +def localize_time(date): + """ + Turn a given date into various timezones + Takes arrow objects + """ + + # 3 PM Kassel, Germany, 4 PM Ramallah/Jerusalem, Palestina (QoF), + # 8 AM Bogota, Colombia (MaMa), 8 PM Jakarta, Indonesia (Gudskul), + # 1 PM (+1day) Wellington, New Zealand (Fafswag), 9 AM Havana, Cuba (Instar). + + + tzs = [ + ('Kassel','Europe/Berlin'), + ('Bamako', 'Europe/London'), + ('Palestine','Asia/Jerusalem'), + ('Bogota','America/Bogota'), + ('Jakarta','Asia/Jakarta'), + ('Makassar','Asia/Makassar'), + ('Wellington', 'Pacific/Auckland') + ] + + localized_begins =[] + for location, tz in tzs: + localized_begins.append( #javascript formatting because of string creation from hell + '__{}__ {}'.format( + str(location), + str(date.to(tz).format("YYYY-MM-DD __HH:mm__")) + ) + ) + return localized_begins + +def create_event_post(post_dir, event): + """ + Create HUGO post based on calendar event metadata + Searches for image URLS in description and downloads them + Function is also called when post is in need of updating + In that case it will also delete images no longer in metadata + TODO: split this up into more functions for legibility + """ + + if not os.path.exists(post_dir): + os.mkdir(post_dir) + + event_metadata = create_metadata(event) + + #list already existing images + #so we can later delete them if we dont find them in the event metadata anymore + existing_images = os.listdir(post_dir) + try: + existing_images.remove('index.md') + existing_images.remove('.timestamp') + except: + pass + + for img in event_metadata['images']: + + #parse img url to safe local image name + img_name = img.split('/')[-1] + fn, ext = img_name.split('.') + img_name = slugify(fn) + '.' + ext + + local_image = os.path.join(post_dir, img_name) + + if not os.path.exists(local_image): + #download preview image + response = requests.get(img, stream=True) + with open(local_image, 'wb') as img_file: + shutil.copyfileobj(response.raw, img_file) + print('Downloaded image for event "{}"'.format(event.name)) + event_metadata['description'] = event_metadata['description'].replace(img, '![]({})'.format(img_name)) + if img_name in existing_images: + existing_images.remove(img_name) + + for left_over_image in existing_images: + #remove images we found, but which are no longer in remote event + os.remove(os.path.join(post_dir,left_over_image)) + print('deleted image', left_over_image) + + with open(os.path.join(post_dir,'index.md'),'w') as f: + post = template.render(event = event_metadata) + f.write(post) + print('created post for', event.name, '({})'.format(event.uid)) + + with open(os.path.join(post_dir,'.timestamp'),'w') as f: + f.write(event_metadata['created']) + + +def update_event_post(post_dir, event): + """ + Update a post based on the VCARD event 'created' field which changes when updated + """ + if os.path.exists(post_dir): + old_timestamp = open(os.path.join(post_dir,'.timestamp')).read() + if event.created > arrow.get(old_timestamp): + print('Updating', event.name, '({})'.format(event.uid)) + create_event_post(post_dir, event) + else: + print('Event current: ', event.name, '({})'.format(event.uid)) + +for event in list(cal.events): + + post_dir = os.path.join(output_dir, event.uid) + + if event.uid not in existing_posts: + #if there is an event we dont already have, make it + create_event_post(post_dir, event) + + elif event.uid in existing_posts: + #if we already have it, update + update_event_post(post_dir, event) + existing_posts.remove(event.uid) # create list of posts which have not been returned by the calendar + + +for post in existing_posts: + #remove events not returned by the calendar (deletion) + print('deleted', post) + shutil.rmtree(os.path.join(output_dir,post)) + + diff --git a/lumbung-calendar-prototype/event_template.md b/lumbung-calendar-prototype/event_template.md new file mode 100644 index 0000000..441f3da --- /dev/null +++ b/lumbung-calendar-prototype/event_template.md @@ -0,0 +1,21 @@ +--- +title: "{{ event.name }}" +date: "{{ event.begin }}" #2021-06-10T10:46:33+02:00 +draft: false +categories: "calendar" +event_begin: "{{ event.begin }}" +event_end: "{{ event.end }}" +duration: "{{ event.duration }}" +localized_begin: "{{ event.localized_begin }}" +uid: "{{ event.uid }}" +{% if event.location %} +location: "{{ event.location }}" +{% endif %} + + +--- +{% if event.description %} + +{{ event.description }} + +{% endif %} diff --git a/lumbung-calendar-prototype/requirements.txt b/lumbung-calendar-prototype/requirements.txt new file mode 100644 index 0000000..356637c --- /dev/null +++ b/lumbung-calendar-prototype/requirements.txt @@ -0,0 +1,16 @@ +# Automatically generated by https://github.com/damnever/pigar. + +# calendar-feed/event_feed.py: 3 +Jinja2 == 2.10 + +# calendar-feed/event_feed.py: 1 +ics == 0.7 + +# calendar-feed/event_feed.py: 6 +natural == 0.2.0 + +# calendar-feed/event_feed.py: 5 +python_slugify == 5.0.2 + +# calendar-feed/event_feed.py: 2 +requests == 2.21.0 diff --git a/lumbung-feed-aggregator/.gitignore b/lumbung-feed-aggregator/.gitignore new file mode 100644 index 0000000..ecf1da3 --- /dev/null +++ b/lumbung-feed-aggregator/.gitignore @@ -0,0 +1,2 @@ +network/ +etags/ diff --git a/lumbung-feed-aggregator/README.md b/lumbung-feed-aggregator/README.md new file mode 100644 index 0000000..97d32e9 --- /dev/null +++ b/lumbung-feed-aggregator/README.md @@ -0,0 +1,11 @@ +# lumbung feed aggregator + +* Grab feeds listed in `feeds_list.txt` +* Parse feed for blogpost entries +* * Download images linked in blogposts +* Turn blogpost entries into HUGO posts + +# TODO/FIXME + +* only include posts with a certain tag + diff --git a/lumbung-feed-aggregator/feeds_list.txt b/lumbung-feed-aggregator/feeds_list.txt new file mode 100644 index 0000000..7334acb --- /dev/null +++ b/lumbung-feed-aggregator/feeds_list.txt @@ -0,0 +1,11 @@ +https://www.masartemasaccion.org/feed/ +https://fafswag.wordpress.com/feed/ +https://wajukuuarts.wordpress.com/feed/ +https://inland.org/feed/ +https://jatiwangiartfactory.tumblr.com/rss/ +https://brittoartstrust.org/feed/ +https://artivismo.org/feed/ +http://www.festivalsegou.org/spip.php?page=backend&lang=fr +https://gudskul.art/feed/ +https://projectartworks.org/feed/ +https://ruangrupa.id/feed/ \ No newline at end of file diff --git a/lumbung-feed-aggregator/post_template.md b/lumbung-feed-aggregator/post_template.md new file mode 100644 index 0000000..9dbc449 --- /dev/null +++ b/lumbung-feed-aggregator/post_template.md @@ -0,0 +1,13 @@ +--- +title: "{{ frontmatter.title }}" +date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 +draft: false +summary: "{{ frontmatter.summary }}" +author: "{{ frontmatter.author }}" +original_link: "{{ frontmatter.original_link }}" +feed_name: "{{ frontmatter.feed_name}}" +categories: ["network", "{{ frontmatter.feed_name}}"] +tags: {{ frontmatter.tags }} +--- + +{{ content }} \ No newline at end of file diff --git a/lumbung-feed-aggregator/rss_aggregator.py b/lumbung-feed-aggregator/rss_aggregator.py new file mode 100644 index 0000000..0f65c93 --- /dev/null +++ b/lumbung-feed-aggregator/rss_aggregator.py @@ -0,0 +1,248 @@ +#!/bin/python3 + +#lumbung.space rss feed aggregator +#© 2021 roel roscam abbing gplv3 etc + +import requests +import jinja2 +import os +import shutil +import feedparser +from urllib.parse import urlparse +from ast import literal_eval as make_tuple +from slugify import slugify +from bs4 import BeautifulSoup +import time +import arrow + + +def write_etag(feed_name, feed_data): + """ + save timestamp of when feed was last modified + """ + etag = '' + modified = '' + + if 'etag' in feed_data: + etag = feed_data.etag + if 'modified' in feed_data: + modified = feed_data.modified + + if etag or modified: + with open(os.path.join('etags',feed_name +'.txt'),'w') as f: + f.write(str((etag, modified))) + +def get_etag(feed_name): + """ + return timestamp of when feed was last modified + """ + fn = os.path.join('etags',feed_name +'.txt') + etag = '' + modified = '' + + if os.path.exists(fn): + etag, modified = make_tuple(open(fn,'r').read()) + + return etag, modified + +def create_frontmatter(entry): + """ + parse RSS metadata and return as frontmatter + """ + if 'published' in entry: + published = entry.published_parsed + if 'updated' in entry: + published = entry.updated_parsed + + published = arrow.get(published) + + if 'author' in entry: + author = entry.author + else: + author = '' + + tags = [] + if 'tags' in entry: + #TODO finish categories + for t in entry.tags: + tags.append(t['term']) + + frontmatter = { + 'title':entry.title, + 'date': published.format(), + 'summary': '', + 'author': author, + 'original_link': entry.link, + 'feed_name': entry['feed_name'], + 'tags': str(tags) + } + + return frontmatter + +def create_post(post_dir, entry): + """ + write hugo post based on RSS entry + """ + frontmatter = create_frontmatter(entry) + + if not os.path.exists(post_dir): + os.makedirs(post_dir) + + if 'content' in entry: + post_content = entry.content[0].value + else: + post_content = entry.summary + + parsed_content = parse_posts(post_dir, post_content) + + with open(os.path.join(post_dir,'index.html'),'w') as f: #n.b. .html + post = template.render(frontmatter=frontmatter, content=parsed_content) + f.write(post) + print('created post for', entry.title, '({})'.format(entry.link)) + +def grab_media(post_directory, url): + """ + download media linked in post to have local copy + if download succeeds return new local path otherwise return url + """ + image = urlparse(url).path.split('/')[-1] + + try: + if not os.path.exists(os.path.join(post_directory, image)): + #TODO: stream is true is a conditional so we could check the headers for things, mimetype etc + response = requests.get(url, stream=True) + if response.ok: + with open(os.path.join(post_directory, image), 'wb') as img_file: + shutil.copyfileobj(response.raw, img_file) + print('Downloaded cover image', image) + return image + return image + elif os.path.exists(os.path.join(post_directory, image)): + return image + + except Exception as e: + print('Failed to download image', url) + print(e) + return url + + +def parse_posts(post_dir, post_content): + """ + parse the post content to for media items + replace foreign image with local copy + filter out iframe sources not in allowlist + """ + soup = BeautifulSoup(post_content, "html.parser") + allowed_iframe_sources = ['youtube.com', 'vimeo.com', 'tv.lumbung.space'] + media = [] + + for img in soup(['img','object']): + local_image = grab_media(post_dir, img['src']) + if img['src'] != local_image: + img['src'] = local_image + + for iframe in soup(['iframe']): + if not any(source in iframe['src'] for source in allowed_iframe_sources): + print('filtered iframe: {}...'.format(iframe['src'][:25])) + iframe.decompose() + return soup.decode() + +def grab_feed(feed_url): + """ + check whether feed has been updated + download & return it if it has + """ + feed_name = urlparse(feed_url).netloc + + etag, modified = get_etag(feed_name) + + try: + if modified: + data = feedparser.parse(feed_url, modified=modified) + elif etag: + data = feedparser.parse(feed_url, etag=etag) + else: + data = feedparser.parse(feed_url) + except Exception as e: + print('Error grabbing feed') + print(feed_name) + print(e) + return False + + print(data.status, feed_url) + if data.status == 200: + #304 means the feed has not been modified since we last checked + write_etag(feed_name, data) + return data + return False + + +feed_urls = open('feeds_list.txt','r').read().splitlines() + +start = time.time() + +if not os.path.exists('etags'): + os.mkdir('etags') + + +env = jinja2.Environment( + loader=jinja2.FileSystemLoader(os.path.curdir) + ) + +output_dir = os.environ.get('OUTPUT_DIR', '/home/r/Programming/lumbung.space/lumbung.space-web/content/posts/') +#output_dir = os.environ.get('OUTPUT_DIR', 'network/') + +if not os.path.exists(output_dir): + os.makedirs(output_dir) + +template = env.get_template('post_template.md') + +#add iframe to the allowlist of feedparser's sanitizer, +#this is now handled in parse_post() +feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {'iframe'} + +for feed_url in feed_urls: + + feed_name = urlparse(feed_url).netloc + + feed_dir = os.path.join(output_dir, feed_name) + + if not os.path.exists(feed_dir): + os.makedirs(feed_dir) + + existing_posts = os.listdir(feed_dir) + + data = grab_feed(feed_url) + + if data: + for entry in data.entries: + # if 'tags' in entry: + # for tag in entry.tags: + # for x in ['lumbung.space', 'D15', 'lumbung']: + # if x in tag['term']: + # print(entry.title) + entry['feed_name'] = feed_name + + post_name = slugify(entry.title) + post_dir = os.path.join(output_dir, feed_name, post_name) + + if post_name not in existing_posts: + #if there is a blog entry we dont already have, make it + create_post(post_dir, entry) + + elif post_name in existing_posts: + #if we already have it, update it + create_post(post_dir, entry) + existing_posts.remove(post_name) # create list of posts which have not been returned by the feed + + for post in existing_posts: + #remove blog posts no longer returned by the RSS feed + print('deleted', post) + shutil.rmtree(os.path.join(feed_dir, slugify(post))) + + + +end = time.time() + +print(end - start) + diff --git a/lumbung-hashtag-bot/.gitignore b/lumbung-hashtag-bot/.gitignore new file mode 100644 index 0000000..8afa646 --- /dev/null +++ b/lumbung-hashtag-bot/.gitignore @@ -0,0 +1,3 @@ +config_hashtag_bot.py +*.secret +__pycache__/* diff --git a/lumbung-hashtag-bot/README.md b/lumbung-hashtag-bot/README.md new file mode 100644 index 0000000..618a3ac --- /dev/null +++ b/lumbung-hashtag-bot/README.md @@ -0,0 +1,30 @@ +# lumbung.space hashtag publishing bot + +This script makes [Hugo page bundles](https://gohugo.io/content-management/page-bundles/) out of Hashtag feeds on a Mastodon Hometown or Glitchsoc instance. + +## Install requirements + +`pip3 install Mastodon.py jinja2` + +## Setup + +This script requires access to an account on said Mastodon instance. This instance and the credentials can be set in `config_hashtag_bot.py`. + +If it is the first time you are running the script, you need to register the application on the Mastodon instance. Have a look at the [Mastodon.py documentation](https://mastodonpy.readthedocs.io/en/stable/#module-mastodon) for how to do that. + +This bot only uses read permissions. + +Set which hashtags you want to publish by adding them to the list `hashtags` in `config_hashtag_bot.py`. Omit the '#'. + +## What it does + +* The Bot only looks at the **local timeline** for posts under each hashtag configured in `config_hashtag_bot.py`. +* This means posts need to be **public** or directly addressed to the bot +* This script respects the mental model of 'local only' posts in the sense that people do not expect them to appear elsewhere. So **local only posts are ignored** +* It takes only posts with Media attached and then only those with images + +## What it doesn't do + +* Different types of media or embeds +* No thread recreation, each post is treated as a top level post + diff --git a/lumbung-hashtag-bot/post_template.md b/lumbung-hashtag-bot/post_template.md new file mode 100644 index 0000000..6aeff3e --- /dev/null +++ b/lumbung-hashtag-bot/post_template.md @@ -0,0 +1,14 @@ +--- +date: "{{ post_metadata.created_at }}" #2021-06-10T10:46:33+02:00 +draft: false +author: "{{ post_metadata.account.display_name }}" +avatar: "{{ post_metadata.account.avatar }}" +categories: ["shouts"] +tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] +--- + +{% for item in post_metadata.media_attachments %} +{{item.description}} +{% endfor %} + +{{ post_metadata.content | filter_mastodon_urls }} \ No newline at end of file diff --git a/lumbung-hashtag-bot/publish_hashtags.py b/lumbung-hashtag-bot/publish_hashtags.py new file mode 100644 index 0000000..09e09d7 --- /dev/null +++ b/lumbung-hashtag-bot/publish_hashtags.py @@ -0,0 +1,137 @@ +# lumbung.space hashtag publishing bot +# © 2021 roel roscam abbing agplv3 +# Makes Hugo posts out of hashtag feeds on Mastodon. +# Requires an account on the Mastodon instance configured. +# Currently does not do any thread recreation and only handles images + +import os +import requests +import shutil + +import jinja2 + +from mastodon import Mastodon +import config_hashtag_bot + +def login_mastodon_bot(): + mastodon = Mastodon( + client_id = 'publishbot_clientcred.secret', + api_base_url = config_hashtag_bot.instance + ) + + mastodon.log_in( + config_hashtag_bot.email, + config_hashtag_bot.password, + to_file = 'publishbot_usercred.secret', scopes=['read'] + ) + + return mastodon + +def create_frontmatter(post_metadata): + """ + Parse post metadata and return it as HUGO frontmatter + """ + + frontmatter = "" + return frontmatter + +def download_media(post_directory, media_attachments): + """ + Download media attached to posts. N.b. currently only images + See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts + """ + + for item in media_attachments: + if item['type'] == 'image': + image = localize_media_url(item['url']) + #TODO check whether this needs to handle delete & redraft with different images + if not os.path.exists(os.path.join(post_directory, image)): + #download image + response = requests.get(item['url'], stream=True) + with open(os.path.join(post_directory, image), 'wb') as img_file: + shutil.copyfileobj(response.raw, img_file) + print('Downloaded cover image', image) + +def create_post(post_directory, post_metadata): + """ + Create Hugo posts based on Toots/posts retuned in timeline. + See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts + """ + + if not os.path.exists(post_directory): + os.mkdir(post_directory) + + with open(os.path.join(post_directory,'index.html'),'w') as f: + post = template.render(post_metadata=post_metadata) + f.write(post) + + download_media(post_directory, post_metadata['media_attachments']) + +def localize_media_url(url): + """ + Returns the filename, used also as custom jinja filter + """ + return url.split('/')[-1] + + +def filter_mastodon_urls(content): + """ + Filters out Mastodon generated URLS for tags + e.g.
+ {% endfor %} +
+ + + + diff --git a/lumbung-video-prototype/video-feed.py b/lumbung-video-prototype/video-feed.py new file mode 100644 index 0000000..15f7da3 --- /dev/null +++ b/lumbung-video-prototype/video-feed.py @@ -0,0 +1,131 @@ +#!/bin/python3 + +#lumbung.space video feed generator +#c 2021 roel roscam abbing gpvl3 etc + +import peertube +import jinja2 +import json +import os +import datetime +import shutil +import requests +import ast +import arrow + + +#jinja filters & config +def duration(n): + """ + convert '6655' in '1:50:55' + + """ + return str(datetime.timedelta(seconds = n)) + +def linebreaks(text): + if not text: + return text + else: + import re + br = re.compile(r"(\r\n|\r|\n)") + return br.sub(r"
\n", text) + + +env = jinja2.Environment( + loader=jinja2.FileSystemLoader(os.path.curdir) + ) +env.filters['duration'] = duration +env.filters['linebreaks'] = linebreaks + +host = 'https://tv.lumbung.space' + +configuration = peertube.Configuration( + host = host+"/api/v1" +) + +client = peertube.ApiClient(configuration) + +v = peertube.VideoApi(client) + +response = v.videos_get(count=100, filter='local', tags_one_of='publish') + +videos = response.to_dict() +videos = videos['data'] + + +def create_post(post_directory, video_metadata): + global client #lazy + + if not os.path.exists(post_dir): + os.mkdir(post_directory) + + preview_image = video_metadata['preview_path'].split('/')[-1] + + if not os.path.exists(os.path.join(post_directory, preview_image)): + #download preview image + response = requests.get(host+video_metadata['preview_path'], stream=True) + with open(os.path.join(post_directory, preview_image), 'wb') as img_file: + shutil.copyfileobj(response.raw, img_file) + print('Downloaded cover image') + + #replace the truncated description with the full video description + #peertube api is some broken thing in between a py dict and a json file + api_response = peertube.VideoApi(client).videos_id_description_get(video_metadata['uuid']) + long_description = ast.literal_eval(api_response) + video_metadata['description'] = long_description['description'] + + + with open(os.path.join(post_directory,'index.md'),'w') as f: + post = template.render(v=video_metadata, host=host, preview_image=preview_image) + f.write(post) + + + with open(os.path.join(post_directory, '.timestamp'), 'w') as f: + timestamp = arrow.get(video_metadata['updated_at']) + f.write(timestamp.format('X')) + +def update_post(post_directory, video_metadata): + if os.path.exists(post_directory): + if os.path.exists(os.path.join(post_directory,'.timestamp')): + old_timestamp = open(os.path.join(post_directory,'.timestamp')).read() + + #FIXME: this is ugly but I need to do this because arrow removes miliseconds + current_timestamp = arrow.get(video_metadata['updated_at']) + current_timestamp = arrow.get(current_timestamp.format('X')) + + if current_timestamp > arrow.get(old_timestamp): + print('Updating', video_metadata['name'], '({})'.format(video_metadata['uuid'])) + create_post(post_dir, video_metadata) + else: + print('Video current: ', video_metadata['name'], '({})'.format(video_metadata['uuid'])) + else: + #compat for when there is no timestamp yet.. + create_post(post_dir, video_metadata) + + +output_dir = os.environ.get('OUTPUT_DIR', '/home/r/Programming/lumbung.space/lumbung.space-web/content/video') + +if not os.path.exists(output_dir): + os.mkdir(output_dir) + +template = env.get_template('index_template.md') + +existing_posts = os.listdir(output_dir) + +for video_metadata in videos: + post_dir = os.path.join(output_dir, video_metadata['uuid']) + + if video_metadata['uuid'] not in existing_posts: #if there is a video we dont already have, make it + print('New: ', video_metadata['name'], '({})'.format(video_metadata['uuid'])) + create_post(post_dir, video_metadata) + + elif video_metadata['uuid'] in existing_posts: # if we already have the video do nothing, possibly update + update_post(post_dir, video_metadata) + existing_posts.remove(video_metadata['uuid']) # create list of posts which have not been returned by peertube + +for post in existing_posts: + print('deleted', post) #rm posts not returned + shutil.rmtree(os.path.join(output_dir,post)) + + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3e1f22e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "lumbunglib" +version = "0.1.0" +description = "Python lib which powers lumbung[dot]space automation" +authors = ["rra", "decentral1se"] +license = "GPLv3+" + +[tool.poetry.dependencies] +python = "^3.9" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api"