deps and autoformat

This commit is contained in:
cellarspoon 2021-12-15 11:41:35 +01:00
parent 95964862b7
commit be3e14ac6c
No known key found for this signature in database
GPG Key ID: 03789458B3D0C410
16 changed files with 610 additions and 387 deletions

View File

@ -1,9 +0,0 @@
# Calendar Feed
Generate HUGO posts based on a publicly accessible ICS calendar.
## Use
Fill in your details in `calendar_feed_config.py`
## TODO / FIXME
* Multiple calendars to multiple hugo categories

View File

@ -3,31 +3,32 @@
# lumbung.space calendar feed generator
# © 2021 roel roscam abbing gplv3 etc
from ics import Calendar
import requests
import jinja2
import os
import shutil
from slugify import slugify
from natural import date
from event_feed_config import calendar_url, output_dir
from urllib.parse import urlparse
import arrow
import re
import shutil
from urllib.parse import urlparse
import arrow
import jinja2
import requests
from ics import Calendar
from natural import date
from slugify import slugify
from event_feed_config import calendar_url, output_dir
cal = Calendar(requests.get(calendar_url).text)
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.curdir)
)
env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.curdir))
if not os.path.exists(output_dir):
os.mkdir(output_dir)
template = env.get_template('event_template.md')
template = env.get_template("event_template.md")
existing_posts = os.listdir(output_dir)
def findURLs(string):
"""
return all URLs in a given string
@ -36,6 +37,7 @@ def findURLs(string):
url = re.findall(regex, string)
return [x[0] for x in url]
def find_imageURLS(string):
"""
return all image URLS in a given string
@ -45,6 +47,7 @@ def find_imageURLS(string):
img_urls = re.findall(regex, string, flags=re.IGNORECASE)
return img_urls
def create_metadata(event):
"""
construct a formatted dict of event metadata for use as frontmatter for HUGO post
@ -55,24 +58,28 @@ def create_metadata(event):
if location_urls:
location_url = location_urls[0]
event.location = '[{}]({})'.format(urlparse(location_url).netloc, location_url)
event.location = "[{}]({})".format(
urlparse(location_url).netloc, location_url
)
event_metadata = {
'name':event.name,
'created':event.created.format(),
'description': event.description,
'localized_begin': '           '.join(localize_time(event.begin)), #non-breaking space characters to defeat markdown
'begin': event.begin.format(),
'end': event.end.format(),
'duration': date.compress(event.duration),
'location': event.location,
'uid': event.uid,
'images' : find_imageURLS(event.description) # currently not used in template
"name": event.name,
"created": event.created.format(),
"description": event.description,
"localized_begin": "           ".join(
localize_time(event.begin)
), # non-breaking space characters to defeat markdown
"begin": event.begin.format(),
"end": event.end.format(),
"duration": date.compress(event.duration),
"location": event.location,
"uid": event.uid,
"images": find_imageURLS(event.description), # currently not used in template
}
return event_metadata
def localize_time(date):
"""
Turn a given date into various timezones
@ -83,27 +90,26 @@ def localize_time(date):
# 8 AM Bogota, Colombia (MaMa), 8 PM Jakarta, Indonesia (Gudskul),
# 1 PM (+1day) Wellington, New Zealand (Fafswag), 9 AM Havana, Cuba (Instar).
tzs = [
('Kassel','Europe/Berlin'),
('Bamako', 'Europe/London'),
('Palestine','Asia/Jerusalem'),
('Bogota','America/Bogota'),
('Jakarta','Asia/Jakarta'),
('Makassar','Asia/Makassar'),
('Wellington', 'Pacific/Auckland')
("Kassel", "Europe/Berlin"),
("Bamako", "Europe/London"),
("Palestine", "Asia/Jerusalem"),
("Bogota", "America/Bogota"),
("Jakarta", "Asia/Jakarta"),
("Makassar", "Asia/Makassar"),
("Wellington", "Pacific/Auckland"),
]
localized_begins = []
for location, tz in tzs:
localized_begins.append( # javascript formatting because of string creation from hell
'__{}__ {}'.format(
str(location),
str(date.to(tz).format("YYYY-MM-DD __HH:mm__"))
"__{}__ {}".format(
str(location), str(date.to(tz).format("YYYY-MM-DD __HH:mm__"))
)
)
return localized_begins
def create_event_post(post_dir, event):
"""
Create HUGO post based on calendar event metadata
@ -122,42 +128,44 @@ def create_event_post(post_dir, event):
# so we can later delete them if we dont find them in the event metadata anymore
existing_images = os.listdir(post_dir)
try:
existing_images.remove('index.md')
existing_images.remove('.timestamp')
existing_images.remove("index.md")
existing_images.remove(".timestamp")
except:
pass
for img in event_metadata['images']:
for img in event_metadata["images"]:
# parse img url to safe local image name
img_name = img.split('/')[-1]
fn, ext = img_name.split('.')
img_name = slugify(fn) + '.' + ext
img_name = img.split("/")[-1]
fn, ext = img_name.split(".")
img_name = slugify(fn) + "." + ext
local_image = os.path.join(post_dir, img_name)
if not os.path.exists(local_image):
# download preview image
response = requests.get(img, stream=True)
with open(local_image, 'wb') as img_file:
with open(local_image, "wb") as img_file:
shutil.copyfileobj(response.raw, img_file)
print('Downloaded image for event "{}"'.format(event.name))
event_metadata['description'] = event_metadata['description'].replace(img, '![]({})'.format(img_name))
event_metadata["description"] = event_metadata["description"].replace(
img, "![]({})".format(img_name)
)
if img_name in existing_images:
existing_images.remove(img_name)
for left_over_image in existing_images:
# remove images we found, but which are no longer in remote event
os.remove(os.path.join(post_dir, left_over_image))
print('deleted image', left_over_image)
print("deleted image", left_over_image)
with open(os.path.join(post_dir,'index.md'),'w') as f:
with open(os.path.join(post_dir, "index.md"), "w") as f:
post = template.render(event=event_metadata)
f.write(post)
print('created post for', event.name, '({})'.format(event.uid))
print("created post for", event.name, "({})".format(event.uid))
with open(os.path.join(post_dir,'.timestamp'),'w') as f:
f.write(event_metadata['created'])
with open(os.path.join(post_dir, ".timestamp"), "w") as f:
f.write(event_metadata["created"])
def update_event_post(post_dir, event):
@ -165,12 +173,13 @@ def update_event_post(post_dir, event):
Update a post based on the VCARD event 'created' field which changes when updated
"""
if os.path.exists(post_dir):
old_timestamp = open(os.path.join(post_dir,'.timestamp')).read()
old_timestamp = open(os.path.join(post_dir, ".timestamp")).read()
if event.created > arrow.get(old_timestamp):
print('Updating', event.name, '({})'.format(event.uid))
print("Updating", event.name, "({})".format(event.uid))
create_event_post(post_dir, event)
else:
print('Event current: ', event.name, '({})'.format(event.uid))
print("Event current: ", event.name, "({})".format(event.uid))
for event in list(cal.events):
@ -183,12 +192,12 @@ for event in list(cal.events):
elif event.uid in existing_posts:
# if we already have it, update
update_event_post(post_dir, event)
existing_posts.remove(event.uid) # create list of posts which have not been returned by the calendar
existing_posts.remove(
event.uid
) # create list of posts which have not been returned by the calendar
for post in existing_posts:
# remove events not returned by the calendar (deletion)
print('deleted', post)
print("deleted", post)
shutil.rmtree(os.path.join(output_dir, post))

View File

@ -11,9 +11,8 @@ uid: "{{ event.uid }}"
{% if event.location %}
location: "{{ event.location }}"
{% endif %}
---
{% if event.description %}
{{ event.description }}

View File

@ -1,16 +0,0 @@
# Automatically generated by https://github.com/damnever/pigar.
# calendar-feed/event_feed.py: 3
Jinja2 == 2.10
# calendar-feed/event_feed.py: 1
ics == 0.7
# calendar-feed/event_feed.py: 6
natural == 0.2.0
# calendar-feed/event_feed.py: 5
python_slugify == 5.0.2
# calendar-feed/event_feed.py: 2
requests == 2.21.0

View File

@ -1,11 +0,0 @@
# lumbung feed aggregator
* Grab feeds listed in `feeds_list.txt`
* Parse feed for blogpost entries
* * Download images linked in blogposts
* Turn blogpost entries into HUGO posts
# TODO/FIXME
* only include posts with a certain tag

View File

@ -3,82 +3,86 @@
# lumbung.space rss feed aggregator
# © 2021 roel roscam abbing gplv3 etc
import requests
import jinja2
import os
import shutil
import feedparser
from urllib.parse import urlparse
from ast import literal_eval as make_tuple
from slugify import slugify
from bs4 import BeautifulSoup
import time
from ast import literal_eval as make_tuple
from urllib.parse import urlparse
import arrow
import feedparser
import jinja2
import requests
from bs4 import BeautifulSoup
from slugify import slugify
def write_etag(feed_name, feed_data):
"""
save timestamp of when feed was last modified
"""
etag = ''
modified = ''
etag = ""
modified = ""
if 'etag' in feed_data:
if "etag" in feed_data:
etag = feed_data.etag
if 'modified' in feed_data:
if "modified" in feed_data:
modified = feed_data.modified
if etag or modified:
with open(os.path.join('etags',feed_name +'.txt'),'w') as f:
with open(os.path.join("etags", feed_name + ".txt"), "w") as f:
f.write(str((etag, modified)))
def get_etag(feed_name):
"""
return timestamp of when feed was last modified
"""
fn = os.path.join('etags',feed_name +'.txt')
etag = ''
modified = ''
fn = os.path.join("etags", feed_name + ".txt")
etag = ""
modified = ""
if os.path.exists(fn):
etag, modified = make_tuple(open(fn,'r').read())
etag, modified = make_tuple(open(fn, "r").read())
return etag, modified
def create_frontmatter(entry):
"""
parse RSS metadata and return as frontmatter
"""
if 'published' in entry:
if "published" in entry:
published = entry.published_parsed
if 'updated' in entry:
if "updated" in entry:
published = entry.updated_parsed
published = arrow.get(published)
if 'author' in entry:
if "author" in entry:
author = entry.author
else:
author = ''
author = ""
tags = []
if 'tags' in entry:
if "tags" in entry:
# TODO finish categories
for t in entry.tags:
tags.append(t['term'])
tags.append(t["term"])
frontmatter = {
'title':entry.title,
'date': published.format(),
'summary': '',
'author': author,
'original_link': entry.link,
'feed_name': entry['feed_name'],
'tags': str(tags)
"title": entry.title,
"date": published.format(),
"summary": "",
"author": author,
"original_link": entry.link,
"feed_name": entry["feed_name"],
"tags": str(tags),
}
return frontmatter
def create_post(post_dir, entry):
"""
write hugo post based on RSS entry
@ -88,40 +92,41 @@ def create_post(post_dir, entry):
if not os.path.exists(post_dir):
os.makedirs(post_dir)
if 'content' in entry:
if "content" in entry:
post_content = entry.content[0].value
else:
post_content = entry.summary
parsed_content = parse_posts(post_dir, post_content)
with open(os.path.join(post_dir,'index.html'),'w') as f: #n.b. .html
with open(os.path.join(post_dir, "index.html"), "w") as f: # n.b. .html
post = template.render(frontmatter=frontmatter, content=parsed_content)
f.write(post)
print('created post for', entry.title, '({})'.format(entry.link))
print("created post for", entry.title, "({})".format(entry.link))
def grab_media(post_directory, url):
"""
download media linked in post to have local copy
if download succeeds return new local path otherwise return url
"""
image = urlparse(url).path.split('/')[-1]
image = urlparse(url).path.split("/")[-1]
try:
if not os.path.exists(os.path.join(post_directory, image)):
# TODO: stream is true is a conditional so we could check the headers for things, mimetype etc
response = requests.get(url, stream=True)
if response.ok:
with open(os.path.join(post_directory, image), 'wb') as img_file:
with open(os.path.join(post_directory, image), "wb") as img_file:
shutil.copyfileobj(response.raw, img_file)
print('Downloaded cover image', image)
print("Downloaded cover image", image)
return image
return image
elif os.path.exists(os.path.join(post_directory, image)):
return image
except Exception as e:
print('Failed to download image', url)
print("Failed to download image", url)
print(e)
return url
@ -133,20 +138,21 @@ def parse_posts(post_dir, post_content):
filter out iframe sources not in allowlist
"""
soup = BeautifulSoup(post_content, "html.parser")
allowed_iframe_sources = ['youtube.com', 'vimeo.com', 'tv.lumbung.space']
allowed_iframe_sources = ["youtube.com", "vimeo.com", "tv.lumbung.space"]
media = []
for img in soup(['img','object']):
local_image = grab_media(post_dir, img['src'])
if img['src'] != local_image:
img['src'] = local_image
for img in soup(["img", "object"]):
local_image = grab_media(post_dir, img["src"])
if img["src"] != local_image:
img["src"] = local_image
for iframe in soup(['iframe']):
if not any(source in iframe['src'] for source in allowed_iframe_sources):
print('filtered iframe: {}...'.format(iframe['src'][:25]))
for iframe in soup(["iframe"]):
if not any(source in iframe["src"] for source in allowed_iframe_sources):
print("filtered iframe: {}...".format(iframe["src"][:25]))
iframe.decompose()
return soup.decode()
def grab_feed(feed_url):
"""
check whether feed has been updated
@ -164,7 +170,7 @@ def grab_feed(feed_url):
else:
data = feedparser.parse(feed_url)
except Exception as e:
print('Error grabbing feed')
print("Error grabbing feed")
print(feed_name)
print(e)
return False
@ -177,29 +183,29 @@ def grab_feed(feed_url):
return False
feed_urls = open('feeds_list.txt','r').read().splitlines()
feed_urls = open("feeds_list.txt", "r").read().splitlines()
start = time.time()
if not os.path.exists('etags'):
os.mkdir('etags')
if not os.path.exists("etags"):
os.mkdir("etags")
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.curdir)
env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.curdir))
output_dir = os.environ.get(
"OUTPUT_DIR", "/home/r/Programming/lumbung.space/lumbung.space-web/content/posts/"
)
output_dir = os.environ.get('OUTPUT_DIR', '/home/r/Programming/lumbung.space/lumbung.space-web/content/posts/')
# output_dir = os.environ.get('OUTPUT_DIR', 'network/')
if not os.path.exists(output_dir):
os.makedirs(output_dir)
template = env.get_template('post_template.md')
template = env.get_template("post_template.md")
# add iframe to the allowlist of feedparser's sanitizer,
# this is now handled in parse_post()
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {'iframe'}
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
for feed_url in feed_urls:
@ -221,7 +227,7 @@ for feed_url in feed_urls:
# for x in ['lumbung.space', 'D15', 'lumbung']:
# if x in tag['term']:
# print(entry.title)
entry['feed_name'] = feed_name
entry["feed_name"] = feed_name
post_name = slugify(entry.title)
post_dir = os.path.join(output_dir, feed_name, post_name)
@ -233,16 +239,16 @@ for feed_url in feed_urls:
elif post_name in existing_posts:
# if we already have it, update it
create_post(post_dir, entry)
existing_posts.remove(post_name) # create list of posts which have not been returned by the feed
existing_posts.remove(
post_name
) # create list of posts which have not been returned by the feed
for post in existing_posts:
# remove blog posts no longer returned by the RSS feed
print('deleted', post)
print("deleted", post)
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
end = time.time()
print(end - start)

View File

@ -1,30 +0,0 @@
# lumbung.space hashtag publishing bot
This script makes [Hugo page bundles](https://gohugo.io/content-management/page-bundles/) out of Hashtag feeds on a Mastodon Hometown or Glitchsoc instance.
## Install requirements
`pip3 install Mastodon.py jinja2`
## Setup
This script requires access to an account on said Mastodon instance. This instance and the credentials can be set in `config_hashtag_bot.py`.
If it is the first time you are running the script, you need to register the application on the Mastodon instance. Have a look at the [Mastodon.py documentation](https://mastodonpy.readthedocs.io/en/stable/#module-mastodon) for how to do that.
This bot only uses read permissions.
Set which hashtags you want to publish by adding them to the list `hashtags` in `config_hashtag_bot.py`. Omit the '#'.
## What it does
* The Bot only looks at the **local timeline** for posts under each hashtag configured in `config_hashtag_bot.py`.
* This means posts need to be **public** or directly addressed to the bot
* This script respects the mental model of 'local only' posts in the sense that people do not expect them to appear elsewhere. So **local only posts are ignored**
* It takes only posts with Media attached and then only those with images
## What it doesn't do
* Different types of media or embeds
* No thread recreation, each post is treated as a top level post

View File

@ -5,28 +5,31 @@
# Currently does not do any thread recreation and only handles images
import os
import requests
import shutil
import jinja2
import requests
from mastodon import Mastodon
import config_hashtag_bot
def login_mastodon_bot():
mastodon = Mastodon(
client_id = 'publishbot_clientcred.secret',
api_base_url = config_hashtag_bot.instance
client_id="publishbot_clientcred.secret",
api_base_url=config_hashtag_bot.instance,
)
mastodon.log_in(
config_hashtag_bot.email,
config_hashtag_bot.password,
to_file = 'publishbot_usercred.secret', scopes=['read']
to_file="publishbot_usercred.secret",
scopes=["read"],
)
return mastodon
def create_frontmatter(post_metadata):
"""
Parse post metadata and return it as HUGO frontmatter
@ -35,6 +38,7 @@ def create_frontmatter(post_metadata):
frontmatter = ""
return frontmatter
def download_media(post_directory, media_attachments):
"""
Download media attached to posts. N.b. currently only images
@ -42,15 +46,16 @@ def download_media(post_directory, media_attachments):
"""
for item in media_attachments:
if item['type'] == 'image':
image = localize_media_url(item['url'])
if item["type"] == "image":
image = localize_media_url(item["url"])
# TODO check whether this needs to handle delete & redraft with different images
if not os.path.exists(os.path.join(post_directory, image)):
# download image
response = requests.get(item['url'], stream=True)
with open(os.path.join(post_directory, image), 'wb') as img_file:
response = requests.get(item["url"], stream=True)
with open(os.path.join(post_directory, image), "wb") as img_file:
shutil.copyfileobj(response.raw, img_file)
print('Downloaded cover image', image)
print("Downloaded cover image", image)
def create_post(post_directory, post_metadata):
"""
@ -61,17 +66,18 @@ def create_post(post_directory, post_metadata):
if not os.path.exists(post_directory):
os.mkdir(post_directory)
with open(os.path.join(post_directory,'index.html'),'w') as f:
with open(os.path.join(post_directory, "index.html"), "w") as f:
post = template.render(post_metadata=post_metadata)
f.write(post)
download_media(post_directory, post_metadata['media_attachments'])
download_media(post_directory, post_metadata["media_attachments"])
def localize_media_url(url):
"""
Returns the filename, used also as custom jinja filter
"""
return url.split('/')[-1]
return url.split("/")[-1]
def filter_mastodon_urls(content):
@ -89,15 +95,12 @@ mastodon = login_mastodon_bot()
output_dir = config_hashtag_bot.output_dir
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.curdir)
)
env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.curdir))
env.filters['localize_media_url'] = localize_media_url
env.filters['filter_mastodon_urls'] = filter_mastodon_urls
template = env.get_template('post_template.md')
env.filters["localize_media_url"] = localize_media_url
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
template = env.get_template("post_template.md")
if not os.path.exists(output_dir):
@ -112,26 +115,31 @@ for hashtag in config_hashtag_bot.hashtags:
existing_posts = os.listdir(hashtag_dir) # list all existing posts
timeline = mastodon.timeline_hashtag(hashtag, local=True, only_media=True) #returns max 20 queries and only with media
timeline = mastodon.fetch_remaining(timeline) #returns all the rest n.b. can take a while because of rate limit
timeline = mastodon.timeline_hashtag(
hashtag, local=True, only_media=True
) # returns max 20 queries and only with media
timeline = mastodon.fetch_remaining(
timeline
) # returns all the rest n.b. can take a while because of rate limit
for post_metadata in timeline:
post_dir = os.path.join(hashtag_dir, str(post_metadata['id']))
post_dir = os.path.join(hashtag_dir, str(post_metadata["id"]))
# if there is a post in the feed we dont already have locally, make it
if str(post_metadata['id']) not in existing_posts:
if str(post_metadata["id"]) not in existing_posts:
if not post_metadata['local_only']: #if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
if not post_metadata[
"local_only"
]: # if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
create_post(post_dir, post_metadata)
# if we already have the post do nothing, possibly update
elif str(post_metadata['id']) in existing_posts:
elif str(post_metadata["id"]) in existing_posts:
# update_post(post_dir, post_metadata)
existing_posts.remove(str(post_metadata['id'])) # create list of posts which have not been returned in the feed
existing_posts.remove(
str(post_metadata["id"])
) # create list of posts which have not been returned in the feed
for post in existing_posts:
print('deleted', post) #rm posts that exist but are no longer returned in feed
print("deleted", post) # rm posts that exist but are no longer returned in feed
shutil.rmtree(os.path.join(hashtag_dir, post))

View File

@ -1,27 +0,0 @@
# video feed prototypes
These scripts poll a peertube instance to return a list of videos and construct a static page for it using jinja2.
See it in action on <https://roelof.info/lumbung/>
## video-feed.py
Utility that returns Peertube videos tagged as `publish` and turns them in to `hugo` page bundles. Videos no longer tagged as `publish` are deleted.
### index-template.md
Jinja2 template of a hugo post for use with the above.
## streams-feed.py
Returns only livestreams and displays them differently depending on the tags associated with the video. E.g. audio stream or video stream. WIP.
### video-feed.html
The jinja template for creating video feeds. This is now used in the HUGO theme.
### video-feed-prototype.html
rendered example of above

View File

@ -9,7 +9,6 @@ channel_url: "{{ v.channel.url }}"
preview_image: "{{ preview_image }}"
categories: ["tv","{{ v.channel.display_name }}"]
is_live: {{ v.is_live }}
---
{{ v.description }}

View File

@ -1,12 +0,0 @@
# Automatically generated by https://github.com/damnever/pigar.
# video_feed/streams-feed.py: 7
# video_feed/video-feed.py: 7
Jinja2 == 2.10
# video_feed/streams-feed.py: 6
# video_feed/video-feed.py: 6
git+https://framagit.org/framasoft/peertube/clients/python.git
# video_feed/video-feed.py: 12
requests == 2.21.0

View File

@ -3,15 +3,16 @@
# lumbung.space video feed generator
# c 2021 roel roscam abbing gpvl3 etc
import peertube
import jinja2
import ast
import datetime
import json
import os
import datetime
import shutil
import requests
import ast
import arrow
import jinja2
import peertube
import requests
# jinja filters & config
@ -22,35 +23,33 @@ def duration(n):
"""
return str(datetime.timedelta(seconds=n))
def linebreaks(text):
if not text:
return text
else:
import re
br = re.compile(r"(\r\n|\r|\n)")
return br.sub(r"<br />\n", text)
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.curdir)
)
env.filters['duration'] = duration
env.filters['linebreaks'] = linebreaks
env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.curdir))
env.filters["duration"] = duration
env.filters["linebreaks"] = linebreaks
host = 'https://tv.lumbung.space'
host = "https://tv.lumbung.space"
configuration = peertube.Configuration(
host = host+"/api/v1"
)
configuration = peertube.Configuration(host=host + "/api/v1")
client = peertube.ApiClient(configuration)
v = peertube.VideoApi(client)
response = v.videos_get(count=100, filter='local', tags_one_of='publish')
response = v.videos_get(count=100, filter="local", tags_one_of="publish")
videos = response.to_dict()
videos = videos['data']
videos = videos["data"]
def create_post(post_directory, video_metadata):
@ -59,73 +58,87 @@ def create_post(post_directory, video_metadata):
if not os.path.exists(post_dir):
os.mkdir(post_directory)
preview_image = video_metadata['preview_path'].split('/')[-1]
preview_image = video_metadata["preview_path"].split("/")[-1]
if not os.path.exists(os.path.join(post_directory, preview_image)):
# download preview image
response = requests.get(host+video_metadata['preview_path'], stream=True)
with open(os.path.join(post_directory, preview_image), 'wb') as img_file:
response = requests.get(host + video_metadata["preview_path"], stream=True)
with open(os.path.join(post_directory, preview_image), "wb") as img_file:
shutil.copyfileobj(response.raw, img_file)
print('Downloaded cover image')
print("Downloaded cover image")
# replace the truncated description with the full video description
# peertube api is some broken thing in between a py dict and a json file
api_response = peertube.VideoApi(client).videos_id_description_get(video_metadata['uuid'])
api_response = peertube.VideoApi(client).videos_id_description_get(
video_metadata["uuid"]
)
long_description = ast.literal_eval(api_response)
video_metadata['description'] = long_description['description']
video_metadata["description"] = long_description["description"]
with open(os.path.join(post_directory,'index.md'),'w') as f:
with open(os.path.join(post_directory, "index.md"), "w") as f:
post = template.render(v=video_metadata, host=host, preview_image=preview_image)
f.write(post)
with open(os.path.join(post_directory, ".timestamp"), "w") as f:
timestamp = arrow.get(video_metadata["updated_at"])
f.write(timestamp.format("X"))
with open(os.path.join(post_directory, '.timestamp'), 'w') as f:
timestamp = arrow.get(video_metadata['updated_at'])
f.write(timestamp.format('X'))
def update_post(post_directory, video_metadata):
if os.path.exists(post_directory):
if os.path.exists(os.path.join(post_directory,'.timestamp')):
old_timestamp = open(os.path.join(post_directory,'.timestamp')).read()
if os.path.exists(os.path.join(post_directory, ".timestamp")):
old_timestamp = open(os.path.join(post_directory, ".timestamp")).read()
# FIXME: this is ugly but I need to do this because arrow removes miliseconds
current_timestamp = arrow.get(video_metadata['updated_at'])
current_timestamp = arrow.get(current_timestamp.format('X'))
current_timestamp = arrow.get(video_metadata["updated_at"])
current_timestamp = arrow.get(current_timestamp.format("X"))
if current_timestamp > arrow.get(old_timestamp):
print('Updating', video_metadata['name'], '({})'.format(video_metadata['uuid']))
print(
"Updating",
video_metadata["name"],
"({})".format(video_metadata["uuid"]),
)
create_post(post_dir, video_metadata)
else:
print('Video current: ', video_metadata['name'], '({})'.format(video_metadata['uuid']))
print(
"Video current: ",
video_metadata["name"],
"({})".format(video_metadata["uuid"]),
)
else:
# compat for when there is no timestamp yet..
create_post(post_dir, video_metadata)
output_dir = os.environ.get('OUTPUT_DIR', '/home/r/Programming/lumbung.space/lumbung.space-web/content/video')
output_dir = os.environ.get(
"OUTPUT_DIR", "/home/r/Programming/lumbung.space/lumbung.space-web/content/video"
)
if not os.path.exists(output_dir):
os.mkdir(output_dir)
template = env.get_template('index_template.md')
template = env.get_template("index_template.md")
existing_posts = os.listdir(output_dir)
for video_metadata in videos:
post_dir = os.path.join(output_dir, video_metadata['uuid'])
post_dir = os.path.join(output_dir, video_metadata["uuid"])
if video_metadata['uuid'] not in existing_posts: #if there is a video we dont already have, make it
print('New: ', video_metadata['name'], '({})'.format(video_metadata['uuid']))
if (
video_metadata["uuid"] not in existing_posts
): # if there is a video we dont already have, make it
print("New: ", video_metadata["name"], "({})".format(video_metadata["uuid"]))
create_post(post_dir, video_metadata)
elif video_metadata['uuid'] in existing_posts: # if we already have the video do nothing, possibly update
elif (
video_metadata["uuid"] in existing_posts
): # if we already have the video do nothing, possibly update
update_post(post_dir, video_metadata)
existing_posts.remove(video_metadata['uuid']) # create list of posts which have not been returned by peertube
existing_posts.remove(
video_metadata["uuid"]
) # create list of posts which have not been returned by peertube
for post in existing_posts:
print('deleted', post) #rm posts not returned
print("deleted", post) # rm posts not returned
shutil.rmtree(os.path.join(output_dir, post))

288
poetry.lock generated Normal file
View File

@ -0,0 +1,288 @@
[[package]]
name = "arrow"
version = "0.14.7"
description = "Better dates & times for Python"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[package.dependencies]
python-dateutil = "*"
[[package]]
name = "certifi"
version = "2021.10.8"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "charset-normalizer"
version = "2.0.9"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main"
optional = false
python-versions = ">=3.5.0"
[package.extras]
unicode_backport = ["unicodedata2"]
[[package]]
name = "ics"
version = "0.7"
description = "Python icalendar (rfc5545) parser"
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
arrow = ">=0.11,<0.15"
python-dateutil = "*"
six = ">1.5"
tatsu = ">4.2"
[[package]]
name = "idna"
version = "3.3"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]]
name = "jinja2"
version = "3.0.3"
description = "A very fast and expressive template engine."
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
MarkupSafe = ">=2.0"
[package.extras]
i18n = ["Babel (>=2.7)"]
[[package]]
name = "markupsafe"
version = "2.0.1"
description = "Safely add untrusted strings to HTML/XML markup."
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "natural"
version = "0.2.0"
description = "Convert data to their natural (human-readable) format"
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
six = "*"
[[package]]
name = "peertube"
version = "1.0.0"
description = ""
category = "main"
optional = false
python-versions = "*"
develop = false
[package.dependencies]
certifi = "*"
python-dateutil = "*"
six = ">=1.10"
urllib3 = ">=1.15"
[package.source]
type = "git"
url = "https://framagit.org/framasoft/peertube/clients/python.git"
reference = "master"
resolved_reference = "ebcf44d663190276b154cbc6e9a74b0f15af5e60"
[[package]]
name = "python-dateutil"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
[package.dependencies]
six = ">=1.5"
[[package]]
name = "python-slugify"
version = "5.0.2"
description = "A Python Slugify application that handles Unicode"
category = "main"
optional = false
python-versions = ">=3.6"
[package.dependencies]
text-unidecode = ">=1.3"
[package.extras]
unidecode = ["Unidecode (>=1.1.1)"]
[[package]]
name = "requests"
version = "2.26.0"
description = "Python HTTP for Humans."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[package.dependencies]
certifi = ">=2017.4.17"
charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
urllib3 = ">=1.21.1,<1.27"
[package.extras]
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "tatsu"
version = "5.7.0"
description = "TatSu takes a grammar in a variation of EBNF as input, and outputs a memoizing PEG/Packrat parser in Python."
category = "main"
optional = false
python-versions = ">=3.8"
[package.extras]
future-regex = ["regex"]
[[package]]
name = "text-unidecode"
version = "1.3"
description = "The most basic Text::Unidecode port"
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "urllib3"
version = "1.26.7"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
[package.extras]
brotli = ["brotlipy (>=0.6.0)"]
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "9aee20bc6d4cacb5f2e41b8809a311af26a839bc5054cd243fed0324bfe3aa8e"
[metadata.files]
arrow = [
{file = "arrow-0.14.7-py2.py3-none-any.whl", hash = "sha256:4bfacea734ead51495dc47df00421ecfd4ca1f2c0fbe58b9a26eaeddedc31caf"},
{file = "arrow-0.14.7.tar.gz", hash = "sha256:67f8be7c0cf420424bc62d8d7dc40b44e4bb2f7b515f9cc2954fb36e35797656"},
]
certifi = [
{file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
{file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
]
charset-normalizer = [
{file = "charset-normalizer-2.0.9.tar.gz", hash = "sha256:b0b883e8e874edfdece9c28f314e3dd5badf067342e42fb162203335ae61aa2c"},
{file = "charset_normalizer-2.0.9-py3-none-any.whl", hash = "sha256:1eecaa09422db5be9e29d7fc65664e6c33bd06f9ced7838578ba40d58bdf3721"},
]
ics = [
{file = "ics-0.7-py2.py3-none-any.whl", hash = "sha256:bf5fbdef6e1e073afdadf1b996f0271186dd114a148e38e795919a1ae644d6ac"},
{file = "ics-0.7-py3.7.egg", hash = "sha256:3b606205b9582ad27dff77f9b227a30d02fdac532731927fe39df1f1ddf8673f"},
{file = "ics-0.7.tar.gz", hash = "sha256:81113a2bb3166c1afcd71cd450c968d40efc385601e9d8344733e00ad8f53429"},
]
idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
]
jinja2 = [
{file = "Jinja2-3.0.3-py3-none-any.whl", hash = "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8"},
{file = "Jinja2-3.0.3.tar.gz", hash = "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"},
]
markupsafe = [
{file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"},
{file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"},
{file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"},
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"},
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"},
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"},
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"},
{file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"},
{file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"},
{file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"},
{file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"},
{file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7"},
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8"},
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5"},
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"},
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"},
{file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"},
{file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"},
{file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"},
{file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"},
]
natural = [
{file = "natural-0.2.0.tar.gz", hash = "sha256:18c83662d2d33fd7e6eee4e3b0d7366e1ce86225664e3127a2aaf0a3233f7df2"},
]
peertube = []
python-dateutil = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
]
python-slugify = [
{file = "python-slugify-5.0.2.tar.gz", hash = "sha256:f13383a0b9fcbe649a1892b9c8eb4f8eab1d6d84b84bb7a624317afa98159cab"},
{file = "python_slugify-5.0.2-py2.py3-none-any.whl", hash = "sha256:6d8c5df75cd4a7c3a2d21e257633de53f52ab0265cd2d1dc62a730e8194a7380"},
]
requests = [
{file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
tatsu = [
{file = "TatSu-5.7.0-py2.py3-none-any.whl", hash = "sha256:9eebadfc2889d8e82e197df22913df56ff204bf4cfc62db49a5c7edd084e10b4"},
{file = "TatSu-5.7.0.zip", hash = "sha256:428136cd4aa9600fcd01428bd5667fc752062f54bd0148dc1e64fee7b8d05fa4"},
]
text-unidecode = [
{file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
{file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
]
urllib3 = [
{file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"},
{file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"},
]

View File

@ -7,6 +7,12 @@ license = "GPLv3+"
[tool.poetry.dependencies]
python = "^3.9"
Jinja2 = "^3.0.3"
ics = "^0.7"
natural = "^0.2.0"
python-slugify = "^5.0.2"
requests = "^2.26.0"
peertube = {git = "https://framagit.org/framasoft/peertube/clients/python.git"}
[tool.poetry.dev-dependencies]