2021-12-15 10:30:10 +00:00
|
|
|
# lumbung.space hashtag publishing bot
|
|
|
|
# © 2021 roel roscam abbing agplv3
|
|
|
|
# Makes Hugo posts out of hashtag feeds on Mastodon.
|
|
|
|
# Requires an account on the Mastodon instance configured.
|
|
|
|
# Currently does not do any thread recreation and only handles images
|
|
|
|
|
|
|
|
import os
|
|
|
|
import shutil
|
2021-12-15 10:55:51 +00:00
|
|
|
from pathlib import Path
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
import jinja2
|
2021-12-15 10:41:35 +00:00
|
|
|
import requests
|
2021-12-15 10:30:10 +00:00
|
|
|
from mastodon import Mastodon
|
2021-12-15 10:41:35 +00:00
|
|
|
|
2021-12-15 10:43:12 +00:00
|
|
|
# Which instance to login to
|
|
|
|
instance = "https://social.lumbung.space"
|
|
|
|
|
|
|
|
# n.b. if it is the first time you use this script
|
|
|
|
# You need to register the app:
|
|
|
|
# https://mastodonpy.readthedocs.io/en/stable/#module-mastodon
|
|
|
|
|
|
|
|
# Login credentials for bot account
|
|
|
|
email = ""
|
|
|
|
password = ""
|
|
|
|
|
|
|
|
# Which hashtags to publish
|
|
|
|
hashtags = ["jalansesama"]
|
|
|
|
|
|
|
|
# your Hugo content directory
|
|
|
|
output_dir = os.environ.get("OUTPUT_DIR", "path/to/hugo/content")
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
|
2021-12-15 10:30:10 +00:00
|
|
|
def login_mastodon_bot():
|
|
|
|
mastodon = Mastodon(
|
2021-12-15 10:41:35 +00:00
|
|
|
client_id="publishbot_clientcred.secret",
|
2021-12-15 10:43:12 +00:00
|
|
|
api_base_url=instance,
|
2021-12-15 10:30:10 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
mastodon.log_in(
|
2021-12-15 10:43:12 +00:00
|
|
|
email,
|
|
|
|
password,
|
2021-12-15 10:41:35 +00:00
|
|
|
to_file="publishbot_usercred.secret",
|
|
|
|
scopes=["read"],
|
2021-12-15 10:30:10 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
return mastodon
|
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
|
2021-12-15 10:30:10 +00:00
|
|
|
def create_frontmatter(post_metadata):
|
|
|
|
"""
|
|
|
|
Parse post metadata and return it as HUGO frontmatter
|
|
|
|
"""
|
|
|
|
|
|
|
|
frontmatter = ""
|
|
|
|
return frontmatter
|
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
|
2021-12-15 10:30:10 +00:00
|
|
|
def download_media(post_directory, media_attachments):
|
|
|
|
"""
|
|
|
|
Download media attached to posts. N.b. currently only images
|
|
|
|
See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts
|
|
|
|
"""
|
|
|
|
|
|
|
|
for item in media_attachments:
|
2021-12-15 10:41:35 +00:00
|
|
|
if item["type"] == "image":
|
|
|
|
image = localize_media_url(item["url"])
|
|
|
|
# TODO check whether this needs to handle delete & redraft with different images
|
2021-12-15 10:30:10 +00:00
|
|
|
if not os.path.exists(os.path.join(post_directory, image)):
|
2021-12-15 10:41:35 +00:00
|
|
|
# download image
|
|
|
|
response = requests.get(item["url"], stream=True)
|
|
|
|
with open(os.path.join(post_directory, image), "wb") as img_file:
|
2021-12-15 10:30:10 +00:00
|
|
|
shutil.copyfileobj(response.raw, img_file)
|
2021-12-15 10:41:35 +00:00
|
|
|
print("Downloaded cover image", image)
|
|
|
|
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
def create_post(post_directory, post_metadata):
|
|
|
|
"""
|
|
|
|
Create Hugo posts based on Toots/posts retuned in timeline.
|
|
|
|
See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not os.path.exists(post_directory):
|
|
|
|
os.mkdir(post_directory)
|
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
with open(os.path.join(post_directory, "index.html"), "w") as f:
|
2021-12-15 10:30:10 +00:00
|
|
|
post = template.render(post_metadata=post_metadata)
|
|
|
|
f.write(post)
|
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
download_media(post_directory, post_metadata["media_attachments"])
|
|
|
|
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
def localize_media_url(url):
|
|
|
|
"""
|
|
|
|
Returns the filename, used also as custom jinja filter
|
|
|
|
"""
|
2021-12-15 10:41:35 +00:00
|
|
|
return url.split("/")[-1]
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
def filter_mastodon_urls(content):
|
|
|
|
"""
|
|
|
|
Filters out Mastodon generated URLS for tags
|
|
|
|
e.g. <a href="https://social.lumbung.space/tags/jalankita" class="mention hashtag" rel="tag">
|
|
|
|
Used also as custom jinja filter
|
|
|
|
"""
|
2021-12-15 10:41:35 +00:00
|
|
|
# TODO
|
2021-12-15 10:30:10 +00:00
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
mastodon = login_mastodon_bot()
|
|
|
|
|
2021-12-15 10:43:12 +00:00
|
|
|
output_dir = output_dir
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.curdir))
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
env.filters["localize_media_url"] = localize_media_url
|
|
|
|
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:55:51 +00:00
|
|
|
cwd = Path.resolve()
|
|
|
|
template = env.get_template(os.path.join(cwd, "templates" "hashtag.md"))
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(output_dir):
|
|
|
|
os.mkdir(output_dir)
|
|
|
|
|
|
|
|
|
2021-12-15 10:43:12 +00:00
|
|
|
for hashtag in hashtags:
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
hashtag_dir = os.path.join(output_dir, hashtag)
|
|
|
|
if not os.path.exists(hashtag_dir):
|
|
|
|
os.mkdir(hashtag_dir)
|
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
existing_posts = os.listdir(hashtag_dir) # list all existing posts
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
timeline = mastodon.timeline_hashtag(
|
|
|
|
hashtag, local=True, only_media=True
|
|
|
|
) # returns max 20 queries and only with media
|
|
|
|
timeline = mastodon.fetch_remaining(
|
|
|
|
timeline
|
|
|
|
) # returns all the rest n.b. can take a while because of rate limit
|
2021-12-15 10:30:10 +00:00
|
|
|
|
|
|
|
for post_metadata in timeline:
|
2021-12-15 10:41:35 +00:00
|
|
|
post_dir = os.path.join(hashtag_dir, str(post_metadata["id"]))
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
# if there is a post in the feed we dont already have locally, make it
|
|
|
|
if str(post_metadata["id"]) not in existing_posts:
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
if not post_metadata[
|
|
|
|
"local_only"
|
|
|
|
]: # if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
|
2021-12-15 10:30:10 +00:00
|
|
|
create_post(post_dir, post_metadata)
|
|
|
|
|
|
|
|
# if we already have the post do nothing, possibly update
|
2021-12-15 10:41:35 +00:00
|
|
|
elif str(post_metadata["id"]) in existing_posts:
|
|
|
|
# update_post(post_dir, post_metadata)
|
|
|
|
existing_posts.remove(
|
|
|
|
str(post_metadata["id"])
|
|
|
|
) # create list of posts which have not been returned in the feed
|
2021-12-15 10:30:10 +00:00
|
|
|
|
2021-12-15 10:41:35 +00:00
|
|
|
for post in existing_posts:
|
|
|
|
print("deleted", post) # rm posts that exist but are no longer returned in feed
|
|
|
|
shutil.rmtree(os.path.join(hashtag_dir, post))
|