lumbunglib/konfluks/hashtag.py

164 lines
5.3 KiB
Python
Raw Normal View History

2021-12-15 10:30:10 +00:00
import os
import shutil
2021-12-15 10:55:51 +00:00
from pathlib import Path
from re import sub
2021-12-15 10:30:10 +00:00
import jinja2
2021-12-15 10:41:35 +00:00
import requests
2021-12-15 10:30:10 +00:00
from mastodon import Mastodon
2021-12-15 10:41:35 +00:00
2021-12-15 10:43:12 +00:00
instance = "https://social.lumbung.space"
email = ""
password = ""
2022-01-14 08:39:37 +00:00
hashtags = [
"documentafifteen",
"harvestedbyputra",
"jalansesama",
"lumbungdotspace",
"majelisakakbar",
"majelisakbar",
"warungkopi",
2022-02-04 10:57:46 +00:00
"lumbungkios",
"kassel_ecosystem",
"ruruhaus",
2022-04-06 07:44:14 +00:00
"offbeatentrack_kassel",
"lumbungofpublishers",
2022-01-14 08:39:37 +00:00
]
2021-12-15 10:43:12 +00:00
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
def login_mastodon_bot():
mastodon = Mastodon(
access_token=os.environ.get("MASTODON_AUTH_TOKEN"), api_base_url=instance
2021-12-15 10:30:10 +00:00
)
return mastodon
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
def create_frontmatter(post_metadata):
"""
Parse post metadata and return it as HUGO frontmatter
"""
frontmatter = ""
return frontmatter
2021-12-15 10:41:35 +00:00
2021-12-15 10:30:10 +00:00
def download_media(post_directory, media_attachments):
"""
Download media attached to posts. N.b. currently only images
See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts
"""
for item in media_attachments:
2021-12-15 10:41:35 +00:00
if item["type"] == "image":
image = localize_media_url(item["url"])
# TODO check whether this needs to handle delete & redraft with different images
2021-12-15 10:30:10 +00:00
if not os.path.exists(os.path.join(post_directory, image)):
2021-12-15 10:41:35 +00:00
# download image
response = requests.get(item["url"], stream=True)
with open(os.path.join(post_directory, image), "wb") as img_file:
2021-12-15 10:30:10 +00:00
shutil.copyfileobj(response.raw, img_file)
2021-12-15 10:41:35 +00:00
print("Downloaded cover image", image)
2021-12-15 10:30:10 +00:00
def create_post(post_directory, post_metadata):
"""
Create Hugo posts based on Toots/posts retuned in timeline.
See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
"""
if not os.path.exists(post_directory):
os.mkdir(post_directory)
2021-12-15 11:23:37 +00:00
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
name = post_metadata["account"]["display_name"]
name = sub('"', '\\"', name)
post_metadata["account"]["display_name"] = name
2021-12-15 11:23:37 +00:00
env.filters["localize_media_url"] = localize_media_url
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
template = env.get_template("hashtag.md")
2021-12-15 10:41:35 +00:00
with open(os.path.join(post_directory, "index.html"), "w") as f:
2021-12-15 10:30:10 +00:00
post = template.render(post_metadata=post_metadata)
f.write(post)
2021-12-15 10:41:35 +00:00
download_media(post_directory, post_metadata["media_attachments"])
2021-12-15 10:30:10 +00:00
def localize_media_url(url):
"""
Returns the filename, used also as custom jinja filter
"""
2021-12-15 10:41:35 +00:00
return url.split("/")[-1]
2021-12-15 10:30:10 +00:00
def filter_mastodon_urls(content):
"""
Filters out Mastodon generated URLS for tags
e.g. <a href="https://social.lumbung.space/tags/jalankita" class="mention hashtag" rel="tag">
Used also as custom jinja filter
"""
2021-12-15 10:41:35 +00:00
# TODO
2021-12-15 10:30:10 +00:00
return content
2021-12-15 11:23:37 +00:00
def main():
mastodon = login_mastodon_bot()
output_dir = os.environ.get("OUTPUT_DIR")
if not os.path.exists(output_dir):
os.mkdir(output_dir)
2021-12-15 10:30:10 +00:00
2022-01-27 16:52:26 +00:00
all_existing_posts = []
for i in os.listdir(output_dir):
all_existing_posts += os.listdir(os.path.join(output_dir, i))
2021-12-15 11:23:37 +00:00
for hashtag in hashtags:
2021-12-15 10:30:10 +00:00
2021-12-15 11:23:37 +00:00
hashtag_dir = os.path.join(output_dir, hashtag)
if not os.path.exists(hashtag_dir):
os.mkdir(hashtag_dir)
2021-12-15 10:30:10 +00:00
2021-12-15 11:23:37 +00:00
existing_posts = os.listdir(hashtag_dir) # list all existing posts
2021-12-15 10:30:10 +00:00
2021-12-15 11:23:37 +00:00
timeline = mastodon.timeline_hashtag(
hashtag, local=True, only_media=True
) # returns max 20 queries and only with media
timeline = mastodon.fetch_remaining(
timeline
) # returns all the rest n.b. can take a while because of rate limit
2021-12-15 10:30:10 +00:00
2021-12-15 11:23:37 +00:00
for post_metadata in timeline:
post_dir = os.path.join(hashtag_dir, str(post_metadata["id"]))
# if there is a post in the feed we dont already have locally, make it
2022-01-27 16:52:26 +00:00
if str(post_metadata["id"]) not in all_existing_posts:
2021-12-15 11:23:37 +00:00
if not post_metadata[
"local_only"
]: # if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
create_post(post_dir, post_metadata)
all_existing_posts.append(str(post_metadata["id"]))
2022-01-10 14:07:04 +00:00
else:
print(
"not pulling post %s (post is local only)"
% (post_metadata["id"])
)
2021-12-15 10:30:10 +00:00
2021-12-15 11:23:37 +00:00
# if we already have the post do nothing, possibly update
elif str(post_metadata["id"]) in existing_posts:
# update_post(post_dir, post_metadata)
existing_posts.remove(
str(post_metadata["id"])
) # create list of posts which have not been returned in the feed
elif str(post_metadata["id"]) in all_existing_posts:
print(
"skipping post %s as it was already pulled with a different hashtag."
% (str(post_metadata["id"]))
)
2021-12-15 10:30:10 +00:00
2021-12-15 11:23:37 +00:00
for post in existing_posts:
print(
"deleted", post
) # rm posts that exist but are no longer returned in feed
shutil.rmtree(os.path.join(hashtag_dir, post))