konfluks/konfluks/hashtag.py

import os
import shutil
from pathlib import Path
from re import sub

import jinja2
import requests
from mastodon import Mastodon

instance = "https://social.lumbung.space"
email = ""
password = ""
hashtags = [
    "documentafifteen",
    "harvestedbyputra",
    "jalansesama",
    "lumbungdotspace",
    "majelisakakbar",
    "majelisakbar",
    "warungkopi",
    "lumbungkios",
    "kassel_ecosystem",
    "ruruhaus",
    "offbeatentrack_kassel",
    "lumbungofpublishers",
    "lumbungkiosproducts",
]


def login_mastodon_bot():
    mastodon = Mastodon(
        access_token=os.environ.get("MASTODON_AUTH_TOKEN"), api_base_url=instance
    )

    return mastodon


def create_frontmatter(post_metadata):
    """
    Parse post metadata and return it as HUGO frontmatter
    """

    frontmatter = ""
    return frontmatter


def download_media(post_directory, media_attachments):
    """
    Download media attached to posts. N.b. currently only images
    See: https://mastodonpy.readthedocs.io/en/stable/#media-dicts
    """

    for item in media_attachments:
        if item["type"] == "image":
            image = localize_media_url(item["url"])
            # TODO check whether this needs to handle delete & redraft with different images
            if not os.path.exists(os.path.join(post_directory, image)):
                # download image
                response = requests.get(item["url"], stream=True)
                with open(os.path.join(post_directory, image), "wb") as img_file:
                    shutil.copyfileobj(response.raw, img_file)
                    print("Downloaded cover image", image)
        elif item["type"] == "video":
            video = localize_media_url(item["url"])
            if not os.path.exists(os.path.join(post_directory, video)):
                # download video file
                response = requests.get(item["url"], stream=True)
                with open(os.path.join(post_directory, video), "wb") as video_file:
                    shutil.copyfileobj(response.raw, video_file)
                    print("Downloaded video in post", video)
            if not os.path.exists(os.path.join(post_directory, "thumbnail.png")):
                #download video preview
                response = requests.get(item["preview_url"], stream=True)
                with open(os.path.join(post_directory, "thumbnail.png"), "wb") as thumbnail:
                    shutil.copyfileobj(response.raw, thumbnail)
                    print("Downloaded thumbnail for", video)


def create_post(post_directory, post_metadata):
    """
    Create Hugo posts based on Toots/posts retuned in timeline.
    See: https://mastodonpy.readthedocs.io/en/stable/#toot-dicts
    """

    if not os.path.exists(post_directory):
        os.mkdir(post_directory)

    template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
    name = post_metadata["account"]["display_name"]
    name = sub('"', '\\"', name)
    post_metadata["account"]["display_name"] = name
    env.filters["localize_media_url"] = localize_media_url
    env.filters["filter_mastodon_urls"] = filter_mastodon_urls
    template = env.get_template("hashtag.md")

    with open(os.path.join(post_directory, "index.html"), "w") as f:
        post = template.render(post_metadata=post_metadata)
        f.write(post)

    download_media(post_directory, post_metadata["media_attachments"])


def localize_media_url(url):
    """
    Returns the filename, used also as custom jinja filter
    """
    return url.split("/")[-1]


def filter_mastodon_urls(content):
    """
    Filters out Mastodon generated URLS for tags
    e.g. <a href="https://social.lumbung.space/tags/jalankita" class="mention hashtag" rel="tag">
    Used also as custom jinja filter
    """
    # TODO
    return content


def main():
    mastodon = login_mastodon_bot()
    output_dir = os.environ.get("OUTPUT_DIR")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    all_existing_posts = []
    for i in os.listdir(output_dir):
        all_existing_posts += os.listdir(os.path.join(output_dir, i))

    for hashtag in hashtags:

        hashtag_dir = os.path.join(output_dir, hashtag)
        if not os.path.exists(hashtag_dir):
            os.mkdir(hashtag_dir)

        existing_posts = os.listdir(hashtag_dir)  # list all existing posts

        timeline = mastodon.timeline_hashtag(
            hashtag, local=True, only_media=True
        )  # returns max 20 queries and only with media
        timeline = mastodon.fetch_remaining(
            timeline
        )  # returns all the rest n.b. can take a while because of rate limit

        for post_metadata in timeline:
            post_dir = os.path.join(hashtag_dir, str(post_metadata["id"]))
            # if there is a post in the feed we dont already have locally, make it
            if str(post_metadata["id"]) not in all_existing_posts:
                if not post_metadata[
                    "local_only"
                ]:  # if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
                    create_post(post_dir, post_metadata)
                    all_existing_posts.append(str(post_metadata["id"]))
                else:
                    print(
                        "not pulling post %s (post is local only)"
                        % (post_metadata["id"])
                    )

            # if we already have the post do nothing, possibly update
            elif str(post_metadata["id"]) in existing_posts:
                # update_post(post_dir, post_metadata)
                existing_posts.remove(
                    str(post_metadata["id"])
                )  # create list of posts which have not been returned in the feed
            elif str(post_metadata["id"]) in all_existing_posts:
                print(
                    "skipping post %s as it was already pulled with a different hashtag."
                    % (str(post_metadata["id"]))
                )

        for post in existing_posts:
            print(
                "deleted", post
            )  # rm posts that exist but are no longer returned in feed
            shutil.rmtree(os.path.join(hashtag_dir, post))