hometown integration

2022-01-10 14:12:31 +01:00
13 changed files with 27 additions and 194 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,4 @@
-*.txt
-*.txt.*
-__pycache__
-etags
 test
+__pycache__
 .venv
 content
--- a/README.md
+++ b/README.md
@ -1,40 +1,3 @@
 # lumbunglib

-> Python lib which powers `lumbung.space` automation
-
-## hacking
-
-Install [poetry](https://python-poetry.org/docs/#osx--linux--bashonwindows-install-instructions):
-
-```bash
-curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
-```
-
-We use Poetry because it locks the dependencies all the way down and makes it
-easier to manage installation & maintenance in the long-term. Then install the
-dependencies & have them managed by Poetry:
-
-```
-poetry install
-```
-
-Each script requires some environment variables to run, you can see the latest
-deployment configuration over
-[here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml),
-look for the values under the `environment: ...` stanza.
-
-All scripts have an entrypoint described in the
-[`pypoetry.toml`](https://git.autonomic.zone/ruangrupa/lumbunglib/src/commit/40bf9416b8792c08683ad8ac878093c7ef1b2f5d/pyproject.toml#L27-L31)
-which you can run via `poetry run ...`. For example, if you want to run the
-[`lumbunglib/video.py`](./lumbunglib/video.py) script, you'd do:
-
-```
-mkdir -p testdir
-export OUTPUT_DIR=/testdir
-poetry run lumbunglib-vid
-```
-
-Run `poetry run poetry2setup > setup.py` if updating the poetry dependencies.
-This allows us to run `pip install .` in the deployment and Pip will understand
-that it is just a regular Python package. If adding a new cli command, extend
-`pyproject.toml` with a new `[tool.poetry.scripts]` entry.
+> Python lib which powers `lumbung[dot]space` automation
--- a/lumbunglib/cloudcal.py
+++ b/lumbunglib/cloudcal.py
@ -3,7 +3,6 @@ import re
 import shutil
 from pathlib import Path
 from urllib.parse import urlparse
-from slugify import slugify

 import arrow
 import jinja2
@ -76,7 +75,6 @@ def create_metadata(event):
        "duration": date.compress(event.duration),
        "location": event.location,
        "uid": event.uid,
-        "featured_image": "",
        "images": find_imageURLS(event.description),  # currently not used in template
    }

@ -112,6 +110,7 @@ def localize_time(date):
        )
    return localized_begins

+
 def create_event_post(post_dir, event):
    """
    Create HUGO post based on calendar event metadata
@ -147,15 +146,12 @@ def create_event_post(post_dir, event):
        if not os.path.exists(local_image):
            # download preview image
            response = requests.get(img, stream=True)
-            if response.status_code == 200:
-                with open(local_image, "wb") as img_file:
-                    shutil.copyfileobj(response.raw, img_file)
-                    print('Downloaded image for event "{}"'.format(event.name))
-                event_metadata["description"] = event_metadata["description"].replace(
-                    img, "![]({})".format(img_name)
-                )
-                if event_metadata["featured_image"] == "":
-                    event_metadata["featured_image"] = img_name
+            with open(local_image, "wb") as img_file:
+                shutil.copyfileobj(response.raw, img_file)
+                print('Downloaded image for event "{}"'.format(event.name))
+            event_metadata["description"] = event_metadata["description"].replace(
+                img, "![]({})".format(img_name)
+            )
        if img_name in existing_images:
            existing_images.remove(img_name)

@ -188,18 +184,18 @@ def update_event_post(post_dir, event):

 def main():
    for event in list(cal.events):
-        post_name = slugify(event.name) + "-" + event.uid
-        post_dir = os.path.join(output_dir, post_name)

-        if post_name not in existing_posts:
+        post_dir = os.path.join(output_dir, event.uid)
+
+        if event.uid not in existing_posts:
            # if there is an event we dont already have, make it
            create_event_post(post_dir, event)

-        elif post_name in existing_posts:
+        elif event.uid in existing_posts:
            # if we already have it, update
            update_event_post(post_dir, event)
            existing_posts.remove(
-                post_name
+                event.uid
            )  # create list of posts which have not been returned by the calendar

    for post in existing_posts:
--- a/lumbunglib/feed.py
+++ b/lumbunglib/feed.py
@ -1,7 +1,6 @@
 import os
 import shutil
 import time
-from hashlib import md5
 from ast import literal_eval as make_tuple
 from pathlib import Path
 from urllib.parse import urlparse
@ -61,11 +60,6 @@ def create_frontmatter(entry):
    else:
        author = ""

-    if "title" in entry:
-        title = entry.title
-    else:
-        title = ""
-
    tags = []
    if "tags" in entry:
        # TODO finish categories
@ -73,7 +67,7 @@ def create_frontmatter(entry):
            tags.append(t["term"])

    frontmatter = {
-        "title": title,
+        "title": entry.title,
        "date": published.format(),
        "summary": "",
        "author": author,
@ -200,25 +194,13 @@ def main():
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

-    feed_dict = dict()
-    for url in feed_urls:
-        feed_name = urlparse(url).netloc
-        feed_dict[url] = feed_name
-
-    feed_names = feed_dict.values()
-    content_dirs = os.listdir(output_dir)
-    for i in content_dirs:
-        if i not in feed_names:
-            shutil.rmtree(os.path.join(output_dir, i))
-            print("%s not in feeds_list.txt, removing local data" %(i))
-
    # add iframe to the allowlist of feedparser's sanitizer,
    # this is now handled in parse_post()
    feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}

    for feed_url in feed_urls:

-        feed_name = feed_dict[feed_url]
+        feed_name = urlparse(feed_url).netloc

        feed_dir = os.path.join(output_dir, feed_name)

@ -239,13 +221,6 @@ def main():
                entry["feed_name"] = feed_name

                post_name = slugify(entry.title)
-                # pixelfed returns the whole post text as the post name. max
-                # filename length is 255 on many systems. here we're shortening
-                # the name and adding a hash to it to avoid a conflict in a
-                # situation where 2 posts start with exactly the same text.
-                if len(post_name) > 150:
-                    post_hash = md5(bytes(post_name, "utf-8"))
-                    post_name = post_name[:150] + "-" + post_hash.hexdigest()
                post_dir = os.path.join(output_dir, feed_name, post_name)

                if post_name not in existing_posts:
--- a/lumbunglib/hashtag.py
+++ b/lumbunglib/hashtag.py
@ -9,16 +9,7 @@ from mastodon import Mastodon
 instance = "https://social.lumbung.space"
 email = ""
 password = ""
-hashtags = [
-    "documentafifteen",
-    "harvestedbyputra",
-    "jalansesama",
-    "lumbungdotspace",
-    "majelisakakbar",
-    "majelisakbar",
-    "warungkopi",
-    "lumbungkios",
-]
+hashtags = ["jalansesama"]


 def login_mastodon_bot():
@ -104,10 +95,6 @@ def main():
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

-    all_existing_posts = []
-    for i in os.listdir(output_dir):
-        all_existing_posts += os.listdir(os.path.join(output_dir, i))
-
    for hashtag in hashtags:

        hashtag_dir = os.path.join(output_dir, hashtag)
@ -125,13 +112,14 @@ def main():

        for post_metadata in timeline:
            post_dir = os.path.join(hashtag_dir, str(post_metadata["id"]))
+
            # if there is a post in the feed we dont already have locally, make it
-            if str(post_metadata["id"]) not in all_existing_posts:
+            if str(post_metadata["id"]) not in existing_posts:
+
                if not post_metadata[
                    "local_only"
                ]:  # if you get an error here then you are using vanilla Mastodon, this is a Hometown or Glitch only feature
                    create_post(post_dir, post_metadata)
-                    all_existing_posts.append(str(post_metadata["id"]))
                else:
                    print("not pulling post %s (post is local only)" % (post_metadata["id"]))

@ -141,8 +129,6 @@ def main():
                existing_posts.remove(
                    str(post_metadata["id"])
                )  # create list of posts which have not been returned in the feed
-            elif str(post_metadata["id"]) in all_existing_posts:
-                print("skipping post %s as it was already pulled with a different hashtag." % (str(post_metadata["id"])))

        for post in existing_posts:
            print(
--- a/lumbunglib/templates/calendar.md
+++ b/lumbunglib/templates/calendar.md
@ -8,9 +8,6 @@ event_end: "{{ event.end }}"
 duration: "{{ event.duration }}"
 localized_begin: "{{ event.localized_begin }}"
 uid: "{{ event.uid }}"
-{% if event.featured_image %}
-featured_image: "{{ event.featured_image }}"
-{% endif %}
 {% if event.location %}
 location: "{{ event.location }}"
 {% endif %}
--- a/lumbunglib/templates/feed.md
+++ b/lumbunglib/templates/feed.md
@ -7,7 +7,7 @@ author: "{{ frontmatter.author }}"
 original_link: "{{ frontmatter.original_link }}"
 feed_name: "{{ frontmatter.feed_name}}"
 categories: ["network", "{{ frontmatter.feed_name}}"]
-tags: {{ frontmatter.tags }}
+tags: { { frontmatter.tags } }
 ---

 {{ content }}
--- a/lumbunglib/templates/hashtag.md
+++ b/lumbunglib/templates/hashtag.md
@ -4,8 +4,6 @@ draft: false
 author: "{{ post_metadata.account.display_name }}"
 avatar: "{{ post_metadata.account.avatar }}"
 categories: ["shouts"]
-images: [{% for i in post_metadata.media_attachments %} "{{ i.url }}", {% endfor %}]
-title: "{{ post_metadata.account.display_name }}"
 tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}]
 ---

--- a/lumbunglib/templates/video.md
+++ b/lumbunglib/templates/video.md
@ -7,7 +7,6 @@ video_duration: "{{ v.duration | duration }} "
 video_channel: "{{ v.channel.display_name }}"
 channel_url: "{{ v.channel.url }}"
 preview_image: "{{ preview_image }}"
-images: ["./{{ preview_image }}"]
 categories: ["tv","{{ v.channel.display_name }}"]
 is_live: {{ v.is_live }}
 ---
--- a/lumbunglib/video.py
+++ b/lumbunglib/video.py
@ -4,7 +4,6 @@ import json
 import os
 import shutil
 from pathlib import Path
-from slugify import slugify

 import arrow
 import jinja2
@ -98,6 +97,7 @@ def update_post(post_directory, video_metadata, host):
            # compat for when there is no timestamp yet..
            create_post(post_directory, video_metadata, host)

+
 def main():
    v = peertube.VideoApi(client)

@ -114,11 +114,10 @@ def main():
    existing_posts = os.listdir(output_dir)

    for video_metadata in videos:
-        post_name = slugify(video_metadata["name"]) + "-" + video_metadata["uuid"]
-        post_dir = os.path.join(output_dir, post_name)
+        post_dir = os.path.join(output_dir, video_metadata["uuid"])

        if (
-            post_name not in existing_posts
+            video_metadata["uuid"] not in existing_posts
        ):  # if there is a video we dont already have, make it
            print(
                "New: ", video_metadata["name"], "({})".format(video_metadata["uuid"])
@ -126,11 +125,11 @@ def main():
            create_post(post_dir, video_metadata, host)

        elif (
-            post_name in existing_posts
+            video_metadata["uuid"] in existing_posts
        ):  # if we already have the video do nothing, possibly update
            update_post(post_dir, video_metadata, host)
            existing_posts.remove(
-                post_name
+                video_metadata["uuid"]
            )  # create list of posts which have not been returned by peertube

    for post in existing_posts:
--- a/poetry.lock
+++ b/poetry.lock
@ -9,21 +9,6 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 [package.dependencies]
 python-dateutil = "*"

-[[package]]
-name = "beautifulsoup4"
-version = "4.10.0"
-description = "Screen-scraping library"
-category = "main"
-optional = false
-python-versions = ">3.0.0"
-
-[package.dependencies]
-soupsieve = ">1.2"
-
-[package.extras]
-html5lib = ["html5lib"]
-lxml = ["lxml"]
-
 [[package]]
 name = "blurhash"
 version = "1.1.4"
@ -35,17 +20,6 @@ python-versions = "*"
 [package.extras]
 test = ["pillow", "numpy", "pytest"]

-[[package]]
-name = "bs4"
-version = "0.0.1"
-description = "Dummy package for Beautiful Soup"
-category = "main"
-optional = false
-python-versions = "*"
-
-[package.dependencies]
-beautifulsoup4 = "*"
-
 [[package]]
 name = "certifi"
 version = "2021.10.8"
@ -73,17 +47,6 @@ category = "main"
 optional = false
 python-versions = ">=3.5"

-[[package]]
-name = "feedparser"
-version = "6.0.8"
-description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
-category = "main"
-optional = false
-python-versions = ">=3.6"
-
-[package.dependencies]
-sgmllib3k = "*"
-
 [[package]]
 name = "ics"
 version = "0.7"
@ -260,14 +223,6 @@ urllib3 = ">=1.21.1,<1.27"
 socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
 use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]

-[[package]]
-name = "sgmllib3k"
-version = "1.0.0"
-description = "Py3k port of sgmllib."
-category = "main"
-optional = false
-python-versions = "*"
-
 [[package]]
 name = "six"
 version = "1.16.0"
@ -276,14 +231,6 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"

-[[package]]
-name = "soupsieve"
-version = "2.3.1"
-description = "A modern CSS selector implementation for Beautiful Soup."
-category = "main"
-optional = false
-python-versions = ">=3.6"
-
 [[package]]
 name = "tatsu"
 version = "5.7.0"
@ -319,24 +266,17 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.9"
-content-hash = "c5c987253f949737210f4a3d3c3c24b0affd4a9c7d06de386c9bd514c592db8b"
+content-hash = "d3c9d528613826932cb3f316a3a69066e87e69011d4c8d3a2492521833d3851f"

 [metadata.files]
 arrow = [
    {file = "arrow-0.14.7-py2.py3-none-any.whl", hash = "sha256:4bfacea734ead51495dc47df00421ecfd4ca1f2c0fbe58b9a26eaeddedc31caf"},
    {file = "arrow-0.14.7.tar.gz", hash = "sha256:67f8be7c0cf420424bc62d8d7dc40b44e4bb2f7b515f9cc2954fb36e35797656"},
 ]
-beautifulsoup4 = [
-    {file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"},
-    {file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"},
-]
 blurhash = [
    {file = "blurhash-1.1.4-py2.py3-none-any.whl", hash = "sha256:7611c1bc41383d2349b6129208587b5d61e8792ce953893cb49c38beeb400d1d"},
    {file = "blurhash-1.1.4.tar.gz", hash = "sha256:da56b163e5a816e4ad07172f5639287698e09d7f3dc38d18d9726d9c1dbc4cee"},
 ]
-bs4 = [
-    {file = "bs4-0.0.1.tar.gz", hash = "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"},
-]
 certifi = [
    {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
    {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
@ -349,10 +289,6 @@ decorator = [
    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]
-feedparser = [
-    {file = "feedparser-6.0.8-py3-none-any.whl", hash = "sha256:1b7f57841d9cf85074deb316ed2c795091a238adb79846bc46dccdaf80f9c59a"},
-    {file = "feedparser-6.0.8.tar.gz", hash = "sha256:5ce0410a05ab248c8c7cfca3a0ea2203968ee9ff4486067379af4827a59f9661"},
-]
 ics = [
    {file = "ics-0.7-py2.py3-none-any.whl", hash = "sha256:bf5fbdef6e1e073afdadf1b996f0271186dd114a148e38e795919a1ae644d6ac"},
    {file = "ics-0.7-py3.7.egg", hash = "sha256:3b606205b9582ad27dff77f9b227a30d02fdac532731927fe39df1f1ddf8673f"},
@ -473,17 +409,10 @@ requests = [
    {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
    {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
 ]
-sgmllib3k = [
-    {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
-]
 six = [
    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
 ]
-soupsieve = [
-    {file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"},
-    {file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"},
-]
 tatsu = [
    {file = "TatSu-5.7.0-py2.py3-none-any.whl", hash = "sha256:9eebadfc2889d8e82e197df22913df56ff204bf4cfc62db49a5c7edd084e10b4"},
    {file = "TatSu-5.7.0.zip", hash = "sha256:428136cd4aa9600fcd01428bd5667fc752062f54bd0148dc1e64fee7b8d05fa4"},
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,8 +13,6 @@ natural = "^0.2.0"
 python-slugify = "^5.0.2"
 requests = "^2.26.0"
 peertube = {git = "https://framagit.org/framasoft/peertube/clients/python.git"}
-feedparser = "^6.0.8"
-bs4 = "^0.0.1"
 "Mastodon.py" = "^1.5.1"

 [tool.poetry.dev-dependencies]
@ -27,5 +25,4 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry.scripts]
 lumbunglib-cal = "lumbunglib.cloudcal:main"
 lumbunglib-vid = "lumbunglib.video:main"
-lumbunglib-feed = "lumbunglib.feed:main"
 lumbunglib-hash = "lumbunglib.hashtag:main"
--- a/setup.py
+++ b/setup.py
@ -10,8 +10,6 @@ package_data = \
 install_requires = \
 ['Jinja2>=3.0.3,<4.0.0',
 'Mastodon.py>=1.5.1,<2.0.0',
- 'bs4>=0.0.1,<0.0.2',
- 'feedparser>=6.0.8,<7.0.0',
 'ics>=0.7,<0.8',
 'natural>=0.2.0,<0.3.0',
 'peertube @ '
@ -21,7 +19,6 @@ install_requires = \

 entry_points = \
 {'console_scripts': ['lumbunglib-cal = lumbunglib.cloudcal:main',
-                     'lumbunglib-feed = lumbunglib.feed:main',
                     'lumbunglib-hash = lumbunglib.hashtag:main',
                     'lumbunglib-vid = lumbunglib.video:main']}