forked from ruangrupa/konfluks
Compare commits
10 Commits
b147d46535
...
saneyaml
Author | SHA1 | Date | |
---|---|---|---|
a809433410
|
|||
cf8b1ff7e9
|
|||
2fbc952a72
|
|||
bac9bbd7b3
|
|||
8c4a36791f | |||
dfa4b40d52 | |||
0aaa711538 | |||
c40f740f50
|
|||
e0fd2c40a6 | |||
f69c092548
|
@ -13,7 +13,7 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
from re import compile as re_compile
|
from re import compile as re_compile
|
||||||
yamlre = re_compile('"')
|
import saneyaml
|
||||||
|
|
||||||
|
|
||||||
def write_etag(feed_name, feed_data):
|
def write_etag(feed_name, feed_data):
|
||||||
@ -120,12 +120,12 @@ def sanitize_yaml (frontmatter):
|
|||||||
#some fields are lists
|
#some fields are lists
|
||||||
l = []
|
l = []
|
||||||
for i in v:
|
for i in v:
|
||||||
i = yamlre.sub('\\"', i)
|
i = saneyaml.load(i)
|
||||||
l.append(i)
|
l.append(i)
|
||||||
frontmatter[k] = l
|
frontmatter[k] = l
|
||||||
|
|
||||||
else:
|
else:
|
||||||
v = yamlre.sub('\\"', v)
|
v = saneyaml.load(v)
|
||||||
frontmatter[k] = v
|
frontmatter[k] = v
|
||||||
|
|
||||||
return frontmatter
|
return frontmatter
|
||||||
|
@ -19,13 +19,16 @@ hashtags = [
|
|||||||
"majelisakbar",
|
"majelisakbar",
|
||||||
"warungkopi",
|
"warungkopi",
|
||||||
"lumbungkios",
|
"lumbungkios",
|
||||||
|
"kassel_ecosystem",
|
||||||
|
"ruruhaus",
|
||||||
|
"offbeatentrack_kassel",
|
||||||
|
"lumbungofpublishers",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def login_mastodon_bot():
|
def login_mastodon_bot():
|
||||||
mastodon = Mastodon(
|
mastodon = Mastodon(
|
||||||
access_token=os.environ.get("MASTODON_AUTH_TOKEN"),
|
access_token=os.environ.get("MASTODON_AUTH_TOKEN"), api_base_url=instance
|
||||||
api_base_url = instance
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return mastodon
|
return mastodon
|
||||||
@ -69,9 +72,9 @@ def create_post(post_directory, post_metadata):
|
|||||||
|
|
||||||
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
|
template_dir = os.path.join(Path(__file__).parent.resolve(), "templates")
|
||||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
|
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir))
|
||||||
name = post_metadata['account']['display_name']
|
name = post_metadata["account"]["display_name"]
|
||||||
name = sub('"', '\\"', name)
|
name = sub('"', '\\"', name)
|
||||||
post_metadata['account']['display_name'] = name
|
post_metadata["account"]["display_name"] = name
|
||||||
env.filters["localize_media_url"] = localize_media_url
|
env.filters["localize_media_url"] = localize_media_url
|
||||||
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
|
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
|
||||||
|
|
||||||
@ -136,7 +139,10 @@ def main():
|
|||||||
create_post(post_dir, post_metadata)
|
create_post(post_dir, post_metadata)
|
||||||
all_existing_posts.append(str(post_metadata["id"]))
|
all_existing_posts.append(str(post_metadata["id"]))
|
||||||
else:
|
else:
|
||||||
print("not pulling post %s (post is local only)" % (post_metadata["id"]))
|
print(
|
||||||
|
"not pulling post %s (post is local only)"
|
||||||
|
% (post_metadata["id"])
|
||||||
|
)
|
||||||
|
|
||||||
# if we already have the post do nothing, possibly update
|
# if we already have the post do nothing, possibly update
|
||||||
elif str(post_metadata["id"]) in existing_posts:
|
elif str(post_metadata["id"]) in existing_posts:
|
||||||
@ -145,7 +151,10 @@ def main():
|
|||||||
str(post_metadata["id"])
|
str(post_metadata["id"])
|
||||||
) # create list of posts which have not been returned in the feed
|
) # create list of posts which have not been returned in the feed
|
||||||
elif str(post_metadata["id"]) in all_existing_posts:
|
elif str(post_metadata["id"]) in all_existing_posts:
|
||||||
print("skipping post %s as it was already pulled with a different hashtag." % (str(post_metadata["id"])))
|
print(
|
||||||
|
"skipping post %s as it was already pulled with a different hashtag."
|
||||||
|
% (str(post_metadata["id"]))
|
||||||
|
)
|
||||||
|
|
||||||
for post in existing_posts:
|
for post in existing_posts:
|
||||||
print(
|
print(
|
||||||
|
@ -102,52 +102,60 @@ def main():
|
|||||||
v = peertube.VideoApi(client)
|
v = peertube.VideoApi(client)
|
||||||
count = 100
|
count = 100
|
||||||
page = 0
|
page = 0
|
||||||
response = v.videos_get(count=count, filter="local", tags_one_of="publish", start=page)
|
try:
|
||||||
|
response = v.videos_get(count=count, filter="local", tags_one_of="publish", start=page)
|
||||||
videos = response.to_dict()
|
videos = response.to_dict()
|
||||||
total = videos['total']
|
total = videos['total']
|
||||||
videos = videos['data']
|
videos = videos['data']
|
||||||
total -= count
|
total -= count
|
||||||
if total > 0:
|
if total > 0:
|
||||||
to_download = total // count
|
to_download = total // count
|
||||||
last_page = total % count
|
last_page = total % count
|
||||||
for i in range(to_download):
|
for i in range(to_download):
|
||||||
page += 1
|
page += 1
|
||||||
response = v.videos_get(count=count, filter="local", tags_one_of="publish", start=page)
|
response = v.videos_get(count=count, filter="local", tags_one_of="publish", start=page)
|
||||||
videos += response.to_dict()['data']
|
videos += response.to_dict()['data']
|
||||||
if last_page > 0:
|
if last_page > 0:
|
||||||
page += 1
|
page += 1
|
||||||
response = v.videos_get(count=count, filter="local", tags_one_of="publish", start=page)
|
response = v.videos_get(count=count, filter="local", tags_one_of="publish", start=page)
|
||||||
videos += response.to_dict()['data'][-1*last_page:]
|
videos += response.to_dict()['data'][-1*last_page:]
|
||||||
|
|
||||||
|
|
||||||
output_dir = os.environ.get("OUTPUT_DIR")
|
output_dir = os.environ.get("OUTPUT_DIR")
|
||||||
|
|
||||||
if not os.path.exists(output_dir):
|
if not os.path.exists(output_dir):
|
||||||
os.mkdir(output_dir)
|
os.mkdir(output_dir)
|
||||||
|
|
||||||
existing_posts = os.listdir(output_dir)
|
existing_posts = os.listdir(output_dir)
|
||||||
|
|
||||||
for video_metadata in videos:
|
for video_metadata in videos:
|
||||||
post_name = slugify(video_metadata["name"]) + "-" + video_metadata["uuid"]
|
post_name = slugify(video_metadata["name"]) + "-" + video_metadata["uuid"]
|
||||||
post_dir = os.path.join(output_dir, post_name)
|
post_dir = os.path.join(output_dir, post_name)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
post_name not in existing_posts
|
post_name not in existing_posts
|
||||||
): # if there is a video we dont already have, make it
|
): # if there is a video we dont already have, make it
|
||||||
print(
|
print(
|
||||||
"New: ", video_metadata["name"], "({})".format(video_metadata["uuid"])
|
"New: ", video_metadata["name"], "({})".format(video_metadata["uuid"])
|
||||||
)
|
)
|
||||||
create_post(post_dir, video_metadata, host)
|
create_post(post_dir, video_metadata, host)
|
||||||
|
|
||||||
elif (
|
elif (
|
||||||
post_name in existing_posts
|
post_name in existing_posts
|
||||||
): # if we already have the video do nothing, possibly update
|
): # if we already have the video do nothing, possibly update
|
||||||
update_post(post_dir, video_metadata, host)
|
update_post(post_dir, video_metadata, host)
|
||||||
existing_posts.remove(
|
existing_posts.remove(
|
||||||
post_name
|
post_name
|
||||||
) # create list of posts which have not been returned by peertube
|
) # create list of posts which have not been returned by peertube
|
||||||
|
|
||||||
|
except:
|
||||||
|
print("didn't get a response from peertube, instance might have been taken down or made private. removing all posts.")
|
||||||
|
output_dir = os.environ.get("OUTPUT_DIR")
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.mkdir(output_dir)
|
||||||
|
existing_posts = os.listdir(output_dir)
|
||||||
|
|
||||||
for post in existing_posts:
|
for post in existing_posts:
|
||||||
print("deleted", post) # rm posts not returned
|
print("deleted", post) # rm posts not returned
|
||||||
shutil.rmtree(os.path.join(output_dir, post))
|
shutil.rmtree(os.path.join(output_dir, post))
|
||||||
|
|
||||||
|
64
poetry.lock
generated
64
poetry.lock
generated
@ -242,6 +242,14 @@ category = "main"
|
|||||||
optional = false
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyyaml"
|
||||||
|
version = "6.0"
|
||||||
|
description = "YAML parser and emitter for Python"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "requests"
|
name = "requests"
|
||||||
version = "2.27.1"
|
version = "2.27.1"
|
||||||
@ -260,6 +268,21 @@ urllib3 = ">=1.21.1,<1.27"
|
|||||||
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
|
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
|
||||||
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
|
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "saneyaml"
|
||||||
|
version = "0.5.2"
|
||||||
|
description = "Read and write readable YAML safely preserving order and avoiding bad surprises with unwanted infered type conversions. This library is a PyYaml wrapper with sane behaviour to read and write readable YAML safely, typically when used for configuration."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "<4,>=3.6.*"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
PyYAML = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["Sphinx (>=3.3.1)", "sphinx-rtd-theme (>=0.5.0)", "doc8 (>=0.8.1)"]
|
||||||
|
testing = ["pytest (>=6)", "pytest-xdist (>=2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sgmllib3k"
|
name = "sgmllib3k"
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
@ -319,7 +342,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "c5c987253f949737210f4a3d3c3c24b0affd4a9c7d06de386c9bd514c592db8b"
|
content-hash = "86ebded9dbd151b57502b40d3e58d6d92f837bc776184afa84d297c40d6daa7a"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
arrow = [
|
arrow = [
|
||||||
@ -469,10 +492,49 @@ pytz = [
|
|||||||
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
|
{file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
|
||||||
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
|
{file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
|
||||||
]
|
]
|
||||||
|
pyyaml = [
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
|
||||||
|
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
|
||||||
|
{file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
|
||||||
|
{file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
|
||||||
|
{file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
|
||||||
|
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
|
||||||
|
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
|
||||||
|
]
|
||||||
requests = [
|
requests = [
|
||||||
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
|
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
|
||||||
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
|
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
|
||||||
]
|
]
|
||||||
|
saneyaml = [
|
||||||
|
{file = "saneyaml-0.5.2-py3-none-any.whl", hash = "sha256:e54ed827973647ee9be8e8c091536b55ad22b3f9b1296e36701a3544822e7eac"},
|
||||||
|
{file = "saneyaml-0.5.2.tar.gz", hash = "sha256:d6074f1959041342ab41d74a6f904720ffbcf63c94467858e0e22e17e3c43d41"},
|
||||||
|
]
|
||||||
sgmllib3k = [
|
sgmllib3k = [
|
||||||
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
|
||||||
]
|
]
|
||||||
|
@ -16,6 +16,7 @@ peertube = {git = "https://framagit.org/framasoft/peertube/clients/python.git"}
|
|||||||
feedparser = "^6.0.8"
|
feedparser = "^6.0.8"
|
||||||
bs4 = "^0.0.1"
|
bs4 = "^0.0.1"
|
||||||
"Mastodon.py" = "^1.5.1"
|
"Mastodon.py" = "^1.5.1"
|
||||||
|
saneyaml = "^0.5.2"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
poetry2setup = "^1.0.0"
|
poetry2setup = "^1.0.0"
|
||||||
|
Reference in New Issue
Block a user