Compare commits
11 Commits
6020db4d15
...
opds-fetch
Author | SHA1 | Date | |
---|---|---|---|
2db5be3438 | |||
028bc1df84 | |||
82a017f624 | |||
9d9f8f6d72 | |||
e01aa9a607 | |||
3055ee37df | |||
a4f749ebd7 | |||
0ecc0ecd3a | |||
657ced1ceb | |||
d21158eb91 | |||
98299daa1b |
@ -70,7 +70,7 @@ poetry install
|
|||||||
|
|
||||||
Each script requires some environment variables to run, you can see the latest deployment configuration over [here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), look for the values under the `environment: ...` stanza.
|
Each script requires some environment variables to run, you can see the latest deployment configuration over [here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), look for the values under the `environment: ...` stanza.
|
||||||
|
|
||||||
All scripts have an entrypoint described in the [`pypoetry.toml`](https://git.autonomic.zone/ruangrupa/konfluks/src/commit/40bf9416b8792c08683ad8ac878093c7ef1b2f5d/pyproject.toml#L27-L31) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./knofluks/video.py) script, you'd do:
|
All scripts have an entrypoint described in the [`pypoetry.toml`](./pyproject.toml) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./konfluks/video.py) script, you'd do:
|
||||||
|
|
||||||
```
|
```
|
||||||
mkdir -p testdir
|
mkdir -p testdir
|
||||||
|
@ -138,9 +138,9 @@ def create_event_post(post_dir, event):
|
|||||||
for img in event_metadata["images"]:
|
for img in event_metadata["images"]:
|
||||||
|
|
||||||
# parse img url to safe local image name
|
# parse img url to safe local image name
|
||||||
img_name = img.split("/")[-1]
|
img_name = os.path.basename(img)
|
||||||
fn, ext = img_name.split(".")
|
fn, ext = os.path.splitext(img_name)
|
||||||
img_name = slugify(fn) + "." + ext
|
img_name = slugify(fn) + '.' + ext
|
||||||
|
|
||||||
local_image = os.path.join(post_dir, img_name)
|
local_image = os.path.join(post_dir, img_name)
|
||||||
|
|
||||||
|
141
konfluks/feed.py
141
konfluks/feed.py
@ -155,8 +155,11 @@ def parse_enclosures(post_dir, entry):
|
|||||||
if "type" in e:
|
if "type" in e:
|
||||||
print("found enclosed media", e.type)
|
print("found enclosed media", e.type)
|
||||||
if "image/" in e.type:
|
if "image/" in e.type:
|
||||||
|
if not os.path.exists(post_dir): #this might be redundant with create_post
|
||||||
|
os.makedirs(post_dir)
|
||||||
featured_image = grab_media(post_dir, e.href)
|
featured_image = grab_media(post_dir, e.href)
|
||||||
entry["featured_image"] = featured_image
|
media_item = urlparse(e.href).path.split('/')[-1]
|
||||||
|
entry["featured_image"] = media_item
|
||||||
else:
|
else:
|
||||||
print("FIXME:ignoring enclosed", e.type)
|
print("FIXME:ignoring enclosed", e.type)
|
||||||
return entry
|
return entry
|
||||||
@ -309,12 +312,15 @@ def create_opds_post(post_dir, entry):
|
|||||||
ft = item['type'].split('/')[-1]
|
ft = item['type'].split('/')[-1]
|
||||||
fn = item['rel'].split('/')[-1]
|
fn = item['rel'].split('/')[-1]
|
||||||
|
|
||||||
if fn == "acquisition":
|
# entry.links has image, thumbnail and publication/acquisition.
|
||||||
fn = "publication" #calling the publications acquisition is weird
|
# Only downloading image for now
|
||||||
|
#if fn == "acquisition":
|
||||||
|
#fn = "publication" #calling the publications acquisition is weird
|
||||||
|
|
||||||
prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft)
|
if 'image' in fn:
|
||||||
|
prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft)
|
||||||
grab_media(post_dir, item['href'], prefered_name)
|
grab_media(post_dir, item['href'], prefered_name)
|
||||||
|
frontmatter['featured_image'] = prefered_name
|
||||||
|
|
||||||
if "summary" in entry:
|
if "summary" in entry:
|
||||||
summary = entry.summary
|
summary = entry.summary
|
||||||
@ -330,6 +336,18 @@ def create_opds_post(post_dir, entry):
|
|||||||
timestamp = arrow.get(entry['updated_parsed'])
|
timestamp = arrow.get(entry['updated_parsed'])
|
||||||
f.write(timestamp.format('X'))
|
f.write(timestamp.format('X'))
|
||||||
|
|
||||||
|
def opds_fetch_more(data):
|
||||||
|
"""
|
||||||
|
Look for mode OPDS feeds to pull, untill we no longer hit the "next" navigation property.
|
||||||
|
"""
|
||||||
|
for link in data.feed.links:
|
||||||
|
for i in link:
|
||||||
|
if link[i] == 'next':
|
||||||
|
print(link['href'])
|
||||||
|
data = grab_feed(link['href'])
|
||||||
|
return data
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
feed_urls = open("feeds_list.txt", "r").read().splitlines()
|
feed_urls = open("feeds_list.txt", "r").read().splitlines()
|
||||||
@ -373,62 +391,77 @@ def main():
|
|||||||
|
|
||||||
data = grab_feed(feed_url)
|
data = grab_feed(feed_url)
|
||||||
|
|
||||||
if data:
|
if data: #whenever we get a 200
|
||||||
|
if data.feed: #only if it is an actual feed
|
||||||
opds_feed = False
|
opds_feed = False
|
||||||
for i in data.feed['links']:
|
opds_entries = []
|
||||||
if i['rel'] == 'self':
|
if 'links' in data.feed:
|
||||||
if 'opds' in i['type']:
|
for i in data.feed['links']:
|
||||||
opds_feed = True
|
if i['rel'] == 'self':
|
||||||
print("OPDS type feed!")
|
if 'opds' in i['type']:
|
||||||
|
opds_feed = True
|
||||||
|
print("OPDS type feed!")
|
||||||
|
feed_data = data
|
||||||
|
while feed_data:
|
||||||
|
feed_data = opds_fetch_more(feed_data)
|
||||||
|
if feed_data:
|
||||||
|
for i in feed_data.entries:
|
||||||
|
opds_entries.append(i)
|
||||||
|
for i in opds_entries:
|
||||||
|
data['entries'].append(i)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for entry in data.entries:
|
for entry in data.entries:
|
||||||
# if 'tags' in entry:
|
# if 'tags' in entry:
|
||||||
# for tag in entry.tags:
|
# for tag in entry.tags:
|
||||||
# for x in ['lumbung.space', 'D15', 'lumbung']:
|
# for x in ['lumbung.space', 'D15', 'lumbung']:
|
||||||
# if x in tag['term']:
|
# if x in tag['term']:
|
||||||
# print(entry.title)
|
# print(entry.title)
|
||||||
entry["feed_name"] = feed_name
|
entry["feed_name"] = feed_name
|
||||||
|
|
||||||
post_name = slugify(entry.title)
|
post_name = slugify(entry.title)
|
||||||
|
|
||||||
# pixelfed returns the whole post text as the post name. max
|
# pixelfed returns the whole post text as the post name. max
|
||||||
# filename length is 255 on many systems. here we're shortening
|
# filename length is 255 on many systems. here we're shortening
|
||||||
# the name and adding a hash to it to avoid a conflict in a
|
# the name and adding a hash to it to avoid a conflict in a
|
||||||
# situation where 2 posts start with exactly the same text.
|
# situation where 2 posts start with exactly the same text.
|
||||||
if len(post_name) > 150:
|
if len(post_name) > 150:
|
||||||
post_hash = md5(bytes(post_name, "utf-8"))
|
post_hash = md5(bytes(post_name, "utf-8"))
|
||||||
post_name = post_name[:150] + "-" + post_hash.hexdigest()
|
post_name = post_name[:150] + "-" + post_hash.hexdigest()
|
||||||
|
|
||||||
if opds_feed:
|
|
||||||
entry['opds'] = True
|
|
||||||
#format: Beyond-Debiasing-Report_Online-75535a4886e3
|
|
||||||
post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1]
|
|
||||||
|
|
||||||
post_dir = os.path.join(output_dir, feed_name, post_name)
|
|
||||||
|
|
||||||
if post_name not in existing_posts:
|
|
||||||
# if there is a blog entry we dont already have, make it
|
|
||||||
if opds_feed:
|
if opds_feed:
|
||||||
create_opds_post(post_dir, entry)
|
entry['opds'] = True
|
||||||
else:
|
#format: Beyond-Debiasing-Report_Online-75535a4886e3
|
||||||
create_post(post_dir, entry)
|
post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1]
|
||||||
|
|
||||||
elif post_name in existing_posts:
|
post_dir = os.path.join(output_dir, feed_name, post_name)
|
||||||
# if we already have it, update it
|
|
||||||
if opds_feed:
|
|
||||||
create_opds_post(post_dir, entry)
|
|
||||||
else:
|
|
||||||
create_post(post_dir, entry)
|
|
||||||
existing_posts.remove(
|
|
||||||
post_name
|
|
||||||
) # create list of posts which have not been returned by the feed
|
|
||||||
|
|
||||||
for post in existing_posts:
|
if post_name not in existing_posts:
|
||||||
# remove blog posts no longer returned by the RSS feed
|
# if there is a blog entry we dont already have, make it
|
||||||
print("deleted", post)
|
if opds_feed:
|
||||||
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
|
create_opds_post(post_dir, entry)
|
||||||
|
else:
|
||||||
|
create_post(post_dir, entry)
|
||||||
|
|
||||||
|
elif post_name in existing_posts:
|
||||||
|
# if we already have it, update it
|
||||||
|
if opds_feed:
|
||||||
|
create_opds_post(post_dir, entry)
|
||||||
|
else:
|
||||||
|
create_post(post_dir, entry)
|
||||||
|
existing_posts.remove(
|
||||||
|
post_name
|
||||||
|
) # create list of posts which have not been returned by the feed
|
||||||
|
|
||||||
|
|
||||||
|
for post in existing_posts:
|
||||||
|
# remove blog posts no longer returned by the RSS feed
|
||||||
|
post_dir = os.path.join(output_dir, feed_name, post)
|
||||||
|
shutil.rmtree(post_dir)
|
||||||
|
print("deleted", post_dir)
|
||||||
|
else:
|
||||||
|
print(feed_url, "is not or no longer a feed!")
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
|
@ -60,6 +60,21 @@ def download_media(post_directory, media_attachments):
|
|||||||
with open(os.path.join(post_directory, image), "wb") as img_file:
|
with open(os.path.join(post_directory, image), "wb") as img_file:
|
||||||
shutil.copyfileobj(response.raw, img_file)
|
shutil.copyfileobj(response.raw, img_file)
|
||||||
print("Downloaded cover image", image)
|
print("Downloaded cover image", image)
|
||||||
|
elif item["type"] == "video":
|
||||||
|
video = localize_media_url(item["url"])
|
||||||
|
if not os.path.exists(os.path.join(post_directory, video)):
|
||||||
|
# download video file
|
||||||
|
response = requests.get(item["url"], stream=True)
|
||||||
|
with open(os.path.join(post_directory, video), "wb") as video_file:
|
||||||
|
shutil.copyfileobj(response.raw, video_file)
|
||||||
|
print("Downloaded video in post", video)
|
||||||
|
if not os.path.exists(os.path.join(post_directory, "thumbnail.png")):
|
||||||
|
#download video preview
|
||||||
|
response = requests.get(item["preview_url"], stream=True)
|
||||||
|
with open(os.path.join(post_directory, "thumbnail.png"), "wb") as thumbnail:
|
||||||
|
shutil.copyfileobj(response.raw, thumbnail)
|
||||||
|
print("Downloaded thumbnail for", video)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_post(post_directory, post_metadata):
|
def create_post(post_directory, post_metadata):
|
||||||
@ -78,7 +93,6 @@ def create_post(post_directory, post_metadata):
|
|||||||
post_metadata["account"]["display_name"] = name
|
post_metadata["account"]["display_name"] = name
|
||||||
env.filters["localize_media_url"] = localize_media_url
|
env.filters["localize_media_url"] = localize_media_url
|
||||||
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
|
env.filters["filter_mastodon_urls"] = filter_mastodon_urls
|
||||||
|
|
||||||
template = env.get_template("hashtag.md")
|
template = env.get_template("hashtag.md")
|
||||||
|
|
||||||
with open(os.path.join(post_directory, "index.html"), "w") as f:
|
with open(os.path.join(post_directory, "index.html"), "w") as f:
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
title: "{{ event.name }}"
|
title: "{{ event.name }}"
|
||||||
date: "{{ event.begin }}" #2021-06-10T10:46:33+02:00
|
date: "{{ event.begin }}" #2021-06-10T10:46:33+02:00
|
||||||
draft: false
|
draft: false
|
||||||
categories: "calendar"
|
source: "lumbung calendar"
|
||||||
event_begin: "{{ event.begin }}"
|
event_begin: "{{ event.begin }}"
|
||||||
event_end: "{{ event.end }}"
|
event_end: "{{ event.end }}"
|
||||||
duration: "{{ event.duration }}"
|
duration: "{{ event.duration }}"
|
||||||
|
@ -3,11 +3,11 @@ title: "{{ frontmatter.title }}"
|
|||||||
date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00
|
date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00
|
||||||
draft: false
|
draft: false
|
||||||
summary: "{{ frontmatter.summary }}"
|
summary: "{{ frontmatter.summary }}"
|
||||||
authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %}
|
contributors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %}
|
||||||
original_link: "{{ frontmatter.original_link }}"
|
original_link: "{{ frontmatter.original_link }}"
|
||||||
feed_name: "{{ frontmatter.feed_name}}"
|
feed_name: "{{ frontmatter.feed_name}}"
|
||||||
categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"]
|
card_type: "{{ frontmatter.card_type }}"
|
||||||
contributors: ["{{ frontmatter.feed_name}}"]
|
sources: ["{{ frontmatter.feed_name}}"]
|
||||||
tags: {{ frontmatter.tags }}
|
tags: {{ frontmatter.tags }}
|
||||||
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
|
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
|
||||||
---
|
---
|
||||||
|
@ -1,17 +1,27 @@
|
|||||||
---
|
---
|
||||||
date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00
|
date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00
|
||||||
draft: false
|
draft: false
|
||||||
authors: ["{{ post_metadata.account.display_name }}"]
|
contributors: ["{{ post_metadata.account.display_name }}"]
|
||||||
contributors: ["{{ post_metadata.account.acct}}"]
|
|
||||||
avatar: {{ post_metadata.account.avatar }}
|
avatar: {{ post_metadata.account.avatar }}
|
||||||
categories: ["shouts"]
|
|
||||||
images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}]
|
|
||||||
title: {{ post_metadata.account.display_name }}
|
title: {{ post_metadata.account.display_name }}
|
||||||
tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}]
|
tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}]
|
||||||
|
images: [{% for i in post_metadata.media_attachments %}{% if i.type == "image" %}"{{ i.url | localize_media_url }}", {%endif%}{% endfor %}]
|
||||||
|
videos: [{% for i in post_metadata.media_attachments %}{% if i.type == "video" %}"{{ i.url | localize_media_url }}", {%endif%}{% endfor %}]
|
||||||
---
|
---
|
||||||
|
|
||||||
{% for item in post_metadata.media_attachments %}
|
{% for item in post_metadata.media_attachments %}
|
||||||
|
{% if item.type == "image" %}
|
||||||
<img src="{{item.url | localize_media_url }}" alt="{{item.description}}">
|
<img src="{{item.url | localize_media_url }}" alt="{{item.description}}">
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
{% for item in post_metadata.media_attachments %}
|
||||||
|
{% if item.type == "video" %}
|
||||||
|
<video controls width="540px" preload="none" poster="thumbnail.png">
|
||||||
|
<source src="{{item.url | localize_media_url }}" type="video/mp4">
|
||||||
|
{% if item.description %}{{item.description}}{% endif %}
|
||||||
|
</video>
|
||||||
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
{{ post_metadata.content | filter_mastodon_urls }}
|
{{ post_metadata.content | filter_mastodon_urls }}
|
||||||
|
@ -3,10 +3,10 @@ title: "{{ frontmatter.title }}"
|
|||||||
date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00
|
date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00
|
||||||
draft: false
|
draft: false
|
||||||
summary: "{{ frontmatter.summary }}"
|
summary: "{{ frontmatter.summary }}"
|
||||||
authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %}
|
contributors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %}
|
||||||
original_link: "{{ frontmatter.original_link }}"
|
original_link: "{{ frontmatter.original_link }}"
|
||||||
feed_name: "{{ frontmatter.feed_name}}"
|
feed_name: "{{ frontmatter.feed_name}}"
|
||||||
categories: ["timeline", "{{ frontmatter.feed_name}}"]
|
sources: ["timeline", "{{ frontmatter.feed_name}}"]
|
||||||
timelines: {{ frontmatter.timelines }}
|
timelines: {{ frontmatter.timelines }}
|
||||||
hidden: true
|
hidden: true
|
||||||
---
|
---
|
||||||
|
@ -9,7 +9,7 @@ channel_url: "{{ v.channel.url }}"
|
|||||||
contributors: ["{{ v.account.display_name }}"]
|
contributors: ["{{ v.account.display_name }}"]
|
||||||
preview_image: "{{ preview_image }}"
|
preview_image: "{{ preview_image }}"
|
||||||
images: ["./{{ preview_image }}"]
|
images: ["./{{ preview_image }}"]
|
||||||
categories: ["tv","{{ v.channel.display_name }}"]
|
sources: ["{{ v.channel.display_name }}"]
|
||||||
is_live: {{ v.is_live }}
|
is_live: {{ v.is_live }}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user