Compare commits
	
		
			11 Commits
		
	
	
		
			6020db4d15
			...
			opds-fetch
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 2db5be3438 | |||
| 028bc1df84 | |||
| 82a017f624 | |||
| 9d9f8f6d72 | |||
| e01aa9a607 | |||
| 3055ee37df | |||
| a4f749ebd7 | |||
| 0ecc0ecd3a | |||
| 657ced1ceb | |||
| d21158eb91 | |||
| 98299daa1b | 
| @ -70,7 +70,7 @@ poetry install | |||||||
|  |  | ||||||
| Each script requires some environment variables to run, you can see the latest deployment configuration over [here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), look for the values under the `environment: ...` stanza. | Each script requires some environment variables to run, you can see the latest deployment configuration over [here](https://git.autonomic.zone/ruangrupa/lumbung.space/src/branch/main/compose.yml), look for the values under the `environment: ...` stanza. | ||||||
|  |  | ||||||
| All scripts have an entrypoint described in the [`pypoetry.toml`](https://git.autonomic.zone/ruangrupa/konfluks/src/commit/40bf9416b8792c08683ad8ac878093c7ef1b2f5d/pyproject.toml#L27-L31) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./knofluks/video.py) script, you'd do: | All scripts have an entrypoint described in the [`pypoetry.toml`](./pyproject.toml) which you can run via `poetry run ...`. For example, if you want to run the [`konfluks/video.py`](./konfluks/video.py) script, you'd do: | ||||||
|  |  | ||||||
| ``` | ``` | ||||||
| mkdir -p testdir | mkdir -p testdir | ||||||
|  | |||||||
| @ -138,9 +138,9 @@ def create_event_post(post_dir, event): | |||||||
|     for img in event_metadata["images"]: |     for img in event_metadata["images"]: | ||||||
|  |  | ||||||
|         # parse img url to safe local image name |         # parse img url to safe local image name | ||||||
|         img_name = img.split("/")[-1] |         img_name = os.path.basename(img) | ||||||
|         fn, ext = img_name.split(".") |         fn, ext = os.path.splitext(img_name) | ||||||
|         img_name = slugify(fn) + "." + ext |         img_name =  slugify(fn) + '.' + ext | ||||||
|  |  | ||||||
|         local_image = os.path.join(post_dir, img_name) |         local_image = os.path.join(post_dir, img_name) | ||||||
|  |  | ||||||
|  | |||||||
							
								
								
									
										139
									
								
								konfluks/feed.py
									
									
									
									
									
								
							
							
						
						
									
										139
									
								
								konfluks/feed.py
									
									
									
									
									
								
							| @ -155,8 +155,11 @@ def parse_enclosures(post_dir, entry): | |||||||
|         if "type" in e: |         if "type" in e: | ||||||
|             print("found enclosed media", e.type) |             print("found enclosed media", e.type) | ||||||
|             if "image/" in e.type: |             if "image/" in e.type: | ||||||
|  |                 if not os.path.exists(post_dir): #this might be redundant with create_post | ||||||
|  |                     os.makedirs(post_dir) | ||||||
|                 featured_image = grab_media(post_dir, e.href) |                 featured_image = grab_media(post_dir, e.href) | ||||||
|                 entry["featured_image"] = featured_image |                 media_item = urlparse(e.href).path.split('/')[-1] | ||||||
|  |                 entry["featured_image"] = media_item | ||||||
|             else: |             else: | ||||||
|                 print("FIXME:ignoring enclosed", e.type) |                 print("FIXME:ignoring enclosed", e.type) | ||||||
|     return entry |     return entry | ||||||
| @ -309,12 +312,15 @@ def create_opds_post(post_dir, entry): | |||||||
|         ft = item['type'].split('/')[-1] |         ft = item['type'].split('/')[-1] | ||||||
|         fn = item['rel'].split('/')[-1] |         fn = item['rel'].split('/')[-1] | ||||||
|  |  | ||||||
|         if fn == "acquisition": |         # entry.links has image, thumbnail and publication/acquisition. | ||||||
|             fn = "publication" #calling the publications acquisition is weird |         # Only downloading image for now | ||||||
|  |         #if fn == "acquisition":  | ||||||
|  |         #fn = "publication" #calling the publications acquisition is weird  | ||||||
|  |  | ||||||
|         prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft) |         if 'image' in fn: | ||||||
|  |             prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft) | ||||||
|         grab_media(post_dir, item['href'], prefered_name) |             grab_media(post_dir, item['href'], prefered_name) | ||||||
|  |             frontmatter['featured_image'] = prefered_name | ||||||
|  |  | ||||||
|         if "summary" in entry: |         if "summary" in entry: | ||||||
|             summary = entry.summary |             summary = entry.summary | ||||||
| @ -330,6 +336,18 @@ def create_opds_post(post_dir, entry): | |||||||
|         timestamp = arrow.get(entry['updated_parsed']) |         timestamp = arrow.get(entry['updated_parsed']) | ||||||
|         f.write(timestamp.format('X')) |         f.write(timestamp.format('X')) | ||||||
|  |  | ||||||
|  | def opds_fetch_more(data): | ||||||
|  |     """ | ||||||
|  |     Look for mode OPDS feeds to pull, untill we no longer hit the "next" navigation property.  | ||||||
|  |     """ | ||||||
|  |     for link in data.feed.links: | ||||||
|  |         for i in link: | ||||||
|  |             if link[i] == 'next': | ||||||
|  |                 print(link['href']) | ||||||
|  |                 data = grab_feed(link['href']) | ||||||
|  |                 return data | ||||||
|  |     return None | ||||||
|  |  | ||||||
|  |  | ||||||
| def main(): | def main(): | ||||||
|     feed_urls = open("feeds_list.txt", "r").read().splitlines() |     feed_urls = open("feeds_list.txt", "r").read().splitlines() | ||||||
| @ -373,62 +391,77 @@ def main(): | |||||||
|  |  | ||||||
|         data = grab_feed(feed_url) |         data = grab_feed(feed_url) | ||||||
|  |  | ||||||
|         if data: |         if data: #whenever we get a 200 | ||||||
|  |             if data.feed: #only if it is an actual feed | ||||||
|             opds_feed = False |                 opds_feed = False | ||||||
|             for i in data.feed['links']: |                 opds_entries = [] | ||||||
|                 if i['rel'] == 'self': |                 if 'links' in data.feed: | ||||||
|                     if 'opds' in i['type']: |                     for i in data.feed['links']: | ||||||
|                         opds_feed = True |                             if i['rel'] == 'self': | ||||||
|                         print("OPDS type feed!") |                                 if 'opds' in i['type']: | ||||||
|  |                                     opds_feed = True | ||||||
|  |                                     print("OPDS type feed!") | ||||||
|  |                                     feed_data = data | ||||||
|  |                                     while feed_data: | ||||||
|  |                                         feed_data = opds_fetch_more(feed_data) | ||||||
|  |                                         if feed_data: | ||||||
|  |                                             for i in feed_data.entries: | ||||||
|  |                                                 opds_entries.append(i) | ||||||
|  |                                     for i in opds_entries: | ||||||
|  |                                         data['entries'].append(i) | ||||||
|                                      |                                      | ||||||
|  |  | ||||||
|             for entry in data.entries: |  | ||||||
|                 # if 'tags' in entry: |  | ||||||
|                 #     for tag in entry.tags: |  | ||||||
|                 #        for x in ['lumbung.space', 'D15', 'lumbung']: |  | ||||||
|                 #            if x in tag['term']: |  | ||||||
|                 #                print(entry.title) |  | ||||||
|                 entry["feed_name"] = feed_name |  | ||||||
|  |  | ||||||
|                 post_name = slugify(entry.title) |                 for entry in data.entries: | ||||||
|  |                     # if 'tags' in entry: | ||||||
|  |                     #     for tag in entry.tags: | ||||||
|  |                     #        for x in ['lumbung.space', 'D15', 'lumbung']: | ||||||
|  |                     #            if x in tag['term']: | ||||||
|  |                     #                print(entry.title) | ||||||
|  |                     entry["feed_name"] = feed_name | ||||||
|  |  | ||||||
|                 # pixelfed returns the whole post text as the post name. max |                     post_name = slugify(entry.title) | ||||||
|                 # filename length is 255 on many systems. here we're shortening |  | ||||||
|                 # the name and adding a hash to it to avoid a conflict in a |  | ||||||
|                 # situation where 2 posts start with exactly the same text. |  | ||||||
|                 if len(post_name) > 150: |  | ||||||
|                     post_hash = md5(bytes(post_name, "utf-8")) |  | ||||||
|                     post_name = post_name[:150] + "-" + post_hash.hexdigest() |  | ||||||
|  |  | ||||||
|                 if opds_feed: |                     # pixelfed returns the whole post text as the post name. max | ||||||
|                     entry['opds'] = True |                     # filename length is 255 on many systems. here we're shortening | ||||||
|                     #format: Beyond-Debiasing-Report_Online-75535a4886e3 |                     # the name and adding a hash to it to avoid a conflict in a | ||||||
|                     post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] |                     # situation where 2 posts start with exactly the same text. | ||||||
|  |                     if len(post_name) > 150: | ||||||
|  |                         post_hash = md5(bytes(post_name, "utf-8")) | ||||||
|  |                         post_name = post_name[:150] + "-" + post_hash.hexdigest() | ||||||
|  |  | ||||||
|                 post_dir = os.path.join(output_dir, feed_name, post_name) |  | ||||||
|  |  | ||||||
|                 if post_name not in existing_posts: |  | ||||||
|                     # if there is a blog entry we dont already have, make it |  | ||||||
|                     if opds_feed: |                     if opds_feed: | ||||||
|                         create_opds_post(post_dir, entry) |                         entry['opds'] = True | ||||||
|                     else: |                         #format: Beyond-Debiasing-Report_Online-75535a4886e3 | ||||||
|                         create_post(post_dir, entry) |                         post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] | ||||||
|  |  | ||||||
|                 elif post_name in existing_posts: |                     post_dir = os.path.join(output_dir, feed_name, post_name) | ||||||
|                     # if we already have it, update it |  | ||||||
|                     if opds_feed: |  | ||||||
|                         create_opds_post(post_dir, entry) |  | ||||||
|                     else: |  | ||||||
|                         create_post(post_dir, entry) |  | ||||||
|                     existing_posts.remove( |  | ||||||
|                         post_name |  | ||||||
|                     )  # create list of posts which have not been returned by the feed |  | ||||||
|  |  | ||||||
|             for post in existing_posts: |                     if post_name not in existing_posts: | ||||||
|                 # remove blog posts no longer returned by the RSS feed |                         # if there is a blog entry we dont already have, make it | ||||||
|                 print("deleted", post) |                         if opds_feed: | ||||||
|                 shutil.rmtree(os.path.join(feed_dir, slugify(post))) |                             create_opds_post(post_dir, entry) | ||||||
|  |                         else: | ||||||
|  |                             create_post(post_dir, entry) | ||||||
|  |  | ||||||
|  |                     elif post_name in existing_posts: | ||||||
|  |                         # if we already have it, update it | ||||||
|  |                         if opds_feed: | ||||||
|  |                             create_opds_post(post_dir, entry) | ||||||
|  |                         else: | ||||||
|  |                             create_post(post_dir, entry) | ||||||
|  |                         existing_posts.remove( | ||||||
|  |                             post_name | ||||||
|  |                         )  # create list of posts which have not been returned by the feed | ||||||
|  |  | ||||||
|  |  | ||||||
|  |                 for post in existing_posts: | ||||||
|  |                     # remove blog posts no longer returned by the RSS feed | ||||||
|  |                     post_dir = os.path.join(output_dir, feed_name, post) | ||||||
|  |                     shutil.rmtree(post_dir) | ||||||
|  |                     print("deleted", post_dir) | ||||||
|  |             else: | ||||||
|  |                 print(feed_url, "is not or no longer a feed!") | ||||||
|  |  | ||||||
|     end = time.time() |     end = time.time() | ||||||
|  |  | ||||||
|  | |||||||
| @ -60,6 +60,21 @@ def download_media(post_directory, media_attachments): | |||||||
|                 with open(os.path.join(post_directory, image), "wb") as img_file: |                 with open(os.path.join(post_directory, image), "wb") as img_file: | ||||||
|                     shutil.copyfileobj(response.raw, img_file) |                     shutil.copyfileobj(response.raw, img_file) | ||||||
|                     print("Downloaded cover image", image) |                     print("Downloaded cover image", image) | ||||||
|  |         elif item["type"] == "video": | ||||||
|  |             video = localize_media_url(item["url"]) | ||||||
|  |             if not os.path.exists(os.path.join(post_directory, video)): | ||||||
|  |                 # download video file | ||||||
|  |                 response = requests.get(item["url"], stream=True) | ||||||
|  |                 with open(os.path.join(post_directory, video), "wb") as video_file: | ||||||
|  |                     shutil.copyfileobj(response.raw, video_file) | ||||||
|  |                     print("Downloaded video in post", video) | ||||||
|  |             if not os.path.exists(os.path.join(post_directory, "thumbnail.png")): | ||||||
|  |                 #download video preview | ||||||
|  |                 response = requests.get(item["preview_url"], stream=True) | ||||||
|  |                 with open(os.path.join(post_directory, "thumbnail.png"), "wb") as thumbnail: | ||||||
|  |                     shutil.copyfileobj(response.raw, thumbnail) | ||||||
|  |                     print("Downloaded thumbnail for", video) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_post(post_directory, post_metadata): | def create_post(post_directory, post_metadata): | ||||||
| @ -78,7 +93,6 @@ def create_post(post_directory, post_metadata): | |||||||
|     post_metadata["account"]["display_name"] = name |     post_metadata["account"]["display_name"] = name | ||||||
|     env.filters["localize_media_url"] = localize_media_url |     env.filters["localize_media_url"] = localize_media_url | ||||||
|     env.filters["filter_mastodon_urls"] = filter_mastodon_urls |     env.filters["filter_mastodon_urls"] = filter_mastodon_urls | ||||||
|  |  | ||||||
|     template = env.get_template("hashtag.md") |     template = env.get_template("hashtag.md") | ||||||
|  |  | ||||||
|     with open(os.path.join(post_directory, "index.html"), "w") as f: |     with open(os.path.join(post_directory, "index.html"), "w") as f: | ||||||
|  | |||||||
| @ -2,7 +2,7 @@ | |||||||
| title: "{{ event.name }}" | title: "{{ event.name }}" | ||||||
| date: "{{ event.begin }}" #2021-06-10T10:46:33+02:00 | date: "{{ event.begin }}" #2021-06-10T10:46:33+02:00 | ||||||
| draft: false | draft: false | ||||||
| categories: "calendar" | source: "lumbung calendar" | ||||||
| event_begin: "{{ event.begin }}" | event_begin: "{{ event.begin }}" | ||||||
| event_end: "{{ event.end }}" | event_end: "{{ event.end }}" | ||||||
| duration: "{{ event.duration }}" | duration: "{{ event.duration }}" | ||||||
|  | |||||||
| @ -3,11 +3,11 @@ title: "{{ frontmatter.title }}" | |||||||
| date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 | date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 | ||||||
| draft: false | draft: false | ||||||
| summary: "{{ frontmatter.summary }}" | summary: "{{ frontmatter.summary }}" | ||||||
| authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} | contributors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} | ||||||
| original_link: "{{ frontmatter.original_link }}" | original_link: "{{ frontmatter.original_link }}" | ||||||
| feed_name: "{{ frontmatter.feed_name}}" | feed_name: "{{ frontmatter.feed_name}}" | ||||||
| categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"] | card_type: "{{ frontmatter.card_type }}" | ||||||
| contributors: ["{{ frontmatter.feed_name}}"] | sources: ["{{ frontmatter.feed_name}}"] | ||||||
| tags: {{ frontmatter.tags }} | tags: {{ frontmatter.tags }} | ||||||
| {% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} | {% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %} | ||||||
| --- | --- | ||||||
|  | |||||||
| @ -1,17 +1,27 @@ | |||||||
| --- | --- | ||||||
| date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00 | date: {{ post_metadata.created_at }} #2021-06-10T10:46:33+02:00 | ||||||
| draft: false | draft: false | ||||||
| authors: ["{{ post_metadata.account.display_name }}"] | contributors: ["{{ post_metadata.account.display_name }}"] | ||||||
| contributors: ["{{ post_metadata.account.acct}}"] |  | ||||||
| avatar: {{ post_metadata.account.avatar }} | avatar: {{ post_metadata.account.avatar }} | ||||||
| categories: ["shouts"] |  | ||||||
| images: [{% for i in post_metadata.media_attachments %} {{ i.url }}, {% endfor %}] |  | ||||||
| title: {{ post_metadata.account.display_name }} | title: {{ post_metadata.account.display_name }} | ||||||
| tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] | tags: [{% for i in post_metadata.tags %} "{{ i.name }}", {% endfor %}] | ||||||
|  | images: [{% for i in post_metadata.media_attachments %}{% if i.type == "image" %}"{{ i.url | localize_media_url  }}", {%endif%}{% endfor %}] | ||||||
|  | videos: [{% for i in post_metadata.media_attachments %}{% if i.type == "video" %}"{{ i.url | localize_media_url  }}", {%endif%}{% endfor %}] | ||||||
| --- | --- | ||||||
|  |  | ||||||
| {% for item in post_metadata.media_attachments %} | {% for item in post_metadata.media_attachments %} | ||||||
|  | {% if item.type == "image" %} | ||||||
| <img src="{{item.url | localize_media_url }}" alt="{{item.description}}"> | <img src="{{item.url | localize_media_url }}" alt="{{item.description}}"> | ||||||
|  | {% endif %} | ||||||
|  | {% endfor %} | ||||||
|  |  | ||||||
|  | {% for item in post_metadata.media_attachments %} | ||||||
|  | {% if item.type == "video" %} | ||||||
|  | <video controls width="540px" preload="none" poster="thumbnail.png"> | ||||||
|  | 	<source src="{{item.url | localize_media_url }}" type="video/mp4"> | ||||||
|  | {% if item.description %}{{item.description}}{% endif %} | ||||||
|  | </video> | ||||||
|  | {% endif %} | ||||||
| {% endfor %} | {% endfor %} | ||||||
|  |  | ||||||
| {{ post_metadata.content | filter_mastodon_urls }} | {{ post_metadata.content | filter_mastodon_urls }} | ||||||
|  | |||||||
| @ -3,10 +3,10 @@ title: "{{ frontmatter.title }}" | |||||||
| date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 | date: "{{ frontmatter.date }}" #2021-06-10T10:46:33+02:00 | ||||||
| draft: false | draft: false | ||||||
| summary: "{{ frontmatter.summary }}" | summary: "{{ frontmatter.summary }}" | ||||||
| authors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} | contributors: {% if frontmatter.author %} ["{{ frontmatter.author }}"] {% endif %} | ||||||
| original_link: "{{ frontmatter.original_link }}" | original_link: "{{ frontmatter.original_link }}" | ||||||
| feed_name: "{{ frontmatter.feed_name}}" | feed_name: "{{ frontmatter.feed_name}}" | ||||||
| categories: ["timeline", "{{ frontmatter.feed_name}}"] | sources: ["timeline", "{{ frontmatter.feed_name}}"] | ||||||
| timelines: {{ frontmatter.timelines }} | timelines: {{ frontmatter.timelines }} | ||||||
| hidden: true | hidden: true | ||||||
| --- | --- | ||||||
|  | |||||||
| @ -9,7 +9,7 @@ channel_url: "{{ v.channel.url }}" | |||||||
| contributors: ["{{ v.account.display_name }}"] | contributors: ["{{ v.account.display_name }}"] | ||||||
| preview_image: "{{ preview_image }}" | preview_image: "{{ preview_image }}" | ||||||
| images: ["./{{ preview_image }}"] | images: ["./{{ preview_image }}"] | ||||||
| categories: ["tv","{{ v.channel.display_name }}"] | sources: ["{{ v.channel.display_name }}"] | ||||||
| is_live: {{ v.is_live }} | is_live: {{ v.is_live }} | ||||||
| --- | --- | ||||||
|  |  | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	