Merge pull request 'handle feeds with enclosures (featured media / podcasts)' (#35) from r/lumbunglib:master into master

Reviewed-on: ruangrupa/lumbunglib#35
This commit is contained in:
rra 2022-06-01 08:05:36 +02:00
commit 5ba944b6d1
2 changed files with 44 additions and 7 deletions

View File

@ -85,6 +85,11 @@ def create_frontmatter(entry):
for t in entry.tags:
tags.append(t['term'])
if "featured_image" in entry:
featured_image = entry.featured_image
else:
featured_image = ''
card_type = "network"
if entry.feed_name == "pen.lumbung.space":
card_type = "pen"
@ -110,7 +115,8 @@ def create_frontmatter(entry):
'original_link': entry.link,
'feed_name': entry['feed_name'],
'tags': str(tags),
'card_type': card_type
'card_type': card_type,
'featured_image': featured_image
}
return frontmatter
@ -136,11 +142,33 @@ def sanitize_yaml (frontmatter):
return frontmatter
def parse_enclosures(post_dir, entry):
"""
Parses feed enclosures which are featured media
Can be featured image but also podcast entries
https://pythonhosted.org/feedparser/reference-entry-enclosures.html
"""
#TODO parse more than images
#TODO handle the fact it could be multiple items
for e in entry.enclosures:
if "type" in e:
print("found enclosed media", e.type)
if "image/" in e.type:
featured_image = grab_media(post_dir, e.href)
entry["featured_image"] = featured_image
else:
print("FIXME:ignoring enclosed", e.type)
return entry
def create_post(post_dir, entry):
"""
write hugo post based on RSS entry
"""
if "enclosures" in entry:
entry = parse_enclosures(post_dir, entry)
frontmatter = create_frontmatter(entry)
if not os.path.exists(post_dir):
@ -169,18 +197,25 @@ def grab_media(post_directory, url, prefered_name=None):
"""
media_item = urlparse(url).path.split('/')[-1]
headers = {
'User-Agent': 'https://git.autonomic.zone/ruangrupa/lumbunglib',
'From': 'info@lumbung.space' # This is another valid field
}
if prefered_name:
media_item = prefered_name
try:
if not os.path.exists(os.path.join(post_directory, media_item)):
#TODO: stream is true is a conditional so we could check the headers for things, mimetype etc
response = requests.get(url, stream=True)
response = requests.get(url, headers=headers, stream=True)
if response.ok:
with open(os.path.join(post_directory, media_item), 'wb') as media_file:
shutil.copyfileobj(response.raw, media_file)
print('Downloaded media item', media_item)
return media_item
else:
print("Download failed", response.status_code)
return url
return media_item
elif os.path.exists(os.path.join(post_directory, media_item)):
return media_item
@ -235,11 +270,12 @@ def grab_feed(feed_url):
print(e)
return False
print(data.status, feed_url)
if data.status == 200:
# 304 means the feed has not been modified since we last checked
write_etag(feed_name, data)
return data
if "status" in data:
print(data.status, feed_url)
if data.status == 200:
# 304 means the feed has not been modified since we last checked
write_etag(feed_name, data)
return data
return False
def create_opds_post(post_dir, entry):

View File

@ -8,6 +8,7 @@ original_link: "{{ frontmatter.original_link }}"
feed_name: "{{ frontmatter.feed_name}}"
categories: ["{{ frontmatter.card_type }}", "{{ frontmatter.feed_name}}"]
tags: {{ frontmatter.tags }}
{% if frontmatter.featured_image %}featured_image: "{{frontmatter.featured_image}}"{% endif %}
---
{{ content }}