Test whether a url still returns a feed, pass right filename as featured_image when handling enclosure, pass post_dir to existing_posts
This commit is contained in:
parent
a4f749ebd7
commit
e01aa9a607
100
konfluks/feed.py
100
konfluks/feed.py
@ -156,7 +156,8 @@ def parse_enclosures(post_dir, entry):
|
|||||||
print("found enclosed media", e.type)
|
print("found enclosed media", e.type)
|
||||||
if "image/" in e.type:
|
if "image/" in e.type:
|
||||||
featured_image = grab_media(post_dir, e.href)
|
featured_image = grab_media(post_dir, e.href)
|
||||||
entry["featured_image"] = featured_image
|
media_item = urlparse(e.href).path.split('/')[-1]
|
||||||
|
entry["featured_image"] = media_item
|
||||||
else:
|
else:
|
||||||
print("FIXME:ignoring enclosed", e.type)
|
print("FIXME:ignoring enclosed", e.type)
|
||||||
return entry
|
return entry
|
||||||
@ -374,61 +375,64 @@ def main():
|
|||||||
data = grab_feed(feed_url)
|
data = grab_feed(feed_url)
|
||||||
|
|
||||||
if data:
|
if data:
|
||||||
|
|
||||||
opds_feed = False
|
opds_feed = False
|
||||||
for i in data.feed['links']:
|
if 'links' in data.feed:
|
||||||
if i['rel'] == 'self':
|
for i in data.feed['links']:
|
||||||
if 'opds' in i['type']:
|
if i['rel'] == 'self':
|
||||||
opds_feed = True
|
if 'opds' in i['type']:
|
||||||
print("OPDS type feed!")
|
opds_feed = True
|
||||||
|
print("OPDS type feed!")
|
||||||
|
|
||||||
|
if data.feed:
|
||||||
|
for entry in data.entries:
|
||||||
|
# if 'tags' in entry:
|
||||||
|
# for tag in entry.tags:
|
||||||
|
# for x in ['lumbung.space', 'D15', 'lumbung']:
|
||||||
|
# if x in tag['term']:
|
||||||
|
# print(entry.title)
|
||||||
|
entry["feed_name"] = feed_name
|
||||||
|
|
||||||
for entry in data.entries:
|
post_name = slugify(entry.title)
|
||||||
# if 'tags' in entry:
|
|
||||||
# for tag in entry.tags:
|
|
||||||
# for x in ['lumbung.space', 'D15', 'lumbung']:
|
|
||||||
# if x in tag['term']:
|
|
||||||
# print(entry.title)
|
|
||||||
entry["feed_name"] = feed_name
|
|
||||||
|
|
||||||
post_name = slugify(entry.title)
|
# pixelfed returns the whole post text as the post name. max
|
||||||
|
# filename length is 255 on many systems. here we're shortening
|
||||||
|
# the name and adding a hash to it to avoid a conflict in a
|
||||||
|
# situation where 2 posts start with exactly the same text.
|
||||||
|
if len(post_name) > 150:
|
||||||
|
post_hash = md5(bytes(post_name, "utf-8"))
|
||||||
|
post_name = post_name[:150] + "-" + post_hash.hexdigest()
|
||||||
|
|
||||||
# pixelfed returns the whole post text as the post name. max
|
|
||||||
# filename length is 255 on many systems. here we're shortening
|
|
||||||
# the name and adding a hash to it to avoid a conflict in a
|
|
||||||
# situation where 2 posts start with exactly the same text.
|
|
||||||
if len(post_name) > 150:
|
|
||||||
post_hash = md5(bytes(post_name, "utf-8"))
|
|
||||||
post_name = post_name[:150] + "-" + post_hash.hexdigest()
|
|
||||||
|
|
||||||
if opds_feed:
|
|
||||||
entry['opds'] = True
|
|
||||||
#format: Beyond-Debiasing-Report_Online-75535a4886e3
|
|
||||||
post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1]
|
|
||||||
|
|
||||||
post_dir = os.path.join(output_dir, feed_name, post_name)
|
|
||||||
|
|
||||||
if post_name not in existing_posts:
|
|
||||||
# if there is a blog entry we dont already have, make it
|
|
||||||
if opds_feed:
|
if opds_feed:
|
||||||
create_opds_post(post_dir, entry)
|
entry['opds'] = True
|
||||||
else:
|
#format: Beyond-Debiasing-Report_Online-75535a4886e3
|
||||||
create_post(post_dir, entry)
|
post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1]
|
||||||
|
|
||||||
elif post_name in existing_posts:
|
post_dir = os.path.join(output_dir, feed_name, post_name)
|
||||||
# if we already have it, update it
|
|
||||||
if opds_feed:
|
|
||||||
create_opds_post(post_dir, entry)
|
|
||||||
else:
|
|
||||||
create_post(post_dir, entry)
|
|
||||||
existing_posts.remove(
|
|
||||||
post_name
|
|
||||||
) # create list of posts which have not been returned by the feed
|
|
||||||
|
|
||||||
for post in existing_posts:
|
if post_name not in existing_posts:
|
||||||
# remove blog posts no longer returned by the RSS feed
|
# if there is a blog entry we dont already have, make it
|
||||||
print("deleted", post)
|
if opds_feed:
|
||||||
shutil.rmtree(os.path.join(feed_dir, slugify(post)))
|
create_opds_post(post_dir, entry)
|
||||||
|
else:
|
||||||
|
create_post(post_dir, entry)
|
||||||
|
|
||||||
|
elif post_name in existing_posts:
|
||||||
|
# if we already have it, update it
|
||||||
|
if opds_feed:
|
||||||
|
create_opds_post(post_dir, entry)
|
||||||
|
else:
|
||||||
|
create_post(post_dir, entry)
|
||||||
|
existing_posts.remove(
|
||||||
|
post_dir
|
||||||
|
) # create list of posts which have not been returned by the feed
|
||||||
|
|
||||||
|
|
||||||
|
for post in existing_posts:
|
||||||
|
# remove blog posts no longer returned by the RSS feed
|
||||||
|
print("deleted", post)
|
||||||
|
shutil.rmtree(post)
|
||||||
|
else:
|
||||||
|
print(feed_url, "is not or no longer a feed!")
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user