From e01aa9a607bd7fd3f35d3c42a2df289a06d63c53 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 6 Nov 2024 16:48:41 +0100 Subject: [PATCH 1/2] Test whether a url still returns a feed, pass right filename as featured_image when handling enclosure, pass post_dir to existing_posts --- konfluks/feed.py | 100 ++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/konfluks/feed.py b/konfluks/feed.py index f51d3e9..08606d2 100644 --- a/konfluks/feed.py +++ b/konfluks/feed.py @@ -156,7 +156,8 @@ def parse_enclosures(post_dir, entry): print("found enclosed media", e.type) if "image/" in e.type: featured_image = grab_media(post_dir, e.href) - entry["featured_image"] = featured_image + media_item = urlparse(e.href).path.split('/')[-1] + entry["featured_image"] = media_item else: print("FIXME:ignoring enclosed", e.type) return entry @@ -374,61 +375,64 @@ def main(): data = grab_feed(feed_url) if data: - opds_feed = False - for i in data.feed['links']: - if i['rel'] == 'self': - if 'opds' in i['type']: - opds_feed = True - print("OPDS type feed!") + if 'links' in data.feed: + for i in data.feed['links']: + if i['rel'] == 'self': + if 'opds' in i['type']: + opds_feed = True + print("OPDS type feed!") + if data.feed: + for entry in data.entries: + # if 'tags' in entry: + # for tag in entry.tags: + # for x in ['lumbung.space', 'D15', 'lumbung']: + # if x in tag['term']: + # print(entry.title) + entry["feed_name"] = feed_name - for entry in data.entries: - # if 'tags' in entry: - # for tag in entry.tags: - # for x in ['lumbung.space', 'D15', 'lumbung']: - # if x in tag['term']: - # print(entry.title) - entry["feed_name"] = feed_name + post_name = slugify(entry.title) - post_name = slugify(entry.title) + # pixelfed returns the whole post text as the post name. max + # filename length is 255 on many systems. here we're shortening + # the name and adding a hash to it to avoid a conflict in a + # situation where 2 posts start with exactly the same text. + if len(post_name) > 150: + post_hash = md5(bytes(post_name, "utf-8")) + post_name = post_name[:150] + "-" + post_hash.hexdigest() - # pixelfed returns the whole post text as the post name. max - # filename length is 255 on many systems. here we're shortening - # the name and adding a hash to it to avoid a conflict in a - # situation where 2 posts start with exactly the same text. - if len(post_name) > 150: - post_hash = md5(bytes(post_name, "utf-8")) - post_name = post_name[:150] + "-" + post_hash.hexdigest() - - if opds_feed: - entry['opds'] = True - #format: Beyond-Debiasing-Report_Online-75535a4886e3 - post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] - - post_dir = os.path.join(output_dir, feed_name, post_name) - - if post_name not in existing_posts: - # if there is a blog entry we dont already have, make it if opds_feed: - create_opds_post(post_dir, entry) - else: - create_post(post_dir, entry) + entry['opds'] = True + #format: Beyond-Debiasing-Report_Online-75535a4886e3 + post_name = slugify(entry['title'])+'-'+entry['id'].split('-')[-1] - elif post_name in existing_posts: - # if we already have it, update it - if opds_feed: - create_opds_post(post_dir, entry) - else: - create_post(post_dir, entry) - existing_posts.remove( - post_name - ) # create list of posts which have not been returned by the feed + post_dir = os.path.join(output_dir, feed_name, post_name) - for post in existing_posts: - # remove blog posts no longer returned by the RSS feed - print("deleted", post) - shutil.rmtree(os.path.join(feed_dir, slugify(post))) + if post_name not in existing_posts: + # if there is a blog entry we dont already have, make it + if opds_feed: + create_opds_post(post_dir, entry) + else: + create_post(post_dir, entry) + + elif post_name in existing_posts: + # if we already have it, update it + if opds_feed: + create_opds_post(post_dir, entry) + else: + create_post(post_dir, entry) + existing_posts.remove( + post_dir + ) # create list of posts which have not been returned by the feed + + + for post in existing_posts: + # remove blog posts no longer returned by the RSS feed + print("deleted", post) + shutil.rmtree(post) + else: + print(feed_url, "is not or no longer a feed!") end = time.time() From 9d9f8f6d72840f6e90aa9e621237d559936b5a69 Mon Sep 17 00:00:00 2001 From: rra Date: Wed, 6 Nov 2024 17:24:55 +0100 Subject: [PATCH 2/2] do proper deletion --- konfluks/feed.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/konfluks/feed.py b/konfluks/feed.py index 08606d2..53435ba 100644 --- a/konfluks/feed.py +++ b/konfluks/feed.py @@ -374,16 +374,16 @@ def main(): data = grab_feed(feed_url) - if data: - opds_feed = False - if 'links' in data.feed: - for i in data.feed['links']: - if i['rel'] == 'self': - if 'opds' in i['type']: - opds_feed = True - print("OPDS type feed!") + if data: #whenever we get a 200 + if data.feed: #only if it is an actual feed + opds_feed = False + if 'links' in data.feed: + for i in data.feed['links']: + if i['rel'] == 'self': + if 'opds' in i['type']: + opds_feed = True + print("OPDS type feed!") - if data.feed: for entry in data.entries: # if 'tags' in entry: # for tag in entry.tags: @@ -423,14 +423,15 @@ def main(): else: create_post(post_dir, entry) existing_posts.remove( - post_dir + post_name ) # create list of posts which have not been returned by the feed for post in existing_posts: # remove blog posts no longer returned by the RSS feed - print("deleted", post) - shutil.rmtree(post) + post_dir = os.path.join(output_dir, feed_name, post) + shutil.rmtree(post_dir) + print("deleted", post_dir) else: print(feed_url, "is not or no longer a feed!")