follow navigation links in opds to retrieve entire catalog

2024-11-28 21:47:00 +01:00
1 changed files with 31 additions and 5 deletions
--- a/konfluks/feed.py
+++ b/konfluks/feed.py
@ -312,12 +312,15 @@ def create_opds_post(post_dir, entry):
        ft = item['type'].split('/')[-1]
        fn = item['rel'].split('/')[-1]

-        if fn == "acquisition":
-            fn = "publication" #calling the publications acquisition is weird
+        # entry.links has image, thumbnail and publication/acquisition.
+        # Only downloading image for now
+        #if fn == "acquisition": 
+        #fn = "publication" #calling the publications acquisition is weird 

-        prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft)
-
-        grab_media(post_dir, item['href'], prefered_name)
+        if 'image' in fn:
+            prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft)
+            grab_media(post_dir, item['href'], prefered_name)
+            frontmatter['featured_image'] = prefered_name

        if "summary" in entry:
            summary = entry.summary
@ -333,6 +336,18 @@ def create_opds_post(post_dir, entry):
        timestamp = arrow.get(entry['updated_parsed'])
        f.write(timestamp.format('X'))

+def opds_fetch_more(data):
+    """
+    Look for mode OPDS feeds to pull, untill we no longer hit the "next" navigation property. 
+    """
+    for link in data.feed.links:
+        for i in link:
+            if link[i] == 'next':
+                print(link['href'])
+                data = grab_feed(link['href'])
+                return data
+    return None
+

 def main():
    feed_urls = open("feeds_list.txt", "r").read().splitlines()
@ -379,12 +394,23 @@ def main():
        if data: #whenever we get a 200
            if data.feed: #only if it is an actual feed
                opds_feed = False
+                opds_entries = []
                if 'links' in data.feed:
                    for i in data.feed['links']:
                            if i['rel'] == 'self':
                                if 'opds' in i['type']:
                                    opds_feed = True
                                    print("OPDS type feed!")
+                                    feed_data = data
+                                    while feed_data:
+                                        feed_data = opds_fetch_more(feed_data)
+                                        if feed_data:
+                                            for i in feed_data.entries:
+                                                opds_entries.append(i)
+                                    for i in opds_entries:
+                                        data['entries'].append(i)
+                                    
+

                for entry in data.entries:
                    # if 'tags' in entry: