From 2db5be3438e48cf5d362c17d8390f6c5164bbdc6 Mon Sep 17 00:00:00 2001
From: rra <rscmbbng@riseup.net>
Date: Thu, 28 Nov 2024 21:47:00 +0100
Subject: [PATCH] follow navigation links in opds to retrieve entire catalog

---
 konfluks/feed.py | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/konfluks/feed.py b/konfluks/feed.py
index 844e0da..8360c12 100644
--- a/konfluks/feed.py
+++ b/konfluks/feed.py
@@ -312,12 +312,15 @@ def create_opds_post(post_dir, entry):
         ft = item['type'].split('/')[-1]
         fn = item['rel'].split('/')[-1]
 
-        if fn == "acquisition":
-            fn = "publication" #calling the publications acquisition is weird
+        # entry.links has image, thumbnail and publication/acquisition.
+        # Only downloading image for now
+        #if fn == "acquisition": 
+        #fn = "publication" #calling the publications acquisition is weird 
 
-        prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft)
-
-        grab_media(post_dir, item['href'], prefered_name)
+        if 'image' in fn:
+            prefered_name = "{}-{}.{}".format(fn, slugify(entry['title']), ft)
+            grab_media(post_dir, item['href'], prefered_name)
+            frontmatter['featured_image'] = prefered_name
 
         if "summary" in entry:
             summary = entry.summary
@@ -333,6 +336,18 @@ def create_opds_post(post_dir, entry):
         timestamp = arrow.get(entry['updated_parsed'])
         f.write(timestamp.format('X'))
 
+def opds_fetch_more(data):
+    """
+    Look for mode OPDS feeds to pull, untill we no longer hit the "next" navigation property. 
+    """
+    for link in data.feed.links:
+        for i in link:
+            if link[i] == 'next':
+                print(link['href'])
+                data = grab_feed(link['href'])
+                return data
+    return None
+
 
 def main():
     feed_urls = open("feeds_list.txt", "r").read().splitlines()
@@ -379,12 +394,23 @@ def main():
         if data: #whenever we get a 200
             if data.feed: #only if it is an actual feed
                 opds_feed = False
+                opds_entries = []
                 if 'links' in data.feed:
                     for i in data.feed['links']:
                             if i['rel'] == 'self':
                                 if 'opds' in i['type']:
                                     opds_feed = True
                                     print("OPDS type feed!")
+                                    feed_data = data
+                                    while feed_data:
+                                        feed_data = opds_fetch_more(feed_data)
+                                        if feed_data:
+                                            for i in feed_data.entries:
+                                                opds_entries.append(i)
+                                    for i in opds_entries:
+                                        data['entries'].append(i)
+                                    
+
 
                 for entry in data.entries:
                     # if 'tags' in entry: