fix: don't pull the same post twice #19
@ -60,6 +60,11 @@ def create_frontmatter(entry):
|
||||
else:
|
||||
author = ""
|
||||
|
||||
if "title" in entry:
|
||||
title = entry.title
|
||||
else:
|
||||
title = ""
|
||||
|
||||
tags = []
|
||||
if "tags" in entry:
|
||||
# TODO finish categories
|
||||
@ -67,7 +72,7 @@ def create_frontmatter(entry):
|
||||
tags.append(t["term"])
|
||||
|
||||
frontmatter = {
|
||||
"title": entry.title,
|
||||
"title": title,
|
||||
"date": published.format(),
|
||||
"summary": "",
|
||||
"author": author,
|
||||
@ -194,13 +199,25 @@ def main():
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
feed_dict = dict()
|
||||
for url in feed_urls:
|
||||
feed_name = urlparse(url).netloc
|
||||
feed_dict[url] = feed_name
|
||||
|
||||
feed_names = feed_dict.values()
|
||||
content_dirs = os.listdir(output_dir)
|
||||
for i in content_dirs:
|
||||
if i not in feed_names:
|
||||
shutil.rmtree(os.path.join(output_dir, i))
|
||||
print("%s not in feeds_list.txt, removing local data" %(i))
|
||||
|
||||
# add iframe to the allowlist of feedparser's sanitizer,
|
||||
# this is now handled in parse_post()
|
||||
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
|
||||
|
||||
for feed_url in feed_urls:
|
||||
|
||||
feed_name = urlparse(feed_url).netloc
|
||||
feed_name = feed_dict[feed_url]
|
||||
|
||||
feed_dir = os.path.join(output_dir, feed_name)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user