fix: don't pull the same post twice #19
@ -60,6 +60,11 @@ def create_frontmatter(entry):
|
|||||||
else:
|
else:
|
||||||
author = ""
|
author = ""
|
||||||
|
|
||||||
|
if "title" in entry:
|
||||||
|
title = entry.title
|
||||||
|
else:
|
||||||
|
title = ""
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
if "tags" in entry:
|
if "tags" in entry:
|
||||||
# TODO finish categories
|
# TODO finish categories
|
||||||
@ -67,7 +72,7 @@ def create_frontmatter(entry):
|
|||||||
tags.append(t["term"])
|
tags.append(t["term"])
|
||||||
|
|
||||||
frontmatter = {
|
frontmatter = {
|
||||||
"title": entry.title,
|
"title": title,
|
||||||
"date": published.format(),
|
"date": published.format(),
|
||||||
"summary": "",
|
"summary": "",
|
||||||
"author": author,
|
"author": author,
|
||||||
@ -194,13 +199,25 @@ def main():
|
|||||||
if not os.path.exists(output_dir):
|
if not os.path.exists(output_dir):
|
||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir)
|
||||||
|
|
||||||
|
feed_dict = dict()
|
||||||
|
for url in feed_urls:
|
||||||
|
feed_name = urlparse(url).netloc
|
||||||
|
feed_dict[url] = feed_name
|
||||||
|
|
||||||
|
feed_names = feed_dict.values()
|
||||||
|
content_dirs = os.listdir(output_dir)
|
||||||
|
for i in content_dirs:
|
||||||
|
if i not in feed_names:
|
||||||
|
shutil.rmtree(os.path.join(output_dir, i))
|
||||||
|
print("%s not in feeds_list.txt, removing local data" %(i))
|
||||||
|
|
||||||
# add iframe to the allowlist of feedparser's sanitizer,
|
# add iframe to the allowlist of feedparser's sanitizer,
|
||||||
# this is now handled in parse_post()
|
# this is now handled in parse_post()
|
||||||
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
|
feedparser.sanitizer._HTMLSanitizer.acceptable_elements |= {"iframe"}
|
||||||
|
|
||||||
for feed_url in feed_urls:
|
for feed_url in feed_urls:
|
||||||
|
|
||||||
feed_name = urlparse(feed_url).netloc
|
feed_name = feed_dict[feed_url]
|
||||||
|
|
||||||
feed_dir = os.path.join(output_dir, feed_name)
|
feed_dir = os.path.join(output_dir, feed_name)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user