feed: compile regex once

This commit is contained in:
knoflook 2022-02-21 11:19:12 +01:00
parent 41690c0a89
commit b147d46535
Signed by untrusted user: knoflook
GPG Key ID: D6A1D0E8FC4FEF1C

View File

@ -12,7 +12,8 @@ import jinja2
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from slugify import slugify from slugify import slugify
from re import sub from re import compile as re_compile
yamlre = re_compile('"')
def write_etag(feed_name, feed_data): def write_etag(feed_name, feed_data):
@ -48,7 +49,7 @@ def get_etag(feed_name):
def create_frontmatter(entry): def create_frontmatter(entry):
""" """
parse RSS metadata and return as frontmatter parse RSS metadata and return as frontmatter
""" """
if 'published' in entry: if 'published' in entry:
published = entry.published_parsed published = entry.published_parsed
@ -61,7 +62,7 @@ def create_frontmatter(entry):
author = entry.author author = entry.author
else: else:
author = '' author = ''
if 'authors' in entry: if 'authors' in entry:
authors = [] authors = []
for a in entry.authors: for a in entry.authors:
@ -119,12 +120,12 @@ def sanitize_yaml (frontmatter):
#some fields are lists #some fields are lists
l = [] l = []
for i in v: for i in v:
i = sub('"', '\\"', i) i = yamlre.sub('\\"', i)
l.append(i) l.append(i)
frontmatter[k] = l frontmatter[k] = l
else: else:
v = sub('"', '\\"', v) v = yamlre.sub('\\"', v)
frontmatter[k] = v frontmatter[k] = v
return frontmatter return frontmatter
@ -348,7 +349,7 @@ def main():
entry["feed_name"] = feed_name entry["feed_name"] = feed_name
post_name = slugify(entry.title) post_name = slugify(entry.title)
# pixelfed returns the whole post text as the post name. max # pixelfed returns the whole post text as the post name. max
# filename length is 255 on many systems. here we're shortening # filename length is 255 on many systems. here we're shortening
# the name and adding a hash to it to avoid a conflict in a # the name and adding a hash to it to avoid a conflict in a
@ -356,7 +357,7 @@ def main():
if len(post_name) > 150: if len(post_name) > 150:
post_hash = md5(bytes(post_name, "utf-8")) post_hash = md5(bytes(post_name, "utf-8"))
post_name = post_name[:150] + "-" + post_hash.hexdigest() post_name = post_name[:150] + "-" + post_hash.hexdigest()
if opds_feed: if opds_feed:
entry['opds'] = True entry['opds'] = True
#format: Beyond-Debiasing-Report_Online-75535a4886e3 #format: Beyond-Debiasing-Report_Online-75535a4886e3
@ -376,7 +377,7 @@ def main():
if opds_feed: if opds_feed:
create_opds_post(post_dir, entry) create_opds_post(post_dir, entry)
else: else:
create_post(post_dir, entry) create_post(post_dir, entry)
existing_posts.remove( existing_posts.remove(
post_name post_name
) # create list of posts which have not been returned by the feed ) # create list of posts which have not been returned by the feed