From 2fbc952a72c8de97a1b7782bc7076fc9b75e50de Mon Sep 17 00:00:00 2001 From: knoflook Date: Fri, 18 Mar 2022 13:59:14 +0100 Subject: [PATCH 1/3] feat: sanitize all yaml --- lumbunglib/feed.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index 8c2bba0..b2f30df 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -5,7 +5,6 @@ from hashlib import md5 from ast import literal_eval as make_tuple from pathlib import Path from urllib.parse import urlparse -from re import sub import arrow import feedparser @@ -14,7 +13,8 @@ import requests from bs4 import BeautifulSoup from slugify import slugify from re import compile as re_compile -yamlre = re_compile('"') +from re import sub +yamlre = re_compile('[_\?:\'\[\]\{\}\#\&\*\!\|\>\`\"\%]') def write_etag(feed_name, feed_data): @@ -121,12 +121,12 @@ def sanitize_yaml (frontmatter): #some fields are lists l = [] for i in v: - i = yamlre.sub('\\"', i) + i = yamlre.sub(r'\\\g<0>', i) l.append(i) frontmatter[k] = l else: - v = yamlre.sub('\\"', v) + v = yamlre.sub(r'\\\g<0>', v) frontmatter[k] = v return frontmatter From cf8b1ff7e98209d9e75c4a201bd57270edee70b4 Mon Sep 17 00:00:00 2001 From: knoflook Date: Thu, 24 Mar 2022 15:13:53 +0100 Subject: [PATCH 2/3] fix: don't escape some characters --- lumbunglib/feed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index b2f30df..6c04da5 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -14,7 +14,7 @@ from bs4 import BeautifulSoup from slugify import slugify from re import compile as re_compile from re import sub -yamlre = re_compile('[_\?:\'\[\]\{\}\#\&\*\!\|\>\`\"\%]') +yamlre = re_compile('[_\'\[\]\{\}\#\&\*\!\|\>\`\"\%]') def write_etag(feed_name, feed_data): From a809433410827ced9c2d734bd375c45c6969824d Mon Sep 17 00:00:00 2001 From: knoflook Date: Tue, 12 Apr 2022 13:41:34 +0200 Subject: [PATCH 3/3] feed: move to saneyaml --- lumbunglib/feed.py | 7 +++-- poetry.lock | 64 +++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/lumbunglib/feed.py b/lumbunglib/feed.py index 6c04da5..1348e02 100644 --- a/lumbunglib/feed.py +++ b/lumbunglib/feed.py @@ -13,8 +13,7 @@ import requests from bs4 import BeautifulSoup from slugify import slugify from re import compile as re_compile -from re import sub -yamlre = re_compile('[_\'\[\]\{\}\#\&\*\!\|\>\`\"\%]') +import saneyaml def write_etag(feed_name, feed_data): @@ -121,12 +120,12 @@ def sanitize_yaml (frontmatter): #some fields are lists l = [] for i in v: - i = yamlre.sub(r'\\\g<0>', i) + i = saneyaml.load(i) l.append(i) frontmatter[k] = l else: - v = yamlre.sub(r'\\\g<0>', v) + v = saneyaml.load(v) frontmatter[k] = v return frontmatter diff --git a/poetry.lock b/poetry.lock index 24631e5..ebc7b53 100644 --- a/poetry.lock +++ b/poetry.lock @@ -242,6 +242,14 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" + [[package]] name = "requests" version = "2.27.1" @@ -260,6 +268,21 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +[[package]] +name = "saneyaml" +version = "0.5.2" +description = "Read and write readable YAML safely preserving order and avoiding bad surprises with unwanted infered type conversions. This library is a PyYaml wrapper with sane behaviour to read and write readable YAML safely, typically when used for configuration." +category = "main" +optional = false +python-versions = "<4,>=3.6.*" + +[package.dependencies] +PyYAML = "*" + +[package.extras] +docs = ["Sphinx (>=3.3.1)", "sphinx-rtd-theme (>=0.5.0)", "doc8 (>=0.8.1)"] +testing = ["pytest (>=6)", "pytest-xdist (>=2)"] + [[package]] name = "sgmllib3k" version = "1.0.0" @@ -319,7 +342,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "c5c987253f949737210f4a3d3c3c24b0affd4a9c7d06de386c9bd514c592db8b" +content-hash = "86ebded9dbd151b57502b40d3e58d6d92f837bc776184afa84d297c40d6daa7a" [metadata.files] arrow = [ @@ -469,10 +492,49 @@ pytz = [ {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"}, {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"}, ] +pyyaml = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] requests = [ {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, ] +saneyaml = [ + {file = "saneyaml-0.5.2-py3-none-any.whl", hash = "sha256:e54ed827973647ee9be8e8c091536b55ad22b3f9b1296e36701a3544822e7eac"}, + {file = "saneyaml-0.5.2.tar.gz", hash = "sha256:d6074f1959041342ab41d74a6f904720ffbcf63c94467858e0e22e17e3c43d41"}, +] sgmllib3k = [ {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, ] diff --git a/pyproject.toml b/pyproject.toml index 20ba70a..1a3fdcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ peertube = {git = "https://framagit.org/framasoft/peertube/clients/python.git"} feedparser = "^6.0.8" bs4 = "^0.0.1" "Mastodon.py" = "^1.5.1" +saneyaml = "^0.5.2" [tool.poetry.dev-dependencies] poetry2setup = "^1.0.0"