From 60a126024c8f4fa383fa08f5ecdcbfc843df6278 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Mon, 6 Sep 2021 12:58:48 +0000 Subject: [PATCH 1/3] Stricter sanitization rules for download filenames Fixes #212 --- src/plugin/downloader.cr | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/plugin/downloader.cr b/src/plugin/downloader.cr index 054698e..d826bac 100644 --- a/src/plugin/downloader.cr +++ b/src/plugin/downloader.cr @@ -24,8 +24,9 @@ class Plugin end private def process_filename(str) - return "_" if str == ".." - str.gsub "/", "_" + str + .gsub(/[\/\s\.\177\000-\031]/, "_") + .gsub(/__+/, "_") end private def download(job : Queue::Job) From ccf558eaa7657a96ebe7277828ad7fdf6397fc9a Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Wed, 8 Sep 2021 10:03:05 +0000 Subject: [PATCH 2/3] Improve filename sanitization rules --- spec/util_spec.cr | 10 ++++++++++ src/plugin/downloader.cr | 12 +++--------- src/util/util.cr | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/spec/util_spec.cr b/spec/util_spec.cr index 27d97c2..5e8b9f0 100644 --- a/spec/util_spec.cr +++ b/spec/util_spec.cr @@ -61,3 +61,13 @@ describe "chapter_sort" do end.should eq ary end end + +describe "sanitize_filename" do + it "returns a random string for empty sanitized string" do + sanitize_filename("..").should_not eq sanitize_filename("..") + end + it "sanitizes correctly" do + sanitize_filename(".. \n\v.\rマンゴー/|*()<[1/2] 3.14 hello world ") + .should eq " . マンゴー_()[1_2] 3.14 hello world" + end +end diff --git a/src/plugin/downloader.cr b/src/plugin/downloader.cr index d826bac..2800232 100644 --- a/src/plugin/downloader.cr +++ b/src/plugin/downloader.cr @@ -23,12 +23,6 @@ class Plugin job end - private def process_filename(str) - str - .gsub(/[\/\s\.\177\000-\031]/, "_") - .gsub(/__+/, "_") - end - private def download(job : Queue::Job) @downloading = true @queue.set_status Queue::JobStatus::Downloading, job @@ -43,8 +37,8 @@ class Plugin pages = info["pages"].as_i - manga_title = process_filename job.manga_title - chapter_title = process_filename info["title"].as_s + manga_title = sanitize_filename job.manga_title + chapter_title = sanitize_filename info["title"].as_s @queue.set_pages pages, job lib_dir = @library_path @@ -69,7 +63,7 @@ class Plugin while page = plugin.next_page break unless @queue.exists? job - fn = process_filename page["filename"].as_s + fn = sanitize_filename page["filename"].as_s url = page["url"].as_s headers = HTTP::Headers.new diff --git a/src/util/util.cr b/src/util/util.cr index c4e168a..50aa7fe 100644 --- a/src/util/util.cr +++ b/src/util/util.cr @@ -120,3 +120,22 @@ class String match / s.size end end + +# Does the followings: +# - turns space-like characters into the normal whitespaces ( ) +# - strips and collapses spaces +# - removes ASCII control characters +# - replaces slashes (/) with underscores (_) +# - removes leading dots (.) +# - removes the following special characters: \:*?"<>| +# +# If the sanitized string is empty, returns a random string instead. +def sanitize_filename(str : String) : String + sanitized = str + .gsub(/\s+/, " ") + .strip + .gsub(/\//, "_") + .gsub(/^\.+/, "") + .gsub(/[\177\000-\031\\:\*\?\"<>\|]/, "") + sanitized.size > 0 ? sanitized : random_str +end From 566cebfcdda40835fbac6274dbeb5153250f4075 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Thu, 9 Sep 2021 00:13:58 +0000 Subject: [PATCH 3/3] Remove all leading dots and spaces --- spec/util_spec.cr | 2 +- src/util/util.cr | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/util_spec.cr b/spec/util_spec.cr index 5e8b9f0..fb4efd7 100644 --- a/spec/util_spec.cr +++ b/spec/util_spec.cr @@ -68,6 +68,6 @@ describe "sanitize_filename" do end it "sanitizes correctly" do sanitize_filename(".. \n\v.\rマンゴー/|*()<[1/2] 3.14 hello world ") - .should eq " . マンゴー_()[1_2] 3.14 hello world" + .should eq "マンゴー_()[1_2] 3.14 hello world" end end diff --git a/src/util/util.cr b/src/util/util.cr index 50aa7fe..833f1ca 100644 --- a/src/util/util.cr +++ b/src/util/util.cr @@ -135,7 +135,7 @@ def sanitize_filename(str : String) : String .gsub(/\s+/, " ") .strip .gsub(/\//, "_") - .gsub(/^\.+/, "") + .gsub(/^[\.\s]+/, "") .gsub(/[\177\000-\031\\:\*\?\"<>\|]/, "") sanitized.size > 0 ? sanitized : random_str end