From 0667f01471f8f1032f778e90bfee09af42e63ed9 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 02:01:43 +0900 Subject: [PATCH 01/39] Measure scan only --- src/library/library.cr | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/library/library.cr b/src/library/library.cr index 9351e60..32cc161 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -98,6 +98,7 @@ class Library end def scan + start = Time.local unless Dir.exists? @dir Logger.info "The library directory #{@dir} does not exist. " \ "Attempting to create it" @@ -122,7 +123,8 @@ class Library storage.bulk_insert_ids storage.close - Logger.debug "Scan completed" + ms = (Time.local - start).total_milliseconds + Logger.debug "Scan completed. #{ms}ms" Storage.default.mark_unavailable end From 291a340cdd48b068e71cde78b9cf22227658ecf6 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Thu, 9 Sep 2021 20:45:47 +0900 Subject: [PATCH 02/39] Add yaml serializer to Library, Title, Entry --- src/library/entry.cr | 2 ++ src/library/library.cr | 2 ++ src/library/title.cr | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/library/entry.cr b/src/library/entry.cr index 28b7122..bb0aa1b 100644 --- a/src/library/entry.cr +++ b/src/library/entry.cr @@ -1,6 +1,8 @@ require "image_size" class Entry + include YAML::Serializable + getter zip_path : String, book : Title, title : String, size : String, pages : Int32, id : String, encoded_path : String, encoded_title : String, mtime : Time, err_msg : String? diff --git a/src/library/library.cr b/src/library/library.cr index 32cc161..9c23d68 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -1,4 +1,6 @@ class Library + include YAML::Serializable + getter dir : String, title_ids : Array(String), title_hash : Hash(String, Title) diff --git a/src/library/title.cr b/src/library/title.cr index f1915d4..899e21f 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -2,6 +2,8 @@ require "digest" require "../archive" class Title + include YAML::Serializable + getter dir : String, parent_id : String, title_ids : Array(String), entries : Array(Entry), title : String, id : String, encoded_title : String, mtime : Time, signature : UInt64, From 4409ed8f455d9e44705974ca1896e884a5957fc2 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Thu, 9 Sep 2021 21:14:11 +0900 Subject: [PATCH 03/39] Implement save_instance, load_instance --- src/library/library.cr | 37 +++++++++++++++++++++++++++++++++++++ src/mango.cr | 1 + 2 files changed, 38 insertions(+) diff --git a/src/library/library.cr b/src/library/library.cr index 9c23d68..b8528f3 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -6,6 +6,41 @@ class Library use_default + def save_instance + path = Config.current.library_path + instance_file_path = File.join path, "library.yml.zip" + + writer = Compress::Zip::Writer.new instance_file_path + writer.add "instance.yml", self.to_yaml + writer.close + end + + def self.load_instance + dir = Config.current.library_path + return unless Dir.exists? dir + instance_file_path = File.join path, "library.yml.zip" + return unless File.exists? instance_file_path + + zip_file = Compress::Zip::File.new instance_file_path + instance_file = zip_file.entries.find { |entry| entry.filename == "instance.yml" } + + if instance_file.nil? + zip_file.close + return + end + begin + instance_file.open do |content| + @@default = Library.from_yaml content + end + rescue e + Logger.error e + end + + zip_file.close + + scan + end + def initialize register_mime_types @@ -128,6 +163,8 @@ class Library ms = (Time.local - start).total_milliseconds Logger.debug "Scan completed. #{ms}ms" Storage.default.mark_unavailable + + save_instance end def get_continue_reading_entries(username) diff --git a/src/mango.cr b/src/mango.cr index f27165e..39b1352 100644 --- a/src/mango.cr +++ b/src/mango.cr @@ -58,6 +58,7 @@ class CLI < Clim LRUCache.init Storage.default Queue.default + Library.load_instance Library.default Plugin::Downloader.default From 0a90e3b3334d2be89d6517b8e576db7a546b9f34 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Fri, 10 Sep 2021 20:24:54 +0900 Subject: [PATCH 04/39] Ignore caches --- src/library/library.cr | 4 ++-- src/library/title.cr | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index b8528f3..a3f2baa 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -18,7 +18,7 @@ class Library def self.load_instance dir = Config.current.library_path return unless Dir.exists? dir - instance_file_path = File.join path, "library.yml.zip" + instance_file_path = File.join dir, "library.yml.zip" return unless File.exists? instance_file_path zip_file = Compress::Zip::File.new instance_file_path @@ -38,7 +38,7 @@ class Library zip_file.close - scan + Library.default.scan end def initialize diff --git a/src/library/title.cr b/src/library/title.cr index 899e21f..6c873b8 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -10,9 +10,13 @@ class Title entry_cover_url_cache : Hash(String, String)? setter entry_cover_url_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @entry_display_name_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @entry_cover_url_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @cached_display_name : String? + @[YAML::Field(ignore: true)] @cached_cover_url : String? def initialize(@dir : String, @parent_id) From eb3e37b95097abf11b91c2a1c210fd71e0890126 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sat, 11 Sep 2021 13:30:07 +0900 Subject: [PATCH 05/39] Examine titles and recycle them --- src/library/library.cr | 8 +++++++- src/library/title.cr | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/library/library.cr b/src/library/library.cr index a3f2baa..86375be 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -144,14 +144,20 @@ class Library storage = Storage.new auto_close: false + @title_ids.select! do |title_id| + title = @title_hash[title_id] + title.examine + end + remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } + (Dir.entries @dir) .select { |fn| !fn.starts_with? "." } .map { |fn| File.join @dir, fn } + .select { |path| !(remained_title_dirs.includes? path) } .select { |path| File.directory? path } .map { |path| Title.new path, "" } .select { |title| !(title.entries.empty? && title.titles.empty?) } .sort! { |a, b| a.title <=> b.title } - .tap { |_| @title_ids.clear } .each do |title| @title_hash[title.id] = title @title_ids << title.id diff --git a/src/library/title.cr b/src/library/title.cr index 6c873b8..44464c9 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -69,6 +69,12 @@ class Title end end + def examine : Bool + return false unless Dir.exists? @dir + signature = Dir.signature @dir + return @signature == signature + end + def to_slim_json : String JSON.build do |json| json.object do From fb43abb950137416043d0b923a75a10a073c395c Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sat, 11 Sep 2021 21:16:33 +0900 Subject: [PATCH 06/39] Enhance the examine method --- src/library/title.cr | 66 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 44464c9..51b82c9 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -70,9 +70,71 @@ class Title end def examine : Bool - return false unless Dir.exists? @dir + return false unless Dir.exists? @dir # no title, should be removed signature = Dir.signature @dir - return @signature == signature + # `signature` doesn't reflect movings, renames in nested titles + # return true if @signature == signature # not changed, preserve + + # fix title + @signature = signature + storage = Storage.default + id = storage.get_title_id dir, signature + if id.nil? + id = random_str + storage.insert_title_id({ + path: dir, + id: id, + signature: signature.to_s, + }) + end + @id = id + @mtime = File.info(@dir).modification_time + + @title_ids.select! do |title_id| + title = Library.default.get_title! title_id + title.examine + end + remained_title_dirs = @title_ids.map do |id| + title = Library.default.get_title! id + title.dir + end + + @entries.select! { |entry| File.exists? entry.zip_path } + remained_entry_zip_paths = @entries.map &.zip_path + + Dir.entries(dir).each do |fn| + next if fn.starts_with? "." + path = File.join dir, fn + if File.directory? path + next if remained_title_dirs.includes? path + title = Title.new path, @id + next if title.entries.size == 0 && title.titles.size == 0 + Library.default.title_hash[title.id] = title + @title_ids << title.id + next + end + if is_supported_file path + next if remained_entry_zip_paths.includes? path + entry = Entry.new path, self + @entries << entry if entry.pages > 0 || entry.err_msg + end + end + + mtimes = [@mtime] + mtimes += @title_ids.map { |e| Library.default.title_hash[e].mtime } + mtimes += @entries.map &.mtime + @mtime = mtimes.max + + @title_ids.sort! do |a, b| + compare_numerically Library.default.title_hash[a].title, + Library.default.title_hash[b].title + end + sorter = ChapterSorter.new @entries.map &.title + @entries.sort! do |a, b| + sorter.compare a.title, b.title + end + + return true # this could be recycled end def to_slim_json : String From 80e13abc4a9d5dd53346e58aaf72b0a94a9230b4 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sat, 11 Sep 2021 22:44:17 +0900 Subject: [PATCH 07/39] Spawn scan job --- src/library/library.cr | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/library/library.cr b/src/library/library.cr index 86375be..9ee26d1 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -21,6 +21,7 @@ class Library instance_file_path = File.join dir, "library.yml.zip" return unless File.exists? instance_file_path + Logger.debug "Load library instance" zip_file = Compress::Zip::File.new instance_file_path instance_file = zip_file.entries.find { |entry| entry.filename == "instance.yml" } @@ -38,7 +39,13 @@ class Library zip_file.close - Library.default.scan + spawn do + start = Time.local + Library.default.scan + ms = (Time.local - start).total_milliseconds + Logger.info "Re-scanned #{Library.default.title_ids.size} titles \ + in #{ms}ms" + end end def initialize From e6214ddc5d0a752ed4abaaa268090235b27c1450 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 01:26:02 +0900 Subject: [PATCH 08/39] Rescan only if instance loaded --- src/library/library.cr | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 9ee26d1..15674cf 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -29,22 +29,26 @@ class Library zip_file.close return end + is_loaded = false begin instance_file.open do |content| @@default = Library.from_yaml content end + is_loaded = true rescue e Logger.error e end zip_file.close - spawn do - start = Time.local - Library.default.scan - ms = (Time.local - start).total_milliseconds - Logger.info "Re-scanned #{Library.default.title_ids.size} titles \ - in #{ms}ms" + if is_loaded + spawn do + start = Time.local + Library.default.scan + ms = (Time.local - start).total_milliseconds + Logger.info "Re-scanned #{Library.default.title_ids.size} titles \ + in #{ms}ms" + end end end From 4e8b561f70111c33a280dfa6487a382fd8fdec24 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 00:37:48 +0900 Subject: [PATCH 09/39] Apply contents signature of directories --- src/library/title.cr | 10 ++++++---- src/util/signature.cr | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 51b82c9..5caefa0 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -32,6 +32,7 @@ class Title }) end @id = id + @contents_signature = Dir.contents_signature dir @title = File.basename dir @encoded_title = URI.encode @title @title_ids = [] of String @@ -71,12 +72,13 @@ class Title def examine : Bool return false unless Dir.exists? @dir # no title, should be removed - signature = Dir.signature @dir - # `signature` doesn't reflect movings, renames in nested titles - # return true if @signature == signature # not changed, preserve + contents_signature = Dir.contents_signature @dir + # not changed, preserve + return true if @contents_signature == contents_signature # fix title - @signature = signature + @contents_signature = contents_signature + @signature = Dir.signature @dir storage = Storage.default id = storage.get_title_id dir, signature if id.nil? diff --git a/src/util/signature.cr b/src/util/signature.cr index d1a0040..f2bf103 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -48,4 +48,27 @@ class Dir end Digest::CRC32.checksum(signatures.sort.join).to_u64 end + + # Returns the contents signature of the directory at dirname for checking + # to rescan. + # Rescan conditions: + # - When a file added, moved, removed, renamed (including which in nested + # directories) + def self.contents_signature(dirname) : String + signatures = [] of String + self.open dirname do |dir| + dir.entries.sort.each do |fn| + next if fn.starts_with? "." + path = File.join dirname, fn + if File.directory? path + signatures << Dir.contents_signature path + else + # Only add its signature value to `signatures` when it is a + # supported file + signatures << fn if is_supported_file fn + end + end + end + Digest::SHA1.hexdigest(signatures.sort.join) + end end From a8f729f5c1021b2accfa15748f85f2f114181d61 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 00:48:23 +0900 Subject: [PATCH 10/39] Sort entries and titles when they needed --- src/library/title.cr | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 5caefa0..8260be9 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -92,6 +92,7 @@ class Title @id = id @mtime = File.info(@dir).modification_time + previous_titles_size = @title_ids.size @title_ids.select! do |title_id| title = Library.default.get_title! title_id title.examine @@ -101,9 +102,12 @@ class Title title.dir end + previous_entries_size = @entries.size @entries.select! { |entry| File.exists? entry.zip_path } remained_entry_zip_paths = @entries.map &.zip_path + is_titles_added = false + is_entries_added = false Dir.entries(dir).each do |fn| next if fn.starts_with? "." path = File.join dir, fn @@ -113,12 +117,16 @@ class Title next if title.entries.size == 0 && title.titles.size == 0 Library.default.title_hash[title.id] = title @title_ids << title.id + is_titles_added = true next end if is_supported_file path next if remained_entry_zip_paths.includes? path entry = Entry.new path, self - @entries << entry if entry.pages > 0 || entry.err_msg + if entry.pages > 0 || entry.err_msg + @entries << entry + is_entries_added = true + end end end @@ -127,13 +135,17 @@ class Title mtimes += @entries.map &.mtime @mtime = mtimes.max - @title_ids.sort! do |a, b| - compare_numerically Library.default.title_hash[a].title, - Library.default.title_hash[b].title + if is_titles_added || previous_titles_size != @title_ids.size + @title_ids.sort! do |a, b| + compare_numerically Library.default.title_hash[a].title, + Library.default.title_hash[b].title + end end - sorter = ChapterSorter.new @entries.map &.title - @entries.sort! do |a, b| - sorter.compare a.title, b.title + if is_entries_added || previous_entries_size != @entries.size + sorter = ChapterSorter.new @entries.map &.title + @entries.sort! do |a, b| + sorter.compare a.title, b.title + end end return true # this could be recycled From 9309f51df634ae3a9f498c30ed31f0f8066dec6f Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 02:03:54 +0900 Subject: [PATCH 11/39] Memoization on dir contents_signature --- src/library/title.cr | 7 ++++--- src/util/signature.cr | 7 +++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 8260be9..2e9db26 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -70,11 +70,12 @@ class Title end end - def examine : Bool + def examine(cache = {} of String => String) : Bool return false unless Dir.exists? @dir # no title, should be removed - contents_signature = Dir.contents_signature @dir + contents_signature = Dir.contents_signature @dir, cache # not changed, preserve return true if @contents_signature == contents_signature + puts "Contents changed in #{@dir}" # fix title @contents_signature = contents_signature @@ -95,7 +96,7 @@ class Title previous_titles_size = @title_ids.size @title_ids.select! do |title_id| title = Library.default.get_title! title_id - title.examine + title.examine cache end remained_title_dirs = @title_ids.map do |id| title = Library.default.get_title! id diff --git a/src/util/signature.cr b/src/util/signature.cr index f2bf103..e56aa06 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -54,7 +54,8 @@ class Dir # Rescan conditions: # - When a file added, moved, removed, renamed (including which in nested # directories) - def self.contents_signature(dirname) : String + def self.contents_signature(dirname, cache = {} of String => String) : String + return cache[dirname] if cache[dirname]? signatures = [] of String self.open dirname do |dir| dir.entries.sort.each do |fn| @@ -69,6 +70,8 @@ class Dir end end end - Digest::SHA1.hexdigest(signatures.sort.join) + hash = Digest::SHA1.hexdigest(signatures.sort.join) + cache[dirname] = hash + hash end end From 7e36c91ea7000606aac0ca050a25d0cdb14871d7 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 10:47:15 +0900 Subject: [PATCH 12/39] Remove debug print --- src/library/title.cr | 1 - 1 file changed, 1 deletion(-) diff --git a/src/library/title.cr b/src/library/title.cr index 2e9db26..46de5b9 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -75,7 +75,6 @@ class Title contents_signature = Dir.contents_signature @dir, cache # not changed, preserve return true if @contents_signature == contents_signature - puts "Contents changed in #{@dir}" # fix title @contents_signature = contents_signature From bdbdf9c94be29c62bf425c5f2349905b5c93c162 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 11:09:48 +0900 Subject: [PATCH 13/39] Fix to pass 'make check', fix comments --- src/library/library.cr | 4 +++- src/library/title.cr | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 15674cf..4764b59 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -23,7 +23,9 @@ class Library Logger.debug "Load library instance" zip_file = Compress::Zip::File.new instance_file_path - instance_file = zip_file.entries.find { |entry| entry.filename == "instance.yml" } + instance_file = zip_file.entries.find do |entry| + entry.filename == "instance.yml" + end if instance_file.nil? zip_file.close diff --git a/src/library/title.cr b/src/library/title.cr index 46de5b9..ed63471 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -71,12 +71,12 @@ class Title end def examine(cache = {} of String => String) : Bool - return false unless Dir.exists? @dir # no title, should be removed + return false unless Dir.exists? @dir # No title, Remove this contents_signature = Dir.contents_signature @dir, cache - # not changed, preserve + # Not changed. Reuse this return true if @contents_signature == contents_signature - # fix title + # Fix title @contents_signature = contents_signature @signature = Dir.signature @dir storage = Storage.default @@ -97,8 +97,8 @@ class Title title = Library.default.get_title! title_id title.examine cache end - remained_title_dirs = @title_ids.map do |id| - title = Library.default.get_title! id + remained_title_dirs = @title_ids.map do |title_id| + title = Library.default.get_title! title_id title.dir end @@ -148,7 +148,7 @@ class Title end end - return true # this could be recycled + true # Fixed, reuse this end def to_slim_json : String From cd48b45f110740b1348dcbf0369caf9d9909a7db Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 12:45:24 +0900 Subject: [PATCH 14/39] Add 'require "yaml"' --- src/library/entry.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/src/library/entry.cr b/src/library/entry.cr index bb0aa1b..5694551 100644 --- a/src/library/entry.cr +++ b/src/library/entry.cr @@ -1,4 +1,5 @@ require "image_size" +require "yaml" class Entry include YAML::Serializable From 8c90b46114f637155f2b2f9efcc651092c2ef3eb Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 13:39:28 +0900 Subject: [PATCH 15/39] Remove removed titles from title_hash --- src/library/library.cr | 4 +++- src/library/title.cr | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 4764b59..a7b43e9 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -159,7 +159,9 @@ class Library @title_ids.select! do |title_id| title = @title_hash[title_id] - title.examine + existence = title.examine + @title_hash.delete title_id unless existence + existence end remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } diff --git a/src/library/title.cr b/src/library/title.cr index ed63471..def2b66 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -95,7 +95,9 @@ class Title previous_titles_size = @title_ids.size @title_ids.select! do |title_id| title = Library.default.get_title! title_id - title.examine cache + existence = title.examine cache + Library.default.title_hash.delete title_id unless existence + existence end remained_title_dirs = @title_ids.map do |title_id| title = Library.default.get_title! title_id From 7734dae138c009f3e4c14ba9efc3a5984817e47e Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 14:36:17 +0900 Subject: [PATCH 16/39] Remove unnecessary sort --- src/util/signature.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/signature.cr b/src/util/signature.cr index e56aa06..b883e7c 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -70,7 +70,7 @@ class Dir end end end - hash = Digest::SHA1.hexdigest(signatures.sort.join) + hash = Digest::SHA1.hexdigest(signatures.join) cache[dirname] = hash hash end From f5933a48d9ce4d7987b22524774a9efecdee8170 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 17:40:40 +0900 Subject: [PATCH 17/39] Register mime_type scan, thumbnails when loading instance --- src/library/library.cr | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index a7b43e9..f9edb7e 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -44,19 +44,11 @@ class Library zip_file.close if is_loaded - spawn do - start = Time.local - Library.default.scan - ms = (Time.local - start).total_milliseconds - Logger.info "Re-scanned #{Library.default.title_ids.size} titles \ - in #{ms}ms" - end + Library.default.register_jobs end end def initialize - register_mime_types - @dir = Config.current.library_path # explicitly initialize @titles to bypass the compiler check. it will # be filled with actual Titles in the `scan` call below @@ -66,6 +58,12 @@ class Library @entries_count = 0 @thumbnails_count = 0 + register_jobs + end + + protected def register_jobs + register_mime_types + scan_interval = Config.current.scan_interval_minutes if scan_interval < 1 scan From 8f1383a818a43788c37e67d6af041dc71df0581a Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 18:01:16 +0900 Subject: [PATCH 18/39] Use Gzip instead of Zip --- src/library/library.cr | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index f9edb7e..5b61827 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -10,8 +10,9 @@ class Library path = Config.current.library_path instance_file_path = File.join path, "library.yml.zip" - writer = Compress::Zip::Writer.new instance_file_path - writer.add "instance.yml", self.to_yaml + writer = Compress::Gzip::Writer.new instance_file_path, + Compress::Gzip::BEST_COMPRESSION + writer.write self.to_yaml.to_slice writer.close end @@ -22,18 +23,10 @@ class Library return unless File.exists? instance_file_path Logger.debug "Load library instance" - zip_file = Compress::Zip::File.new instance_file_path - instance_file = zip_file.entries.find do |entry| - entry.filename == "instance.yml" - end - if instance_file.nil? - zip_file.close - return - end is_loaded = false begin - instance_file.open do |content| + Compress::Gzip::Reader.open instance_file_path do |content| @@default = Library.from_yaml content end is_loaded = true @@ -41,8 +34,6 @@ class Library Logger.error e end - zip_file.close - if is_loaded Library.default.register_jobs end From a151ec486da746e078ac96e49015b43e9d701c4e Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sun, 12 Sep 2021 18:04:41 +0900 Subject: [PATCH 19/39] Fix file extension of gzip file --- src/library/library.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 5b61827..7ab10df 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -8,7 +8,7 @@ class Library def save_instance path = Config.current.library_path - instance_file_path = File.join path, "library.yml.zip" + instance_file_path = File.join path, "library.yml.gz" writer = Compress::Gzip::Writer.new instance_file_path, Compress::Gzip::BEST_COMPRESSION @@ -19,7 +19,7 @@ class Library def self.load_instance dir = Config.current.library_path return unless Dir.exists? dir - instance_file_path = File.join dir, "library.yml.zip" + instance_file_path = File.join dir, "library.yml.gz" return unless File.exists? instance_file_path Logger.debug "Load library instance" From 03e044a1aa6b4d88a8d210b97c563f11204ada95 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Tue, 14 Sep 2021 07:16:14 +0000 Subject: [PATCH 20/39] Improve logging --- src/library/library.cr | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 7ab10df..6d69563 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -9,6 +9,7 @@ class Library def save_instance path = Config.current.library_path instance_file_path = File.join path, "library.yml.gz" + Logger.debug "Caching library to #{instance_file_path}" writer = Compress::Gzip::Writer.new instance_file_path, Compress::Gzip::BEST_COMPRESSION @@ -22,21 +23,16 @@ class Library instance_file_path = File.join dir, "library.yml.gz" return unless File.exists? instance_file_path - Logger.debug "Load library instance" + Logger.debug "Loading cached library from #{instance_file_path}" - is_loaded = false begin Compress::Gzip::Reader.open instance_file_path do |content| @@default = Library.from_yaml content end - is_loaded = true + Library.default.register_jobs rescue e Logger.error e end - - if is_loaded - Library.default.register_jobs - end end def initialize @@ -64,7 +60,7 @@ class Library start = Time.local scan ms = (Time.local - start).total_milliseconds - Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" + Logger.debug "Library initialized in #{ms}ms" sleep scan_interval.minutes end end @@ -171,10 +167,13 @@ class Library storage.close ms = (Time.local - start).total_milliseconds - Logger.debug "Scan completed. #{ms}ms" + Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" + Storage.default.mark_unavailable - save_instance + spawn do + save_instance + end end def get_continue_reading_entries(username) From be47f309b09a61a32e2862dd9ce13b3ea7d4b217 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 18:11:08 +0900 Subject: [PATCH 21/39] Use cache when calculating contents_signature --- src/util/signature.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/signature.cr b/src/util/signature.cr index b883e7c..190a4a6 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -62,7 +62,7 @@ class Dir next if fn.starts_with? "." path = File.join dirname, fn if File.directory? path - signatures << Dir.contents_signature path + signatures << Dir.contents_signature path, cache else # Only add its signature value to `signatures` when it is a # supported file From 523195d6496bfd596afcc36f95654dd88edd05ee Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 22:37:30 +0900 Subject: [PATCH 22/39] Define ExamineContext, apply it when scanning --- src/library/library.cr | 12 ++++++++++-- src/library/title.cr | 17 +++++++++++------ src/library/types.cr | 7 +++++++ src/util/signature.cr | 4 ++-- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 6d69563..582440a 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -142,20 +142,28 @@ class Library storage = Storage.new auto_close: false + examine_context : ExamineContext = { + file_count: 0, + cached_contents_signature: {} of String => String, + deleted_title_ids: [] of String, + deleted_entry_ids: [] of String + } + @title_ids.select! do |title_id| title = @title_hash[title_id] - existence = title.examine + existence = title.examine examine_context @title_hash.delete title_id unless existence existence end remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } + cache = examine_context["cached_contents_signature"] (Dir.entries @dir) .select { |fn| !fn.starts_with? "." } .map { |fn| File.join @dir, fn } .select { |path| !(remained_title_dirs.includes? path) } .select { |path| File.directory? path } - .map { |path| Title.new path, "" } + .map { |path| Title.new path, "", cache } .select { |title| !(title.entries.empty? && title.titles.empty?) } .sort! { |a, b| a.title <=> b.title } .each do |title| diff --git a/src/library/title.cr b/src/library/title.cr index def2b66..12c565f 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -19,7 +19,7 @@ class Title @[YAML::Field(ignore: true)] @cached_cover_url : String? - def initialize(@dir : String, @parent_id) + def initialize(@dir : String, @parent_id, cache : Hash(String, String)?) storage = Storage.default @signature = Dir.signature dir id = storage.get_title_id dir, signature @@ -32,7 +32,7 @@ class Title }) end @id = id - @contents_signature = Dir.contents_signature dir + @contents_signature = Dir.contents_signature dir, cache @title = File.basename dir @encoded_title = URI.encode @title @title_ids = [] of String @@ -70,9 +70,14 @@ class Title end end - def examine(cache = {} of String => String) : Bool + def self.new(dir : String, parent_id) + new dir, parent_id, nil + end + + def examine(context : ExamineContext) : Bool return false unless Dir.exists? @dir # No title, Remove this - contents_signature = Dir.contents_signature @dir, cache + contents_signature = Dir.contents_signature @dir, + context["cached_contents_signature"] # Not changed. Reuse this return true if @contents_signature == contents_signature @@ -95,7 +100,7 @@ class Title previous_titles_size = @title_ids.size @title_ids.select! do |title_id| title = Library.default.get_title! title_id - existence = title.examine cache + existence = title.examine context Library.default.title_hash.delete title_id unless existence existence end @@ -115,7 +120,7 @@ class Title path = File.join dir, fn if File.directory? path next if remained_title_dirs.includes? path - title = Title.new path, @id + title = Title.new path, @id, context["cached_contents_signature"] next if title.entries.size == 0 && title.titles.size == 0 Library.default.title_hash[title.id] = title @title_ids << title.id diff --git a/src/library/types.cr b/src/library/types.cr index 0545184..eb87545 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -133,3 +133,10 @@ class TitleInfo LRUCache.set generate_cache_entry key, self.to_json end end + +alias ExamineContext = NamedTuple( + file_count: Int32, + cached_contents_signature: Hash(String, String), + deleted_title_ids: Array(String), + deleted_entry_ids: Array(String) +) diff --git a/src/util/signature.cr b/src/util/signature.cr index 190a4a6..c4104fe 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -55,7 +55,7 @@ class Dir # - When a file added, moved, removed, renamed (including which in nested # directories) def self.contents_signature(dirname, cache = {} of String => String) : String - return cache[dirname] if cache[dirname]? + return cache[dirname] if !cache.nil? && cache[dirname]? signatures = [] of String self.open dirname do |dir| dir.entries.sort.each do |fn| @@ -71,7 +71,7 @@ class Dir end end hash = Digest::SHA1.hexdigest(signatures.join) - cache[dirname] = hash + cache[dirname] = hash unless cache.nil? hash end end From 2e09efbd6238a331253a237736915d4f01788712 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 22:51:05 +0900 Subject: [PATCH 23/39] Collect deleted ids --- src/library/library.cr | 5 ++++- src/library/title.cr | 11 +++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 582440a..012d14b 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -152,7 +152,10 @@ class Library @title_ids.select! do |title_id| title = @title_hash[title_id] existence = title.examine examine_context - @title_hash.delete title_id unless existence + unless existence + @title_hash.delete title_id + examine_context["deleted_title_ids"] << title_id + end existence end remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } diff --git a/src/library/title.cr b/src/library/title.cr index 12c565f..fb2d2fe 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -101,7 +101,10 @@ class Title @title_ids.select! do |title_id| title = Library.default.get_title! title_id existence = title.examine context - Library.default.title_hash.delete title_id unless existence + unless existence + Library.default.title_hash.delete title_id + context["deleted_title_ids"] << title_id + end existence end remained_title_dirs = @title_ids.map do |title_id| @@ -110,7 +113,11 @@ class Title end previous_entries_size = @entries.size - @entries.select! { |entry| File.exists? entry.zip_path } + @entries.select! do |entry| + existence = File.exists? entry.zip_path + context["deleted_entry_ids"] << entry.id unless existence + existence + end remained_entry_zip_paths = @entries.map &.zip_path is_titles_added = false From 670cf54957c744acfc94b3fe6985248792cd1806 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 22:51:37 +0900 Subject: [PATCH 24/39] Apply yield forcely --- src/library/title.cr | 1 + src/library/types.cr | 5 +++++ src/util/signature.cr | 1 + 3 files changed, 7 insertions(+) diff --git a/src/library/title.cr b/src/library/title.cr index fb2d2fe..1a8d488 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -115,6 +115,7 @@ class Title previous_entries_size = @entries.size @entries.select! do |entry| existence = File.exists? entry.zip_path + yield_process_file context context["deleted_entry_ids"] << entry.id unless existence existence end diff --git a/src/library/types.cr b/src/library/types.cr index eb87545..1355fcd 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -140,3 +140,8 @@ alias ExamineContext = NamedTuple( deleted_title_ids: Array(String), deleted_entry_ids: Array(String) ) + +def yield_process_file(context : ExamineContext) + context["file_count"] += 1 + Fiber.yield if context["file_count"] % 1000 == 0 +end diff --git a/src/util/signature.cr b/src/util/signature.cr index c4104fe..d0a55a6 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -56,6 +56,7 @@ class Dir # directories) def self.contents_signature(dirname, cache = {} of String => String) : String return cache[dirname] if !cache.nil? && cache[dirname]? + Fiber.yield # Yield first signatures = [] of String self.open dirname do |dir| dir.entries.sort.each do |fn| From 9489d6abfdac6af1fab3a805b48639081dbe987a Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 23:07:47 +0900 Subject: [PATCH 25/39] Use reference instead of primitive --- src/library/library.cr | 6 +++--- src/library/title.cr | 2 +- src/library/types.cr | 23 +++++++++++++++-------- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 012d14b..0a04b1a 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -143,10 +143,10 @@ class Library storage = Storage.new auto_close: false examine_context : ExamineContext = { - file_count: 0, + file_counter: (YieldCounter.new 1000), cached_contents_signature: {} of String => String, - deleted_title_ids: [] of String, - deleted_entry_ids: [] of String + deleted_title_ids: [] of String, + deleted_entry_ids: [] of String, } @title_ids.select! do |title_id| diff --git a/src/library/title.cr b/src/library/title.cr index 1a8d488..fcc5f2f 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -115,7 +115,7 @@ class Title previous_entries_size = @entries.size @entries.select! do |entry| existence = File.exists? entry.zip_path - yield_process_file context + context["file_counter"].count_and_yield context["deleted_entry_ids"] << entry.id unless existence existence end diff --git a/src/library/types.cr b/src/library/types.cr index 1355fcd..7856660 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -134,14 +134,21 @@ class TitleInfo end end +class YieldCounter + setter threshold : Int32 + + def initialize(@threshold : Int32) + @file_count = 0 + end + + def count_and_yield + @file_count += 1 + Fiber.yield if @file_count % @threshold == 0 + end +end + alias ExamineContext = NamedTuple( - file_count: Int32, + file_counter: YieldCounter, cached_contents_signature: Hash(String, String), deleted_title_ids: Array(String), - deleted_entry_ids: Array(String) -) - -def yield_process_file(context : ExamineContext) - context["file_count"] += 1 - Fiber.yield if context["file_count"] % 1000 == 0 -end + deleted_entry_ids: Array(String)) From 57b2f7c625ad83fbbc2d08012a84c46ffc27c3c8 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 23:08:07 +0900 Subject: [PATCH 26/39] Get nested ids when title removed --- src/library/library.cr | 3 ++- src/library/title.cr | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 0a04b1a..6cf8c32 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -153,8 +153,9 @@ class Library title = @title_hash[title_id] existence = title.examine examine_context unless existence + examine_context["deleted_title_ids"].concat title.deep_titles.map &.id + examine_context["deleted_entry_ids"].concat title.deep_entries.map &.id @title_hash.delete title_id - examine_context["deleted_title_ids"] << title_id end existence end diff --git a/src/library/title.cr b/src/library/title.cr index fcc5f2f..db2ca30 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -102,8 +102,9 @@ class Title title = Library.default.get_title! title_id existence = title.examine context unless existence + context["deleted_title_ids"].concat title.deep_titles.map &.id + context["deleted_entry_ids"].concat title.deep_entries.map &.id Library.default.title_hash.delete title_id - context["deleted_title_ids"] << title_id end existence end From 663c0c0b38e47944e0da0d228b3593cdf317c871 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 23:15:40 +0900 Subject: [PATCH 27/39] Remove nested title including self --- src/library/library.cr | 6 ++++-- src/library/title.cr | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 6cf8c32..8029fcb 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -153,13 +153,15 @@ class Library title = @title_hash[title_id] existence = title.examine examine_context unless existence - examine_context["deleted_title_ids"].concat title.deep_titles.map &.id + examine_context["deleted_title_ids"].concat [title_id] + title.deep_titles.map &.id examine_context["deleted_entry_ids"].concat title.deep_entries.map &.id - @title_hash.delete title_id end existence end remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } + examine_context["deleted_title_ids"].each do |title_id| + @title_hash.delete title_id + end cache = examine_context["cached_contents_signature"] (Dir.entries @dir) diff --git a/src/library/title.cr b/src/library/title.cr index db2ca30..6bae693 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -102,9 +102,8 @@ class Title title = Library.default.get_title! title_id existence = title.examine context unless existence - context["deleted_title_ids"].concat title.deep_titles.map &.id + context["deleted_title_ids"].concat [title_id] + title.deep_titles.map &.id context["deleted_entry_ids"].concat title.deep_entries.map &.id - Library.default.title_hash.delete title_id end existence end From f4d7128b59fd229031735fac38006ba4a86ce7f8 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Tue, 14 Sep 2021 23:30:03 +0900 Subject: [PATCH 28/39] Mark unavailable only in candidates --- src/library/library.cr | 3 ++- src/storage.cr | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/library/library.cr b/src/library/library.cr index 8029fcb..12d662f 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -183,7 +183,8 @@ class Library ms = (Time.local - start).total_milliseconds Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" - Storage.default.mark_unavailable + Storage.default.mark_unavailable examine_context["deleted_entry_ids"], + examine_context["deleted_title_ids"] spawn do save_instance diff --git a/src/storage.cr b/src/storage.cr index 39116b9..1f0aab7 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -466,6 +466,47 @@ class Storage end end + # Limit mark targets with given arguments + def mark_unavailable(trash_ids_candidates : Array(String), trash_titles_candidates : Array(String)) + MainFiber.run do + get_db do |db| + # Detect dangling entry IDs + trash_ids = [] of String + db.query "select path, id from ids where id in " \ + "(#{trash_ids_candidates.join "," { |i| "'#{i}'" }})" do |rs| + rs.each do + path = rs.read String + fullpath = Path.new(path).expand(Config.current.library_path).to_s + trash_ids << rs.read String unless File.exists? fullpath + end + end + + unless trash_ids.empty? + Logger.debug "Marking #{trash_ids.size} entries as unavailable" + end + db.exec "update ids set unavailable = 1 where id in " \ + "(#{trash_ids.join "," { |i| "'#{i}'" }})" + + # Detect dangling title IDs + trash_titles = [] of String + db.query "select path, id from titles where id in " \ + "(#{trash_titles_candidates.join "," { |i| "'#{i}'" }})" do |rs| + rs.each do + path = rs.read String + fullpath = Path.new(path).expand(Config.current.library_path).to_s + trash_titles << rs.read String unless Dir.exists? fullpath + end + end + + unless trash_titles.empty? + Logger.debug "Marking #{trash_titles.size} titles as unavailable" + end + db.exec "update titles set unavailable = 1 where id in " \ + "(#{trash_titles.join "," { |i| "'#{i}'" }})" + end + end + end + private def get_missing(tablename) ary = [] of IDTuple MainFiber.run do From a3b2cdd372d546612e8806143b9baddce7cad415 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Wed, 15 Sep 2021 01:17:44 +0900 Subject: [PATCH 29/39] Lint --- src/library/library.cr | 3 ++- src/library/title.cr | 3 ++- src/storage.cr | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/library/library.cr b/src/library/library.cr index 12d662f..b50fb87 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -153,7 +153,8 @@ class Library title = @title_hash[title_id] existence = title.examine examine_context unless existence - examine_context["deleted_title_ids"].concat [title_id] + title.deep_titles.map &.id + examine_context["deleted_title_ids"].concat [title_id] + + title.deep_titles.map &.id examine_context["deleted_entry_ids"].concat title.deep_entries.map &.id end existence diff --git a/src/library/title.cr b/src/library/title.cr index 6bae693..41efd7c 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -102,7 +102,8 @@ class Title title = Library.default.get_title! title_id existence = title.examine context unless existence - context["deleted_title_ids"].concat [title_id] + title.deep_titles.map &.id + context["deleted_title_ids"].concat [title_id] + + title.deep_titles.map &.id context["deleted_entry_ids"].concat title.deep_entries.map &.id end existence diff --git a/src/storage.cr b/src/storage.cr index 1f0aab7..2f01863 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -467,13 +467,14 @@ class Storage end # Limit mark targets with given arguments - def mark_unavailable(trash_ids_candidates : Array(String), trash_titles_candidates : Array(String)) + def mark_unavailable(ids_candidates : Array(String), + titles_candidates : Array(String)) MainFiber.run do get_db do |db| # Detect dangling entry IDs trash_ids = [] of String db.query "select path, id from ids where id in " \ - "(#{trash_ids_candidates.join "," { |i| "'#{i}'" }})" do |rs| + "(#{ids_candidates.join "," { |i| "'#{i}'" }})" do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s @@ -490,7 +491,7 @@ class Storage # Detect dangling title IDs trash_titles = [] of String db.query "select path, id from titles where id in " \ - "(#{trash_titles_candidates.join "," { |i| "'#{i}'" }})" do |rs| + "(#{titles_candidates.join "," { |i| "'#{i}'" }})" do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s From d13cfc045fd344e4557f882bdc1eb748fb8bc0a0 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Wed, 15 Sep 2021 01:27:05 +0900 Subject: [PATCH 30/39] Add a comment --- src/storage.cr | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/storage.cr b/src/storage.cr index 2f01863..4511255 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -467,6 +467,8 @@ class Storage end # Limit mark targets with given arguments + # They should be checked again if they are really gone, + # since they would be available which are renamed or moved def mark_unavailable(ids_candidates : Array(String), titles_candidates : Array(String)) MainFiber.run do From de193906a2b3bf72ea255e330621fceea9edcac0 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Wed, 15 Sep 2021 16:54:55 +0900 Subject: [PATCH 31/39] Refactor mark_unavailable --- src/storage.cr | 55 +++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/src/storage.cr b/src/storage.cr index 4511255..9f6f45b 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -429,54 +429,24 @@ class Storage end def mark_unavailable - MainFiber.run do - get_db do |db| - # Detect dangling entry IDs - trash_ids = [] of String - db.query "select path, id from ids where unavailable = 0" do |rs| - rs.each do - path = rs.read String - fullpath = Path.new(path).expand(Config.current.library_path).to_s - trash_ids << rs.read String unless File.exists? fullpath - end - end - - unless trash_ids.empty? - Logger.debug "Marking #{trash_ids.size} entries as unavailable" - end - db.exec "update ids set unavailable = 1 where id in " \ - "(#{trash_ids.join "," { |i| "'#{i}'" }})" - - # Detect dangling title IDs - trash_titles = [] of String - db.query "select path, id from titles where unavailable = 0" do |rs| - rs.each do - path = rs.read String - fullpath = Path.new(path).expand(Config.current.library_path).to_s - trash_titles << rs.read String unless Dir.exists? fullpath - end - end - - unless trash_titles.empty? - Logger.debug "Marking #{trash_titles.size} titles as unavailable" - end - db.exec "update titles set unavailable = 1 where id in " \ - "(#{trash_titles.join "," { |i| "'#{i}'" }})" - end - end + mark_unavailable nil, nil end # Limit mark targets with given arguments # They should be checked again if they are really gone, # since they would be available which are renamed or moved - def mark_unavailable(ids_candidates : Array(String), - titles_candidates : Array(String)) + def mark_unavailable(ids_candidates : Array(String) | Nil, + titles_candidates : Array(String) | Nil) MainFiber.run do get_db do |db| # Detect dangling entry IDs trash_ids = [] of String - db.query "select path, id from ids where id in " \ - "(#{ids_candidates.join "," { |i| "'#{i}'" }})" do |rs| + # Use query builder instead? + query = "select path, id from ids where unavailable = 0" + unless ids_candidates.nil? + query += " and id in (#{ids_candidates.join "," { |i| "'#{i}'" }})" + end + db.query query do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s @@ -492,8 +462,11 @@ class Storage # Detect dangling title IDs trash_titles = [] of String - db.query "select path, id from titles where id in " \ - "(#{titles_candidates.join "," { |i| "'#{i}'" }})" do |rs| + query = "select path, id from titles where unavailable = 0" + unless titles_candidates.nil? + query += " and id in (#{titles_candidates.join "," { |i| "'#{i}'" }})" + end + db.query query do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s From d330db131e52ad0c62ffbf6e0df0568662813551 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Wed, 15 Sep 2021 08:46:30 +0000 Subject: [PATCH 32/39] Simplify `mark_unavailable` --- src/storage.cr | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/storage.cr b/src/storage.cr index 9f6f45b..762946d 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -428,15 +428,11 @@ class Storage end end - def mark_unavailable - mark_unavailable nil, nil - end - # Limit mark targets with given arguments # They should be checked again if they are really gone, # since they would be available which are renamed or moved - def mark_unavailable(ids_candidates : Array(String) | Nil, - titles_candidates : Array(String) | Nil) + def mark_unavailable(ids_candidates : Array(String)?, + titles_candidates : Array(String)?) MainFiber.run do get_db do |db| # Detect dangling entry IDs From 44a6f822cd2d69156beb359dfa26b522f47e875d Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Wed, 15 Sep 2021 09:00:30 +0000 Subject: [PATCH 33/39] Simplify Title.new --- src/library/title.cr | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 3abc38d..ee1edd0 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -19,7 +19,7 @@ class Title @[YAML::Field(ignore: true)] @cached_cover_url : String? - def initialize(@dir : String, @parent_id, cache : Hash(String, String)?) + def initialize(@dir : String, @parent_id, cache : Hash(String, String)? = nil) storage = Storage.default @signature = Dir.signature dir id = storage.get_title_id dir, signature @@ -70,10 +70,6 @@ class Title end end - def self.new(dir : String, parent_id) - new dir, parent_id, nil - end - def examine(context : ExamineContext) : Bool return false unless Dir.exists? @dir # No title, Remove this contents_signature = Dir.contents_signature @dir, From 70ab198a33ee34bba7c1ce8f1152918a81316569 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Thu, 16 Sep 2021 00:16:26 +0900 Subject: [PATCH 34/39] Add config 'forcely_yield_count' the default value 1000 would make a fiber yield on each 4ms on SSD Apply yield counter in Dir.contents_signauture Use contents_signature cache in Title.new --- src/config.cr | 1 + src/library/library.cr | 3 ++- src/library/title.cr | 6 +++--- src/util/signature.cr | 11 +++++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/config.cr b/src/config.cr index 99fb5b9..bc7aa18 100644 --- a/src/config.cr +++ b/src/config.cr @@ -23,6 +23,7 @@ class Config property cache_enabled = false property cache_size_mbs = 50 property cache_log_enabled = true + property forcely_yield_count = 1000 property disable_login = false property default_username = "" property auth_proxy_header_name = "" diff --git a/src/library/library.cr b/src/library/library.cr index ea2a5e3..de8d7b4 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -133,8 +133,9 @@ class Library storage = Storage.new auto_close: false + count = Config.current.forcely_yield_count examine_context : ExamineContext = { - file_counter: (YieldCounter.new 1000), + file_counter: (YieldCounter.new count), cached_contents_signature: {} of String => String, deleted_title_ids: [] of String, deleted_entry_ids: [] of String, diff --git a/src/library/title.cr b/src/library/title.cr index ee1edd0..f57024c 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -19,7 +19,7 @@ class Title @[YAML::Field(ignore: true)] @cached_cover_url : String? - def initialize(@dir : String, @parent_id, cache : Hash(String, String)? = nil) + def initialize(@dir : String, @parent_id, cache = {} of String => String) storage = Storage.default @signature = Dir.signature dir id = storage.get_title_id dir, signature @@ -43,7 +43,7 @@ class Title next if fn.starts_with? "." path = File.join dir, fn if File.directory? path - title = Title.new path, @id + title = Title.new path, @id, cache next if title.entries.size == 0 && title.titles.size == 0 Library.default.title_hash[title.id] = title @title_ids << title.id @@ -73,7 +73,7 @@ class Title def examine(context : ExamineContext) : Bool return false unless Dir.exists? @dir # No title, Remove this contents_signature = Dir.contents_signature @dir, - context["cached_contents_signature"] + context["cached_contents_signature"], context["file_counter"] # Not changed. Reuse this return true if @contents_signature == contents_signature diff --git a/src/util/signature.cr b/src/util/signature.cr index d0a55a6..59a8311 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -54,9 +54,11 @@ class Dir # Rescan conditions: # - When a file added, moved, removed, renamed (including which in nested # directories) - def self.contents_signature(dirname, cache = {} of String => String) : String - return cache[dirname] if !cache.nil? && cache[dirname]? - Fiber.yield # Yield first + def self.contents_signature(dirname, + cache = {} of String => String, + counter : YieldCounter? = nil) : String + return cache[dirname] if cache[dirname]? + counter.count_and_yield unless counter.nil? signatures = [] of String self.open dirname do |dir| dir.entries.sort.each do |fn| @@ -69,10 +71,11 @@ class Dir # supported file signatures << fn if is_supported_file fn end + counter.count_and_yield unless counter.nil? end end hash = Digest::SHA1.hexdigest(signatures.join) - cache[dirname] = hash unless cache.nil? + cache[dirname] = hash hash end end From 9769e760a0ad0ce9035ad350185497942b280795 Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Thu, 16 Sep 2021 07:49:12 +0900 Subject: [PATCH 35/39] Pass a counter to recursive calls, Ignore negative threshold --- src/library/types.cr | 4 ++-- src/util/signature.cr | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/library/types.cr b/src/library/types.cr index 7856660..a0e0428 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -135,7 +135,7 @@ class TitleInfo end class YieldCounter - setter threshold : Int32 + @file_count : UInt32 def initialize(@threshold : Int32) @file_count = 0 @@ -143,7 +143,7 @@ class YieldCounter def count_and_yield @file_count += 1 - Fiber.yield if @file_count % @threshold == 0 + Fiber.yield if @threshold > 0 && @file_count % @threshold == 0 end end diff --git a/src/util/signature.cr b/src/util/signature.cr index 59a8311..76e2628 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -65,7 +65,7 @@ class Dir next if fn.starts_with? "." path = File.join dirname, fn if File.directory? path - signatures << Dir.contents_signature path, cache + signatures << Dir.contents_signature path, cache, counter else # Only add its signature value to `signatures` when it is a # supported file From b56e16e1e19bca9a654fc813d79674159c1c42ea Mon Sep 17 00:00:00 2001 From: Leeingnyo Date: Sat, 18 Sep 2021 10:59:43 +0900 Subject: [PATCH 36/39] Remove counter, yield everytime --- src/config.cr | 1 - src/library/library.cr | 2 -- src/library/title.cr | 4 ++-- src/library/types.cr | 14 -------------- src/util/signature.cr | 10 ++++------ 5 files changed, 6 insertions(+), 25 deletions(-) diff --git a/src/config.cr b/src/config.cr index bc7aa18..99fb5b9 100644 --- a/src/config.cr +++ b/src/config.cr @@ -23,7 +23,6 @@ class Config property cache_enabled = false property cache_size_mbs = 50 property cache_log_enabled = true - property forcely_yield_count = 1000 property disable_login = false property default_username = "" property auth_proxy_header_name = "" diff --git a/src/library/library.cr b/src/library/library.cr index de8d7b4..ce1a8fc 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -133,9 +133,7 @@ class Library storage = Storage.new auto_close: false - count = Config.current.forcely_yield_count examine_context : ExamineContext = { - file_counter: (YieldCounter.new count), cached_contents_signature: {} of String => String, deleted_title_ids: [] of String, deleted_entry_ids: [] of String, diff --git a/src/library/title.cr b/src/library/title.cr index f57024c..6a959d8 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -73,7 +73,7 @@ class Title def examine(context : ExamineContext) : Bool return false unless Dir.exists? @dir # No title, Remove this contents_signature = Dir.contents_signature @dir, - context["cached_contents_signature"], context["file_counter"] + context["cached_contents_signature"] # Not changed. Reuse this return true if @contents_signature == contents_signature @@ -112,7 +112,7 @@ class Title previous_entries_size = @entries.size @entries.select! do |entry| existence = File.exists? entry.zip_path - context["file_counter"].count_and_yield + Fiber.yield context["deleted_entry_ids"] << entry.id unless existence existence end diff --git a/src/library/types.cr b/src/library/types.cr index a0e0428..4c9dc93 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -134,21 +134,7 @@ class TitleInfo end end -class YieldCounter - @file_count : UInt32 - - def initialize(@threshold : Int32) - @file_count = 0 - end - - def count_and_yield - @file_count += 1 - Fiber.yield if @threshold > 0 && @file_count % @threshold == 0 - end -end - alias ExamineContext = NamedTuple( - file_counter: YieldCounter, cached_contents_signature: Hash(String, String), deleted_title_ids: Array(String), deleted_entry_ids: Array(String)) diff --git a/src/util/signature.cr b/src/util/signature.cr index 76e2628..5ca3e14 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -54,24 +54,22 @@ class Dir # Rescan conditions: # - When a file added, moved, removed, renamed (including which in nested # directories) - def self.contents_signature(dirname, - cache = {} of String => String, - counter : YieldCounter? = nil) : String + def self.contents_signature(dirname, cache = {} of String => String) : String return cache[dirname] if cache[dirname]? - counter.count_and_yield unless counter.nil? + Fiber.yield signatures = [] of String self.open dirname do |dir| dir.entries.sort.each do |fn| next if fn.starts_with? "." path = File.join dirname, fn if File.directory? path - signatures << Dir.contents_signature path, cache, counter + signatures << Dir.contents_signature path, cache else # Only add its signature value to `signatures` when it is a # supported file signatures << fn if is_supported_file fn end - counter.count_and_yield unless counter.nil? + Fiber.yield end end hash = Digest::SHA1.hexdigest(signatures.join) From 96f1ef3dde56cc4132f69a0f22be810443281440 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Sat, 18 Sep 2021 02:00:10 +0000 Subject: [PATCH 37/39] Improve comments on examine --- src/library/title.cr | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index f57024c..f9499c1 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -70,14 +70,21 @@ class Title end end + # Utility method used in library rescanning. + # - When the title does not exist on the file system anymore, return false + # and let it be deleted from the libaray instance + # - When the title exists, but its contents sigature is now different from + # the cache, it means some of its content (nested titles or entries) + # has been added, deleted, or renamed. In this case we update its + # contents signature and instance variables + # - When the title exists and its contents sigature is still the same, we + # return true so it can be reused without rescanning def examine(context : ExamineContext) : Bool return false unless Dir.exists? @dir # No title, Remove this contents_signature = Dir.contents_signature @dir, context["cached_contents_signature"], context["file_counter"] - # Not changed. Reuse this return true if @contents_signature == contents_signature - # Fix title @contents_signature = contents_signature @signature = Dir.signature @dir storage = Storage.default @@ -160,7 +167,7 @@ class Title end end - true # Fixed, reuse this + true end alias SortContext = NamedTuple(username: String, opt: SortOptions) From 3f73591dd44fbd705de2cc9bd4b936fa3d16ddf8 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Sat, 18 Sep 2021 02:14:22 +0000 Subject: [PATCH 38/39] Update comments --- src/library/title.cr | 6 +++--- src/storage.cr | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 853ed1c..1709e9b 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -72,12 +72,12 @@ class Title # Utility method used in library rescanning. # - When the title does not exist on the file system anymore, return false - # and let it be deleted from the libaray instance - # - When the title exists, but its contents sigature is now different from + # and let it be deleted from the library instance + # - When the title exists, but its contents signature is now different from # the cache, it means some of its content (nested titles or entries) # has been added, deleted, or renamed. In this case we update its # contents signature and instance variables - # - When the title exists and its contents sigature is still the same, we + # - When the title exists and its contents signature is still the same, we # return true so it can be reused without rescanning def examine(context : ExamineContext) : Bool return false unless Dir.exists? @dir # No title, Remove this diff --git a/src/storage.cr b/src/storage.cr index 762946d..eea5927 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -428,9 +428,10 @@ class Storage end end - # Limit mark targets with given arguments - # They should be checked again if they are really gone, - # since they would be available which are renamed or moved + # Mark titles and entries that no longer exist on the file system as + # unavailable. By supplying `id_candidates` and `titles_candidates`, it + # only checks the existence of the candidate titles/entries to speed up + # the process. def mark_unavailable(ids_candidates : Array(String)?, titles_candidates : Array(String)?) MainFiber.run do From 16397050dd7604b0ab2f21aec3a573041052cd90 Mon Sep 17 00:00:00 2001 From: Alex Ling Date: Sat, 18 Sep 2021 02:24:50 +0000 Subject: [PATCH 39/39] Update comments --- src/library/title.cr | 2 +- src/storage.cr | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/library/title.cr b/src/library/title.cr index 1709e9b..7efee42 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -80,7 +80,7 @@ class Title # - When the title exists and its contents signature is still the same, we # return true so it can be reused without rescanning def examine(context : ExamineContext) : Bool - return false unless Dir.exists? @dir # No title, Remove this + return false unless Dir.exists? @dir contents_signature = Dir.contents_signature @dir, context["cached_contents_signature"] return true if @contents_signature == contents_signature diff --git a/src/storage.cr b/src/storage.cr index eea5927..32f446a 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -438,7 +438,6 @@ class Storage get_db do |db| # Detect dangling entry IDs trash_ids = [] of String - # Use query builder instead? query = "select path, id from ids where unavailable = 0" unless ids_candidates.nil? query += " and id in (#{ids_candidates.join "," { |i| "'#{i}'" }})"