diff --git a/src/library/entry.cr b/src/library/entry.cr index 0a08246..43fbb23 100644 --- a/src/library/entry.cr +++ b/src/library/entry.cr @@ -1,6 +1,9 @@ require "image_size" +require "yaml" class Entry + include YAML::Serializable + getter zip_path : String, book : Title, title : String, size : String, pages : Int32, id : String, encoded_path : String, encoded_title : String, mtime : Time, err_msg : String? diff --git a/src/library/library.cr b/src/library/library.cr index 508a274..ce1a8fc 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -1,12 +1,41 @@ class Library + include YAML::Serializable + getter dir : String, title_ids : Array(String), title_hash : Hash(String, Title) use_default - def initialize - register_mime_types + def save_instance + path = Config.current.library_path + instance_file_path = File.join path, "library.yml.gz" + Logger.debug "Caching library to #{instance_file_path}" + writer = Compress::Gzip::Writer.new instance_file_path, + Compress::Gzip::BEST_COMPRESSION + writer.write self.to_yaml.to_slice + writer.close + end + + def self.load_instance + dir = Config.current.library_path + return unless Dir.exists? dir + instance_file_path = File.join dir, "library.yml.gz" + return unless File.exists? instance_file_path + + Logger.debug "Loading cached library from #{instance_file_path}" + + begin + Compress::Gzip::Reader.open instance_file_path do |content| + @@default = Library.from_yaml content + end + Library.default.register_jobs + rescue e + Logger.error e + end + end + + def initialize @dir = Config.current.library_path # explicitly initialize @titles to bypass the compiler check. it will # be filled with actual Titles in the `scan` call below @@ -16,6 +45,12 @@ class Library @entries_count = 0 @thumbnails_count = 0 + register_jobs + end + + protected def register_jobs + register_mime_types + scan_interval = Config.current.scan_interval_minutes if scan_interval < 1 scan @@ -25,7 +60,7 @@ class Library start = Time.local scan ms = (Time.local - start).total_milliseconds - Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" + Logger.debug "Library initialized in #{ms}ms" sleep scan_interval.minutes end end @@ -89,6 +124,7 @@ class Library end def scan + start = Time.local unless Dir.exists? @dir Logger.info "The library directory #{@dir} does not exist. " \ "Attempting to create it" @@ -97,14 +133,36 @@ class Library storage = Storage.new auto_close: false + examine_context : ExamineContext = { + cached_contents_signature: {} of String => String, + deleted_title_ids: [] of String, + deleted_entry_ids: [] of String, + } + + @title_ids.select! do |title_id| + title = @title_hash[title_id] + existence = title.examine examine_context + unless existence + examine_context["deleted_title_ids"].concat [title_id] + + title.deep_titles.map &.id + examine_context["deleted_entry_ids"].concat title.deep_entries.map &.id + end + existence + end + remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } + examine_context["deleted_title_ids"].each do |title_id| + @title_hash.delete title_id + end + + cache = examine_context["cached_contents_signature"] (Dir.entries @dir) .select { |fn| !fn.starts_with? "." } .map { |fn| File.join @dir, fn } + .select { |path| !(remained_title_dirs.includes? path) } .select { |path| File.directory? path } - .map { |path| Title.new path, "" } + .map { |path| Title.new path, "", cache } .select { |title| !(title.entries.empty? && title.titles.empty?) } .sort! { |a, b| a.title <=> b.title } - .tap { |_| @title_ids.clear } .each do |title| @title_hash[title.id] = title @title_ids << title.id @@ -113,8 +171,15 @@ class Library storage.bulk_insert_ids storage.close - Logger.debug "Scan completed" - Storage.default.mark_unavailable + ms = (Time.local - start).total_milliseconds + Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" + + Storage.default.mark_unavailable examine_context["deleted_entry_ids"], + examine_context["deleted_title_ids"] + + spawn do + save_instance + end end def get_continue_reading_entries(username) diff --git a/src/library/title.cr b/src/library/title.cr index 957a608..7efee42 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -2,18 +2,24 @@ require "digest" require "../archive" class Title + include YAML::Serializable + getter dir : String, parent_id : String, title_ids : Array(String), entries : Array(Entry), title : String, id : String, encoded_title : String, mtime : Time, signature : UInt64, entry_cover_url_cache : Hash(String, String)? setter entry_cover_url_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @entry_display_name_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @entry_cover_url_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @cached_display_name : String? + @[YAML::Field(ignore: true)] @cached_cover_url : String? - def initialize(@dir : String, @parent_id) + def initialize(@dir : String, @parent_id, cache = {} of String => String) storage = Storage.default @signature = Dir.signature dir id = storage.get_title_id dir, signature @@ -26,6 +32,7 @@ class Title }) end @id = id + @contents_signature = Dir.contents_signature dir, cache @title = File.basename dir @encoded_title = URI.encode @title @title_ids = [] of String @@ -36,7 +43,7 @@ class Title next if fn.starts_with? "." path = File.join dir, fn if File.directory? path - title = Title.new path, @id + title = Title.new path, @id, cache next if title.entries.size == 0 && title.titles.size == 0 Library.default.title_hash[title.id] = title @title_ids << title.id @@ -63,6 +70,106 @@ class Title end end + # Utility method used in library rescanning. + # - When the title does not exist on the file system anymore, return false + # and let it be deleted from the library instance + # - When the title exists, but its contents signature is now different from + # the cache, it means some of its content (nested titles or entries) + # has been added, deleted, or renamed. In this case we update its + # contents signature and instance variables + # - When the title exists and its contents signature is still the same, we + # return true so it can be reused without rescanning + def examine(context : ExamineContext) : Bool + return false unless Dir.exists? @dir + contents_signature = Dir.contents_signature @dir, + context["cached_contents_signature"] + return true if @contents_signature == contents_signature + + @contents_signature = contents_signature + @signature = Dir.signature @dir + storage = Storage.default + id = storage.get_title_id dir, signature + if id.nil? + id = random_str + storage.insert_title_id({ + path: dir, + id: id, + signature: signature.to_s, + }) + end + @id = id + @mtime = File.info(@dir).modification_time + + previous_titles_size = @title_ids.size + @title_ids.select! do |title_id| + title = Library.default.get_title! title_id + existence = title.examine context + unless existence + context["deleted_title_ids"].concat [title_id] + + title.deep_titles.map &.id + context["deleted_entry_ids"].concat title.deep_entries.map &.id + end + existence + end + remained_title_dirs = @title_ids.map do |title_id| + title = Library.default.get_title! title_id + title.dir + end + + previous_entries_size = @entries.size + @entries.select! do |entry| + existence = File.exists? entry.zip_path + Fiber.yield + context["deleted_entry_ids"] << entry.id unless existence + existence + end + remained_entry_zip_paths = @entries.map &.zip_path + + is_titles_added = false + is_entries_added = false + Dir.entries(dir).each do |fn| + next if fn.starts_with? "." + path = File.join dir, fn + if File.directory? path + next if remained_title_dirs.includes? path + title = Title.new path, @id, context["cached_contents_signature"] + next if title.entries.size == 0 && title.titles.size == 0 + Library.default.title_hash[title.id] = title + @title_ids << title.id + is_titles_added = true + next + end + if is_supported_file path + next if remained_entry_zip_paths.includes? path + entry = Entry.new path, self + if entry.pages > 0 || entry.err_msg + @entries << entry + is_entries_added = true + end + end + end + + mtimes = [@mtime] + mtimes += @title_ids.map { |e| Library.default.title_hash[e].mtime } + mtimes += @entries.map &.mtime + @mtime = mtimes.max + + if is_titles_added || previous_titles_size != @title_ids.size + @title_ids.sort! do |a, b| + compare_numerically Library.default.title_hash[a].title, + Library.default.title_hash[b].title + end + end + if is_entries_added || previous_entries_size != @entries.size + sorter = ChapterSorter.new @entries.map &.title + @entries.sort! do |a, b| + sorter.compare a.title, b.title + end + end + + true + end + alias SortContext = NamedTuple(username: String, opt: SortOptions) def build_json(*, slim = false, shallow = false, diff --git a/src/library/types.cr b/src/library/types.cr index 0545184..4c9dc93 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -133,3 +133,8 @@ class TitleInfo LRUCache.set generate_cache_entry key, self.to_json end end + +alias ExamineContext = NamedTuple( + cached_contents_signature: Hash(String, String), + deleted_title_ids: Array(String), + deleted_entry_ids: Array(String)) diff --git a/src/mango.cr b/src/mango.cr index f27165e..39b1352 100644 --- a/src/mango.cr +++ b/src/mango.cr @@ -58,6 +58,7 @@ class CLI < Clim LRUCache.init Storage.default Queue.default + Library.load_instance Library.default Plugin::Downloader.default diff --git a/src/storage.cr b/src/storage.cr index 39116b9..32f446a 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -428,12 +428,21 @@ class Storage end end - def mark_unavailable + # Mark titles and entries that no longer exist on the file system as + # unavailable. By supplying `id_candidates` and `titles_candidates`, it + # only checks the existence of the candidate titles/entries to speed up + # the process. + def mark_unavailable(ids_candidates : Array(String)?, + titles_candidates : Array(String)?) MainFiber.run do get_db do |db| # Detect dangling entry IDs trash_ids = [] of String - db.query "select path, id from ids where unavailable = 0" do |rs| + query = "select path, id from ids where unavailable = 0" + unless ids_candidates.nil? + query += " and id in (#{ids_candidates.join "," { |i| "'#{i}'" }})" + end + db.query query do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s @@ -449,7 +458,11 @@ class Storage # Detect dangling title IDs trash_titles = [] of String - db.query "select path, id from titles where unavailable = 0" do |rs| + query = "select path, id from titles where unavailable = 0" + unless titles_candidates.nil? + query += " and id in (#{titles_candidates.join "," { |i| "'#{i}'" }})" + end + db.query query do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s diff --git a/src/util/signature.cr b/src/util/signature.cr index d1a0040..5ca3e14 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -48,4 +48,32 @@ class Dir end Digest::CRC32.checksum(signatures.sort.join).to_u64 end + + # Returns the contents signature of the directory at dirname for checking + # to rescan. + # Rescan conditions: + # - When a file added, moved, removed, renamed (including which in nested + # directories) + def self.contents_signature(dirname, cache = {} of String => String) : String + return cache[dirname] if cache[dirname]? + Fiber.yield + signatures = [] of String + self.open dirname do |dir| + dir.entries.sort.each do |fn| + next if fn.starts_with? "." + path = File.join dirname, fn + if File.directory? path + signatures << Dir.contents_signature path, cache + else + # Only add its signature value to `signatures` when it is a + # supported file + signatures << fn if is_supported_file fn + end + Fiber.yield + end + end + hash = Digest::SHA1.hexdigest(signatures.join) + cache[dirname] = hash + hash + end end