diff --git a/.dockerignore b/.dockerignore index 491fc35..996cb5c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,9 @@ node_modules lib +Dockerfile +Dockerfile.arm32v7 +Dockerfile.arm64v8 +README.md +.all-contributorsrc +env.example +.github/ diff --git a/README.md b/README.md index a04cadd..1a010dd 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ The official docker images are available on [Dockerhub](https://hub.docker.com/r ### CLI ``` - Mango - Manga Server and Web Reader. Version 0.23.0 + Mango - Manga Server and Web Reader. Version 0.24.0 Usage: @@ -86,6 +86,10 @@ log_level: info upload_path: ~/mango/uploads plugin_path: ~/mango/plugins download_timeout_seconds: 30 +library_cache_path: ~/mango/library.yml.gz +cache_enabled: false +cache_size_mbs: 50 +cache_log_enabled: true disable_login: false default_username: "" auth_proxy_header_name: "" @@ -97,12 +101,12 @@ mangadex: download_queue_db_path: ~/mango/queue.db chapter_rename_rule: '[Vol.{volume} ][Ch.{chapter} ]{title|id}' manga_rename_rule: '{title}' - subscription_update_interval_hours: 24 ``` - `scan_interval_minutes`, `thumbnail_generation_interval_hours` and `db_optimization_interval_hours` can be any non-negative integer. Setting them to `0` disables the periodic tasks - `log_level` can be `debug`, `info`, `warn`, `error`, `fatal` or `off`. Setting it to `off` disables the logging - You can disable authentication by setting `disable_login` to true. Note that `default_username` must be set to an existing username for this to work. +- By setting `cache_enabled` to `true`, you can enable an experimental feature where Mango caches library metadata to improve page load time. You can further fine-tune the feature with `cache_size_mbs` and `cache_log_enabled`. ### Library Structure diff --git a/shard.yml b/shard.yml index 1da553d..0054a23 100644 --- a/shard.yml +++ b/shard.yml @@ -1,5 +1,5 @@ name: mango -version: 0.23.0 +version: 0.24.0 authors: - Alex Ling diff --git a/spec/util_spec.cr b/spec/util_spec.cr index 27d97c2..fb4efd7 100644 --- a/spec/util_spec.cr +++ b/spec/util_spec.cr @@ -61,3 +61,13 @@ describe "chapter_sort" do end.should eq ary end end + +describe "sanitize_filename" do + it "returns a random string for empty sanitized string" do + sanitize_filename("..").should_not eq sanitize_filename("..") + end + it "sanitizes correctly" do + sanitize_filename(".. \n\v.\rマンゴー/|*()<[1/2] 3.14 hello world ") + .should eq "マンゴー_()[1_2] 3.14 hello world" + end +end diff --git a/src/config.cr b/src/config.cr index 332a159..aa818c3 100644 --- a/src/config.cr +++ b/src/config.cr @@ -11,6 +11,8 @@ class Config property session_secret : String = "mango-session-secret" property library_path : String = File.expand_path "~/mango/library", home: true + property library_cache_path = File.expand_path "~/mango/library.yml.gz", + home: true property db_path : String = File.expand_path "~/mango/mango.db", home: true property scan_interval_minutes : Int32 = 5 property thumbnail_generation_interval_hours : Int32 = 24 @@ -20,6 +22,9 @@ class Config property plugin_path : String = File.expand_path "~/mango/plugins", home: true property download_timeout_seconds : Int32 = 30 + property cache_enabled = false + property cache_size_mbs = 50 + property cache_log_enabled = true property disable_login = false property default_username = "" property auth_proxy_header_name = "" diff --git a/src/library/cache.cr b/src/library/cache.cr new file mode 100644 index 0000000..d0d3f01 --- /dev/null +++ b/src/library/cache.cr @@ -0,0 +1,188 @@ +require "digest" + +require "./entry" +require "./types" + +# Base class for an entry in the LRU cache. +# There are two ways to use it: +# 1. Use it as it is by instantiating with the appropriate `SaveT` and +# `ReturnT`. Note that in this case, `SaveT` and `ReturnT` must be the +# same type. That is, the input value will be stored as it is without +# any transformation. +# 2. You can also subclass it and provide custom implementations for +# `to_save_t` and `to_return_t`. This allows you to transform and store +# the input value to a different type. See `SortedEntriesCacheEntry` as +# an example. +private class CacheEntry(SaveT, ReturnT) + getter key : String, atime : Time + + @value : SaveT + + def initialize(@key : String, value : ReturnT) + @atime = @ctime = Time.utc + @value = self.class.to_save_t value + end + + def value + @atime = Time.utc + self.class.to_return_t @value + end + + def self.to_save_t(value : ReturnT) + value + end + + def self.to_return_t(value : SaveT) + value + end + + def instance_size + instance_sizeof(CacheEntry(SaveT, ReturnT)) + # sizeof itself + instance_sizeof(String) + @key.bytesize + # allocated memory for @key + @value.instance_size + end +end + +class SortedEntriesCacheEntry < CacheEntry(Array(String), Array(Entry)) + def self.to_save_t(value : Array(Entry)) + value.map &.id + end + + def self.to_return_t(value : Array(String)) + ids_to_entries value + end + + private def self.ids_to_entries(ids : Array(String)) + e_map = Library.default.deep_entries.to_h { |entry| {entry.id, entry} } + entries = [] of Entry + begin + ids.each do |id| + entries << e_map[id] + end + return entries if ids.size == entries.size + rescue + end + end + + def instance_size + instance_sizeof(SortedEntriesCacheEntry) + # sizeof itself + instance_sizeof(String) + @key.bytesize + # allocated memory for @key + @value.size * (instance_sizeof(String) + sizeof(String)) + + @value.sum(&.bytesize) # elements in Array(String) + end + + def self.gen_key(book_id : String, username : String, + entries : Array(Entry), opt : SortOptions?) + entries_sig = Digest::SHA1.hexdigest (entries.map &.id).to_s + user_context = opt && opt.method == SortMethod::Progress ? username : "" + sig = Digest::SHA1.hexdigest (book_id + entries_sig + user_context + + (opt ? opt.to_tuple.to_s : "nil")) + "#{sig}:sorted_entries" + end +end + +class String + def instance_size + instance_sizeof(String) + bytesize + end +end + +struct Tuple(*T) + def instance_size + sizeof(T) + # total size of non-reference types + self.sum do |e| + next 0 unless e.is_a? Reference + if e.responds_to? :instance_size + e.instance_size + else + instance_sizeof(typeof(e)) + end + end + end +end + +alias CacheableType = Array(Entry) | String | Tuple(String, Int32) +alias CacheEntryType = SortedEntriesCacheEntry | + CacheEntry(String, String) | + CacheEntry(Tuple(String, Int32), Tuple(String, Int32)) + +def generate_cache_entry(key : String, value : CacheableType) + if value.is_a? Array(Entry) + SortedEntriesCacheEntry.new key, value + else + CacheEntry(typeof(value), typeof(value)).new key, value + end +end + +# LRU Cache +class LRUCache + @@limit : Int128 = Int128.new 0 + @@should_log = true + # key => entry + @@cache = {} of String => CacheEntryType + + def self.enabled + Config.current.cache_enabled + end + + def self.init + cache_size = Config.current.cache_size_mbs + @@limit = Int128.new cache_size * 1024 * 1024 if enabled + @@should_log = Config.current.cache_log_enabled + end + + def self.get(key : String) + return unless enabled + entry = @@cache[key]? + if @@should_log + Logger.debug "LRUCache #{entry.nil? ? "miss" : "hit"} #{key}" + end + return entry.value unless entry.nil? + end + + def self.set(cache_entry : CacheEntryType) + return unless enabled + key = cache_entry.key + @@cache[key] = cache_entry + Logger.debug "LRUCache cached #{key}" if @@should_log + remove_least_recent_access + end + + def self.invalidate(key : String) + return unless enabled + @@cache.delete key + end + + def self.print + return unless @@should_log + sum = @@cache.sum { |_, entry| entry.instance_size } + Logger.debug "---- LRU Cache ----" + Logger.debug "Size: #{sum} Bytes" + Logger.debug "List:" + @@cache.each do |k, v| + Logger.debug "#{k} | #{v.atime} | #{v.instance_size}" + end + Logger.debug "-------------------" + end + + private def self.is_cache_full + sum = @@cache.sum { |_, entry| entry.instance_size } + sum > @@limit + end + + private def self.remove_least_recent_access + if @@should_log && is_cache_full + Logger.debug "Removing entries from LRUCache" + end + while is_cache_full && @@cache.size > 0 + min_tuple = @@cache.min_by { |_, entry| entry.atime } + min_key = min_tuple[0] + min_entry = min_tuple[1] + + Logger.debug " \ + Target: #{min_key}, \ + Last Access Time: #{min_entry.atime}" if @@should_log + invalidate min_key + end + end +end diff --git a/src/library/entry.cr b/src/library/entry.cr index 92f4def..43fbb23 100644 --- a/src/library/entry.cr +++ b/src/library/entry.cr @@ -1,6 +1,9 @@ require "image_size" +require "yaml" class Entry + include YAML::Serializable + getter zip_path : String, book : Title, title : String, size : String, pages : Int32, id : String, encoded_path : String, encoded_title : String, mtime : Time, err_msg : String? @@ -46,7 +49,7 @@ class Entry file.close end - def to_slim_json : String + def build_json(*, slim = false) JSON.build do |json| json.object do {% for str in ["zip_path", "title", "size", "id"] %} @@ -54,23 +57,15 @@ class Entry {% end %} json.field "title_id", @book.id json.field "pages" { json.number @pages } + unless slim + json.field "display_name", @book.display_name @title + json.field "cover_url", cover_url + json.field "mtime" { json.number @mtime.to_unix } + end end end end - def to_json(json : JSON::Builder) - json.object do - {% for str in ["zip_path", "title", "size", "id"] %} - json.field {{str}}, @{{str.id}} - {% end %} - json.field "title_id", @book.id - json.field "display_name", @book.display_name @title - json.field "cover_url", cover_url - json.field "pages" { json.number @pages } - json.field "mtime" { json.number @mtime.to_unix } - end - end - def display_name @book.display_name @title end @@ -81,9 +76,17 @@ class Entry def cover_url return "#{Config.current.base_url}img/icon.png" if @err_msg + + unless @book.entry_cover_url_cache + TitleInfo.new @book.dir do |info| + @book.entry_cover_url_cache = info.entry_cover_url + end + end + entry_cover_url = @book.entry_cover_url_cache + url = "#{Config.current.base_url}api/cover/#{@book.id}/#{@id}" - TitleInfo.new @book.dir do |info| - info_url = info.entry_cover_url[@title]? + if entry_cover_url + info_url = entry_cover_url[@title]? unless info_url.nil? || info_url.empty? url = File.join Config.current.base_url, info_url end @@ -170,6 +173,16 @@ class Entry # For backward backward compatibility with v0.1.0, we save entry titles # instead of IDs in info.json def save_progress(username, page) + LRUCache.invalidate "#{@book.id}:#{username}:progress_sum" + @book.parents.each do |parent| + LRUCache.invalidate "#{parent.id}:#{username}:progress_sum" + end + [false, true].each do |ascend| + sorted_entries_cache_key = SortedEntriesCacheEntry.gen_key @book.id, + username, @book.entries, SortOptions.new(SortMethod::Progress, ascend) + LRUCache.invalidate sorted_entries_cache_key + end + TitleInfo.new @book.dir do |info| if info.progress[username]?.nil? info.progress[username] = {@title => page} diff --git a/src/library/library.cr b/src/library/library.cr index a5a4a80..93eac0c 100644 --- a/src/library/library.cr +++ b/src/library/library.cr @@ -1,12 +1,38 @@ class Library + include YAML::Serializable + getter dir : String, title_ids : Array(String), title_hash : Hash(String, Title) use_default - def initialize - register_mime_types + def save_instance + path = Config.current.library_cache_path + Logger.debug "Caching library to #{path}" + writer = Compress::Gzip::Writer.new path, + Compress::Gzip::BEST_COMPRESSION + writer.write self.to_yaml.to_slice + writer.close + end + + def self.load_instance + path = Config.current.library_cache_path + return unless File.exists? path + + Logger.debug "Loading cached library from #{path}" + + begin + Compress::Gzip::Reader.open path do |content| + @@default = Library.from_yaml content + end + Library.default.register_jobs + rescue e + Logger.error e + end + end + + def initialize @dir = Config.current.library_path # explicitly initialize @titles to bypass the compiler check. it will # be filled with actual Titles in the `scan` call below @@ -16,6 +42,12 @@ class Library @entries_count = 0 @thumbnails_count = 0 + register_jobs + end + + protected def register_jobs + register_mime_types + scan_interval = Config.current.scan_interval_minutes if scan_interval < 1 scan @@ -25,7 +57,7 @@ class Library start = Time.local scan ms = (Time.local - start).total_milliseconds - Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" + Logger.debug "Library initialized in #{ms}ms" sleep scan_interval.minutes end end @@ -51,11 +83,6 @@ class Library def sorted_titles(username, opt : SortOptions? = nil) if opt.nil? opt = SortOptions.from_info_json @dir, username - else - TitleInfo.new @dir do |info| - info.sort_by[username] = opt.to_tuple - info.save - end end # Helper function from src/util/util.cr @@ -66,14 +93,18 @@ class Library titles + titles.flat_map &.deep_titles end - def to_slim_json : String + def deep_entries + titles.flat_map &.deep_entries + end + + def build_json(*, slim = false, depth = -1) JSON.build do |json| json.object do json.field "dir", @dir json.field "titles" do json.array do self.titles.each do |title| - json.raw title.to_slim_json + json.raw title.build_json(slim: slim, depth: depth) end end end @@ -81,15 +112,6 @@ class Library end end - def to_json(json : JSON::Builder) - json.object do - json.field "dir", @dir - json.field "titles" do - json.raw self.titles.to_json - end - end - end - def get_title(tid) @title_hash[tid]? end @@ -99,6 +121,7 @@ class Library end def scan + start = Time.local unless Dir.exists? @dir Logger.info "The library directory #{@dir} does not exist. " \ "Attempting to create it" @@ -107,14 +130,36 @@ class Library storage = Storage.new auto_close: false + examine_context : ExamineContext = { + cached_contents_signature: {} of String => String, + deleted_title_ids: [] of String, + deleted_entry_ids: [] of String, + } + + @title_ids.select! do |title_id| + title = @title_hash[title_id] + existence = title.examine examine_context + unless existence + examine_context["deleted_title_ids"].concat [title_id] + + title.deep_titles.map &.id + examine_context["deleted_entry_ids"].concat title.deep_entries.map &.id + end + existence + end + remained_title_dirs = @title_ids.map { |id| title_hash[id].dir } + examine_context["deleted_title_ids"].each do |title_id| + @title_hash.delete title_id + end + + cache = examine_context["cached_contents_signature"] (Dir.entries @dir) .select { |fn| !fn.starts_with? "." } .map { |fn| File.join @dir, fn } + .select { |path| !(remained_title_dirs.includes? path) } .select { |path| File.directory? path } - .map { |path| Title.new path, "" } + .map { |path| Title.new path, "", cache } .select { |title| !(title.entries.empty? && title.titles.empty?) } .sort! { |a, b| a.title <=> b.title } - .tap { |_| @title_ids.clear } .each do |title| @title_hash[title.id] = title @title_ids << title.id @@ -123,8 +168,15 @@ class Library storage.bulk_insert_ids storage.close - Logger.debug "Scan completed" - Storage.default.mark_unavailable + ms = (Time.local - start).total_milliseconds + Logger.info "Scanned #{@title_ids.size} titles in #{ms}ms" + + Storage.default.mark_unavailable examine_context["deleted_entry_ids"], + examine_context["deleted_title_ids"] + + spawn do + save_instance + end end def get_continue_reading_entries(username) diff --git a/src/library/title.cr b/src/library/title.cr index 61c9813..9b797f4 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -1,13 +1,25 @@ +require "digest" require "../archive" class Title + include YAML::Serializable + getter dir : String, parent_id : String, title_ids : Array(String), entries : Array(Entry), title : String, id : String, - encoded_title : String, mtime : Time, signature : UInt64 + encoded_title : String, mtime : Time, signature : UInt64, + entry_cover_url_cache : Hash(String, String)? + setter entry_cover_url_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] @entry_display_name_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] + @entry_cover_url_cache : Hash(String, String)? + @[YAML::Field(ignore: true)] + @cached_display_name : String? + @[YAML::Field(ignore: true)] + @cached_cover_url : String? - def initialize(@dir : String, @parent_id) + def initialize(@dir : String, @parent_id, cache = {} of String => String) storage = Storage.default @signature = Dir.signature dir id = storage.get_title_id dir, signature @@ -20,6 +32,7 @@ class Title }) end @id = id + @contents_signature = Dir.contents_signature dir, cache @title = File.basename dir @encoded_title = URI.encode @title @title_ids = [] of String @@ -30,7 +43,7 @@ class Title next if fn.starts_with? "." path = File.join dir, fn if File.directory? path - title = Title.new path, @id + title = Title.new path, @id, cache next if title.entries.size == 0 && title.titles.size == 0 Library.default.title_hash[title.id] = title @title_ids << title.id @@ -57,24 +70,141 @@ class Title end end - def to_slim_json : String + # Utility method used in library rescanning. + # - When the title does not exist on the file system anymore, return false + # and let it be deleted from the library instance + # - When the title exists, but its contents signature is now different from + # the cache, it means some of its content (nested titles or entries) + # has been added, deleted, or renamed. In this case we update its + # contents signature and instance variables + # - When the title exists and its contents signature is still the same, we + # return true so it can be reused without rescanning + def examine(context : ExamineContext) : Bool + return false unless Dir.exists? @dir + contents_signature = Dir.contents_signature @dir, + context["cached_contents_signature"] + return true if @contents_signature == contents_signature + + @contents_signature = contents_signature + @signature = Dir.signature @dir + storage = Storage.default + id = storage.get_title_id dir, signature + if id.nil? + id = random_str + storage.insert_title_id({ + path: dir, + id: id, + signature: signature.to_s, + }) + end + @id = id + @mtime = File.info(@dir).modification_time + + previous_titles_size = @title_ids.size + @title_ids.select! do |title_id| + title = Library.default.get_title! title_id + existence = title.examine context + unless existence + context["deleted_title_ids"].concat [title_id] + + title.deep_titles.map &.id + context["deleted_entry_ids"].concat title.deep_entries.map &.id + end + existence + end + remained_title_dirs = @title_ids.map do |title_id| + title = Library.default.get_title! title_id + title.dir + end + + previous_entries_size = @entries.size + @entries.select! do |entry| + existence = File.exists? entry.zip_path + Fiber.yield + context["deleted_entry_ids"] << entry.id unless existence + existence + end + remained_entry_zip_paths = @entries.map &.zip_path + + is_titles_added = false + is_entries_added = false + Dir.entries(dir).each do |fn| + next if fn.starts_with? "." + path = File.join dir, fn + if File.directory? path + next if remained_title_dirs.includes? path + title = Title.new path, @id, context["cached_contents_signature"] + next if title.entries.size == 0 && title.titles.size == 0 + Library.default.title_hash[title.id] = title + @title_ids << title.id + is_titles_added = true + next + end + if is_supported_file path + next if remained_entry_zip_paths.includes? path + entry = Entry.new path, self + if entry.pages > 0 || entry.err_msg + @entries << entry + is_entries_added = true + end + end + end + + mtimes = [@mtime] + mtimes += @title_ids.map { |e| Library.default.title_hash[e].mtime } + mtimes += @entries.map &.mtime + @mtime = mtimes.max + + if is_titles_added || previous_titles_size != @title_ids.size + @title_ids.sort! do |a, b| + compare_numerically Library.default.title_hash[a].title, + Library.default.title_hash[b].title + end + end + if is_entries_added || previous_entries_size != @entries.size + sorter = ChapterSorter.new @entries.map &.title + @entries.sort! do |a, b| + sorter.compare a.title, b.title + end + end + + true + end + + alias SortContext = NamedTuple(username: String, opt: SortOptions) + + def build_json(*, slim = false, depth = -1, + sort_context : SortContext? = nil) JSON.build do |json| json.object do {% for str in ["dir", "title", "id"] %} json.field {{str}}, @{{str.id}} {% end %} json.field "signature" { json.number @signature } - json.field "titles" do - json.array do - self.titles.each do |title| - json.raw title.to_slim_json + unless slim + json.field "display_name", display_name + json.field "cover_url", cover_url + json.field "mtime" { json.number @mtime.to_unix } + end + unless depth == 0 + json.field "titles" do + json.array do + self.titles.each do |title| + json.raw title.build_json(slim: slim, + depth: depth > 0 ? depth - 1 : depth) + end end end - end - json.field "entries" do - json.array do - @entries.each do |entry| - json.raw entry.to_slim_json + json.field "entries" do + json.array do + _entries = if sort_context + sorted_entries sort_context[:username], + sort_context[:opt] + else + @entries + end + _entries.each do |entry| + json.raw entry.build_json(slim: slim) + end end end end @@ -92,34 +222,6 @@ class Title end end - def to_json(json : JSON::Builder) - json.object do - {% for str in ["dir", "title", "id"] %} - json.field {{str}}, @{{str.id}} - {% end %} - json.field "signature" { json.number @signature } - json.field "display_name", display_name - json.field "cover_url", cover_url - json.field "mtime" { json.number @mtime.to_unix } - json.field "titles" do - json.raw self.titles.to_json - end - json.field "entries" do - json.raw @entries.to_json - end - json.field "parents" do - json.array do - self.parents.each do |title| - json.object do - json.field "title", title.title - json.field "id", title.id - end - end - end - end - end - end - def titles @title_ids.map { |tid| Library.default.get_title! tid } end @@ -177,11 +279,15 @@ class Title end def display_name + cached_display_name = @cached_display_name + return cached_display_name unless cached_display_name.nil? + dn = @title TitleInfo.new @dir do |info| info_dn = info.display_name dn = info_dn unless info_dn.empty? end + @cached_display_name = dn dn end @@ -205,6 +311,7 @@ class Title end def set_display_name(dn) + @cached_display_name = dn TitleInfo.new @dir do |info| info.display_name = dn info.save @@ -214,11 +321,15 @@ class Title def set_display_name(entry_name : String, dn) TitleInfo.new @dir do |info| info.entry_display_name[entry_name] = dn + @entry_display_name_cache = info.entry_display_name info.save end end def cover_url + cached_cover_url = @cached_cover_url + return cached_cover_url unless cached_cover_url.nil? + url = "#{Config.current.base_url}img/icon.png" readable_entries = @entries.select &.err_msg.nil? if readable_entries.size > 0 @@ -230,10 +341,12 @@ class Title url = File.join Config.current.base_url, info_url end end + @cached_cover_url = url url end def set_cover_url(url : String) + @cached_cover_url = url TitleInfo.new @dir do |info| info.cover_url = url info.save @@ -243,6 +356,7 @@ class Title def set_cover_url(entry_name : String, url : String) TitleInfo.new @dir do |info| info.entry_cover_url[entry_name] = url + @entry_cover_url_cache = info.entry_cover_url info.save end end @@ -262,8 +376,15 @@ class Title end def deep_read_page_count(username) : Int32 - load_progress_for_all_entries(username).sum + - titles.flat_map(&.deep_read_page_count username).sum + key = "#{@id}:#{username}:progress_sum" + sig = Digest::SHA1.hexdigest (entries.map &.id).to_s + cached_sum = LRUCache.get key + return cached_sum[1] if cached_sum.is_a? Tuple(String, Int32) && + cached_sum[0] == sig + sum = load_progress_for_all_entries(username, nil, true).sum + + titles.flat_map(&.deep_read_page_count username).sum + LRUCache.set generate_cache_entry key, {sig, sum} + sum end def deep_total_page_count : Int32 @@ -317,13 +438,12 @@ class Title # use the default (auto, ascending) # When `opt` is not nil, it saves the options to info.json def sorted_entries(username, opt : SortOptions? = nil) + cache_key = SortedEntriesCacheEntry.gen_key @id, username, @entries, opt + cached_entries = LRUCache.get cache_key + return cached_entries if cached_entries.is_a? Array(Entry) + if opt.nil? opt = SortOptions.from_info_json @dir, username - else - TitleInfo.new @dir do |info| - info.sort_by[username] = opt.to_tuple - info.save - end end case opt.not_nil!.method @@ -355,6 +475,7 @@ class Title ary.reverse! unless opt.not_nil!.ascend + LRUCache.set generate_cache_entry cache_key, ary ary end @@ -416,6 +537,17 @@ class Title end def bulk_progress(action, ids : Array(String), username) + LRUCache.invalidate "#{@id}:#{username}:progress_sum" + parents.each do |parent| + LRUCache.invalidate "#{parent.id}:#{username}:progress_sum" + end + [false, true].each do |ascend| + sorted_entries_cache_key = + SortedEntriesCacheEntry.gen_key @id, username, @entries, + SortOptions.new(SortMethod::Progress, ascend) + LRUCache.invalidate sorted_entries_cache_key + end + selected_entries = ids .map { |id| @entries.find &.id.==(id) diff --git a/src/library/types.cr b/src/library/types.cr index 4e83135..4c9dc93 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -1,4 +1,12 @@ -SUPPORTED_IMG_TYPES = ["image/jpeg", "image/png", "image/webp"] +SUPPORTED_IMG_TYPES = %w( + image/jpeg + image/png + image/webp + image/apng + image/avif + image/gif + image/svg+xml +) enum SortMethod Auto @@ -88,6 +96,18 @@ class TitleInfo @@mutex_hash = {} of String => Mutex def self.new(dir, &) + key = "#{dir}:info.json" + info = LRUCache.get key + if info.is_a? String + begin + instance = TitleInfo.from_json info + instance.dir = dir + yield instance + return + rescue + end + end + if @@mutex_hash[dir]? mutex = @@mutex_hash[dir] else @@ -101,6 +121,7 @@ class TitleInfo instance = TitleInfo.from_json File.read json_path end instance.dir = dir + LRUCache.set generate_cache_entry key, instance.to_json yield instance end end @@ -108,5 +129,12 @@ class TitleInfo def save json_path = File.join @dir, "info.json" File.write json_path, self.to_pretty_json + key = "#{@dir}:info.json" + LRUCache.set generate_cache_entry key, self.to_json end end + +alias ExamineContext = NamedTuple( + cached_contents_signature: Hash(String, String), + deleted_title_ids: Array(String), + deleted_entry_ids: Array(String)) diff --git a/src/mango.cr b/src/mango.cr index e8d32a3..8716d04 100644 --- a/src/mango.cr +++ b/src/mango.cr @@ -7,7 +7,7 @@ require "option_parser" require "clim" require "tallboy" -MANGO_VERSION = "0.23.0" +MANGO_VERSION = "0.24.0" # From http://www.network-science.de/ascii/ BANNER = %{ @@ -55,8 +55,10 @@ class CLI < Clim Config.load(opts.config).set_current # Initialize main components + LRUCache.init Storage.default Queue.default + Library.load_instance Library.default Plugin::Downloader.default diff --git a/src/plugin/downloader.cr b/src/plugin/downloader.cr index 054698e..2800232 100644 --- a/src/plugin/downloader.cr +++ b/src/plugin/downloader.cr @@ -23,11 +23,6 @@ class Plugin job end - private def process_filename(str) - return "_" if str == ".." - str.gsub "/", "_" - end - private def download(job : Queue::Job) @downloading = true @queue.set_status Queue::JobStatus::Downloading, job @@ -42,8 +37,8 @@ class Plugin pages = info["pages"].as_i - manga_title = process_filename job.manga_title - chapter_title = process_filename info["title"].as_s + manga_title = sanitize_filename job.manga_title + chapter_title = sanitize_filename info["title"].as_s @queue.set_pages pages, job lib_dir = @library_path @@ -68,7 +63,7 @@ class Plugin while page = plugin.next_page break unless @queue.exists? job - fn = process_filename page["filename"].as_s + fn = sanitize_filename page["filename"].as_s url = page["url"].as_s headers = HTTP::Headers.new diff --git a/src/routes/api.cr b/src/routes/api.cr index b1fb3b3..ed9bb29 100644 --- a/src/routes/api.cr +++ b/src/routes/api.cr @@ -133,24 +133,38 @@ struct APIRouter end Koa.describe "Returns the book with title `tid`", <<-MD - Supply the `tid` query parameter to strip away "display_name", "cover_url", and "mtime" from the returned object to speed up the loading time + - Supply the `slim` query parameter to strip away "display_name", "cover_url", and "mtime" from the returned object to speed up the loading time + - Supply the `depth` query parameter to control the depth of nested titles to return. + - When `depth` is 1, returns the top-level titles and sub-titles/entries one level in them + - When `depth` is 0, returns the top-level titles without their sub-titles/entries + - When `depth` is N, returns the top-level titles and sub-titles/entries N levels in them + - When `depth` is negative, returns the entire library MD Koa.path "tid", desc: "Title ID" Koa.query "slim" + Koa.query "depth" + Koa.query "sort", desc: "Sorting option for entries. Can be one of 'auto', 'title', 'progress', 'time_added' and 'time_modified'" + Koa.query "ascend", desc: "Sorting direction for entries. Set to 0 for the descending order. Doesn't work without specifying 'sort'" Koa.response 200, schema: "title" Koa.response 404, "Title not found" Koa.tag "library" get "/api/book/:tid" do |env| begin + username = get_username env + + sort_opt = SortOptions.new + get_sort_opt + tid = env.params.url["tid"] title = Library.default.get_title tid raise "Title ID `#{tid}` not found" if title.nil? - if env.params.query["slim"]? - send_json env, title.to_slim_json - else - send_json env, title.to_json - end + slim = !env.params.query["slim"]?.nil? + depth = env.params.query["depth"]?.try(&.to_i?) || -1 + + send_json env, title.build_json(slim: slim, depth: depth, + sort_context: {username: username, + opt: sort_opt}) rescue e Logger.error e env.response.status_code = 404 @@ -159,20 +173,25 @@ struct APIRouter end Koa.describe "Returns the entire library with all titles and entries", <<-MD - Supply the `tid` query parameter to strip away "display_name", "cover_url", and "mtime" from the returned object to speed up the loading time + - Supply the `slim` query parameter to strip away "display_name", "cover_url", and "mtime" from the returned object to speed up the loading time + - Supply the `dpeth` query parameter to control the depth of nested titles to return. + - When `depth` is 1, returns the requested title and sub-titles/entries one level in it + - When `depth` is 0, returns the requested title without its sub-titles/entries + - When `depth` is N, returns the requested title and sub-titles/entries N levels in it + - When `depth` is negative, returns the requested title and all sub-titles/entries in it MD Koa.query "slim" + Koa.query "depth" Koa.response 200, schema: { "dir" => String, "titles" => ["title"], } Koa.tag "library" get "/api/library" do |env| - if env.params.query["slim"]? - send_json env, Library.default.to_slim_json - else - send_json env, Library.default.to_json - end + slim = !env.params.query["slim"]?.nil? + depth = env.params.query["depth"]?.try(&.to_i?) || -1 + + send_json env, Library.default.build_json(slim: slim, depth: depth) end Koa.describe "Triggers a library scan" diff --git a/src/routes/main.cr b/src/routes/main.cr index 57917bb..4aa7da6 100644 --- a/src/routes/main.cr +++ b/src/routes/main.cr @@ -41,7 +41,7 @@ struct MainRouter username = get_username env sort_opt = SortOptions.from_info_json Library.default.dir, username - get_sort_opt + get_and_save_sort_opt Library.default.dir titles = Library.default.sorted_titles username, sort_opt percentage = titles.map &.load_percentage username @@ -59,12 +59,12 @@ struct MainRouter username = get_username env sort_opt = SortOptions.from_info_json title.dir, username - get_sort_opt + get_and_save_sort_opt title.dir entries = title.sorted_entries username, sort_opt - percentage = title.load_percentage_for_all_entries username, sort_opt title_percentage = title.titles.map &.load_percentage username + layout "title" rescue e Logger.error e diff --git a/src/storage.cr b/src/storage.cr index 39116b9..32f446a 100644 --- a/src/storage.cr +++ b/src/storage.cr @@ -428,12 +428,21 @@ class Storage end end - def mark_unavailable + # Mark titles and entries that no longer exist on the file system as + # unavailable. By supplying `id_candidates` and `titles_candidates`, it + # only checks the existence of the candidate titles/entries to speed up + # the process. + def mark_unavailable(ids_candidates : Array(String)?, + titles_candidates : Array(String)?) MainFiber.run do get_db do |db| # Detect dangling entry IDs trash_ids = [] of String - db.query "select path, id from ids where unavailable = 0" do |rs| + query = "select path, id from ids where unavailable = 0" + unless ids_candidates.nil? + query += " and id in (#{ids_candidates.join "," { |i| "'#{i}'" }})" + end + db.query query do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s @@ -449,7 +458,11 @@ class Storage # Detect dangling title IDs trash_titles = [] of String - db.query "select path, id from titles where unavailable = 0" do |rs| + query = "select path, id from titles where unavailable = 0" + unless titles_candidates.nil? + query += " and id in (#{titles_candidates.join "," { |i| "'#{i}'" }})" + end + db.query query do |rs| rs.each do path = rs.read String fullpath = Path.new(path).expand(Config.current.library_path).to_s diff --git a/src/util/signature.cr b/src/util/signature.cr index d1a0040..5ca3e14 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -48,4 +48,32 @@ class Dir end Digest::CRC32.checksum(signatures.sort.join).to_u64 end + + # Returns the contents signature of the directory at dirname for checking + # to rescan. + # Rescan conditions: + # - When a file added, moved, removed, renamed (including which in nested + # directories) + def self.contents_signature(dirname, cache = {} of String => String) : String + return cache[dirname] if cache[dirname]? + Fiber.yield + signatures = [] of String + self.open dirname do |dir| + dir.entries.sort.each do |fn| + next if fn.starts_with? "." + path = File.join dirname, fn + if File.directory? path + signatures << Dir.contents_signature path, cache + else + # Only add its signature value to `signatures` when it is a + # supported file + signatures << fn if is_supported_file fn + end + Fiber.yield + end + end + hash = Digest::SHA1.hexdigest(signatures.join) + cache[dirname] = hash + hash + end end diff --git a/src/util/util.cr b/src/util/util.cr index c4e168a..9f5ffee 100644 --- a/src/util/util.cr +++ b/src/util/util.cr @@ -35,6 +35,11 @@ def register_mime_types # FontAwesome fonts ".woff" => "font/woff", ".woff2" => "font/woff2", + + # Supported image formats. JPG, PNG, GIF, WebP, and SVG are already + # defiend by Crystal in `MIME.DEFAULT_TYPES` + ".apng" => "image/apng", + ".avif" => "image/avif", }.each do |k, v| MIME.register k, v end @@ -120,3 +125,22 @@ class String match / s.size end end + +# Does the followings: +# - turns space-like characters into the normal whitespaces ( ) +# - strips and collapses spaces +# - removes ASCII control characters +# - replaces slashes (/) with underscores (_) +# - removes leading dots (.) +# - removes the following special characters: \:*?"<>| +# +# If the sanitized string is empty, returns a random string instead. +def sanitize_filename(str : String) : String + sanitized = str + .gsub(/\s+/, " ") + .strip + .gsub(/\//, "_") + .gsub(/^[\.\s]+/, "") + .gsub(/[\177\000-\031\\:\*\?\"<>\|]/, "") + sanitized.size > 0 ? sanitized : random_str +end diff --git a/src/util/web.cr b/src/util/web.cr index 12459e5..5704ea8 100644 --- a/src/util/web.cr +++ b/src/util/web.cr @@ -107,6 +107,26 @@ macro get_sort_opt end end +macro get_and_save_sort_opt(dir) + sort_method = env.params.query["sort"]? + + if sort_method + is_ascending = true + + ascend = env.params.query["ascend"]? + if ascend && ascend.to_i? == 0 + is_ascending = false + end + + sort_opt = SortOptions.new sort_method, is_ascending + + TitleInfo.new {{dir}} do |info| + info.sort_by[username] = sort_opt.to_tuple + info.save + end + end +end + module HTTP class Client private def self.exec(uri : URI, tls : TLSContext = nil)