mirror of
https://github.com/hkalexling/Mango.git
synced 2025-08-02 02:45:29 -04:00
Signature matching
This commit is contained in:
parent
7f76322377
commit
667d390be4
50
migration/ids_signature.7.cr
Normal file
50
migration/ids_signature.7.cr
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
class IDSignature < MG::Base
|
||||||
|
def up : String
|
||||||
|
<<-SQL
|
||||||
|
ALTER TABLE ids ADD COLUMN signature TEXT;
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
|
def down : String
|
||||||
|
<<-SQL
|
||||||
|
-- remove signature column from ids
|
||||||
|
ALTER TABLE ids RENAME TO tmp;
|
||||||
|
|
||||||
|
CREATE TABLE ids (
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
id TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO ids
|
||||||
|
SELECT path, id
|
||||||
|
FROM tmp;
|
||||||
|
|
||||||
|
DROP TABLE tmp;
|
||||||
|
|
||||||
|
-- recreate the indices
|
||||||
|
CREATE UNIQUE INDEX path_idx ON ids (path);
|
||||||
|
CREATE UNIQUE INDEX id_idx ON ids (id);
|
||||||
|
|
||||||
|
-- recreate the foreign key constraint on thumbnails
|
||||||
|
ALTER TABLE thumbnails RENAME TO tmp;
|
||||||
|
|
||||||
|
CREATE TABLE thumbnails (
|
||||||
|
id TEXT NOT NULL,
|
||||||
|
data BLOB NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
mime TEXT NOT NULL,
|
||||||
|
size INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY (id) REFERENCES ids (id)
|
||||||
|
ON UPDATE CASCADE
|
||||||
|
ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO thumbnails
|
||||||
|
SELECT * FROM tmp;
|
||||||
|
|
||||||
|
DROP TABLE tmp;
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX tn_index ON thumbnails (id);
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
end
|
31
migration/relative_path.8.cr
Normal file
31
migration/relative_path.8.cr
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
class RelativePath < MG::Base
|
||||||
|
def up : String
|
||||||
|
base = Config.current.library_path
|
||||||
|
base = base[...-1] if base.ends_with? "/"
|
||||||
|
|
||||||
|
<<-SQL
|
||||||
|
-- update the path column in ids to relative paths
|
||||||
|
UPDATE ids
|
||||||
|
SET path = REPLACE(path, '#{base}', '');
|
||||||
|
|
||||||
|
-- update the path column in titles to relative paths
|
||||||
|
UPDATE titles
|
||||||
|
SET path = REPLACE(path, '#{base}', '');
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
|
def down : String
|
||||||
|
base = Config.current.library_path
|
||||||
|
base = base[...-1] if base.ends_with? "/"
|
||||||
|
|
||||||
|
<<-SQL
|
||||||
|
-- update the path column in ids to absolute paths
|
||||||
|
UPDATE ids
|
||||||
|
SET path = '#{base}' || path;
|
||||||
|
|
||||||
|
-- update the path column in titles to absolute paths
|
||||||
|
UPDATE titles
|
||||||
|
SET path = '#{base}' || path;
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
end
|
@ -11,13 +11,14 @@ class Entry
|
|||||||
@title = File.basename @zip_path, File.extname @zip_path
|
@title = File.basename @zip_path, File.extname @zip_path
|
||||||
@encoded_title = URI.encode @title
|
@encoded_title = URI.encode @title
|
||||||
@size = (File.size @zip_path).humanize_bytes
|
@size = (File.size @zip_path).humanize_bytes
|
||||||
id = storage.get_id @zip_path, false
|
id = storage.get_entry_id @zip_path, File.signature(@zip_path)
|
||||||
if id.nil?
|
if id.nil?
|
||||||
id = random_str
|
id = random_str
|
||||||
storage.insert_id({
|
storage.insert_id({
|
||||||
path: @zip_path,
|
path: @zip_path,
|
||||||
id: id,
|
id: id,
|
||||||
is_title: false,
|
title_signature: nil,
|
||||||
|
entry_signature: File.signature(@zip_path).to_s,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
@id = id
|
@id = id
|
||||||
|
@ -42,16 +42,6 @@ class Library
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
db_interval = Config.current.db_optimization_interval_hours
|
|
||||||
unless db_interval < 1
|
|
||||||
spawn do
|
|
||||||
loop do
|
|
||||||
Storage.default.optimize
|
|
||||||
sleep db_interval.hours
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def titles
|
def titles
|
||||||
@ -119,6 +109,7 @@ class Library
|
|||||||
storage.close
|
storage.close
|
||||||
|
|
||||||
Logger.debug "Scan completed"
|
Logger.debug "Scan completed"
|
||||||
|
Storage.default.optimize
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_continue_reading_entries(username)
|
def get_continue_reading_entries(username)
|
||||||
|
@ -3,19 +3,21 @@ require "../archive"
|
|||||||
class Title
|
class Title
|
||||||
getter dir : String, parent_id : String, title_ids : Array(String),
|
getter dir : String, parent_id : String, title_ids : Array(String),
|
||||||
entries : Array(Entry), title : String, id : String,
|
entries : Array(Entry), title : String, id : String,
|
||||||
encoded_title : String, mtime : Time, signature : UInt64 = 0
|
encoded_title : String, mtime : Time, signature : UInt64
|
||||||
|
|
||||||
@entry_display_name_cache : Hash(String, String)?
|
@entry_display_name_cache : Hash(String, String)?
|
||||||
|
|
||||||
def initialize(@dir : String, @parent_id)
|
def initialize(@dir : String, @parent_id)
|
||||||
storage = Storage.default
|
storage = Storage.default
|
||||||
id = storage.get_id @dir, true
|
@signature = Dir.signature dir
|
||||||
|
id = storage.get_title_id dir, signature
|
||||||
if id.nil?
|
if id.nil?
|
||||||
id = random_str
|
id = random_str
|
||||||
storage.insert_id({
|
storage.insert_id({
|
||||||
path: @dir,
|
path: dir,
|
||||||
id: id,
|
id: id,
|
||||||
is_title: true,
|
title_signature: signature.to_s,
|
||||||
|
entry_signature: nil,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
@id = id
|
@id = id
|
||||||
@ -25,8 +27,6 @@ class Title
|
|||||||
@entries = [] of Entry
|
@entries = [] of Entry
|
||||||
@mtime = File.info(dir).modification_time
|
@mtime = File.info(dir).modification_time
|
||||||
|
|
||||||
signatures = [] of UInt64
|
|
||||||
|
|
||||||
Dir.entries(dir).each do |fn|
|
Dir.entries(dir).each do |fn|
|
||||||
next if fn.starts_with? "."
|
next if fn.starts_with? "."
|
||||||
path = File.join dir, fn
|
path = File.join dir, fn
|
||||||
@ -35,18 +35,14 @@ class Title
|
|||||||
next if title.entries.size == 0 && title.titles.size == 0
|
next if title.entries.size == 0 && title.titles.size == 0
|
||||||
Library.default.title_hash[title.id] = title
|
Library.default.title_hash[title.id] = title
|
||||||
@title_ids << title.id
|
@title_ids << title.id
|
||||||
signatures << title.signature
|
|
||||||
next
|
next
|
||||||
end
|
end
|
||||||
if [".zip", ".cbz", ".rar", ".cbr"].includes? File.extname path
|
if [".zip", ".cbz", ".rar", ".cbr"].includes? File.extname path
|
||||||
entry = Entry.new path, self
|
entry = Entry.new path, self
|
||||||
@entries << entry if entry.pages > 0 || entry.err_msg
|
@entries << entry if entry.pages > 0 || entry.err_msg
|
||||||
signatures << File.size entry.zip_path
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@signature = Digest::CRC32.checksum(signatures.sort.join "").to_u64
|
|
||||||
|
|
||||||
mtimes = [@mtime]
|
mtimes = [@mtime]
|
||||||
mtimes += @title_ids.map { |e| Library.default.title_hash[e].mtime }
|
mtimes += @title_ids.map { |e| Library.default.title_hash[e].mtime }
|
||||||
mtimes += @entries.map { |e| e.mtime }
|
mtimes += @entries.map { |e| e.mtime }
|
||||||
|
103
src/storage.cr
103
src/storage.cr
@ -20,9 +20,11 @@ class Storage
|
|||||||
@path : String
|
@path : String
|
||||||
@db : DB::Database?
|
@db : DB::Database?
|
||||||
|
|
||||||
alias IDTuple = NamedTuple(path: String,
|
alias IDTuple = NamedTuple(
|
||||||
|
path: String,
|
||||||
id: String,
|
id: String,
|
||||||
is_title: Bool)
|
entry_signature: String?,
|
||||||
|
title_signature: String?)
|
||||||
|
|
||||||
use_default
|
use_default
|
||||||
|
|
||||||
@ -230,16 +232,82 @@ class Storage
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_id(path, is_title)
|
def get_title_id(path, signature)
|
||||||
id = nil
|
id = nil
|
||||||
|
path = Path.new(path).relative_to(Config.current.library_path).to_s
|
||||||
MainFiber.run do
|
MainFiber.run do
|
||||||
get_db do |db|
|
get_db do |db|
|
||||||
if is_title
|
# First attempt to find the matching title in DB using BOTH path
|
||||||
id = db.query_one? "select id from titles where path = (?)", path,
|
# and signature
|
||||||
as: String
|
id = db.query_one? "select id from titles where path = (?) and " \
|
||||||
else
|
"signature = (?)", path, signature.to_s, as: String
|
||||||
id = db.query_one? "select id from ids where path = (?)", path,
|
|
||||||
as: String
|
should_update = id.nil?
|
||||||
|
# If it fails, try to match using the path only. This could happen
|
||||||
|
# for example when a new entry is added to the title
|
||||||
|
id ||= db.query_one? "select id from titles where path = (?)", path,
|
||||||
|
as: String
|
||||||
|
|
||||||
|
# If it still fails, we will have to rely on the signature values.
|
||||||
|
# This could happen when the user moved or renamed the title, or
|
||||||
|
# a title containing the title
|
||||||
|
unless id
|
||||||
|
# If there are multiple rows with the same signature (this could
|
||||||
|
# happen simply by bad luck, or when the user copied a title),
|
||||||
|
# pick the row that has the most similar path to the give path
|
||||||
|
rows = [] of Tuple(String, String)
|
||||||
|
db.query "select id, path from titles where signature = (?)",
|
||||||
|
signature.to_s do |rs|
|
||||||
|
rs.each do
|
||||||
|
rows << {rs.read(String), rs.read(String)}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
row = rows.max_by?(&.[1].components_similarity(path))
|
||||||
|
id = row[0] if row
|
||||||
|
end
|
||||||
|
|
||||||
|
# At this point, `id` would still be nil if there's no row matching
|
||||||
|
# either the path or the signature
|
||||||
|
|
||||||
|
# If we did identify a matching title, save the path and signature
|
||||||
|
# values back to the DB
|
||||||
|
if id && should_update
|
||||||
|
db.exec "update titles set path = (?), signature = (?) " \
|
||||||
|
"where id = (?)", path, signature.to_s, id
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
id
|
||||||
|
end
|
||||||
|
|
||||||
|
# See the comments in `#get_title_id` to see how this method works.
|
||||||
|
def get_entry_id(path, signature)
|
||||||
|
id = nil
|
||||||
|
path = Path.new(path).relative_to(Config.current.library_path).to_s
|
||||||
|
MainFiber.run do
|
||||||
|
get_db do |db|
|
||||||
|
id = db.query_one? "select id from ids where path = (?) and " \
|
||||||
|
"signature = (?)", path, signature.to_s, as: String
|
||||||
|
|
||||||
|
should_update = id.nil?
|
||||||
|
id ||= db.query_one? "select id from ids where path = (?)", path,
|
||||||
|
as: String
|
||||||
|
|
||||||
|
unless id
|
||||||
|
rows = [] of Tuple(String, String)
|
||||||
|
db.query "select id, path from ids where signature = (?)",
|
||||||
|
signature.to_s do |rs|
|
||||||
|
rs.each do
|
||||||
|
rows << {rs.read(String), rs.read(String)}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
row = rows.max_by?(&.[1].components_similarity(path))
|
||||||
|
id = row[0] if row
|
||||||
|
end
|
||||||
|
|
||||||
|
if id && should_update
|
||||||
|
db.exec "update ids set path = (?), signature = (?) " \
|
||||||
|
"where id = (?)", path, signature.to_s, id
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -256,11 +324,14 @@ class Storage
|
|||||||
db.transaction do |tran|
|
db.transaction do |tran|
|
||||||
conn = tran.connection
|
conn = tran.connection
|
||||||
@@insert_ids.each do |tp|
|
@@insert_ids.each do |tp|
|
||||||
if tp[:is_title]
|
path = Path.new(tp[:path])
|
||||||
conn.exec "insert into titles values (?, ?, null)", tp[:id],
|
.relative_to(Config.current.library_path).to_s
|
||||||
tp[:path]
|
if tp[:title_signature]
|
||||||
|
conn.exec "insert into titles values (?, ?, ?)", tp[:id],
|
||||||
|
path, tp[:title_signature].to_s
|
||||||
else
|
else
|
||||||
conn.exec "insert into ids values (?, ?)", tp[:path], tp[:id]
|
conn.exec "insert into ids values (?, ?, ?)", path, tp[:id],
|
||||||
|
tp[:entry_signature].to_s
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -363,7 +434,8 @@ class Storage
|
|||||||
db.query "select path, id from ids" do |rs|
|
db.query "select path, id from ids" do |rs|
|
||||||
rs.each do
|
rs.each do
|
||||||
path = rs.read String
|
path = rs.read String
|
||||||
trash_ids << rs.read String unless File.exists? path
|
fullpath = Path.new(path).expand(Config.current.library_path).to_s
|
||||||
|
trash_ids << rs.read String unless File.exists? fullpath
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -377,7 +449,8 @@ class Storage
|
|||||||
db.query "select path, id from titles" do |rs|
|
db.query "select path, id from titles" do |rs|
|
||||||
rs.each do
|
rs.each do
|
||||||
path = rs.read String
|
path = rs.read String
|
||||||
trash_titles << rs.read String unless Dir.exists? path
|
fullpath = Path.new(path).expand(Config.current.library_path).to_s
|
||||||
|
trash_titles << rs.read String unless Dir.exists? fullpath
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
50
src/util/signature.cr
Normal file
50
src/util/signature.cr
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
class File
|
||||||
|
abstract struct Info
|
||||||
|
def inode
|
||||||
|
@stat.st_ino
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the signature of the file at filename.
|
||||||
|
# When it is not a supported file, returns 0. Otherwise, calculate the
|
||||||
|
# signature by combining its inode value, file size and mtime. This
|
||||||
|
# ensures that moving (unless to another device) and renaming the file
|
||||||
|
# preserves the signature, while copying or editing the file changes it.
|
||||||
|
def self.signature(filename) : UInt64
|
||||||
|
return 0u64 unless %w(.zip .rar .cbz .cbr).includes? File.extname filename
|
||||||
|
info = File.info filename
|
||||||
|
signatures = [
|
||||||
|
info.inode,
|
||||||
|
File.size(filename),
|
||||||
|
info.modification_time.to_unix,
|
||||||
|
]
|
||||||
|
Digest::CRC32.checksum(signatures.sort.join).to_u64
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Dir
|
||||||
|
# Returns the signature of the directory at dirname.
|
||||||
|
# The signature is calculated by combining its mtime and the signatures of
|
||||||
|
# all directories and files in it. This ensures that moving (unless to
|
||||||
|
# another device) and renaming the directory preserves the signature,
|
||||||
|
# while copying or editing its content changes it.
|
||||||
|
def self.signature(dirname) : UInt64
|
||||||
|
signatures = [] of (UInt64 | Int64)
|
||||||
|
signatures << File.info(dirname).modification_time.to_unix
|
||||||
|
self.open dirname do |dir|
|
||||||
|
dir.entries.each do |fn|
|
||||||
|
next if fn.starts_with? "."
|
||||||
|
path = File.join dirname, fn
|
||||||
|
if File.directory? path
|
||||||
|
signatures << Dir.signature path
|
||||||
|
else
|
||||||
|
_sig = File.signature path
|
||||||
|
# Only add its signature value to `signatures` when it is a
|
||||||
|
# supported file
|
||||||
|
signatures << _sig if _sig > 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
Digest::CRC32.checksum(signatures.sort.join).to_u64
|
||||||
|
end
|
||||||
|
end
|
@ -92,3 +92,18 @@ def sort_titles(titles : Array(Title), opt : SortOptions, username : String)
|
|||||||
|
|
||||||
ary
|
ary
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class String
|
||||||
|
# Returns the similarity (in [0, 1]) of two paths.
|
||||||
|
# For the two paths, separate them into arrays of components, count the
|
||||||
|
# number of matching components backwards, and divide the count by the
|
||||||
|
# number of components of the shorter path.
|
||||||
|
def components_similarity(other : String) : Float64
|
||||||
|
s, l = [self, other]
|
||||||
|
.map { |str| Path.new(str).parts }
|
||||||
|
.sort_by &.size
|
||||||
|
|
||||||
|
match = s.reverse.zip(l.reverse).count { |a, b| a == b }
|
||||||
|
match / s.size
|
||||||
|
end
|
||||||
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user