mirror of
https://github.com/hkalexling/Mango.git
synced 2025-08-02 02:45:29 -04:00
Signature matching
This commit is contained in:
parent
7f76322377
commit
667d390be4
50
migration/ids_signature.7.cr
Normal file
50
migration/ids_signature.7.cr
Normal file
@ -0,0 +1,50 @@
|
||||
class IDSignature < MG::Base
|
||||
def up : String
|
||||
<<-SQL
|
||||
ALTER TABLE ids ADD COLUMN signature TEXT;
|
||||
SQL
|
||||
end
|
||||
|
||||
def down : String
|
||||
<<-SQL
|
||||
-- remove signature column from ids
|
||||
ALTER TABLE ids RENAME TO tmp;
|
||||
|
||||
CREATE TABLE ids (
|
||||
path TEXT NOT NULL,
|
||||
id TEXT NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO ids
|
||||
SELECT path, id
|
||||
FROM tmp;
|
||||
|
||||
DROP TABLE tmp;
|
||||
|
||||
-- recreate the indices
|
||||
CREATE UNIQUE INDEX path_idx ON ids (path);
|
||||
CREATE UNIQUE INDEX id_idx ON ids (id);
|
||||
|
||||
-- recreate the foreign key constraint on thumbnails
|
||||
ALTER TABLE thumbnails RENAME TO tmp;
|
||||
|
||||
CREATE TABLE thumbnails (
|
||||
id TEXT NOT NULL,
|
||||
data BLOB NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
mime TEXT NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
FOREIGN KEY (id) REFERENCES ids (id)
|
||||
ON UPDATE CASCADE
|
||||
ON DELETE CASCADE
|
||||
);
|
||||
|
||||
INSERT INTO thumbnails
|
||||
SELECT * FROM tmp;
|
||||
|
||||
DROP TABLE tmp;
|
||||
|
||||
CREATE UNIQUE INDEX tn_index ON thumbnails (id);
|
||||
SQL
|
||||
end
|
||||
end
|
31
migration/relative_path.8.cr
Normal file
31
migration/relative_path.8.cr
Normal file
@ -0,0 +1,31 @@
|
||||
class RelativePath < MG::Base
|
||||
def up : String
|
||||
base = Config.current.library_path
|
||||
base = base[...-1] if base.ends_with? "/"
|
||||
|
||||
<<-SQL
|
||||
-- update the path column in ids to relative paths
|
||||
UPDATE ids
|
||||
SET path = REPLACE(path, '#{base}', '');
|
||||
|
||||
-- update the path column in titles to relative paths
|
||||
UPDATE titles
|
||||
SET path = REPLACE(path, '#{base}', '');
|
||||
SQL
|
||||
end
|
||||
|
||||
def down : String
|
||||
base = Config.current.library_path
|
||||
base = base[...-1] if base.ends_with? "/"
|
||||
|
||||
<<-SQL
|
||||
-- update the path column in ids to absolute paths
|
||||
UPDATE ids
|
||||
SET path = '#{base}' || path;
|
||||
|
||||
-- update the path column in titles to absolute paths
|
||||
UPDATE titles
|
||||
SET path = '#{base}' || path;
|
||||
SQL
|
||||
end
|
||||
end
|
@ -11,13 +11,14 @@ class Entry
|
||||
@title = File.basename @zip_path, File.extname @zip_path
|
||||
@encoded_title = URI.encode @title
|
||||
@size = (File.size @zip_path).humanize_bytes
|
||||
id = storage.get_id @zip_path, false
|
||||
id = storage.get_entry_id @zip_path, File.signature(@zip_path)
|
||||
if id.nil?
|
||||
id = random_str
|
||||
storage.insert_id({
|
||||
path: @zip_path,
|
||||
id: id,
|
||||
is_title: false,
|
||||
path: @zip_path,
|
||||
id: id,
|
||||
title_signature: nil,
|
||||
entry_signature: File.signature(@zip_path).to_s,
|
||||
})
|
||||
end
|
||||
@id = id
|
||||
|
@ -42,16 +42,6 @@ class Library
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
db_interval = Config.current.db_optimization_interval_hours
|
||||
unless db_interval < 1
|
||||
spawn do
|
||||
loop do
|
||||
Storage.default.optimize
|
||||
sleep db_interval.hours
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def titles
|
||||
@ -119,6 +109,7 @@ class Library
|
||||
storage.close
|
||||
|
||||
Logger.debug "Scan completed"
|
||||
Storage.default.optimize
|
||||
end
|
||||
|
||||
def get_continue_reading_entries(username)
|
||||
|
@ -3,19 +3,21 @@ require "../archive"
|
||||
class Title
|
||||
getter dir : String, parent_id : String, title_ids : Array(String),
|
||||
entries : Array(Entry), title : String, id : String,
|
||||
encoded_title : String, mtime : Time, signature : UInt64 = 0
|
||||
encoded_title : String, mtime : Time, signature : UInt64
|
||||
|
||||
@entry_display_name_cache : Hash(String, String)?
|
||||
|
||||
def initialize(@dir : String, @parent_id)
|
||||
storage = Storage.default
|
||||
id = storage.get_id @dir, true
|
||||
@signature = Dir.signature dir
|
||||
id = storage.get_title_id dir, signature
|
||||
if id.nil?
|
||||
id = random_str
|
||||
storage.insert_id({
|
||||
path: @dir,
|
||||
id: id,
|
||||
is_title: true,
|
||||
path: dir,
|
||||
id: id,
|
||||
title_signature: signature.to_s,
|
||||
entry_signature: nil,
|
||||
})
|
||||
end
|
||||
@id = id
|
||||
@ -25,8 +27,6 @@ class Title
|
||||
@entries = [] of Entry
|
||||
@mtime = File.info(dir).modification_time
|
||||
|
||||
signatures = [] of UInt64
|
||||
|
||||
Dir.entries(dir).each do |fn|
|
||||
next if fn.starts_with? "."
|
||||
path = File.join dir, fn
|
||||
@ -35,18 +35,14 @@ class Title
|
||||
next if title.entries.size == 0 && title.titles.size == 0
|
||||
Library.default.title_hash[title.id] = title
|
||||
@title_ids << title.id
|
||||
signatures << title.signature
|
||||
next
|
||||
end
|
||||
if [".zip", ".cbz", ".rar", ".cbr"].includes? File.extname path
|
||||
entry = Entry.new path, self
|
||||
@entries << entry if entry.pages > 0 || entry.err_msg
|
||||
signatures << File.size entry.zip_path
|
||||
end
|
||||
end
|
||||
|
||||
@signature = Digest::CRC32.checksum(signatures.sort.join "").to_u64
|
||||
|
||||
mtimes = [@mtime]
|
||||
mtimes += @title_ids.map { |e| Library.default.title_hash[e].mtime }
|
||||
mtimes += @entries.map { |e| e.mtime }
|
||||
|
103
src/storage.cr
103
src/storage.cr
@ -20,9 +20,11 @@ class Storage
|
||||
@path : String
|
||||
@db : DB::Database?
|
||||
|
||||
alias IDTuple = NamedTuple(path: String,
|
||||
alias IDTuple = NamedTuple(
|
||||
path: String,
|
||||
id: String,
|
||||
is_title: Bool)
|
||||
entry_signature: String?,
|
||||
title_signature: String?)
|
||||
|
||||
use_default
|
||||
|
||||
@ -230,16 +232,82 @@ class Storage
|
||||
end
|
||||
end
|
||||
|
||||
def get_id(path, is_title)
|
||||
def get_title_id(path, signature)
|
||||
id = nil
|
||||
path = Path.new(path).relative_to(Config.current.library_path).to_s
|
||||
MainFiber.run do
|
||||
get_db do |db|
|
||||
if is_title
|
||||
id = db.query_one? "select id from titles where path = (?)", path,
|
||||
as: String
|
||||
else
|
||||
id = db.query_one? "select id from ids where path = (?)", path,
|
||||
as: String
|
||||
# First attempt to find the matching title in DB using BOTH path
|
||||
# and signature
|
||||
id = db.query_one? "select id from titles where path = (?) and " \
|
||||
"signature = (?)", path, signature.to_s, as: String
|
||||
|
||||
should_update = id.nil?
|
||||
# If it fails, try to match using the path only. This could happen
|
||||
# for example when a new entry is added to the title
|
||||
id ||= db.query_one? "select id from titles where path = (?)", path,
|
||||
as: String
|
||||
|
||||
# If it still fails, we will have to rely on the signature values.
|
||||
# This could happen when the user moved or renamed the title, or
|
||||
# a title containing the title
|
||||
unless id
|
||||
# If there are multiple rows with the same signature (this could
|
||||
# happen simply by bad luck, or when the user copied a title),
|
||||
# pick the row that has the most similar path to the give path
|
||||
rows = [] of Tuple(String, String)
|
||||
db.query "select id, path from titles where signature = (?)",
|
||||
signature.to_s do |rs|
|
||||
rs.each do
|
||||
rows << {rs.read(String), rs.read(String)}
|
||||
end
|
||||
end
|
||||
row = rows.max_by?(&.[1].components_similarity(path))
|
||||
id = row[0] if row
|
||||
end
|
||||
|
||||
# At this point, `id` would still be nil if there's no row matching
|
||||
# either the path or the signature
|
||||
|
||||
# If we did identify a matching title, save the path and signature
|
||||
# values back to the DB
|
||||
if id && should_update
|
||||
db.exec "update titles set path = (?), signature = (?) " \
|
||||
"where id = (?)", path, signature.to_s, id
|
||||
end
|
||||
end
|
||||
end
|
||||
id
|
||||
end
|
||||
|
||||
# See the comments in `#get_title_id` to see how this method works.
|
||||
def get_entry_id(path, signature)
|
||||
id = nil
|
||||
path = Path.new(path).relative_to(Config.current.library_path).to_s
|
||||
MainFiber.run do
|
||||
get_db do |db|
|
||||
id = db.query_one? "select id from ids where path = (?) and " \
|
||||
"signature = (?)", path, signature.to_s, as: String
|
||||
|
||||
should_update = id.nil?
|
||||
id ||= db.query_one? "select id from ids where path = (?)", path,
|
||||
as: String
|
||||
|
||||
unless id
|
||||
rows = [] of Tuple(String, String)
|
||||
db.query "select id, path from ids where signature = (?)",
|
||||
signature.to_s do |rs|
|
||||
rs.each do
|
||||
rows << {rs.read(String), rs.read(String)}
|
||||
end
|
||||
end
|
||||
row = rows.max_by?(&.[1].components_similarity(path))
|
||||
id = row[0] if row
|
||||
end
|
||||
|
||||
if id && should_update
|
||||
db.exec "update ids set path = (?), signature = (?) " \
|
||||
"where id = (?)", path, signature.to_s, id
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -256,11 +324,14 @@ class Storage
|
||||
db.transaction do |tran|
|
||||
conn = tran.connection
|
||||
@@insert_ids.each do |tp|
|
||||
if tp[:is_title]
|
||||
conn.exec "insert into titles values (?, ?, null)", tp[:id],
|
||||
tp[:path]
|
||||
path = Path.new(tp[:path])
|
||||
.relative_to(Config.current.library_path).to_s
|
||||
if tp[:title_signature]
|
||||
conn.exec "insert into titles values (?, ?, ?)", tp[:id],
|
||||
path, tp[:title_signature].to_s
|
||||
else
|
||||
conn.exec "insert into ids values (?, ?)", tp[:path], tp[:id]
|
||||
conn.exec "insert into ids values (?, ?, ?)", path, tp[:id],
|
||||
tp[:entry_signature].to_s
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -363,7 +434,8 @@ class Storage
|
||||
db.query "select path, id from ids" do |rs|
|
||||
rs.each do
|
||||
path = rs.read String
|
||||
trash_ids << rs.read String unless File.exists? path
|
||||
fullpath = Path.new(path).expand(Config.current.library_path).to_s
|
||||
trash_ids << rs.read String unless File.exists? fullpath
|
||||
end
|
||||
end
|
||||
|
||||
@ -377,7 +449,8 @@ class Storage
|
||||
db.query "select path, id from titles" do |rs|
|
||||
rs.each do
|
||||
path = rs.read String
|
||||
trash_titles << rs.read String unless Dir.exists? path
|
||||
fullpath = Path.new(path).expand(Config.current.library_path).to_s
|
||||
trash_titles << rs.read String unless Dir.exists? fullpath
|
||||
end
|
||||
end
|
||||
|
||||
|
50
src/util/signature.cr
Normal file
50
src/util/signature.cr
Normal file
@ -0,0 +1,50 @@
|
||||
class File
|
||||
abstract struct Info
|
||||
def inode
|
||||
@stat.st_ino
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the signature of the file at filename.
|
||||
# When it is not a supported file, returns 0. Otherwise, calculate the
|
||||
# signature by combining its inode value, file size and mtime. This
|
||||
# ensures that moving (unless to another device) and renaming the file
|
||||
# preserves the signature, while copying or editing the file changes it.
|
||||
def self.signature(filename) : UInt64
|
||||
return 0u64 unless %w(.zip .rar .cbz .cbr).includes? File.extname filename
|
||||
info = File.info filename
|
||||
signatures = [
|
||||
info.inode,
|
||||
File.size(filename),
|
||||
info.modification_time.to_unix,
|
||||
]
|
||||
Digest::CRC32.checksum(signatures.sort.join).to_u64
|
||||
end
|
||||
end
|
||||
|
||||
class Dir
|
||||
# Returns the signature of the directory at dirname.
|
||||
# The signature is calculated by combining its mtime and the signatures of
|
||||
# all directories and files in it. This ensures that moving (unless to
|
||||
# another device) and renaming the directory preserves the signature,
|
||||
# while copying or editing its content changes it.
|
||||
def self.signature(dirname) : UInt64
|
||||
signatures = [] of (UInt64 | Int64)
|
||||
signatures << File.info(dirname).modification_time.to_unix
|
||||
self.open dirname do |dir|
|
||||
dir.entries.each do |fn|
|
||||
next if fn.starts_with? "."
|
||||
path = File.join dirname, fn
|
||||
if File.directory? path
|
||||
signatures << Dir.signature path
|
||||
else
|
||||
_sig = File.signature path
|
||||
# Only add its signature value to `signatures` when it is a
|
||||
# supported file
|
||||
signatures << _sig if _sig > 0
|
||||
end
|
||||
end
|
||||
end
|
||||
Digest::CRC32.checksum(signatures.sort.join).to_u64
|
||||
end
|
||||
end
|
@ -92,3 +92,18 @@ def sort_titles(titles : Array(Title), opt : SortOptions, username : String)
|
||||
|
||||
ary
|
||||
end
|
||||
|
||||
class String
|
||||
# Returns the similarity (in [0, 1]) of two paths.
|
||||
# For the two paths, separate them into arrays of components, count the
|
||||
# number of matching components backwards, and divide the count by the
|
||||
# number of components of the shorter path.
|
||||
def components_similarity(other : String) : Float64
|
||||
s, l = [self, other]
|
||||
.map { |str| Path.new(str).parts }
|
||||
.sort_by &.size
|
||||
|
||||
match = s.reverse.zip(l.reverse).count { |a, b| a == b }
|
||||
match / s.size
|
||||
end
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user