From: Alexander Schmidt Date: Mon, 26 Oct 2020 22:28:30 +0000 (+0100) Subject: use locator db to speed up file search X-Git-Url: http://git.treefish.org/~alex/photosort.git/commitdiff_plain/5b9ec26e3f502c56d73d11cafb8b411541db89d5?ds=inline;hp=5ad2c3b2ac3baf7b9e64d07a7f0b37d3493a03d4 use locator db to speed up file search --- diff --git a/doc/photosort-config.json b/doc/photosort-config.json index edd4a51..763b638 100644 --- a/doc/photosort-config.json +++ b/doc/photosort-config.json @@ -1,4 +1,5 @@ { + "cache_dir": "/path/to/some/directory", "bunches": [ { "dst_dir": { diff --git a/src/bunch.py b/src/bunch.py index 228131a..689d9b0 100644 --- a/src/bunch.py +++ b/src/bunch.py @@ -23,7 +23,8 @@ class Bunch: source_idx, DirTrigger(src_dir_cfg['path'], src_dir_cfg['cool_time'], src_dir_cfg['max_time']), - Migrator(src_dir_cfg['path'], cfg['dst_dir']['path']), + Migrator(src_dir_cfg['path'], cfg['dst_dir']['path'], + os.path.join(cfg['cache_dir'], "%d.db" % idx)), src_dir_cfg['cleanup'] ) ) @@ -40,6 +41,8 @@ class Bunch: logging.info("Stopping bunch #%d...", self._idx) self._stop = True self._worker_thread.join() + for src in self._sources: + src.migrator.close() logging.info("Stopped bunch #%d.", self._idx) def is_running(self): diff --git a/src/locator.py b/src/locator.py new file mode 100644 index 0000000..55142c5 --- /dev/null +++ b/src/locator.py @@ -0,0 +1,74 @@ +import logging +import os +import sqlite3 +import time + +import misc + +class Locator: + def __init__(self, base_dir, exclude_dir, db_file): + self._base_dir = base_dir + self._exclude_dir = exclude_dir + if not os.path.isfile(db_file): + self._conn = sqlite3.connect(db_file) + self._create_db() + else: + self._conn = sqlite3.connect(db_file) + logging.info("Opened locator for %s.", self._base_dir) + + def locate_file(self, name, size, meta_time): + c = self._conn.cursor() + c.execute("SELECT file_dir FROM cache WHERE name=? AND size=? AND meta_time=?", + (name, size, meta_time)) + cached = c.fetchone() + if cached: + if os.path.isfile(cached[0]) and \ + os.path.getsize(cached[0]) == size and \ + misc.extract_meta_time(cached[0]) == meta_time: + self._update_cache_access_time(name, size, meta_time) + return cached[0] + else: + self._remove_from_cache(name, size, meta_time) + else: + file_dir = misc.find_file(name, size, meta_time) + if file_dir: + self._clean_cache() + self._add_to_cache(name, size, meta_time, file_dir) + return file_dir + + def close(self): + logging.info("Closing locator for %s...", self._base_dir) + self._conn.close() + + def _update_cache_access_time(self, name, size, meta_time): + c = self._conn.cursor() + c.execute("UPDATE cache SET access_time=? WHERE name=? AND size=? AND meta_time=?", + (int(time.time()), name, size, meta_time)) + self._conn.commit() + + def _remove_from_cache(self, name, size, meta_time): + c = self._conn.cursor() + c.execute("DELETE FROM cache WHERE name=? AND size=? AND meta_time=?", + (name, size, meta_time)) + self._conn.commit() + + def _add_to_cache(self, name, size, meta_time, file_dir): + c = self._conn.cursor() + c.execute('''INSERT INTO cache (name, size, meta_time, file_dir, access_time) + VALUES (?, ?, ?, ?, ?)''', + (name, size, meta_time, file_dir, int(time.time()))) + self._conn.commit() + + def _clean_cache(self): + c = self._conn.cursor() + c.execute("DELETE FROM cache WHERE access_time