]> git.treefish.org Git - photosort.git/commitdiff
use locator db to speed up file search
authorAlexander Schmidt <alex@treefish.org>
Mon, 26 Oct 2020 22:28:30 +0000 (23:28 +0100)
committerAlexander Schmidt <alex@treefish.org>
Mon, 26 Oct 2020 22:28:30 +0000 (23:28 +0100)
doc/photosort-config.json
src/bunch.py
src/locator.py [new file with mode: 0644]
src/migrator.py
src/misc.py
src/photosort.py

index edd4a51581a4b0e5aa77a01c7cdbb378ee52780d..763b638e50964c8377c8cf15cf86f3e81a782628 100644 (file)
@@ -1,4 +1,5 @@
 {
+    "cache_dir": "/path/to/some/directory",
     "bunches": [
         {
             "dst_dir": {
index 228131a9e1ec5e6b8157f86813b9fbffceb15634..689d9b0d53fd7ea696063cdc1fe6dec3989f592c 100644 (file)
@@ -23,7 +23,8 @@ class Bunch:
                     source_idx,
                     DirTrigger(src_dir_cfg['path'], src_dir_cfg['cool_time'],
                                src_dir_cfg['max_time']),
-                    Migrator(src_dir_cfg['path'], cfg['dst_dir']['path']),
+                    Migrator(src_dir_cfg['path'], cfg['dst_dir']['path'],
+                             os.path.join(cfg['cache_dir'], "%d.db" % idx)),
                     src_dir_cfg['cleanup']
                 )
             )
@@ -40,6 +41,8 @@ class Bunch:
         logging.info("Stopping bunch #%d...", self._idx)
         self._stop = True
         self._worker_thread.join()
+        for src in self._sources:
+            src.migrator.close()
         logging.info("Stopped bunch #%d.", self._idx)
 
     def is_running(self):
diff --git a/src/locator.py b/src/locator.py
new file mode 100644 (file)
index 0000000..55142c5
--- /dev/null
@@ -0,0 +1,74 @@
+import logging
+import os
+import sqlite3
+import time
+
+import misc
+
+class Locator:
+    def __init__(self, base_dir, exclude_dir, db_file):
+        self._base_dir = base_dir
+        self._exclude_dir = exclude_dir
+        if not os.path.isfile(db_file):
+            self._conn = sqlite3.connect(db_file)
+            self._create_db()
+        else:
+            self._conn = sqlite3.connect(db_file)
+        logging.info("Opened locator for %s.", self._base_dir)
+
+    def locate_file(self, name, size, meta_time):
+        c = self._conn.cursor()
+        c.execute("SELECT file_dir FROM cache WHERE name=? AND size=? AND meta_time=?",
+                  (name, size, meta_time))
+        cached = c.fetchone()
+        if cached:
+            if os.path.isfile(cached[0]) and \
+               os.path.getsize(cached[0]) == size and \
+               misc.extract_meta_time(cached[0]) == meta_time:
+                self._update_cache_access_time(name, size, meta_time)
+                return cached[0]
+            else:
+                self._remove_from_cache(name, size, meta_time)
+        else:
+            file_dir = misc.find_file(name, size, meta_time)
+            if file_dir:
+                self._clean_cache()
+                self._add_to_cache(name, size, meta_time, file_dir)
+            return file_dir
+
+    def close(self):
+        logging.info("Closing locator for %s...", self._base_dir)
+        self._conn.close()
+
+    def _update_cache_access_time(self, name, size, meta_time):
+        c = self._conn.cursor()
+        c.execute("UPDATE cache SET access_time=? WHERE name=? AND size=? AND meta_time=?",
+                  (int(time.time()), name, size, meta_time))
+        self._conn.commit()
+
+    def _remove_from_cache(self, name, size, meta_time):
+        c = self._conn.cursor()
+        c.execute("DELETE FROM cache WHERE name=? AND size=? AND meta_time=?",
+                  (name, size, meta_time))
+        self._conn.commit()
+
+    def _add_to_cache(self, name, size, meta_time, file_dir):
+        c = self._conn.cursor()
+        c.execute('''INSERT INTO cache (name, size, meta_time, file_dir, access_time)
+                     VALUES (?, ?, ?, ?, ?)''',
+                  (name, size, meta_time, file_dir, int(time.time())))
+        self._conn.commit()
+
+    def _clean_cache(self):
+        c = self._conn.cursor()
+        c.execute("DELETE FROM cache WHERE access_time<?", (int(time.time()) - 604800))
+        self._conn.commit()
+
+    def _create_db(self):
+        logging.info("Creating locator database for %s...", self._base_dir)
+        c = self._conn.cursor()
+        c.execute('''CREATE TABLE cache
+                       (name TEXT, size INTEGER, meta_time INTEGER, file_dir TEXT,
+                        access_time INTEGER)
+                     PRIMARY KEY (name, size, meta_time)''')
+        self._conn.commit()
index 7bd32e6d93f5bc187c96c7ee402780645c63233f..6438342f609b6440b7044cff6e89d2e69f4396ee 100644 (file)
@@ -5,9 +5,11 @@ import os
 import misc
 
 class Migrator:
-    def __init__(self, src_dir, dst_dir):
+    def __init__(self, src_dir, dst_dir, db_file=None):
         self._base_src_dir = src_dir
         self._base_dst_dir = dst_dir
+        if db_file:
+            self._locator = Locator(dst_dir, src_dir, db_file)
 
     def migrate(self, remove):
         for src_file_name, src_file_path in misc.walk_media_files(self._base_src_dir):
@@ -17,6 +19,10 @@ class Migrator:
             except Exception as e:
                 logging.error('Error migrating %s: %s', src_file_path, str(e))
 
+    def close(self):
+        if self._locator:
+            self._locator.close()
+
     def _migrate_single(self, src_file_name, src_file_path, remove):
         meta_time = misc.extract_meta_time(src_file_path)
 
@@ -29,7 +35,12 @@ class Migrator:
         dst_file_path = os.path.join(dst_dir, src_file_name)
 
         if not os.path.exists(dst_file_path):
-            alt_dst_dir = misc.find_alt_file(self._base_dst_dir,
+            if self._locator:
+                alt_dst_dir = self._locator.locate_file(src_file_name,
+                                                        os.path.getsize(src_file_path),
+                                                        meta_time)
+            else:
+                alt_dst_dir = misc.find_file(self._base_dst_dir,
                                              src_file_name,
                                              os.path.getsize(src_file_path),
                                              meta_time,
index 5b331b5404b5c24453b88dd22ea1ac54f30f48b4..acd5c944c48451ec62a448d406b671fcf0fa0f66 100644 (file)
@@ -20,16 +20,16 @@ def extract_meta_time(file_path):
     elif is_media_file(file_path, types=['video']):
         return _extract_video_timestamp(file_path)
 
-def find_alt_file(base_dir, name, size, meta_time, exclude_dir):
+def find_file(base_dir, name, size, meta_time, exclude_dir):
     for root, dirs, files in os.walk(base_dir):
         if root.startswith(exclude_dir):
             continue
-        for alt_name in files:
-            if alt_name == name:
-                full_path = os.path.join(root, alt_name)
+        for other_name in files:
+            if other_name == name:
+                full_path = os.path.join(root, other_name)
                 if os.path.getsize(full_path) == size:
-                    alt_meta_time = extract_meta_time(full_path)
-                    if meta_time == alt_meta_time:
+                    other_meta_time = extract_meta_time(full_path)
+                    if meta_time == other_meta_time:
                         return root
     return None
 
index 2829337d7d1e02583ababdbb8b63a82d3a85d726..872724ee49a8a688b234b12556515f221b15f510 100755 (executable)
@@ -29,6 +29,10 @@ logging.basicConfig(format='[%(asctime)s] %(levelname)s: %(message)s',
                     level=log_level, datefmt='%m/%d/%Y %H:%M:%S')
 
 migrator = Migrator(args.SOURCE_DIR, args.DEST_DIR)
+
 migrator.migrate(args.cleanup)
+
 if args.cleanup:
     migrator.cleanup()
+
+migrator.close()