|
1 |
| -# -*- coding: utf-8 -*- |
2 | 1 | """Common code
|
3 | 2 |
|
4 |
| -:copyright: Copyright (c) 2017 Robert Nagler. All Rights Reserved. |
| 3 | +:copyright: Copyright (c) 2017-2025 Robert Nagler. All Rights Reserved. |
5 | 4 | :license: http://www.apache.org/licenses/LICENSE-2.0.html
|
6 | 5 | """
|
7 |
| -from __future__ import absolute_import, division, print_function |
8 |
| -from pykern.pkdebug import pkdlog, pkdp |
| 6 | + |
| 7 | +from pykern.pkcollections import PKDict |
| 8 | +from pykern.pkdebug import pkdc, pkdlog, pkdp |
9 | 9 | import contextlib
|
10 | 10 | import datetime
|
11 | 11 | import errno
|
| 12 | +import exif |
12 | 13 | import os
|
13 | 14 | import os.path
|
14 | 15 | import pykern.pkio
|
|
24 | 25 |
|
25 | 26 | _STILL = "jpg|heic|png|tif|gif|psd|pdf|thm|jpeg"
|
26 | 27 |
|
27 |
| -STILL = re.compile( |
| 28 | +KNOWN_EXT = re.compile( |
28 | 29 | r"^(.+)\.({}|{}|{})$".format(_STILL, _MOVIES, _NEED_JPG),
|
29 | 30 | flags=re.IGNORECASE,
|
30 | 31 | )
|
|
41 | 42 |
|
42 | 43 | THUMB_DIR = re.compile("^(?:200|50)$")
|
43 | 44 |
|
| 45 | +INDEX_LINE = re.compile(r"^([^\s:]+)\s*(.*)") |
44 | 46 |
|
45 |
| -@contextlib.contextmanager |
46 |
| -def user_lock(): |
47 |
| - # Lock directories don't work within Dropbox folders, because |
48 |
| - # Dropbox uploads them and they can hang around after deleting here. |
49 |
| - lock_d = "/tmp/rnpix-lock-" + os.environ["USER"] |
50 |
| - lock_pid = os.path.join(lock_d, "pid") |
| 47 | +MISSING_DESC = "?" |
51 | 48 |
|
52 |
| - def _pid(): |
53 |
| - res = -1 |
54 |
| - try: |
55 |
| - with open(lock_pid) as f: |
56 |
| - res = int(f.read()) |
57 |
| - except Exception: |
58 |
| - pass |
59 |
| - pkdlog(res) |
60 |
| - if res <= 0: |
61 |
| - return res |
62 |
| - try: |
63 |
| - os.kill(res, 0) |
64 |
| - except Exception as e: |
65 |
| - pkdlog(e) |
66 |
| - if isinstance(e, OSError) and e.errno == errno.ESRCH: |
67 |
| - return res |
68 |
| - return -1 |
| 49 | +# Creation Date Value is 2021:03:15 07:10:01-06:00 |
| 50 | +# it's not a date, just a string but it has timezone |
| 51 | +DATE_TIME_RE = re.compile(r"((?:18|19|20)\d\d)\D(\d\d)\D(\d\d)\D(\d\d)\D(\d\d)\D(\d\d)") |
69 | 52 |
|
70 |
| - is_locked = False |
| 53 | +# Also includes a trailing diit possibly |
| 54 | +DATE_RE = re.compile(r"((?:18|19|20)\d\d)\D?(\d\d)\D?(\d\d)\D+(\d*)") |
| 55 | + |
| 56 | +BASE_FTIME = "%Y-%m-%d-%H.%M.%S" |
| 57 | +BASE_FMT = "{}-{}-{}-{}.{}.{}" |
| 58 | +DIR_FMT = "{}/{}-{}" |
| 59 | +DIR_FTIME = "%Y/%m-%d" |
| 60 | + |
| 61 | +ORIGINAL_FTIME = "%Y:%m:%d %H:%M:%S" |
| 62 | + |
| 63 | + |
| 64 | +def date_time_parse(path): |
| 65 | + if m := DATE_TIME_RE.search(path.purebasename): |
| 66 | + d = m.groups() |
| 67 | + elif (m := DATE_RE.search(path.purebasename)) or (m := DATE_RE.search(str(path))): |
| 68 | + d = [m.group(1), m.group(2), m.group(3), 12] |
| 69 | + s = int(m.group(4) or 0) |
| 70 | + d.extend((s // 60, s % 60)) |
| 71 | + else: |
| 72 | + return None |
| 73 | + return datetime.datetime(*list(map(int, d))) |
| 74 | + |
| 75 | + |
| 76 | +def exif_image(readable): |
| 77 | + if isinstance(readable, exif.Image): |
| 78 | + return readable |
| 79 | + # Handle py.path |
| 80 | + if a := getattr(readable, "open", None): |
| 81 | + readable = a("rb") |
| 82 | + return exif.Image(readable) |
| 83 | + |
| 84 | + |
| 85 | +def exif_parse(readable): |
| 86 | + def _date_time(exif_image, date_time): |
| 87 | + if date_time is None: |
| 88 | + return None |
| 89 | + if z := getattr(exif_image, "offset_time_original", None): |
| 90 | + return ( |
| 91 | + datetime.datetime.strptime(date_time + z, ORIGINAL_FTIME + "%z") |
| 92 | + .astimezone(datetime.timezone.utc) |
| 93 | + .replace(tzinfo=None) |
| 94 | + ) |
| 95 | + return datetime.datetime.strptime(date_time, ORIGINAL_FTIME) |
| 96 | + |
| 97 | + i = exif_image(readable) |
71 | 98 | try:
|
72 |
| - for i in range(5): |
73 |
| - try: |
74 |
| - os.mkdir(lock_d) |
75 |
| - is_locked = True |
76 |
| - with open(lock_pid, "w") as f: |
77 |
| - f.write(str(os.getpid())) |
78 |
| - break |
79 |
| - except OSError as e: |
80 |
| - if e.errno != errno.EEXIST: |
81 |
| - raise |
82 |
| - pid = _pid() |
83 |
| - if pid <= 0: |
84 |
| - time.sleep(0.4) |
85 |
| - continue |
86 |
| - if pid == _pid(): |
87 |
| - os.remove(lock_pid) |
88 |
| - os.rmdir(lock_d) |
| 99 | + t = getattr(i, "datetime_original", None) |
| 100 | + d = getattr(i, "image_description", None) |
| 101 | + except KeyError: |
| 102 | + # I guess if there's no metadata, it gets this |
| 103 | + # File "exif/_image.py", line 104, in __getattr__ |
| 104 | + # KeyError: 'APP1' |
| 105 | + t = d = None |
| 106 | + return PKDict(date_time=_date_time(i, t), description=d) |
| 107 | + |
| 108 | + |
| 109 | +def exif_set(readable, path=None, date_time=None, description=None): |
| 110 | + if path is None: |
| 111 | + path = readable |
| 112 | + assert path.ext == ".jpg" |
| 113 | + assert date_time or description |
| 114 | + e = exif_image(readable) |
| 115 | + if date_time is not None: |
| 116 | + e.datetime_original = date_time.strftime(ORIGINAL_FTIME) |
| 117 | + if description is not None: |
| 118 | + e.image_description = description |
| 119 | + path.write(e.get_file(), "wb") |
| 120 | + return date_time |
| 121 | + |
| 122 | + |
| 123 | +def index_parse(path=None): |
| 124 | + def _parse(line): |
| 125 | + nonlocal path |
| 126 | + if not (i := _split(line)): |
| 127 | + pass |
| 128 | + elif not path.new(basename=i.name).exists(): |
| 129 | + pkdlog("indexed image={} does not exist", i.name) |
| 130 | + elif i.name in rv: |
| 131 | + pkdlog( |
| 132 | + "duplicate image={} in {}; skipping desc={}", |
| 133 | + i.name, |
| 134 | + path, |
| 135 | + i.desc, |
| 136 | + ) |
| 137 | + elif not KNOWN_EXT.search(i.name): |
| 138 | + pkdlog( |
| 139 | + "invalid ext image={} in {}; skipping desc={}", |
| 140 | + i.name, |
| 141 | + path, |
| 142 | + i.desc, |
| 143 | + ) |
| 144 | + elif i.desc == MISSING_DESC: |
| 145 | + # assume everything will get identified |
| 146 | + pass |
89 | 147 | else:
|
90 |
| - raise ValueError("{}: unable to create lock".format(lock_d)) |
91 |
| - yield lock_d |
92 |
| - finally: |
93 |
| - if is_locked: |
94 |
| - os.remove(lock_pid) |
95 |
| - os.rmdir(lock_d) |
| 148 | + # success |
| 149 | + return i |
| 150 | + return None |
| 151 | + |
| 152 | + def _split(line): |
| 153 | + l = line.rstrip() |
| 154 | + if l and not l.startswith("#"): |
| 155 | + if m := INDEX_LINE.search(l): |
| 156 | + return PKDict(zip(("name", "desc"), m.groups())) |
| 157 | + pkdlog("invalid line={}", l) |
| 158 | + return None |
| 159 | + |
| 160 | + rv = PKDict() |
| 161 | + if path is None: |
| 162 | + path = pykern.pkio.py_path() |
| 163 | + if path.check(dir=1): |
| 164 | + path = path.join("index.txt") |
| 165 | + if not path.exists(): |
| 166 | + # No index so return empty PKDict so can be added to |
| 167 | + return rv |
| 168 | + with path.open("rt") as f: |
| 169 | + for l in f: |
| 170 | + if i := _parse(l): |
| 171 | + rv[i.name] = i.desc |
| 172 | + return rv |
| 173 | + |
| 174 | + |
| 175 | +def index_update(image, desc): |
| 176 | + i = index_parse() |
| 177 | + i[image] = desc |
| 178 | + index_write(i) |
| 179 | + |
| 180 | + |
| 181 | +def index_write(values): |
| 182 | + with open("index.txt", "w") as f: |
| 183 | + f.write("".join(k + " " + v + "\n" for k, v in values.items())) |
96 | 184 |
|
97 | 185 |
|
98 | 186 | def move_one(src, dst_root=None):
|
99 | 187 | e = src.ext.lower()
|
100 | 188 | if e == ".jpeg":
|
101 | 189 | e = ".jpg"
|
102 |
| - f1 = "%Y-%m-%d-%H.%M.%S" |
103 |
| - f2 = "{}-{}-{}-{}.{}.{}" |
104 | 190 | # CreationDate is in timezone as is DateTimeOriginal but not for movies
|
105 | 191 | z = (
|
106 |
| - ("-CreationDate", "-CreationDateValue", "-createdate") |
| 192 | + ("-CreationDate", "-CreationDateValue", "-createdate", "-DateTimeOriginal") |
107 | 193 | if MOVIE.search(src.basename)
|
108 | 194 | else ("-DateTimeOriginal",)
|
109 | 195 | )
|
110 | 196 | d = None
|
111 | 197 | for y in z:
|
112 | 198 | p = subprocess.run(
|
113 |
| - ("exiftool", "-d", f1, y, "-S", "-s", src), |
| 199 | + ("exiftool", "-d", BASE_FTIME, y, "-S", "-s", src), |
114 | 200 | stdout=subprocess.PIPE,
|
115 | 201 | stderr=subprocess.PIPE,
|
116 | 202 | universal_newlines=True,
|
117 | 203 | )
|
118 | 204 | if p.returncode != 0:
|
119 |
| - pykern.pkcli.command_error("exiftool failed: {} {}".format(src, p.stderr)) |
120 |
| - m = re.search( |
121 |
| - r"((?:20|19)\d\d)\D(\d\d)\D(\d\d)\D(\d\d)\D(\d\d)\D(\d\d)", str(p.stdout) |
122 |
| - ) |
123 |
| - if m: |
124 |
| - # Creation Date Value is 2021:03:15 07:10:01-06:00 |
125 |
| - # it's not a date, just a string but it has timezone |
126 |
| - t = f2.format(*m.groups()) |
127 |
| - d = "{}/{}-{}".format(*m.groups()) |
| 205 | + pkdlog("exiftool failed: path={} stderr={}", src, p.stderr) |
| 206 | + raise RuntimeError(f"unable to parse image={src}") |
| 207 | + if m := DATE_TIME_RE.search(str(p.stdout)): |
| 208 | + t = BASE_FMT.format(*m.groups()) |
| 209 | + d = DIR_FMT.format(*m.groups()) |
128 | 210 | break
|
129 | 211 | if not d:
|
130 | 212 | d = datetime.datetime.fromtimestamp(src.mtime())
|
131 |
| - t = d.strftime(f1) |
132 |
| - d = d.strftime("%Y/%m-%d") |
| 213 | + t = d.strftime(BASE_FTIME) |
| 214 | + d = d.strftime(DIR_FTIME) |
133 | 215 | pkdlog("use mtime: {} => {}", src, t)
|
134 | 216 | if dst_root:
|
135 | 217 | d = dst_root.join(d)
|
@@ -166,6 +248,59 @@ def root():
|
166 | 248 | return pykern.pkio.py_path(r)
|
167 | 249 |
|
168 | 250 |
|
| 251 | +@contextlib.contextmanager |
| 252 | +def user_lock(): |
| 253 | + # Lock directories don't work within Dropbox folders, because |
| 254 | + # Dropbox uploads them and they can hang around after deleting here. |
| 255 | + lock_d = "/tmp/rnpix-lock-" + os.environ["USER"] |
| 256 | + lock_pid = os.path.join(lock_d, "pid") |
| 257 | + |
| 258 | + def _pid(): |
| 259 | + res = -1 |
| 260 | + try: |
| 261 | + with open(lock_pid) as f: |
| 262 | + res = int(f.read()) |
| 263 | + except Exception: |
| 264 | + pass |
| 265 | + pkdlog(res) |
| 266 | + if res <= 0: |
| 267 | + return res |
| 268 | + try: |
| 269 | + os.kill(res, 0) |
| 270 | + except Exception as e: |
| 271 | + pkdlog(e) |
| 272 | + if isinstance(e, OSError) and e.errno == errno.ESRCH: |
| 273 | + return res |
| 274 | + return -1 |
| 275 | + |
| 276 | + is_locked = False |
| 277 | + try: |
| 278 | + for i in range(5): |
| 279 | + try: |
| 280 | + os.mkdir(lock_d) |
| 281 | + is_locked = True |
| 282 | + with open(lock_pid, "w") as f: |
| 283 | + f.write(str(os.getpid())) |
| 284 | + break |
| 285 | + except OSError as e: |
| 286 | + if e.errno != errno.EEXIST: |
| 287 | + raise |
| 288 | + pid = _pid() |
| 289 | + if pid <= 0: |
| 290 | + time.sleep(0.4) |
| 291 | + continue |
| 292 | + if pid == _pid(): |
| 293 | + os.remove(lock_pid) |
| 294 | + os.rmdir(lock_d) |
| 295 | + else: |
| 296 | + raise ValueError("{}: unable to create lock".format(lock_d)) |
| 297 | + yield lock_d |
| 298 | + finally: |
| 299 | + if is_locked: |
| 300 | + os.remove(lock_pid) |
| 301 | + os.rmdir(lock_d) |
| 302 | + |
| 303 | + |
169 | 304 | def _fix_index(d, old, new):
|
170 | 305 | i = d.join("index.txt")
|
171 | 306 | if not i.exists():
|
|
0 commit comments