From 4e77d020c137be84ed6778f3052004361f23ad49 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Mon, 26 Feb 2024 11:26:37 +0800 Subject: [PATCH 01/21] =?UTF-8?q?=E6=B7=BB=E5=8A=A0MongoDB=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=20=E4=B8=8D=E5=86=8D=E6=94=AF=E6=8C=81Alist=203.29.1?= =?UTF-8?q?=20--=20alist-sdk=20=E5=B7=B2=E7=BB=8F=E4=B8=8D=E5=86=8D?= =?UTF-8?q?=E6=94=AF=E6=8C=81=20others?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yaml | 2 +- .gitignore | 2 ++ config.yaml => config-template.yaml | 20 ++++++++++++++++++-- pyproject.toml | 1 + 4 files changed, 22 insertions(+), 3 deletions(-) rename config.yaml => config-template.yaml (73%) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 42d1780..a046e0a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - alist-version: [ '3.29.1', '3.30.0' ] + alist-version: [ '3.30.0' ] python-version: [ '3.10', '3.11', '3.12' ] fail-fast: false diff --git a/.gitignore b/.gitignore index f567da1..d636786 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ __pycache__/ tests/alist/ local_test* tmp/ +config.json +config.yaml # C extensions *.so diff --git a/config.yaml b/config-template.yaml similarity index 73% rename from config.yaml rename to config-template.yaml index 2872313..4ebd479 100644 --- a/config.yaml +++ b/config-template.yaml @@ -5,16 +5,32 @@ daemon: false alist_servers: - url: http://localhost:5244/ username: "admin" - password: "admin" + password: "123456" verify_ssl: false - url: http://localhost:5245/ username: "admin" password: "admin" +notify: # 通知服务,当触发一些异常后,将会发送通知。 + email: # 邮件通知 + enable: false + sender: + smtp: "" + email: "" + password: "" + recipients: + - "" + + feishu: # 飞书web Hook机器人 + enable: false + webhook_url: "" + headers: + K: V + sync_groups: - name: "sync1" - # 同步类型,一共5种: + # 同步类型,一共4种: # 1 copy:如果目标目录中已经存在该文件,则跳过 # 忽略存在与目标目录中但不存在于源目录中的文件 # 2 mirror: 如果目标目录中已经存在该文件,则跳过 diff --git a/pyproject.toml b/pyproject.toml index 5ea4e07..0d2abfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "async-lru", "pydantic", "pyyaml", + "pymongo" ] dynamic = ["version"] From 3be054e399d8525cc98a138d4e5f76a530ef8580 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Mon, 26 Feb 2024 19:24:25 +0800 Subject: [PATCH 02/21] 0.1.0-a0 --- alist_sync/alist_client.py | 16 ++++- alist_sync/config.py | 43 ++++++++++++- alist_sync/d_checker.py | 90 +++++++++++++++++++++++++++ alist_sync/d_main.py | 66 ++++++++++++++++++++ alist_sync/{worker.py => d_worker.py} | 51 ++++++++------- alist_sync/downloader.py | 1 + alist_sync/notice/__init__.py | 0 alist_sync/notice/email.py | 0 alist_sync/notice/webhook.py | 0 alist_sync/scanner.py | 80 +++++++++++++----------- alist_sync/thread_pool.py | 9 +++ alist_sync/version.py | 11 +++- config-template.yaml | 21 ++++--- tests/test_worker.py | 6 +- 14 files changed, 317 insertions(+), 77 deletions(-) create mode 100644 alist_sync/d_main.py rename alist_sync/{worker.py => d_worker.py} (78%) create mode 100644 alist_sync/notice/__init__.py create mode 100644 alist_sync/notice/email.py create mode 100644 alist_sync/notice/webhook.py create mode 100644 alist_sync/thread_pool.py diff --git a/alist_sync/alist_client.py b/alist_sync/alist_client.py index 4e128a4..be862a6 100644 --- a/alist_sync/alist_client.py +++ b/alist_sync/alist_client.py @@ -4,12 +4,14 @@ import time from typing import Literal -from alist_sdk import AsyncClient as _AsyncClient, Task +from alist_sdk import AsyncClient as _AsyncClient, Task, Client from async_lru import alru_cache as lru_cache from alist_sync.common import get_alist_client +from alist_sync.config import create_config logger = logging.getLogger("alist-sync.client") +sync_config = create_config() __all__ = ["AlistClient", "get_status"] @@ -111,6 +113,18 @@ async def get_status( raise ValueError(f"任务不存在: {task_name}") +def create_async_client(client: Client) -> AlistClient: + """创建AsyncClient""" + + _server = sync_config.get_server(client.base_url) + _server.token = client.headers.get("authorization") + + _ac = AlistClient(**_server) + _ac.headers = client.headers + _ac.cookies = client.cookies + return _ac + + if __name__ == "__main__": _c = AlistClient( base_url="http://localhost:5244", diff --git a/alist_sync/config.py b/alist_sync/config.py index 1fc7290..3dfac6d 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -3,8 +3,8 @@ import os from datetime import datetime from pathlib import Path -from functools import cached_property -from typing import Optional +from functools import cached_property, lru_cache +from typing import Optional, Literal from alist_sdk import AlistPathType from pydantic import Field, BaseModel @@ -41,9 +41,12 @@ class AlistServer(BaseModel): verify: Optional[bool] = True headers: Optional[dict] = None - def dump_for_sdk(self): + def dump_for_alist_client(self): return self.model_dump(exclude={"storage_config"}) + def sump_for_sdk(self): + return self.model_dump(exclude={"storage_config", "max_connect"}) + def storages(self) -> list[dict]: """返回给定的 storage_config 中包含的storages""" @@ -82,11 +85,32 @@ def is_storage(_st): class SyncGroup(BaseModel): + enable: bool = True name: str type: str group: list[AlistPathType] = Field(min_length=2) +NotifyType = Literal["email", "webhook"] + + +class EMailNotify(BaseModel): + enable: bool = True + type: NotifyType = "email" + smtp_host: str + smtp_port: int = 25 + sender: str + password: str + recipients: list[str] + + +class WebHookNotify(BaseModel): + enable: bool = True + type: NotifyType = "webhook" + webhook_url: str + headers: dict[str, str] + + class Config(BaseModel): """配置""" @@ -107,8 +131,12 @@ class Config(BaseModel): "1", ) + runner_name: str = "test" + mongodb_uri: str | None = os.getenv("ALIST_SYNC_MONGODB_URI", None) + notify: list[EMailNotify | WebHookNotify] = [] + alist_servers: list[AlistServer] = [] sync_groups: list[SyncGroup] = [] @@ -119,6 +147,14 @@ def cache_dir(self) -> Path: self.cache__dir.mkdir(exist_ok=True, parents=True) return self.cache__dir + @lru_cache(10) + def get_server(self, base_url) -> AlistServer: + """找到AlistServer""" + for server in self.alist_servers: + if base_url == server.base_url: + return server + raise ModuleNotFoundError() + @cached_property def mongodb(self) -> "Database": from pymongo import MongoClient @@ -163,3 +199,4 @@ def dump_to_mongodb(self): print(config) print(config.cache_dir) print(config.mongodb) + print(config.notify) diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index 5344e82..ac67bf2 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -6,3 +6,93 @@ @Date-Time : 2024/2/25 21:17 """ +from queue import Queue +from pymongo.collection import Collection + +from alist_sdk import AlistPath + +from alist_sync.config import create_config, SyncGroup +from alist_sync.d_worker import Worker +from alist_sync.thread_pool import MyThreadPoolExecutor + +sync_config = create_config() + + +class Checker: + def __init__(self, sync_group: SyncGroup, scaner_queue: Queue, worker_queue: Queue): + self.sync_group: SyncGroup = sync_group + self.worker_queue = worker_queue + self.scaner_queue: Queue[AlistPath] = scaner_queue + + self.locker: set = set() + self.load_locker() + + self.conflict: set = set() + self.pool = MyThreadPoolExecutor(10) + + def split_path(self, path) -> tuple[AlistPath, str]: + """将Path切割为sync_dir和相对路径""" + for sr in self.sync_group.group: + try: + return sr, path.relative_to(sr) + except: + pass + raise ValueError() + + def release_lock(self, *items: AlistPath): + for p in items: + self.locker.remove(p) + + def load_locker(self): + col: Collection = sync_config.mongodb.workers + for doc in col.find({}, {"source_path": True, "target_path": True}): + for p in doc.values(): + if not None: + self.locker.add(AlistPath(p)) + + def checker(self, path) -> "Worker|None": + """检查器""" + raise NotImplemented + + def _t_checker(self, path): + if path in self.locker: + return + if _c := self.checker(path): + self.worker_queue.put(_c) + + def mian(self): + """""" + while True: + path = self.scaner_queue.get() + self._t_checker(path) + + +class CheckerCopy(Checker): + """""" + + def checker(self, path) -> "Worker|None": + _sg = self.sync_group.group.copy() + _sync_dir, _relative_path = self.split_path(path) + _sg.remove(_sync_dir) + + for _sd in _sg: + _sd: AlistPath + target_path = _sd.joinpath(_relative_path) + + if not target_path.exists() and target_path not in self.locker: + self.locker.add(target_path) + self.locker.add(path) + return Worker( + type="copy", + need_backup=False, + source_path=path, + target_path=target_path, + ) + + +class CheckerMirror(Checker): + """""" + + +class CheckerSync(Checker): + """""" diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py new file mode 100644 index 0000000..aadc94d --- /dev/null +++ b/alist_sync/d_main.py @@ -0,0 +1,66 @@ +#!/bin/env python3 +""" + +""" +import queue +import threading +from queue import Queue +from alist_sync.thread_pool import MyThreadPoolExecutor + +from alist_sdk import AlistPath, login_server +from alist_sdk.path_lib import ALIST_SERVER_INFO + +from alist_sync.config import SyncGroup, create_config, AlistServer +from alist_sync.d_checker import Checker + +sync_config = create_config() + + +queue_checker = Queue() +queue_worker = Queue() + + +def login_alist(server: AlistServer): + """""" + login_server(**server.dump_for_sdk()) + server.token = ALIST_SERVER_INFO.get(server.base_url) + + +def scaner(url: AlistPath, _queue): + def _scaner(_url: AlistPath): + """""" + for item in _url.iterdir(): + if item.is_file(): + _queue.put(item) + elif item.is_dir(): + pool.submit(_scaner, item) + + pool = MyThreadPoolExecutor(5) + pool.submit(_scaner, url) + pool.wait() + + +def checker(sync_group: SyncGroup, _queue: Queue): + """""" + if sync_group.enable is False: + return + + for uri in sync_group.group: + login_alist(sync_config.get_server(uri)) + + _queue_scaner = Queue(30) + _scaner_pool = MyThreadPoolExecutor(5, "scaner_") + + _sign = ["copy", "mirror"] + + if sync_group.type in _sign: + scaner(sync_group.group[0], _queue_scaner) + else: + for uri in sync_group.group: + scaner(uri, _queue_scaner) + + return Checker(sync_group, _queue_scaner, _queue).mian() # main()是死循环 + + +def mian(): + """""" diff --git a/alist_sync/worker.py b/alist_sync/d_worker.py similarity index 78% rename from alist_sync/worker.py rename to alist_sync/d_worker.py index 674180f..b28b3bd 100644 --- a/alist_sync/worker.py +++ b/alist_sync/d_worker.py @@ -1,7 +1,9 @@ import atexit import datetime import logging +import threading from pathlib import Path +from queue import Queue from typing import Literal, Any from pydantic import BaseModel, computed_field, Field @@ -9,8 +11,11 @@ from pymongo.database import Database from alist_sdk.path_lib import AlistPathType -from alist_sync.config import cache_dir +from alist_sync.config import create_config from alist_sync.common import sha1 +from alist_sync.thread_pool import MyThreadPoolExecutor + +sync_config = create_config() WorkerType = Literal["delete", "copy"] WorkerStatus = Literal[ @@ -28,7 +33,7 @@ class Worker(BaseModel): - owner: str + owner: str = sync_config.runner_name created_at: datetime.datetime = datetime.datetime.now() type: WorkerType need_backup: bool @@ -55,7 +60,7 @@ def _id(self) -> str: @property def tmp_file(self) -> Path: - return cache_dir.joinpath(f"download_tmp_{sha1(self.source_path)}") + return sync_config.cache_dir.joinpath(f"download_tmp_{sha1(self.source_path)}") def update(self, *field: Any): if self.status == "done" and self.workers is not None: @@ -109,37 +114,41 @@ def run(self): self.delete_type() -class Workers(BaseModel): - workers: list[Worker] = [] - mongodb: Database - - model_config = {"arbitrary_types_allowed": True} +class Workers: + # workers: list[Worker] = [] - def __init__(self, **data: Any): - super().__init__(**data) + def __init__(self, mongodb: Database = None): + self.mongodb: Database = mongodb or sync_config.mongodb + self.thread_pool = MyThreadPoolExecutor( + 5, + "worker_", + ) atexit.register(self.__del__) def __del__(self): - for i in cache_dir.iterdir(): + for i in sync_config.cache_dir.iterdir(): if i.name.startswith("download_tmp_"): i.unlink(missing_ok=True) def load_from_mongo(self): """从MongoDB加载Worker""" for i in self.mongodb.workers.find(): - self.workers.append(Worker(**i)) + self.add_worker(Worker(**i)) def add_worker(self, worker: Worker): - self.workers.append(worker) - - def del_worker(self, _id: str): - """删除Worker""" - pass - - def run(self): - for worker in self.workers: - worker.run() + worker.workers = self + worker.collection = self.mongodb.workers + self.thread_pool.submit(worker.run) + + def run(self, queue: Queue): + while True: + self.add_worker(queue.get()) + + def start(self, queue: Queue) -> threading.Thread: + _t = threading.Thread(target=self.run, args=(queue,)) + _t.start() + return _t if __name__ == "__main__": diff --git a/alist_sync/downloader.py b/alist_sync/downloader.py index 553a5d5..4db63f5 100644 --- a/alist_sync/downloader.py +++ b/alist_sync/downloader.py @@ -5,4 +5,5 @@ @Author : LeeCQ @Date-Time : 2024/2/25 21:17 +下载器使用一个单独线程启动,它创建一个事件循环并在其内部保持同步。 """ diff --git a/alist_sync/notice/__init__.py b/alist_sync/notice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alist_sync/notice/email.py b/alist_sync/notice/email.py new file mode 100644 index 0000000..e69de29 diff --git a/alist_sync/notice/webhook.py b/alist_sync/notice/webhook.py new file mode 100644 index 0000000..e69de29 diff --git a/alist_sync/scanner.py b/alist_sync/scanner.py index bda492c..5ca5d1d 100644 --- a/alist_sync/scanner.py +++ b/alist_sync/scanner.py @@ -3,12 +3,13 @@ import logging from pathlib import PurePosixPath -from alist_sdk import Item +from alist_sdk import Item, AlistPath from pydantic import BaseModel from alist_sync.alist_client import AlistClient from alist_sync.common import get_alist_client + logger = logging.getLogger("alist-sync.scan-dir") __all__ = ["scan_dirs", "Scanner"] @@ -16,6 +17,7 @@ class Scanner(BaseModel): # scan_path: list[item] + queue: asyncio.Queue = asyncio.Queue(30) items: dict[str | PurePosixPath, list[Item]] @classmethod @@ -31,48 +33,54 @@ async def scans(cls, *scan_path, client: AlistClient = None): } ) + @classmethod + async def retry(cls, client, _path, _rt=5) -> list[Item]: + __res = await client.list_files(_path, refresh=True) + if __res.code != 200: + logger.warning(f"扫描目录异常: {_path=} {__res.code=} {__res.message=}") + if _rt: + return await cls.retry(client, _path, _rt=_rt - 1) + exit(1) + return __res.data.content or [] + + @classmethod + async def get_files(cls, client, _path, output): + _path = PurePosixPath(_path) + if _path.name == ".alist-sync-data": + logger.debug("跳过 .alist-sync-data ...") + return + + __res = await cls.retry(client, _path) + for _item in __res: + _item: Item + _item.parent = _path + if _item.is_dir: + # noinspection PyAsyncCall + asyncio.create_task( + cls.get_files(client, _item.full_name, output), + name=f"scan_{_item.full_name}", + ) + else: + # _list.append(_item) + await cls.queue.put(_item) + logger.debug("find: %s", _item.full_name) + return _item + @classmethod async def scan( - cls, - path: str | PurePosixPath, - client: AlistClient, + cls, + path: str | PurePosixPath, + client: AlistClient, + output: list | asyncio.Queue = None, ) -> tuple[str, list[Item]]: """扫描目录""" - _list: list[Item] = [] - - async def _retry(_path, _rt=5) -> list[Item]: - __res = await client.list_files(_path, refresh=True) - if __res.code != 200: - logger.warning(f"扫描目录异常: {_path=} {__res.code=} {__res.message=}") - if _rt: - return await _retry(_path, _rt=_rt - 1) - exit(1) - return __res.data.content or [] - - async def get_files(_path): - _path = PurePosixPath(_path) - if _path.name == '.alist-sync-data': - logger.debug("跳过 .alist-sync-data ...") - return - - __res = await _retry(_path) - for _item in __res: - _item: Item - _item.parent = _path - if _item.is_dir: - # noinspection PyAsyncCall - asyncio.create_task( - get_files(_item.full_name), - name=f"scan_{_item.full_name}", - ) - else: - _list.append(_item) - logger.debug("find: %s", _item.full_name) + if output is None: + output: list[Item] = [] logger.info("扫描目录 %s 中的文件.", path) - await get_files(path) + await cls.get_files(client, path, output) await cls.lock() - return path, _list + return path, output @staticmethod async def lock(): diff --git a/alist_sync/thread_pool.py b/alist_sync/thread_pool.py new file mode 100644 index 0000000..fada8f1 --- /dev/null +++ b/alist_sync/thread_pool.py @@ -0,0 +1,9 @@ +from concurrent.futures import ThreadPoolExecutor +from copy import copy + + +class MyThreadPoolExecutor(ThreadPoolExecutor): + def wait(self): + while True: + for t in copy(self._threads): + t.join() diff --git a/alist_sync/version.py b/alist_sync/version.py index f102a9c..bfb94fb 100644 --- a/alist_sync/version.py +++ b/alist_sync/version.py @@ -1 +1,10 @@ -__version__ = "0.0.1" +__version__ = "0.1.0-a0" + + +""" +0.1.0-a0: + [] 使用Workers架构重构,使用多线程,不再使用协程 + [] New 通知 - email + [] New 通知 - webhook + +""" \ No newline at end of file diff --git a/config-template.yaml b/config-template.yaml index 4ebd479..d458ca5 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -3,26 +3,26 @@ cache_dir: ./.cache daemon: false alist_servers: - - url: http://localhost:5244/ + - base_url: http://localhost:5244/ username: "admin" password: "123456" verify_ssl: false - - url: http://localhost:5245/ + - base_url: http://localhost:5245/ username: "admin" password: "admin" notify: # 通知服务,当触发一些异常后,将会发送通知。 - email: # 邮件通知 - enable: false - sender: - smtp: "" - email: "" - password: "" + - enable: false + type: email + smtp_host: "" + smtp_port: 25 + sender: "" + password: "" recipients: - "" - feishu: # 飞书web Hook机器人 - enable: false + - enable: false + type: webhook webhook_url: "" headers: K: V @@ -30,6 +30,7 @@ notify: # 通知服务,当触发一些异常后,将会发送通知。 sync_groups: - name: "sync1" + enable: true # 默认值: True # 同步类型,一共4种: # 1 copy:如果目标目录中已经存在该文件,则跳过 # 忽略存在与目标目录中但不存在于源目录中的文件 diff --git a/tests/test_worker.py b/tests/test_worker.py index af4db78..e491dc2 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -11,11 +11,7 @@ from alist_sdk.path_lib import PureAlistPath, AlistPath, login_server -from alist_sync.worker import Worker, Workers - -# 如果Python版本是3.12跳过模块 -if sys.version_info >= (3, 12): - pytest.skip("Skip this module on Python 3.12", allow_module_level=True) +from alist_sync.d_worker import Worker, Workers def test_worker(): From df0c5def184cac3138d74423eece9ae5f2440cc4 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Mon, 26 Feb 2024 21:36:04 +0800 Subject: [PATCH 03/21] 0.1.0-a2 --- alist_sync/__main__.py | 2 +- alist_sync/config.py | 13 +++--- alist_sync/d_checker.py | 97 +++++++++++++++++++++++++++++++---------- alist_sync/d_main.py | 23 +++++----- alist_sync/d_worker.py | 13 +----- config-template.yaml | 45 ++++++++++++------- 6 files changed, 126 insertions(+), 67 deletions(-) diff --git a/alist_sync/__main__.py b/alist_sync/__main__.py index f0dc2dd..507796c 100644 --- a/alist_sync/__main__.py +++ b/alist_sync/__main__.py @@ -9,7 +9,7 @@ from alist_sync.base_sync import SyncBase from alist_sync.checker import check_dir -from alist_sync.models import AlistServer +from alist_sync.config import AlistServer from alist_sync.run_copy import Copy from alist_sync.run_mirror import Mirror from alist_sync.run_sync import Sync diff --git a/alist_sync/config.py b/alist_sync/config.py index 3dfac6d..08914d5 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -88,6 +88,8 @@ class SyncGroup(BaseModel): enable: bool = True name: str type: str + need_backup: bool = False + backup_dir: str = ".alist-sync-backup" group: list[AlistPathType] = Field(min_length=2) @@ -195,8 +197,9 @@ def dump_to_mongodb(self): if __name__ == "__main__": - config = create_config() - print(config) - print(config.cache_dir) - print(config.mongodb) - print(config.notify) + # config = create_config() + # print(config) + # print(config.cache_dir) + # print(config.mongodb) + # print(config.notify) + print(Config.model_json_schema()) diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index ac67bf2..18e105e 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -7,8 +7,9 @@ """ from queue import Queue -from pymongo.collection import Collection +from typing import Iterator +from pymongo.collection import Collection from alist_sdk import AlistPath from alist_sync.config import create_config, SyncGroup @@ -35,7 +36,7 @@ def split_path(self, path) -> tuple[AlistPath, str]: for sr in self.sync_group.group: try: return sr, path.relative_to(sr) - except: + except ValueError as e: pass raise ValueError() @@ -50,15 +51,29 @@ def load_locker(self): if not None: self.locker.add(AlistPath(p)) - def checker(self, path) -> "Worker|None": - """检查器""" - raise NotImplemented + def get_backup_dir(self, path) -> AlistPath: + return self.split_path(path)[0].joinpath(self.sync_group.backup_dir) + + def _checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": + raise NotImplementedError + + def checker(self, path) -> Iterator[Worker | None]: + # _sg = self.sync_group.group.copy() + _sync_dir, _relative_path = self.split_path(path) + + for _sd in self.sync_group.group: + _sd: AlistPath + if _sd == _sync_dir: + continue + target_path = _sd.joinpath(_relative_path) + yield self._checker(target_path, path) def _t_checker(self, path): if path in self.locker: return - if _c := self.checker(path): - self.worker_queue.put(_c) + for _c in self.checker(path): + if _c: + self.worker_queue.put(_c) def mian(self): """""" @@ -70,29 +85,63 @@ def mian(self): class CheckerCopy(Checker): """""" - def checker(self, path) -> "Worker|None": - _sg = self.sync_group.group.copy() - _sync_dir, _relative_path = self.split_path(path) - _sg.remove(_sync_dir) - - for _sd in _sg: - _sd: AlistPath - target_path = _sd.joinpath(_relative_path) + def _checker( + self, + source_path: AlistPath, + target_path: AlistPath, + ) -> "Worker|None": - if not target_path.exists() and target_path not in self.locker: - self.locker.add(target_path) - self.locker.add(path) - return Worker( - type="copy", - need_backup=False, - source_path=path, - target_path=target_path, - ) + if not target_path.exists() and target_path not in self.locker: + self.locker.add(target_path) + self.locker.add(source_path) + return Worker( + type="copy", + need_backup=False, + source_path=source_path, + target_path=target_path, + ) + return None class CheckerMirror(Checker): """""" + def _checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": + _main = self.sync_group.group[0] + # target如果是主存储器 - 且target不存在,source存在,删除source + if target_path == _main and not target_path.exists() and source_path.exists(): + self.locker.add(target_path) + self.locker.add(source_path) + return Worker( + type="delete", + need_backup=self.sync_group.need_backup, + backup_dir=self.get_backup_dir(source_path), + target_path=source_path, + ) + if not target_path.exists() and target_path not in self.locker: + self.locker.add(target_path) + self.locker.add(source_path) + return Worker( + type="copy", + need_backup=False, + source_path=source_path, + target_path=target_path, + ) + return None + class CheckerSync(Checker): """""" + + +class CheckerSyncIncr(Checker): + """""" + + +def get_checker(type_: str) -> type(Checker): + return { + "copy": CheckerCopy, + "mirror": CheckerMirror, + "sync": CheckerSync, + "sync-incr": CheckerSyncIncr, + }[type_] diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index aadc94d..35b2e45 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -2,24 +2,20 @@ """ """ -import queue -import threading + from queue import Queue -from alist_sync.thread_pool import MyThreadPoolExecutor from alist_sdk import AlistPath, login_server from alist_sdk.path_lib import ALIST_SERVER_INFO +from alist_sync.d_worker import Workers +from alist_sync.thread_pool import MyThreadPoolExecutor from alist_sync.config import SyncGroup, create_config, AlistServer -from alist_sync.d_checker import Checker +from alist_sync.d_checker import get_checker sync_config = create_config() -queue_checker = Queue() -queue_worker = Queue() - - def login_alist(server: AlistServer): """""" login_server(**server.dump_for_sdk()) @@ -40,7 +36,7 @@ def _scaner(_url: AlistPath): pool.wait() -def checker(sync_group: SyncGroup, _queue: Queue): +def checker(sync_group: SyncGroup, _queue_worker: Queue): """""" if sync_group.enable is False: return @@ -59,8 +55,15 @@ def checker(sync_group: SyncGroup, _queue: Queue): for uri in sync_group.group: scaner(uri, _queue_scaner) - return Checker(sync_group, _queue_scaner, _queue).mian() # main()是死循环 + _t_workers = Workers().start(_queue_worker) + + _c = get_checker(sync_group.type) + return _c(sync_group, _queue_scaner, _queue_worker).mian() # main()是死循环 def mian(): """""" + _queue_worker = Queue(30) + + for sync_group in sync_config.sync_groups: + checker(sync_group, _queue_worker) diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index b28b3bd..4ad8990 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -39,8 +39,8 @@ class Worker(BaseModel): need_backup: bool backup_dir: AlistPathType | None = None - source_path: AlistPathType - target_path: AlistPathType | None = None + source_path: AlistPathType | None = None + target_path: AlistPathType # 永远只操作Target文件,删除也是作为Target status: WorkerStatus = "init" error_info: BaseException | None = None @@ -161,15 +161,6 @@ def start(self, queue: Queue) -> threading.Thread: uri = os.environ["MONGODB_URI"] client = MongoClient(uri, server_api=ServerApi("1")) - # w = Worker( - # owner="admin", - # type="delete", - # need_backup=True, - # backer_dir=AlistPath("http://localhost:5244/local/.history"), - # source_path="http://localhost:5244/local/test.txt", - # collection=client.get_default_database().get_collection("workers"), - # ) - # print(w.update()) ws = Workers(mongodb=client.get_default_database()) ws.load_from_mongo() diff --git a/config-template.yaml b/config-template.yaml index d458ca5..d0b9afb 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -11,22 +11,6 @@ alist_servers: username: "admin" password: "admin" -notify: # 通知服务,当触发一些异常后,将会发送通知。 - - enable: false - type: email - smtp_host: "" - smtp_port: 25 - sender: "" - password: "" - recipients: - - "" - - - enable: false - type: webhook - webhook_url: "" - headers: - K: V - sync_groups: - name: "sync1" @@ -40,7 +24,36 @@ sync_groups: # 删除存在于目录目录但不存在于源目录中的文件 # 4 sync-incr: 基于文件的修改时间,只同步源目录中修改时间晚于目标目录的文件 type: "copy" + + # 是否需要备份,如果为True,则会在同步之前备份目标目录 + # 对于copy,该值无效 + need_backup: false # 默认值: False + + # 备份目录,当need_backup为True时,该值有效 + # 一个相对目录,最终为每一个group中的每一个server创建一个备份目录 + backup_dir: "./.alist-sync-backup" # 默认值: ./.alist-sync-backup + + # 同步目录,一个完整的AList URL, + # 对于copy, mirror 第一个为源目录,其他个为目标目录 + # Alist服务器信息需要提前在alist_servers中配置 + # 支持在不同的Alist服务器之间同步 + # 例子:http://localhost:5244/test1 group: - "http://localhost:5244/test1" - "http://localhost:5245/test2" +notify: # 通知服务,当触发一些异常后,将会发送通知。 + - enable: true + type: email + smtp_host: "" + smtp_port: 25 + sender: "" + password: "" + recipients: + - "" + + - enable: true + type: webhook + webhook_url: "" + headers: + K: V From b269918509a2d1f2b7e9c9cdae8a9f9d6eb4bf3c Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Mon, 26 Feb 2024 23:51:57 +0800 Subject: [PATCH 04/21] 0.1.0-a3 --- alist_sync/common.py | 4 +++- alist_sync/config.py | 30 +++++++++++++++++++++++++----- alist_sync/d_checker.py | 4 ++-- alist_sync/d_main.py | 28 +++++++++++++++++++++++----- alist_sync/d_worker.py | 8 ++++++-- alist_sync/thread_pool.py | 1 + 6 files changed, 60 insertions(+), 15 deletions(-) diff --git a/alist_sync/common.py b/alist_sync/common.py index 579c3be..b6c9b73 100644 --- a/alist_sync/common.py +++ b/alist_sync/common.py @@ -7,7 +7,9 @@ from pathlib import Path from typing import Iterable -from alist_sync.config import cache_dir +from alist_sync.config import create_config + +cache_dir = create_config().cache_dir logger = logging.getLogger("alist-sync.common") diff --git a/alist_sync/config.py b/alist_sync/config.py index 08914d5..43b0621 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -6,7 +6,8 @@ from functools import cached_property, lru_cache from typing import Optional, Literal -from alist_sdk import AlistPathType +from alist_sdk import AlistPathType, AlistPath +from httpx import URL from pydantic import Field, BaseModel from pymongo.database import Database @@ -44,8 +45,13 @@ class AlistServer(BaseModel): def dump_for_alist_client(self): return self.model_dump(exclude={"storage_config"}) - def sump_for_sdk(self): - return self.model_dump(exclude={"storage_config", "max_connect"}) + def dump_for_alist_path(self): + _data = self.model_dump( + exclude={"storage_config", "max_connect"}, + by_alias=True, + ) + _data["server"] = _data.pop("base_url") + return _data def storages(self) -> list[dict]: """返回给定的 storage_config 中包含的storages""" @@ -88,6 +94,7 @@ class SyncGroup(BaseModel): enable: bool = True name: str type: str + interval: int = 300 need_backup: bool = False backup_dir: str = ".alist-sync-backup" group: list[AlistPathType] = Field(min_length=2) @@ -116,10 +123,16 @@ class WebHookNotify(BaseModel): class Config(BaseModel): """配置""" + def __hash__(self): + return hash(self._id) + _id: str = "alist-sync-config" cache__dir: Path = Field( - default=os.getenv("ALIST_SYNC_CACHE_DIR") or Path(__file__).parent / ".cache", + default=os.getenv( + "ALIST_SYNC_CACHE_DIR", + Path(__file__).parent / ".alist-sync-cache", + ), alias="cache_dir", ) @@ -152,8 +165,12 @@ def cache_dir(self) -> Path: @lru_cache(10) def get_server(self, base_url) -> AlistServer: """找到AlistServer""" + if isinstance(base_url, AlistPath): + base_url = base_url.as_uri() + find_server = URL(base_url) for server in self.alist_servers: - if base_url == server.base_url: + server_ = URL(server.base_url) + if find_server.host == server_.host and find_server.port == server_.port: return server raise ModuleNotFoundError() @@ -162,6 +179,9 @@ def mongodb(self) -> "Database": from pymongo import MongoClient from pymongo.server_api import ServerApi + if self.mongodb_uri is None: + return None + db = MongoClient( self.mongodb_uri, server_api=ServerApi("1") ).get_default_database() diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index 18e105e..f9af923 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -36,7 +36,7 @@ def split_path(self, path) -> tuple[AlistPath, str]: for sr in self.sync_group.group: try: return sr, path.relative_to(sr) - except ValueError as e: + except ValueError: pass raise ValueError() @@ -75,7 +75,7 @@ def _t_checker(self, path): if _c: self.worker_queue.put(_c) - def mian(self): + def main(self): """""" while True: path = self.scaner_queue.get() diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index 35b2e45..2926f39 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -2,7 +2,7 @@ """ """ - +import logging from queue import Queue from alist_sdk import AlistPath, login_server @@ -14,12 +14,16 @@ from alist_sync.d_checker import get_checker sync_config = create_config() +logger = logging.getLogger("alist-sync.main") def login_alist(server: AlistServer): """""" - login_server(**server.dump_for_sdk()) + if server.base_url in ALIST_SERVER_INFO: + return + login_server(**server.dump_for_alist_path()) server.token = ALIST_SERVER_INFO.get(server.base_url) + logger.info("Login: %s Success.", server.base_url) def scaner(url: AlistPath, _queue): @@ -39,10 +43,13 @@ def _scaner(_url: AlistPath): def checker(sync_group: SyncGroup, _queue_worker: Queue): """""" if sync_group.enable is False: + logger.warning("Checker: %s is disable", sync_group.name) return + logger.info("Checker: %s", sync_group.name) + for uri in sync_group.group: - login_alist(sync_config.get_server(uri)) + login_alist(sync_config.get_server(uri.as_uri())) _queue_scaner = Queue(30) _scaner_pool = MyThreadPoolExecutor(5, "scaner_") @@ -58,12 +65,23 @@ def checker(sync_group: SyncGroup, _queue_worker: Queue): _t_workers = Workers().start(_queue_worker) _c = get_checker(sync_group.type) - return _c(sync_group, _queue_scaner, _queue_worker).mian() # main()是死循环 + return _c(sync_group, _queue_scaner, _queue_worker).main() # main()是死循环 -def mian(): +def main(): """""" _queue_worker = Queue(30) + _tw = Workers().start(_queue_worker) for sync_group in sync_config.sync_groups: checker(sync_group, _queue_worker) + + _tw.join() + + +if __name__ == "__main__": + logger_alist_sync = logging.getLogger("alist-sync") + logger_alist_sync.setLevel(logging.DEBUG) + logger_alist_sync.addHandler(logging.StreamHandler()) + logger.info("Begin...") + main() diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 4ad8990..4a18cc8 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -63,8 +63,8 @@ def tmp_file(self) -> Path: return sync_config.cache_dir.joinpath(f"download_tmp_{sha1(self.source_path)}") def update(self, *field: Any): - if self.status == "done" and self.workers is not None: - return self.workers.del_worker(self._id) + if self.status == "done" and self.collection is not None: + return self.collection.delete_one({"_id": self._id}) return self.update_mongo(*field) def update_mongo(self, *field): @@ -133,6 +133,8 @@ def __del__(self): def load_from_mongo(self): """从MongoDB加载Worker""" + if self.mongodb is None: + return for i in self.mongodb.workers.find(): self.add_worker(Worker(**i)) @@ -146,8 +148,10 @@ def run(self, queue: Queue): self.add_worker(queue.get()) def start(self, queue: Queue) -> threading.Thread: + self.load_from_mongo() _t = threading.Thread(target=self.run, args=(queue,)) _t.start() + logger.info("Worker Thread Start...") return _t diff --git a/alist_sync/thread_pool.py b/alist_sync/thread_pool.py index fada8f1..3fcbbc4 100644 --- a/alist_sync/thread_pool.py +++ b/alist_sync/thread_pool.py @@ -4,6 +4,7 @@ class MyThreadPoolExecutor(ThreadPoolExecutor): def wait(self): + # FIXBUG: 修复线程池无法等待所有线程结束的问题 while True: for t in copy(self._threads): t.join() From 3da4a1db3387ae05456c20e4d3ad68c607669419 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Tue, 27 Feb 2024 14:51:10 +0800 Subject: [PATCH 05/21] 0.1.0-a4 --- alist_sync/config.py | 7 +++++- alist_sync/d_checker.py | 28 ++++++++++++++------- alist_sync/d_main.py | 40 ++++++++++++++++++------------ alist_sync/d_worker.py | 55 +++++++++++++++++++++++------------------ alist_sync/version.py | 5 +++- config-template.yaml | 4 +-- tests/common.py | 9 ++++++- tests/debugger.py | 4 ++- tests/init_alist.sh | 32 ++++++++++++++---------- 9 files changed, 116 insertions(+), 68 deletions(-) diff --git a/alist_sync/config.py b/alist_sync/config.py index 43b0621..4434a88 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -1,5 +1,6 @@ import builtins import json +import logging import os from datetime import datetime from pathlib import Path @@ -11,6 +12,8 @@ from pydantic import Field, BaseModel from pymongo.database import Database +logger = logging.getLogger("alist-sync.config") + def create_config(): """创建配置文件""" @@ -175,16 +178,18 @@ def get_server(self, base_url) -> AlistServer: raise ModuleNotFoundError() @cached_property - def mongodb(self) -> "Database": + def mongodb(self) -> "Database|None": from pymongo import MongoClient from pymongo.server_api import ServerApi if self.mongodb_uri is None: return None + logger.info("Contenting MongoDB ...") db = MongoClient( self.mongodb_uri, server_api=ServerApi("1") ).get_default_database() + logger.info(f"Contented MongoDB: {db.client.HOST}/{db.name}") if db is None: raise ValueError("连接数据库失败") diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index f9af923..32e775d 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -6,6 +6,8 @@ @Date-Time : 2024/2/25 21:17 """ +import logging +import threading from queue import Queue from typing import Iterator @@ -16,6 +18,7 @@ from alist_sync.d_worker import Worker from alist_sync.thread_pool import MyThreadPoolExecutor +logger = logging.getLogger("alist-sync.d_checker") sync_config = create_config() @@ -30,6 +33,10 @@ def __init__(self, sync_group: SyncGroup, scaner_queue: Queue, worker_queue: Que self.conflict: set = set() self.pool = MyThreadPoolExecutor(10) + self.main_thread = threading.Thread( + target=self.main, + name=f"checker_main[{self.sync_group.name}-{self.__class__.__name__}]", + ) def split_path(self, path) -> tuple[AlistPath, str]: """将Path切割为sync_dir和相对路径""" @@ -54,43 +61,46 @@ def load_locker(self): def get_backup_dir(self, path) -> AlistPath: return self.split_path(path)[0].joinpath(self.sync_group.backup_dir) - def _checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": + def checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": raise NotImplementedError - def checker(self, path) -> Iterator[Worker | None]: - # _sg = self.sync_group.group.copy() + def checker_every_dir(self, path) -> Iterator[Worker | None]: _sync_dir, _relative_path = self.split_path(path) - for _sd in self.sync_group.group: _sd: AlistPath if _sd == _sync_dir: continue target_path = _sd.joinpath(_relative_path) - yield self._checker(target_path, path) + logger.debug(f"Check: {target_path} -> {path}") + yield self.checker(path, target_path) def _t_checker(self, path): if path in self.locker: return - for _c in self.checker(path): + for _c in self.checker_every_dir(path): if _c: self.worker_queue.put(_c) def main(self): """""" + logger.info(f"Checker Started - name: {self.main_thread.name}") while True: path = self.scaner_queue.get() self._t_checker(path) + def start(self) -> threading.Thread: + self.main_thread.start() + return self.main_thread + class CheckerCopy(Checker): """""" - def _checker( + def checker( self, source_path: AlistPath, target_path: AlistPath, ) -> "Worker|None": - if not target_path.exists() and target_path not in self.locker: self.locker.add(target_path) self.locker.add(source_path) @@ -106,7 +116,7 @@ def _checker( class CheckerMirror(Checker): """""" - def _checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": + def checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": _main = self.sync_group.group[0] # target如果是主存储器 - 且target不存在,source存在,删除source if target_path == _main and not target_path.exists() and source_path.exists(): diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index 2926f39..478ef42 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -3,6 +3,8 @@ """ import logging +import threading +import time from queue import Queue from alist_sdk import AlistPath, login_server @@ -27,20 +29,28 @@ def login_alist(server: AlistServer): def scaner(url: AlistPath, _queue): - def _scaner(_url: AlistPath): - """""" - for item in _url.iterdir(): - if item.is_file(): - _queue.put(item) - elif item.is_dir(): - pool.submit(_scaner, item) - + def _scaner(_url: AlistPath, _s_num): + """ """ + _s_num.append(1) + logger.debug(f"Scaner: {_url}") + try: + for item in _url.iterdir(): + if item.is_file(): + logger.debug(f"find file: {item}") + _queue.put(item) + elif item.is_dir(): + pool.submit(_scaner, item, _s_num) + finally: + _s_num.pop() + + s_sum = [] pool = MyThreadPoolExecutor(5) - pool.submit(_scaner, url) - pool.wait() + pool.submit(_scaner, url, s_sum) + while s_sum: + time.sleep(2) -def checker(sync_group: SyncGroup, _queue_worker: Queue): +def checker(sync_group: SyncGroup, _queue_worker: Queue) -> threading.Thread | None: """""" if sync_group.enable is False: logger.warning("Checker: %s is disable", sync_group.name) @@ -54,18 +64,16 @@ def checker(sync_group: SyncGroup, _queue_worker: Queue): _queue_scaner = Queue(30) _scaner_pool = MyThreadPoolExecutor(5, "scaner_") - _sign = ["copy", "mirror"] + _ct = get_checker(sync_group.type)(sync_group, _queue_scaner, _queue_worker).start() + _sign = ["copy"] if sync_group.type in _sign: scaner(sync_group.group[0], _queue_scaner) else: for uri in sync_group.group: scaner(uri, _queue_scaner) - _t_workers = Workers().start(_queue_worker) - - _c = get_checker(sync_group.type) - return _c(sync_group, _queue_scaner, _queue_worker).main() # main()是死循环 + return _ct def main(): diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 4a18cc8..d00a539 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -9,7 +9,7 @@ from pydantic import BaseModel, computed_field, Field from pymongo.collection import Collection from pymongo.database import Database -from alist_sdk.path_lib import AlistPathType +from alist_sdk.path_lib import AbsAlistPathType from alist_sync.config import create_config from alist_sync.common import sha1 @@ -37,10 +37,10 @@ class Worker(BaseModel): created_at: datetime.datetime = datetime.datetime.now() type: WorkerType need_backup: bool - backup_dir: AlistPathType | None = None + backup_dir: AbsAlistPathType | None = None - source_path: AlistPathType | None = None - target_path: AlistPathType # 永远只操作Target文件,删除也是作为Target + source_path: AbsAlistPathType | None = None + target_path: AbsAlistPathType # 永远只操作Target文件,删除也是作为Target status: WorkerStatus = "init" error_info: BaseException | None = None @@ -50,9 +50,19 @@ class Worker(BaseModel): model_config = { "arbitrary_types_allowed": True, - "excludes": {"workers", "collection", "tmp_file"}, + "excludes": { + "workers", + "collection", + }, } + def __init__(self, **data: Any): + super().__init__(**data) + logger.info(f"Worker Created: {self.__repr__()}") + + def __repr__(self): + return f"" + @computed_field(return_type=str) @property def _id(self) -> str: @@ -82,9 +92,6 @@ def update_mongo(self, *field): True if field == () else False, ) - def __del__(self): - self.tmp_file.unlink(missing_ok=True) - def backup(self): """备份""" if self.backup_dir is None: @@ -105,6 +112,7 @@ def delete_type(self): def run(self): """启动Worker""" + logger.info(f"worker[{self._id}] 已经安排启动.") if self.need_backup: self.backup() @@ -142,31 +150,30 @@ def add_worker(self, worker: Worker): worker.workers = self worker.collection = self.mongodb.workers self.thread_pool.submit(worker.run) + logger.info("Worker added to Pool") def run(self, queue: Queue): + self.load_from_mongo() while True: self.add_worker(queue.get()) def start(self, queue: Queue) -> threading.Thread: - self.load_from_mongo() _t = threading.Thread(target=self.run, args=(queue,)) _t.start() - logger.info("Worker Thread Start...") + logger.info("Worker Main Thread Start...") return _t if __name__ == "__main__": - import os - from pymongo import MongoClient - from pymongo.server_api import ServerApi - - logging.basicConfig(level=logging.DEBUG) - - uri = os.environ["MONGODB_URI"] - - client = MongoClient(uri, server_api=ServerApi("1")) - - ws = Workers(mongodb=client.get_default_database()) - ws.load_from_mongo() - for w in ws.workers: - print(w) + from alist_sdk import AlistPath + + _w = Worker( + type="copy", + need_backup=False, + source_path=AlistPath("http://local:/sc"), + target_path="http://target_path", + ) + print(_w.tmp_file, type(_w.source_path), _w.target_path) + print() + print(_w.model_dump()) + print(_w.model_dump(mode="json")) diff --git a/alist_sync/version.py b/alist_sync/version.py index bfb94fb..950b881 100644 --- a/alist_sync/version.py +++ b/alist_sync/version.py @@ -2,9 +2,12 @@ """ -0.1.0-a0: +0.1.0-a*: [] 使用Workers架构重构,使用多线程,不再使用协程 [] New 通知 - email [] New 通知 - webhook + + 支持到alist 3.31.0 + """ \ No newline at end of file diff --git a/config-template.yaml b/config-template.yaml index d0b9afb..3c32bf5 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -1,5 +1,5 @@ mongodb_uri: "mongodb+srv://${username}:${password}@${host}/alist_sync?retryWrites=true&w=majority&appName=A1" -cache_dir: ./.cache +cache_dir: ./.alist-sync-cache daemon: false alist_servers: @@ -40,7 +40,7 @@ sync_groups: # 例子:http://localhost:5244/test1 group: - "http://localhost:5244/test1" - - "http://localhost:5245/test2" + - "http://localhost:5244/test2" notify: # 通知服务,当触发一些异常后,将会发送通知。 - enable: true diff --git a/tests/common.py b/tests/common.py index 38e79f9..efd5a68 100644 --- a/tests/common.py +++ b/tests/common.py @@ -5,7 +5,9 @@ from alist_sdk import Client -from alist_sync.config import cache_dir +from alist_sync.config import create_config + +cache_dir = create_config().cache_dir class StorageInfo(NamedTuple): @@ -90,3 +92,8 @@ def setup_function(): DATA_DIR_DST2.fs_path.mkdir(parents=True, exist_ok=True) clear_dir(cache_dir) cache_dir.mkdir(parents=True, exist_ok=True) + + +if __name__ == '__main__': + setup_module() + setup_function() \ No newline at end of file diff --git a/tests/debugger.py b/tests/debugger.py index 1300e5b..601a129 100755 --- a/tests/debugger.py +++ b/tests/debugger.py @@ -9,9 +9,11 @@ from alist_sdk import Client from alist_sync.alist_client import AlistClient -from alist_sync.config import cache_dir +from alist_sync.config import create_config from common import create_storage_local, clear_dir +cache_dir = create_config().cache_dir + WORKDIR = Path(__file__).parent DATA_DIR = WORKDIR / "alist/test_dir" diff --git a/tests/init_alist.sh b/tests/init_alist.sh index 653c764..bab4d5b 100755 --- a/tests/init_alist.sh +++ b/tests/init_alist.sh @@ -5,29 +5,35 @@ cd "$(dirname "$0")" || exit mkdir -p alist cd alist || exit -VERSION=${ALIST_VERSION:-"3.30.0"} - +VERSION=${ALIST_VERSION:-"3.31.0"} platform=$(uname -s | tr '[:upper:]' '[:lower:]') case $platform in - linux | darwin) fix=".tar.gz" ;; - windows*) fix=".zip" ;; +linux | darwin) fix=".tar.gz" ;; +win* | mingw64*) + fix=".zip" + platform="windows" + ;; esac case $(uname -m) in - x86_64) cpu="amd64" ;; - i386 | i686) cpu="386" ;; - aarch64 | arm64) cpu="arm64" ;; +x86_64) cpu="amd64" ;; +i386 | i686) cpu="386" ;; +aarch64 | arm64) cpu="arm64" ;; esac filename="alist-${platform}-${cpu}${fix}" export download_url="https://github.com/alist-org/alist/releases/download/v${VERSION}/${filename}" -if [ ! -f alist ]; then - set -e - echo "Will Install ${download_url}" - wget -q "$download_url" - tar xzvf "$filename" - set +e +if [[ ! -f alist && ! -f "alist.exe" ]]; then + set -e + echo "Will Install ${download_url}" + curl -SLkO "$download_url" + if [ "${fix}" == ".zip" ]; then + unzip "$filename" + else + tar xzvf "$filename" + fi + set +e fi rm -rf data/ test_dir/ From d75789f60bf3b3724dbd027d8bb4d840918e51c6 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Wed, 28 Feb 2024 00:01:36 +0800 Subject: [PATCH 06/21] 0.1.0-a5 --- alist_sync/__main__.py | 13 ++- alist_sync/config.py | 20 +++- alist_sync/d_checker.py | 27 +----- alist_sync/d_worker.py | 80 ++++++++-------- alist_sync/data_handle.py | 190 ++++++++++++++++++++++++++++++++++++++ alist_sync/thread_pool.py | 8 +- config-template.yaml | 3 + 7 files changed, 267 insertions(+), 74 deletions(-) create mode 100644 alist_sync/data_handle.py diff --git a/alist_sync/__main__.py b/alist_sync/__main__.py index 507796c..6f0435a 100644 --- a/alist_sync/__main__.py +++ b/alist_sync/__main__.py @@ -39,7 +39,10 @@ ) _backup: bool = Option( - False, "--backup", "-b", help="删除或覆盖目标文件时备份文件到.alist-sync-data/history" + False, + "--backup", + "-b", + help="删除或覆盖目标文件时备份文件到.alist-sync-data/history", ) _stores_config: str | None = Option( @@ -195,6 +198,14 @@ def sync_incr( return SyncIncr(alist_info, config_dir, cache_dir, sync_group).run() +@app.command("test-config") +def t_config(): + """测试配置""" + from alist_sync.config import create_config + + echo(create_config().dump_to_yaml()) + + if __name__ == "__main__": from rich.logging import RichHandler diff --git a/alist_sync/config.py b/alist_sync/config.py index 4434a88..148a9ed 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -5,13 +5,16 @@ from datetime import datetime from pathlib import Path from functools import cached_property, lru_cache -from typing import Optional, Literal +from typing import Optional, Literal, TYPE_CHECKING from alist_sdk import AlistPathType, AlistPath from httpx import URL from pydantic import Field, BaseModel from pymongo.database import Database +if TYPE_CHECKING: + from alist_sync.data_handle import ShelveHandle, MongoHandle + logger = logging.getLogger("alist-sync.config") @@ -195,16 +198,27 @@ def mongodb(self) -> "Database|None": return db + @cached_property + def handle(self) -> "ShelveHandle|MongoHandle": + from alist_sync.data_handle import ShelveHandle, MongoHandle + + if self.mongodb is None: + return ShelveHandle(self.cache_dir) + return MongoHandle(self.mongodb) + @classmethod def load_from_yaml(cls, file: Path) -> "Config": from yaml import safe_load return cls.model_validate(safe_load(file.open("rb"))) - def dump_to_yaml(self, file: Path): + def dump_to_yaml(self, file: Path = None): from yaml import safe_dump - return safe_dump(self.model_dump(mode="json"), file.open("wb")) + return safe_dump( + self.model_dump(mode="json"), + file.open("wb") if file else None, + ) def load_from_mongo(self, uri: str = None): from pymongo import MongoClient diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index 32e775d..f197f88 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -11,7 +11,6 @@ from queue import Queue from typing import Iterator -from pymongo.collection import Collection from alist_sdk import AlistPath from alist_sync.config import create_config, SyncGroup @@ -28,9 +27,6 @@ def __init__(self, sync_group: SyncGroup, scaner_queue: Queue, worker_queue: Que self.worker_queue = worker_queue self.scaner_queue: Queue[AlistPath] = scaner_queue - self.locker: set = set() - self.load_locker() - self.conflict: set = set() self.pool = MyThreadPoolExecutor(10) self.main_thread = threading.Thread( @@ -47,17 +43,6 @@ def split_path(self, path) -> tuple[AlistPath, str]: pass raise ValueError() - def release_lock(self, *items: AlistPath): - for p in items: - self.locker.remove(p) - - def load_locker(self): - col: Collection = sync_config.mongodb.workers - for doc in col.find({}, {"source_path": True, "target_path": True}): - for p in doc.values(): - if not None: - self.locker.add(AlistPath(p)) - def get_backup_dir(self, path) -> AlistPath: return self.split_path(path)[0].joinpath(self.sync_group.backup_dir) @@ -75,8 +60,6 @@ def checker_every_dir(self, path) -> Iterator[Worker | None]: yield self.checker(path, target_path) def _t_checker(self, path): - if path in self.locker: - return for _c in self.checker_every_dir(path): if _c: self.worker_queue.put(_c) @@ -101,9 +84,7 @@ def checker( source_path: AlistPath, target_path: AlistPath, ) -> "Worker|None": - if not target_path.exists() and target_path not in self.locker: - self.locker.add(target_path) - self.locker.add(source_path) + if not target_path.exists(): return Worker( type="copy", need_backup=False, @@ -120,17 +101,13 @@ def checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|Non _main = self.sync_group.group[0] # target如果是主存储器 - 且target不存在,source存在,删除source if target_path == _main and not target_path.exists() and source_path.exists(): - self.locker.add(target_path) - self.locker.add(source_path) return Worker( type="delete", need_backup=self.sync_group.need_backup, backup_dir=self.get_backup_dir(source_path), target_path=source_path, ) - if not target_path.exists() and target_path not in self.locker: - self.locker.add(target_path) - self.locker.add(source_path) + if not target_path.exists(): return Worker( type="copy", need_backup=False, diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index d00a539..2827846 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -8,8 +8,7 @@ from pydantic import BaseModel, computed_field, Field from pymongo.collection import Collection -from pymongo.database import Database -from alist_sdk.path_lib import AbsAlistPathType +from alist_sdk.path_lib import AbsAlistPathType, AlistPath from alist_sync.config import create_config from alist_sync.common import sha1 @@ -27,6 +26,7 @@ "uploading", "copied", "done", + "failed", ] logger = logging.getLogger("alist-sync.worker") @@ -58,14 +58,14 @@ class Worker(BaseModel): def __init__(self, **data: Any): super().__init__(**data) - logger.info(f"Worker Created: {self.__repr__()}") + logger.info(f"Worker[{self.id}] Created: {self.__repr__()}") def __repr__(self): - return f"" + return f" {self.target_path}>" - @computed_field(return_type=str) + @computed_field(return_type=str, alias="_id") @property - def _id(self) -> str: + def id(self) -> str: return sha1(f"{self.type}{self.source_path}{self.created_at}") @property @@ -75,28 +75,13 @@ def tmp_file(self) -> Path: def update(self, *field: Any): if self.status == "done" and self.collection is not None: return self.collection.delete_one({"_id": self._id}) - return self.update_mongo(*field) - - def update_mongo(self, *field): - """""" - - if field == (): - data = self.model_dump(mode="json") - else: - data = {k: self.__getattr__(k) for k in field} - - logger.debug("更新Worker: %s", data) - return self.collection.update_one( - {"_id": self._id}, - {"$set": data}, - True if field == () else False, - ) + return sync_config.handle.update_worker(self, *field) def backup(self): """备份""" if self.backup_dir is None: raise ValueError("Need Backup, But no Dir.") - backup_file = self.source_path if self.type == "delete" else self.target_path + _backup_file = self.source_path if self.type == "delete" else self.target_path def downloader(self): """HTTP多线程下载""" @@ -113,25 +98,31 @@ def delete_type(self): def run(self): """启动Worker""" logger.info(f"worker[{self._id}] 已经安排启动.") - if self.need_backup: - self.backup() + try: + if self.need_backup: + self.backup() - if self.type == "copy": - self.copy_type() - elif self.type == "delete": - self.delete_type() + if self.type == "copy": + self.copy_type() + elif self.type == "delete": + self.delete_type() + except Exception as _e: + self.error_info = _e + self.status = "failed" + self.update() + logger.error(f"worker[{self._id}] 出现错误: {_e}") class Workers: - # workers: list[Worker] = [] - def __init__(self, mongodb: Database = None): - self.mongodb: Database = mongodb or sync_config.mongodb + def __init__(self): self.thread_pool = MyThreadPoolExecutor( 5, "worker_", ) + self.lockers: set[AlistPath] = set() + atexit.register(self.__del__) def __del__(self): @@ -139,21 +130,28 @@ def __del__(self): if i.name.startswith("download_tmp_"): i.unlink(missing_ok=True) - def load_from_mongo(self): - """从MongoDB加载Worker""" - if self.mongodb is None: - return - for i in self.mongodb.workers.find(): - self.add_worker(Worker(**i)) + def release_lock(self, *items: AlistPath): + for p in items: + self.lockers.remove(p) def add_worker(self, worker: Worker): + if worker.source_path in self.lockers or worker.target_path in self.lockers: + logger.warning(f"Worker {worker} 有路径被锁定.") + return + + self.lockers.add(worker.source_path) + self.lockers.add(worker.target_path) + worker.workers = self - worker.collection = self.mongodb.workers self.thread_pool.submit(worker.run) - logger.info("Worker added to Pool") + logger.info(f"Worker[{worker.id}] added to ThreadPool.") def run(self, queue: Queue): - self.load_from_mongo() + """""" + self.lockers |= sync_config.handle.load_locker() + for i in sync_config.handle.get_workers(): + self.add_worker(Worker(**i)) + while True: self.add_worker(queue.get()) diff --git a/alist_sync/data_handle.py b/alist_sync/data_handle.py new file mode 100644 index 0000000..6e1c542 --- /dev/null +++ b/alist_sync/data_handle.py @@ -0,0 +1,190 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +""" +@File Name : data_handle.py +@Author : LeeCQ +@Date-Time : 2024/2/27 22:15 + +""" +import abc +import datetime +import logging +import shelve +from pathlib import Path +from typing import TYPE_CHECKING, Iterable + +from alist_sdk import AlistPath + +if TYPE_CHECKING: + from alist_sync.d_worker import Worker + from pymongo.database import Database + from pymongo.collection import Collection + +logger = logging.getLogger("alist-sync.data_handle") + + +class HandleBase(metaclass=abc.ABCMeta): + + @abc.abstractmethod + def update_worker(self, worker: "Worker", *field): + """更新或创建Worker""" + raise NotImplementedError + + @abc.abstractmethod + def delete_worker(self, worker_id: str): + """删除Worker""" + raise NotImplementedError + + @abc.abstractmethod + def get_worker(self, worker_id: str): + """获取Worker""" + raise NotImplementedError + + @abc.abstractmethod + def get_workers(self, query=None) -> Iterable["Worker"]: + """获取Workers""" + raise NotImplementedError + + @abc.abstractmethod + def load_locker(self) -> set["AlistPath"]: + """加载锁""" + raise NotImplementedError + + @abc.abstractmethod + def path_in_workers(self, path: "AlistPath") -> bool: + """判断路径是否在Worker中""" + raise NotImplementedError + + def get_locker(self, path: "AlistPath"): + """获取锁""" + return self.path_in_workers(path) + + @abc.abstractmethod + def update_file_item(self, path: "AlistPath", item, *field): + """更新或创建FileItem""" + raise NotImplementedError + + @abc.abstractmethod + def get_file_item(self, item_id: "AlistPath"): + """获取FileItem""" + raise NotImplementedError + + +class MongoHandle(HandleBase): + + def __init__(self, mongodb: "Database"): + self._workers: "Collection" = mongodb.workers + self._items: "Collection" = mongodb.items + + def update_worker(self, worker: "Worker", *field): + if field == (): + data = worker.model_dump(mode="json") + else: + data = {k: worker.__getattr__(k) for k in field} + + logger.debug("更新Worker: %s", data) + return self._workers.update_one( + {"_id": worker.id}, + {"$set": data}, + True if field == () else False, + ) + + def delete_worker(self, worker_id: str): + logger.debug("删除Worker: %s", worker_id) + return self._workers.delete_one({"_id": worker_id}) + + def get_worker(self, worker_id: str): + return self._workers.find_one({"_id": worker_id}) + + def get_workers(self, query=None) -> Iterable: + if query is None: + query = {} + return self._workers.find(query) + + def load_locker(self) -> set["AlistPath"]: + logger.info("正在加载MongoDB中保存的锁。") + return { + AlistPath(p) + for doc in self._workers.find( + {}, + {"source_path": True, "target_path": True}, + ) + for p in doc.values() + if not None + } + + def path_in_workers(self, path: "AlistPath") -> bool: + return bool( + self._workers.find_one( + {"$or": [{"source_path": str(path)}, {"target_path": str(path)}]}, + {"_id": True}, + ) + ) + + def update_file_item(self, path: "AlistPath", item, *field): + if field == (): + data = item.model_dump(mode="json") + else: + data = {k: item.__getattr__(k) for k in field} + + logger.debug("更新FileItem: %s", data) + return self._items.update_one( + {"_id": path.as_uri()}, + { + "$set": { + "_id": item.id, + "update_time": datetime.datetime.now(), + "item": data, + }, + }, + True if field == () else False, + ) + + def get_file_item(self, item_id: AlistPath): + return self._items.find_one({"_id": item_id.as_uri()})["item"] + + +class ShelveHandle(HandleBase): + + def __init__(self, save_dir: Path): + self._workers = shelve.open( + str(save_dir.joinpath("alist_cache_workers.shelve")), writeback=True + ) + self._items = shelve.open( + str(save_dir.joinpath("alist_cache_items.shelve")), writeback=True + ) + + def update_worker(self, worker: "Worker", *field): + self._workers[worker.id] = worker + + def delete_worker(self, worker_id: str): + self._workers.pop(worker_id) + + def get_worker(self, worker_id: str): + return self._workers.get(worker_id) + + def get_workers(self, query=None) -> Iterable["Worker"]: + for _w in self._workers.values(): + yield _w + + def load_locker(self) -> set[AlistPath]: + logger.info("正在加载Shelve中保存的锁。") + return { + AlistPath(p) + for _w in self._workers.values() + for p in (_w.source_path, _w.target_path) + if p is not None + } + + def path_in_workers(self, path: AlistPath) -> bool: + return path in self.load_locker() + + def update_file_item(self, path: AlistPath, item, *field): + self._items[path.as_uri()] = { + "id": path, + "update_time": datetime.datetime.now(), + "item": item, + } + + def get_file_item(self, item_id: AlistPath): + return self._items.get(item_id.as_uri(), {}).get("item") diff --git a/alist_sync/thread_pool.py b/alist_sync/thread_pool.py index 3fcbbc4..6f4f87b 100644 --- a/alist_sync/thread_pool.py +++ b/alist_sync/thread_pool.py @@ -1,10 +1,10 @@ +import time from concurrent.futures import ThreadPoolExecutor from copy import copy class MyThreadPoolExecutor(ThreadPoolExecutor): def wait(self): - # FIXBUG: 修复线程池无法等待所有线程结束的问题 - while True: - for t in copy(self._threads): - t.join() + while self._work_queue.qsize(): + time.sleep(3) + diff --git a/config-template.yaml b/config-template.yaml index 3c32bf5..8d965c6 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -25,6 +25,9 @@ sync_groups: # 4 sync-incr: 基于文件的修改时间,只同步源目录中修改时间晚于目标目录的文件 type: "copy" + # 检查间隔,单位为秒,如果daemon为False,则该值无效 + interval: 300 # 默认值: 300 秒, 5 分钟 + # 是否需要备份,如果为True,则会在同步之前备份目标目录 # 对于copy,该值无效 need_backup: false # 默认值: False From caba34c82cc39de45e398d84f7b5ee5dd450f1ee Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Wed, 28 Feb 2024 17:23:38 +0800 Subject: [PATCH 07/21] 0.1.0-a6 --- alist_sync/d_main.py | 13 ++--- alist_sync/d_worker.py | 113 ++++++++++++++++++++++++++++++-------- alist_sync/data_handle.py | 27 +++++++-- alist_sync/scanner.py | 2 + alist_sync/thread_pool.py | 4 ++ config-template.yaml | 14 ++++- tests/test_worker.py | 55 ++++++++----------- 7 files changed, 162 insertions(+), 66 deletions(-) diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index 478ef42..9da87df 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -8,7 +8,6 @@ from queue import Queue from alist_sdk import AlistPath, login_server -from alist_sdk.path_lib import ALIST_SERVER_INFO from alist_sync.d_worker import Workers from alist_sync.thread_pool import MyThreadPoolExecutor @@ -21,11 +20,8 @@ def login_alist(server: AlistServer): """""" - if server.base_url in ALIST_SERVER_INFO: - return - login_server(**server.dump_for_alist_path()) - server.token = ALIST_SERVER_INFO.get(server.base_url) - logger.info("Login: %s Success.", server.base_url) + _c = login_server(**server.dump_for_alist_path()) + logger.info("Login: %s[%s] Success.", _c.base_url, _c.login_username) def scaner(url: AlistPath, _queue): @@ -43,6 +39,8 @@ def _scaner(_url: AlistPath, _s_num): finally: _s_num.pop() + assert url.exists(), f"目录不存在{url.as_uri()}" + s_sum = [] pool = MyThreadPoolExecutor(5) pool.submit(_scaner, url, s_sum) @@ -68,6 +66,7 @@ def checker(sync_group: SyncGroup, _queue_worker: Queue) -> threading.Thread | N _sign = ["copy"] if sync_group.type in _sign: + logger.debug(f"Copy 只需要扫描 {sync_group.group[0].as_uri() = }") scaner(sync_group.group[0], _queue_scaner) else: for uri in sync_group.group: @@ -89,7 +88,7 @@ def main(): if __name__ == "__main__": logger_alist_sync = logging.getLogger("alist-sync") - logger_alist_sync.setLevel(logging.DEBUG) + logger_alist_sync.setLevel(logging.INFO) logger_alist_sync.addHandler(logging.StreamHandler()) logger.info("Begin...") main() diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 2827846..3d772c9 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -4,9 +4,9 @@ import threading from pathlib import Path from queue import Queue -from typing import Literal, Any +from typing import Literal, Any, Annotated -from pydantic import BaseModel, computed_field, Field +from pydantic import BaseModel, computed_field, Field, PlainSerializer from pymongo.collection import Collection from alist_sdk.path_lib import AbsAlistPathType, AlistPath @@ -32,9 +32,11 @@ logger = logging.getLogger("alist-sync.worker") +# noinspection PyTypeHints class Worker(BaseModel): owner: str = sync_config.runner_name created_at: datetime.datetime = datetime.datetime.now() + done_at: datetime.datetime | None = None type: WorkerType need_backup: bool backup_dir: AbsAlistPathType | None = None @@ -42,7 +44,7 @@ class Worker(BaseModel): source_path: AbsAlistPathType | None = None target_path: AbsAlistPathType # 永远只操作Target文件,删除也是作为Target status: WorkerStatus = "init" - error_info: BaseException | None = None + error_info: str | None = None # 私有属性 workers: "Workers | None" = Field(None, exclude=True) @@ -72,49 +74,114 @@ def id(self) -> str: def tmp_file(self) -> Path: return sync_config.cache_dir.joinpath(f"download_tmp_{sha1(self.source_path)}") - def update(self, *field: Any): - if self.status == "done" and self.collection is not None: - return self.collection.delete_one({"_id": self._id}) - return sync_config.handle.update_worker(self, *field) + def update(self, **field: Any): + if field: + if field.keys() | self.__dict__.keys() != self.__dict__.keys(): + raise KeyError() + self.__dict__.update(field) + + if self.status in ["done", "failed"]: + logger.info(f"Worker[{self.id}] is {self.status}.") + self.done_at = datetime.datetime.now() + sync_config.handle.create_log(self) + return sync_config.handle.delete_worker(self.id) + return sync_config.handle.update_worker(self, *field.keys()) def backup(self): """备份""" if self.backup_dir is None: raise ValueError("Need Backup, But no Dir.") - _backup_file = self.source_path if self.type == "delete" else self.target_path + _backup_file = self.target_path + _target_name = ( + f"{sha1(_backup_file.as_posix())}_" + f"{int(_backup_file.stat().modified.timestamp())}.history" + ) + _backup_target = self.backup_dir.joinpath(_target_name) + _backup_target_json = self.backup_dir.joinpath(_target_name + ".json") + _old_info = _backup_file.stat().model_dump_json() + + self.update(status="back-upping") + + assert ( + not _backup_target.exists() and not _backup_target_json.exists() + ), "备份目标冲突" + + _backup_file.rename(_backup_target) + assert _backup_target.exists() + _backup_target_json.write_text(_old_info) + assert _backup_target_json.re_stat() is not None + + self.update(status="back-upped") + logger.info(f"Worker[{self.id}] Backup Success.") def downloader(self): """HTTP多线程下载""" def upload(self): """上传到alist""" + if self.source_path.stat().size != self.tmp_file.stat().st_size: + raise + self.update(status="uploading") + return self.target_path.write_bytes(self.tmp_file) def copy_type(self): """复制任务""" + logger.debug(f"Worker[{self.id}] Start Copping") + + if self.source_path.stat().size < 10 * 1024 * 1024: + self.target_path.unlink(missing_ok=True) + self.target_path.parent.mkdir(parents=True, exist_ok=True) + _res = self.target_path.write_bytes(self.source_path.read_bytes()) + assert _res.size == self.source_path.stat().size + + return self.update(status="copied") + + logger.error( + f"Worker[{self.id}]大于10M的文件尚未实现。file: {self.source_path.as_uri()} " + f"size:{self.source_path.stat().size}" + ) + raise NotImplementedError( + f"大于10M的文件尚未实现。file: {self.source_path.as_uri()}:{self.source_path.stat().size}" + ) def delete_type(self): """删除任务""" + self.target_path.unlink(missing_ok=True) + assert not self.target_path.exists() + self.update(status="deleted") + + def recheck(self): + """再次检查当前Worker的结果是否符合预期。""" + return True def run(self): """启动Worker""" - logger.info(f"worker[{self._id}] 已经安排启动.") + logger.info(f"worker[{self.id}] 已经安排启动.") + self.update() + logger.debug(f"Worker[{self.id}] Updated to DB.") try: - if self.need_backup: + if self.status in ["done", "failed"]: + return + if self.need_backup and self.status in [ + "init", + ]: self.backup() - if self.type == "copy": + if self.type == "copy" and self.status in ["init", "back-upped"]: self.copy_type() - elif self.type == "delete": + + elif self.type == "delete" and self.status in ["init", "back-upped"]: self.delete_type() + + assert self.recheck() + self.update(status="done") except Exception as _e: - self.error_info = _e - self.status = "failed" - self.update() - logger.error(f"worker[{self._id}] 出现错误: {_e}") + self.error_info = str(_e) + self.update(status="failed") + logger.error(f"worker[{self.id}] 出现错误: {_e}") class Workers: - def __init__(self): self.thread_pool = MyThreadPoolExecutor( 5, @@ -134,23 +201,25 @@ def release_lock(self, *items: AlistPath): for p in items: self.lockers.remove(p) - def add_worker(self, worker: Worker): - if worker.source_path in self.lockers or worker.target_path in self.lockers: - logger.warning(f"Worker {worker} 有路径被锁定.") + def add_worker(self, worker: Worker, is_loader=False): + if not is_loader and ( + worker.source_path in self.lockers or worker.target_path in self.lockers + ): + logger.warning(f"Worker[{worker.id}]中有路径被锁定.") return self.lockers.add(worker.source_path) self.lockers.add(worker.target_path) worker.workers = self - self.thread_pool.submit(worker.run) + self.thread_pool.submit_wait(worker.run) logger.info(f"Worker[{worker.id}] added to ThreadPool.") def run(self, queue: Queue): """""" self.lockers |= sync_config.handle.load_locker() for i in sync_config.handle.get_workers(): - self.add_worker(Worker(**i)) + self.add_worker(Worker(**i), is_loader=True) while True: self.add_worker(queue.get()) diff --git a/alist_sync/data_handle.py b/alist_sync/data_handle.py index 6e1c542..6e90eba 100644 --- a/alist_sync/data_handle.py +++ b/alist_sync/data_handle.py @@ -24,7 +24,6 @@ class HandleBase(metaclass=abc.ABCMeta): - @abc.abstractmethod def update_worker(self, worker: "Worker", *field): """更新或创建Worker""" @@ -69,20 +68,28 @@ def get_file_item(self, item_id: "AlistPath"): """获取FileItem""" raise NotImplementedError + @abc.abstractmethod + def create_log(self, worker: "Worker"): + """""" + class MongoHandle(HandleBase): - def __init__(self, mongodb: "Database"): self._workers: "Collection" = mongodb.workers self._items: "Collection" = mongodb.items + self._logs: "Collection" = mongodb.create_collection() + + def create_log(self, worker: "Worker"): + logger.info(f"create log {worker.id} {worker.status}") + self._logs.insert_one(worker.model_dump(mode="json")) def update_worker(self, worker: "Worker", *field): if field == (): data = worker.model_dump(mode="json") else: - data = {k: worker.__getattr__(k) for k in field} + data = {k: worker.__dict__.get(k) for k in field} - logger.debug("更新Worker: %s", data) + logger.debug(f"更新Worker[{worker.id}]: {data}") return self._workers.update_one( {"_id": worker.id}, {"$set": data}, @@ -145,7 +152,6 @@ def get_file_item(self, item_id: AlistPath): class ShelveHandle(HandleBase): - def __init__(self, save_dir: Path): self._workers = shelve.open( str(save_dir.joinpath("alist_cache_workers.shelve")), writeback=True @@ -153,11 +159,22 @@ def __init__(self, save_dir: Path): self._items = shelve.open( str(save_dir.joinpath("alist_cache_items.shelve")), writeback=True ) + self._logs = open(str(save_dir.joinpath("alist-sync-files.log", "a+"))) + + def __del__(self): + self._workers.close() + self._items.close() + self._logs.close() + + def create_log(self, worker: "Worker"): + logger.info(f"create log for: {worker.id}") + self._logs.write(worker.model_dump_json()) def update_worker(self, worker: "Worker", *field): self._workers[worker.id] = worker def delete_worker(self, worker_id: str): + logger.info(f"Worker[{worker_id}] remove from workers") self._workers.pop(worker_id) def get_worker(self, worker_id: str): diff --git a/alist_sync/scanner.py b/alist_sync/scanner.py index 5ca5d1d..11ba050 100644 --- a/alist_sync/scanner.py +++ b/alist_sync/scanner.py @@ -20,6 +20,8 @@ class Scanner(BaseModel): queue: asyncio.Queue = asyncio.Queue(30) items: dict[str | PurePosixPath, list[Item]] + model_config = {"arbitrary_types_allowed": True} + @classmethod async def scans(cls, *scan_path, client: AlistClient = None): """扫描目录""" diff --git a/alist_sync/thread_pool.py b/alist_sync/thread_pool.py index 6f4f87b..5eb6bf6 100644 --- a/alist_sync/thread_pool.py +++ b/alist_sync/thread_pool.py @@ -8,3 +8,7 @@ def wait(self): while self._work_queue.qsize(): time.sleep(3) + def submit_wait(self, __fn, *args, **kwargs): + while self._work_queue.qsize() > 10: + time.sleep(5) + return self.submit(__fn, *args, **kwargs) diff --git a/config-template.yaml b/config-template.yaml index 8d965c6..687c4b8 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -1,13 +1,19 @@ +# 如果没有配置MongoDB,文档将会存储至本地缓存 mongodb_uri: "mongodb+srv://${username}:${password}@${host}/alist_sync?retryWrites=true&w=majority&appName=A1" + +# 缓存文件夹 cache_dir: ./.alist-sync-cache + +# 是否以Daemon模式运行 daemon: false +# Alist 服务器信息 type: list alist_servers: - base_url: http://localhost:5244/ username: "admin" password: "123456" verify_ssl: false - - base_url: http://localhost:5245/ + - base_url: http://remote_alist_server/ username: "admin" password: "admin" @@ -41,6 +47,12 @@ sync_groups: # Alist服务器信息需要提前在alist_servers中配置 # 支持在不同的Alist服务器之间同步 # 例子:http://localhost:5244/test1 + + ignore: + dir: + - .alist-sync + match: + - "**/*.py" group: - "http://localhost:5244/test1" - "http://localhost:5244/test2" diff --git a/tests/test_worker.py b/tests/test_worker.py index e491dc2..5b998ca 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -12,36 +12,29 @@ from alist_sdk.path_lib import PureAlistPath, AlistPath, login_server from alist_sync.d_worker import Worker, Workers - - -def test_worker(): - from pymongo import MongoClient - from pymongo.server_api import ServerApi - - uri = ( - "mongodb+srv://alist-sync:alist-sync-p@a1.guggt7c.mongodb.net/alist_sync?" - "retryWrites=true&w=majority&appName=A1" +from alist_sync.config import create_config + +sync_config = create_config() + + +def test_worker_copy(): + docs = { + # "_id": "013ac712314196a73bc97baba0e0cb97f769140b", + "backup_dir": None, + "created_at": "2024-02-27T15:32:54.287104", + "error_info": None, + "need_backup": False, + "owner": "test", + "source_path": "http://localhost:5244/local/test_dir/test/t1/test1.txt", + "status": "init", + "target_path": "http://localhost:5244/local_dst/test_dir/test/t1/test1.txt", + "type": "copy", + } + login_server( + **sync_config.get_server("http://localhost:5244/").dump_for_alist_path() ) - - client = MongoClient(uri, server_api=ServerApi("1")) - w = Worker( - owner="admin", - type="delete", - need_backup=True, - backer_dir=AlistPath("http://localhost:5244/local/.history"), - source_path="http://localhost:5244/local/test.txt", - collection=client.get_default_database().get_collection("workers"), + login_server( + **sync_config.get_server("http://localhost:5244/").dump_for_alist_path() ) - print(w.update()) - - -def test_workers(): - import os - from pymongo import MongoClient - from pymongo.server_api import ServerApi - - uri = os.environ["MONGODB_URI"] - - client = MongoClient(uri, server_api=ServerApi("1")) - ws = Workers() - ws.load_from_mongo() + worker = Worker(**docs) + worker.run() From 6ebd390967a5f93c6494b662ac982454c8ffd524 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Wed, 28 Feb 2024 19:30:52 +0800 Subject: [PATCH 08/21] 0.1.0-a7 --- alist_sync/alist_client.py | 2 +- alist_sync/d_worker.py | 8 ++++++-- alist_sync/downloader.py | 31 +++++++++++++++++++++++++++++++ alist_sync/thread_pool.py | 1 - config-template.yaml | 6 ++++++ 5 files changed, 44 insertions(+), 4 deletions(-) diff --git a/alist_sync/alist_client.py b/alist_sync/alist_client.py index be862a6..2e73a8b 100644 --- a/alist_sync/alist_client.py +++ b/alist_sync/alist_client.py @@ -13,7 +13,7 @@ logger = logging.getLogger("alist-sync.client") sync_config = create_config() -__all__ = ["AlistClient", "get_status"] +__all__ = ["AlistClient", "get_status", "create_async_client"] CopyStatusModify = Literal[ "init", diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 3d772c9..8569a60 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -4,9 +4,9 @@ import threading from pathlib import Path from queue import Queue -from typing import Literal, Any, Annotated +from typing import Literal, Any -from pydantic import BaseModel, computed_field, Field, PlainSerializer +from pydantic import BaseModel, computed_field, Field from pymongo.collection import Collection from alist_sdk.path_lib import AbsAlistPathType, AlistPath @@ -70,6 +70,10 @@ def __repr__(self): def id(self) -> str: return sha1(f"{self.type}{self.source_path}{self.created_at}") + @property + def short_id(self) -> str: + return self.id[:8] + @property def tmp_file(self) -> Path: return sync_config.cache_dir.joinpath(f"download_tmp_{sha1(self.source_path)}") diff --git a/alist_sync/downloader.py b/alist_sync/downloader.py index 4db63f5..a275318 100644 --- a/alist_sync/downloader.py +++ b/alist_sync/downloader.py @@ -7,3 +7,34 @@ 下载器使用一个单独线程启动,它创建一个事件循环并在其内部保持同步。 """ +import time +import urllib.parse +from typing import TYPE_CHECKING + +from alist_sdk import AlistPath + +from alist_sync.alist_client import create_async_client + +if TYPE_CHECKING: + ... + + +def upload_stream(source_path: AlistPath, target_path: AlistPath): + async def main(): + async with create_async_client(source_path.client).stream( + "GET", source_path.as_download_uri(), follow_redirects=True + ) as stream_resp: + return await create_async_client(target_path.client).put( + "/api/fs/put", + headers={ + "As-Task": "false", + "Content-Type": "application/octet-stream", + "Last-Modified": str(int(time.time() * 1000)), + "File-Path": urllib.parse.quote_plus(str(target_path.as_posix())), + }, + content=stream_resp.aiter_bytes(), + ) + + import asyncio + + return asyncio.run(main()) diff --git a/alist_sync/thread_pool.py b/alist_sync/thread_pool.py index 5eb6bf6..e70646c 100644 --- a/alist_sync/thread_pool.py +++ b/alist_sync/thread_pool.py @@ -1,6 +1,5 @@ import time from concurrent.futures import ThreadPoolExecutor -from copy import copy class MyThreadPoolExecutor(ThreadPoolExecutor): diff --git a/config-template.yaml b/config-template.yaml index 687c4b8..ae0685c 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -7,12 +7,18 @@ cache_dir: ./.alist-sync-cache # 是否以Daemon模式运行 daemon: false +thread_pool_max_size: + workers: 5 + scanner: 5 + checker: 5 + # Alist 服务器信息 type: list alist_servers: - base_url: http://localhost:5244/ username: "admin" password: "123456" verify_ssl: false + - base_url: http://remote_alist_server/ username: "admin" password: "admin" From d1df3fda2e4231615e518400eba07dbeb589bc4c Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Wed, 28 Feb 2024 23:35:08 +0800 Subject: [PATCH 09/21] 0.1.0-a8 --- alist_sync/alist_client.py | 2 +- alist_sync/d_worker.py | 47 ++++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/alist_sync/alist_client.py b/alist_sync/alist_client.py index 2e73a8b..7fce01a 100644 --- a/alist_sync/alist_client.py +++ b/alist_sync/alist_client.py @@ -119,7 +119,7 @@ def create_async_client(client: Client) -> AlistClient: _server = sync_config.get_server(client.base_url) _server.token = client.headers.get("authorization") - _ac = AlistClient(**_server) + _ac = AlistClient(**_server.dump_for_alist_client()) _ac.headers = client.headers _ac.cookies = client.cookies return _ac diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 8569a60..29f9776 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -121,12 +121,34 @@ def backup(self): def downloader(self): """HTTP多线程下载""" - def upload(self): - """上传到alist""" - if self.source_path.stat().size != self.tmp_file.stat().st_size: - raise - self.update(status="uploading") - return self.target_path.write_bytes(self.tmp_file) + def copy_single_stream(self): + import urllib.parse + + # download + _tmp = self.tmp_file.open("wb") + with self.source_path.client.stream( + self.source_path.get_download_uri() + ) as _res: + for i in _res.iter_by(chunk_size=1024 * 1024): + _tmp.write(i) + _tmp.seek(0) + self.update(status="downloaded") + # upload + self.target_path.client.verify_request( + "PUT", + "/api/fs/put", + headers={ + "As-Task": "false", + "Content-Type": "application/octet-stream", + "Last-Modified": str( + int(self.source_path.stat().modified.timestamp() * 1000) + ), + "File-Path": urllib.parse.quote_plus(str(self.target_path.as_posix())), + }, + content=_tmp, + ) + + self.update(status="uploaded") def copy_type(self): """复制任务""" @@ -136,17 +158,12 @@ def copy_type(self): self.target_path.unlink(missing_ok=True) self.target_path.parent.mkdir(parents=True, exist_ok=True) _res = self.target_path.write_bytes(self.source_path.read_bytes()) - assert _res.size == self.source_path.stat().size - return self.update(status="copied") + else: + self.copy_single_stream() - logger.error( - f"Worker[{self.id}]大于10M的文件尚未实现。file: {self.source_path.as_uri()} " - f"size:{self.source_path.stat().size}" - ) - raise NotImplementedError( - f"大于10M的文件尚未实现。file: {self.source_path.as_uri()}:{self.source_path.stat().size}" - ) + assert self.target_path.re_stat().size == self.source_path.stat().size + return self.update(status="copied") def delete_type(self): """删除任务""" From bd5008a1a27c1b7f8b003f6ec65b55e374e2d19a Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Thu, 29 Feb 2024 15:03:56 +0800 Subject: [PATCH 10/21] 0.1.0-a9 --- alist_sync/common.py | 16 +++++++++ alist_sync/config.py | 7 ++++ alist_sync/d_checker.py | 26 +++++++++++--- alist_sync/d_main.py | 10 +++--- alist_sync/d_worker.py | 71 +++++++++++++++++++++++++-------------- alist_sync/data_handle.py | 3 +- alist_sync/thread_pool.py | 3 ++ 7 files changed, 101 insertions(+), 35 deletions(-) diff --git a/alist_sync/common.py b/alist_sync/common.py index b6c9b73..60ec40f 100644 --- a/alist_sync/common.py +++ b/alist_sync/common.py @@ -4,6 +4,7 @@ import logging import selectors import sys +import threading from pathlib import Path from typing import Iterable @@ -22,6 +23,8 @@ "timeout_input", "clear_cache", "clear_path", + "all_thread_name", + "prefix_in_threads", ] @@ -71,6 +74,19 @@ def is_task_all_success(tasks: Iterable | dict) -> bool: return all(1 if i.status == "success" else 0 for i in tasks) +def all_thread_name() -> set: + """返回全部的线程名字""" + return {t.name for t in threading.enumerate()} + + +def prefix_in_threads(prefix) -> bool: + """在活动的线程,是否存指定的线程名前缀""" + for name in all_thread_name(): + if prefix in name: + return True + return False + + def timeout_input(msg, default, timeout=3): sys.stdout.write(msg) sys.stdout.flush() diff --git a/alist_sync/config.py b/alist_sync/config.py index 148a9ed..4a4cf40 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -2,6 +2,7 @@ import json import logging import os +import time from datetime import datetime from pathlib import Path from functools import cached_property, lru_cache @@ -142,6 +143,8 @@ def __hash__(self): alias="cache_dir", ) + timeout: int = Field(10) + daemon: bool = os.getenv("ALIST_SYNC_DAEMON", "false").lower() in ( "true", "1", @@ -163,6 +166,10 @@ def __hash__(self): create_time: datetime = datetime.now() + @cached_property + def start_time(self) -> int: + return int(time.time()) + @cached_property def cache_dir(self) -> Path: self.cache__dir.mkdir(exist_ok=True, parents=True) diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index f197f88..62056c0 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -8,7 +8,8 @@ """ import logging import threading -from queue import Queue +import time +from queue import Queue, Empty from typing import Iterator from alist_sdk import AlistPath @@ -16,6 +17,7 @@ from alist_sync.config import create_config, SyncGroup from alist_sync.d_worker import Worker from alist_sync.thread_pool import MyThreadPoolExecutor +from common import prefix_in_threads logger = logging.getLogger("alist-sync.d_checker") sync_config = create_config() @@ -68,8 +70,20 @@ def main(self): """""" logger.info(f"Checker Started - name: {self.main_thread.name}") while True: - path = self.scaner_queue.get() - self._t_checker(path) + if ( + self.scaner_queue.empty() + and sync_config.daemon is False + and not prefix_in_threads("scaner_") + and time.time() - sync_config.start_time > sync_config.timeout + ): + logger.info(f"循环线程退出 - {self.main_thread.name}") + break + + try: + self._t_checker(self.scaner_queue.get(timeout=3)) + except Empty: + logger.debug("Checkers: 空 Scaner 队列.") + pass def start(self) -> threading.Thread: self.main_thread.start() @@ -85,12 +99,16 @@ def checker( target_path: AlistPath, ) -> "Worker|None": if not target_path.exists(): + logger.debug( + f"Checked: [COPY] {source_path.as_uri()} -> {target_path.as_uri()}" + ) return Worker( type="copy", need_backup=False, source_path=source_path, target_path=target_path, ) + logger.debug(f"Checked: [JUMP] {source_path.as_uri()}") return None @@ -125,7 +143,7 @@ class CheckerSyncIncr(Checker): """""" -def get_checker(type_: str) -> type(Checker): +def get_checker(type_: str) -> type[Checker]: return { "copy": CheckerCopy, "mirror": CheckerMirror, diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index 9da87df..05443b3 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -42,10 +42,10 @@ def _scaner(_url: AlistPath, _s_num): assert url.exists(), f"目录不存在{url.as_uri()}" s_sum = [] - pool = MyThreadPoolExecutor(5) - pool.submit(_scaner, url, s_sum) - while s_sum: - time.sleep(2) + with MyThreadPoolExecutor(5, thread_name_prefix=f"scaner_{url.as_uri()}") as pool: + pool.submit(_scaner, url, s_sum) + while s_sum: + time.sleep(2) def checker(sync_group: SyncGroup, _queue_worker: Queue) -> threading.Thread | None: @@ -88,7 +88,7 @@ def main(): if __name__ == "__main__": logger_alist_sync = logging.getLogger("alist-sync") - logger_alist_sync.setLevel(logging.INFO) + logger_alist_sync.setLevel(logging.DEBUG) logger_alist_sync.addHandler(logging.StreamHandler()) logger.info("Begin...") main() diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 29f9776..fab0c1e 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -2,8 +2,9 @@ import datetime import logging import threading +import time from pathlib import Path -from queue import Queue +from queue import Queue, Empty from typing import Literal, Any from pydantic import BaseModel, computed_field, Field @@ -11,7 +12,7 @@ from alist_sdk.path_lib import AbsAlistPathType, AlistPath from alist_sync.config import create_config -from alist_sync.common import sha1 +from alist_sync.common import sha1, prefix_in_threads from alist_sync.thread_pool import MyThreadPoolExecutor sync_config = create_config() @@ -127,27 +128,33 @@ def copy_single_stream(self): # download _tmp = self.tmp_file.open("wb") with self.source_path.client.stream( - self.source_path.get_download_uri() + "GET", + self.source_path.get_download_uri(), + follow_redirects=True, ) as _res: - for i in _res.iter_by(chunk_size=1024 * 1024): + for i in _res.iter_bytes(chunk_size=1024 * 1024): _tmp.write(i) - _tmp.seek(0) + _tmp.close() self.update(status="downloaded") # upload - self.target_path.client.verify_request( - "PUT", - "/api/fs/put", - headers={ - "As-Task": "false", - "Content-Type": "application/octet-stream", - "Last-Modified": str( - int(self.source_path.stat().modified.timestamp() * 1000) - ), - "File-Path": urllib.parse.quote_plus(str(self.target_path.as_posix())), - }, - content=_tmp, - ) - + with self.tmp_file.open("rb") as fs: + res = self.target_path.client.verify_request( + "PUT", + "/api/fs/put", + headers={ + "As-Task": "false", + "Content-Type": "application/octet-stream", + "Last-Modified": str( + int(self.source_path.stat().modified.timestamp() * 1000) + ), + "File-Path": urllib.parse.quote_plus( + str(self.target_path.as_posix()) + ), + }, + content=fs, + ) + + assert res.code == 200 self.update(status="uploaded") def copy_type(self): @@ -238,15 +245,29 @@ def add_worker(self, worker: Worker, is_loader=False): def run(self, queue: Queue): """""" - self.lockers |= sync_config.handle.load_locker() - for i in sync_config.handle.get_workers(): - self.add_worker(Worker(**i), is_loader=True) - + # self.lockers |= sync_config.handle.load_locker() + # for i in sync_config.handle.get_workers(): + # self.add_worker(Worker(**i), is_loader=True) while True: - self.add_worker(queue.get()) + if ( + queue.empty() + and sync_config.daemon is False + and not prefix_in_threads("checker_") + and time.time() - sync_config.start_time > sync_config.timeout + ): + logger.info(f"等待Worker执行完成, 排队中的数量: {self.thread_pool.work_qsize()}") + self.thread_pool.shutdown(wait=True) + logger.info(f"循环线程退出 - {threading.current_thread().name}") + break + + try: + self.add_worker(queue.get(timeout=3)) + except Empty: + logger.debug("Workers: 空Worker Queue.") + pass def start(self, queue: Queue) -> threading.Thread: - _t = threading.Thread(target=self.run, args=(queue,)) + _t = threading.Thread(target=self.run, args=(queue,), name="workers_main") _t.start() logger.info("Worker Main Thread Start...") return _t diff --git a/alist_sync/data_handle.py b/alist_sync/data_handle.py index 6e90eba..a418e1f 100644 --- a/alist_sync/data_handle.py +++ b/alist_sync/data_handle.py @@ -77,7 +77,7 @@ class MongoHandle(HandleBase): def __init__(self, mongodb: "Database"): self._workers: "Collection" = mongodb.workers self._items: "Collection" = mongodb.items - self._logs: "Collection" = mongodb.create_collection() + self._logs: "Collection" = mongodb.logs def create_log(self, worker: "Worker"): logger.info(f"create log {worker.id} {worker.status}") @@ -104,6 +104,7 @@ def get_worker(self, worker_id: str): return self._workers.find_one({"_id": worker_id}) def get_workers(self, query=None) -> Iterable: + logger.debug("查询未完成的Worker.") if query is None: query = {} return self._workers.find(query) diff --git a/alist_sync/thread_pool.py b/alist_sync/thread_pool.py index e70646c..945e1c4 100644 --- a/alist_sync/thread_pool.py +++ b/alist_sync/thread_pool.py @@ -3,6 +3,9 @@ class MyThreadPoolExecutor(ThreadPoolExecutor): + def work_qsize(self): + return self._work_queue.qsize() + def wait(self): while self._work_queue.qsize(): time.sleep(3) From 83a9ab0642d03f3b27bbdd53fd810e6c416355a9 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Thu, 29 Feb 2024 18:20:38 +0800 Subject: [PATCH 11/21] 0.1.0-a10 --- .github/workflows/alist-sync.yaml | 55 +++++++++++++++++++++++++++++++ README.md | 22 +++++++++++++ bootstarp.sh | 32 ++++-------------- tools/create_storage.py | 38 +++++++++++++++++++++ {tests => tools}/init_alist.sh | 8 +++-- 5 files changed, 127 insertions(+), 28 deletions(-) create mode 100644 .github/workflows/alist-sync.yaml create mode 100644 tools/create_storage.py rename {tests => tools}/init_alist.sh (82%) mode change 100755 => 100644 diff --git a/.github/workflows/alist-sync.yaml b/.github/workflows/alist-sync.yaml new file mode 100644 index 0000000..003e938 --- /dev/null +++ b/.github/workflows/alist-sync.yaml @@ -0,0 +1,55 @@ +name: Alist Sync + +on: + workflow_dispatch: + +jobs: + run: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Python v4 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install alist-sync + run: | + pip install --upgrade pip + pip install -e . + + - name: Load Cache + uses: actions/cache@v4 + with: + a: b + + - name: Install and Init Alist Server + env: + _ALIST_CONFIG: ${{}} + _ALIST_ADMIN_PASSWORD: ${{}} + run: | + cat > "alist-config.json" << EOF + ${{ secrets.ALIST_CONFIG }} + EOF + bash -ex bootstrap.sh init-alist + + - name: Create Storage for Alist + env: + _ALIST_BACKUP: ${{}} + _ALIST_ADMIN_PASSWORD: ${} + _ALIST_BACKUP_URL: ${{}} + _ALIST_BACKUP_USERNAME: ${{}} + _ALIST_BACKUP_PASSWORD: ${{}} + run: | + cat > alist-backup-config.json < EOF + ${{ secrets.ALIST_CONFIG }} + EOF + python3 tools/create_storage.py + + - name: RUN + env: + SYNC_CONFIG: ${{}} + run: | + ... \ No newline at end of file diff --git a/README.md b/README.md index ce83ba8..e824e30 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,28 @@ Alist 同步工具 终极目标:利用GitHub Actions实现使用Github的计算和网络资源定期同步各个网盘之间的数据。 +## 本地运行 + +```bash +python -m alist_sync -c "" +``` + +## Actions 运行 + +1. FORK存储库 + +2. 创建存储库机密(Action): + 1. ALIST_CONFIG: (可选) [JSON] 指定Alist静态配置(data/config.json) + 2. ALIST_BACKUP: 可选 [JSON] 现有ALIST网页端导出的备份配置 + 3. ALIST_BACKUP_URL: 可选 [URL] 可以从远程导出BACKUP的URL + 4. ALIST_BACKUP_USERNAME: 可选 [string] + 5. ALIST_BACKUP_PASSWORD: 可选 [string] + 6. ALIST_USERNAME: + 7. ALIST_PASSWORD: + 8. SYNC_CONFIG: [YAML|JSON] Alist-sync的配置 + +3. 启用Action + ## 预期同步模式: ### 1. copy 简单复制 (已实现) diff --git a/bootstarp.sh b/bootstarp.sh index bd36c6e..09e3430 100755 --- a/bootstarp.sh +++ b/bootstarp.sh @@ -8,9 +8,9 @@ all_clear() { echo "__pycache__" find . -type d -name "__pycache__" -exec rm -rf {} \; 2>/dev/null echo ".cache" - rm -rf alist_sync/.cache alist_sync.egg-info + rm -rf alist_sync/.alist-sync-cache alist_sync.egg-info echo "alist-test-dir" - rm -rf tests/alist/test_* tests/alist/data tests/alist/daemon + rm -rf alist/test_* alist/data alist/daemon } case $1 in @@ -21,7 +21,7 @@ install) ;; alist) - cd tests/alist || { + cd alist || { echo "Error: tests/alist not find " exit 1 } @@ -31,28 +31,8 @@ alist) alist-init) pkill alist - rm -rf tests/alist - tests/init_alist.sh - ;; - -alist-version) - cd tests/alist || { - echo "未初始化 - 执行 alist-init" - exit 2 - } - ./alist version - ;; - -alist-run) - cd tests/alist || { - echo "未初始化 - 执行 alist-init" - exit 2 - } - ./alist restart - ;; - -alist-stop) - ./tests/alist/alist stop || pkill alist + rm -rf alist/* + sh tools/init_alist.sh alist/ ;; clear) @@ -75,7 +55,7 @@ debugger) ;; *) - echo "Usage: $0 {install|alist-init|alist-version|alist-run|alist-stop|clear|debugger|test}" + echo "Usage: $0 {install|alist-init|clear|debugger|test}" exit 1 ;; esac diff --git a/tools/create_storage.py b/tools/create_storage.py new file mode 100644 index 0000000..980b8c3 --- /dev/null +++ b/tools/create_storage.py @@ -0,0 +1,38 @@ +#!/bin/env python3 +"""存储器 + +1. 从缓存读取 +2. 从URL读取 +3. 从文件读取 +""" +import json +import os +from pathlib import Path + +from alist_sdk.tools.client import ExtraClient + +PROJECT_ROOT = Path(__file__).parent.parent.absolute() + +alist_config = json.loads(PROJECT_ROOT.joinpath("alist/data/config.json").read_text()) + +alist_port = alist_config["scheme"]["http_port"] +admin_password = os.getenv("_ALIST_ADMIN_PASSWORD", "123456") + +remote_url = os.getenv("_ALIST_BACKUP_URL") +remote_username = os.getenv("_ALIST_BACKUP_USERNAME") +remote_password = os.getenv("_ALIST_BACKUP_PASSWORD") + +local_client = ExtraClient( + base_url=f"http://localhost:{alist_port}", + username="admin", + password=admin_password, +) + +remote_client = None +if remote_url: + remote_client = ExtraClient( + base_url=remote_url, + username=remote_username, + password=remote_password, + verify=False, + ) diff --git a/tests/init_alist.sh b/tools/init_alist.sh old mode 100755 new mode 100644 similarity index 82% rename from tests/init_alist.sh rename to tools/init_alist.sh index bab4d5b..2bea880 --- a/tests/init_alist.sh +++ b/tools/init_alist.sh @@ -1,6 +1,10 @@ #!/usr/bin/env bash -cd "$(dirname "$0")" || exit +cd "${1:-$(dirname "$0")}" || exit + +echo "INSTALL ALIST. PWD: $(pwd)" + +__AIST_ADMIN_PASSWORD="${_ALIST_ADMIN_PASSWORD:-123456}" mkdir -p alist cd alist || exit @@ -37,5 +41,5 @@ if [[ ! -f alist && ! -f "alist.exe" ]]; then fi rm -rf data/ test_dir/ -./alist admin set 123456 +./alist admin set "${__AIST_ADMIN_PASSWORD}" ./alist restart From 2e4ffefb94887e7122a5f80b163797ee64dd11f8 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Fri, 1 Mar 2024 00:14:50 +0800 Subject: [PATCH 12/21] 0.1.0-a11 --- .github/workflows/alist-sync.yaml | 54 ++++++++---- .github/workflows/test.yaml | 2 +- alist_sync/__main__.py | 22 ++++- alist_sync/base_sync.py | 2 +- alist_sync/config.py | 21 ++++- alist_sync/d_checker.py | 14 +++- alist_sync/d_main.py | 8 +- alist_sync/d_worker.py | 77 ++++++++++------- alist_sync/data_handle.py | 26 ++++-- alist_sync/downloader.py | 132 ++++++++++++++++++++++++++---- alist_sync/run_copy.py | 2 +- alist_sync/run_sync.py | 2 +- alist_sync/run_sync_incr.py | 2 +- config-template.yaml | 35 ++++++++ tools/create_storage.py | 41 +++++++++- 15 files changed, 355 insertions(+), 85 deletions(-) diff --git a/.github/workflows/alist-sync.yaml b/.github/workflows/alist-sync.yaml index 003e938..d28639a 100644 --- a/.github/workflows/alist-sync.yaml +++ b/.github/workflows/alist-sync.yaml @@ -2,9 +2,19 @@ name: Alist Sync on: workflow_dispatch: + inputs: + reload_storage: + description: | + 是否重新创建存储: true or false + 默认情况下,如果存储已经存在,则不会重新创建 + 如果指定了true,则会删除全部的存储器,并从新载入 + required: false + default: false + type: boolean jobs: run: + name: alist-sync runs-on: ubuntu-latest steps: - name: Checkout @@ -21,35 +31,47 @@ jobs: pip install -e . - name: Load Cache + id: alist-data-cache uses: actions/cache@v4 with: - a: b + key: ${{ runner.name }}-alist-sync-data + path: | + ./.alist-sync-cache/*.db + ./.alist-sync-cache/*.json + ./alist/data - name: Install and Init Alist Server env: - _ALIST_CONFIG: ${{}} - _ALIST_ADMIN_PASSWORD: ${{}} + _ALIST_ADMIN_PASSWORD: ${{ secrets.ALIST_ADMIN_PASSWORD }} run: | - cat > "alist-config.json" << EOF - ${{ secrets.ALIST_CONFIG }} - EOF - bash -ex bootstrap.sh init-alist + # init-alist 总是会重置管理员密码为环境变量。 + bash -ex bootstrap.sh init-alist.sh + - - name: Create Storage for Alist + - name: Create Storage for Alist if load cache failed + if: ${{ github.event.inputs.reload_storage == 'true' }} or ${{ steps.alist-data-cache.outputs.cache-hit != 'true'}} env: - _ALIST_BACKUP: ${{}} - _ALIST_ADMIN_PASSWORD: ${} - _ALIST_BACKUP_URL: ${{}} - _ALIST_BACKUP_USERNAME: ${{}} - _ALIST_BACKUP_PASSWORD: ${{}} + _ALIST_ADMIN_PASSWORD: ${{ secrets.ALIST_ADMIN_PASSWORD }} + + _ALIST_BACKUP: ${{secrets.ALIST_BACKUP}} + _ALIST_BACKUP_URL: ${{secrets.ALIST_BACKUP_URL}} + _ALIST_BACKUP_USERNAME: ${{secrets.ALIST_BACKUP_USERNAME}} + _ALIST_BACKUP_PASSWORD: ${{secrets.ALIST_BACKUP_PASSWORD}} + + _RELOAD_STORAGE: ${{ github.event.inputs.reload_storage }} run: | + # 这将会导入全部的内容包括:设置,元数据,用户,存储器。 + cat > alist-backup-config.json < EOF - ${{ secrets.ALIST_CONFIG }} + ${{ secrets.ALIST_BACKUP_CONFIG }} EOF python3 tools/create_storage.py - name: RUN env: - SYNC_CONFIG: ${{}} + SYNC_CONFIG: ${{secrets.SYNC_CONFIG}} run: | - ... \ No newline at end of file + cat > alist-backup-config.json < EOF + ${{ secrets.ALIST_BACKUP_CONFIG }} + EOF + python3 -m alist_sync sync -c $SYNC_CONFIG \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a046e0a..cc8e55e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - alist-version: [ '3.30.0' ] + alist-version: [ '3.31.0' ] python-version: [ '3.10', '3.11', '3.12' ] fail-fast: false diff --git a/alist_sync/__main__.py b/alist_sync/__main__.py index 6f0435a..9d1776d 100644 --- a/alist_sync/__main__.py +++ b/alist_sync/__main__.py @@ -199,11 +199,29 @@ def sync_incr( @app.command("test-config") -def t_config(): +def t_config(config_file: str = Option(None, "--config", "-c", help="配置文件路径")): """测试配置""" from alist_sync.config import create_config - echo(create_config().dump_to_yaml()) + if config_file and Path(config_file).exists(): + os.environ["ALIST_SYNC_CONFIG"] = str(Path(config_file).resolve().absolute()) + + _c = create_config() + echo(_c.dump_to_yaml()) + logger.info("Done.") + + +@app.command("sync") +def sync(config_file: str = Option(None, "--config", "-c", help="配置文件路径")): + """同步任务""" + from alist_sync.config import create_config + from alist_sync.d_main import main + + if config_file and Path(config_file).exists(): + os.environ["ALIST_SYNC_CONFIG"] = str(Path(config_file).resolve().absolute()) + + create_config() + return main() if __name__ == "__main__": diff --git a/alist_sync/base_sync.py b/alist_sync/base_sync.py index aee705f..4414ede 100644 --- a/alist_sync/base_sync.py +++ b/alist_sync/base_sync.py @@ -6,7 +6,7 @@ from alist_sync.alist_client import AlistClient from alist_sync.common import timeout_input -from alist_sync.models import AlistServer +from alist_sync.config import AlistServer logger = logging.getLogger("alist-sync.base") diff --git a/alist_sync/config.py b/alist_sync/config.py index 4a4cf40..c74d0ea 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -6,7 +6,7 @@ from datetime import datetime from pathlib import Path from functools import cached_property, lru_cache -from typing import Optional, Literal, TYPE_CHECKING +from typing import Optional, Literal, TYPE_CHECKING, Any from alist_sdk import AlistPathType, AlistPath from httpx import URL @@ -28,8 +28,8 @@ def create_config(): "ALIST_SYNC_CONFIG", Path(__file__).parent.parent / "config.yaml" ) - sync_config = Config.load_from_yaml(config_file) - setattr(builtins, "sync_config", sync_config) + _sync_config = Config.load_from_yaml(config_file) + setattr(builtins, "sync_config", _sync_config) return sync_config @@ -165,6 +165,16 @@ def __hash__(self): sync_groups: list[SyncGroup] = [] create_time: datetime = datetime.now() + logs: dict = None + + def __init__(self, **data: Any): + super().__init__(**data) + import logging.config + + _ = self.start_time + if self.logs: + Path("logs").mkdir(exist_ok=True, parents=True) + logging.config.dictConfig(self.logs) @cached_property def start_time(self) -> int: @@ -225,6 +235,8 @@ def dump_to_yaml(self, file: Path = None): return safe_dump( self.model_dump(mode="json"), file.open("wb") if file else None, + sort_keys=False, + allow_unicode=True, ) def load_from_mongo(self, uri: str = None): @@ -242,6 +254,9 @@ def dump_to_mongodb(self): ) +sync_config = create_config() + + if __name__ == "__main__": # config = create_config() # print(config) diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index 62056c0..72fa7f8 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -17,7 +17,7 @@ from alist_sync.config import create_config, SyncGroup from alist_sync.d_worker import Worker from alist_sync.thread_pool import MyThreadPoolExecutor -from common import prefix_in_threads +from alist_sync.common import prefix_in_threads logger = logging.getLogger("alist-sync.d_checker") sync_config = create_config() @@ -58,7 +58,6 @@ def checker_every_dir(self, path) -> Iterator[Worker | None]: if _sd == _sync_dir: continue target_path = _sd.joinpath(_relative_path) - logger.debug(f"Check: {target_path} -> {path}") yield self.checker(path, target_path) def _t_checker(self, path): @@ -69,6 +68,7 @@ def _t_checker(self, path): def main(self): """""" logger.info(f"Checker Started - name: {self.main_thread.name}") + _started = False while True: if ( self.scaner_queue.empty() @@ -80,10 +80,16 @@ def main(self): break try: + _started = True self._t_checker(self.scaner_queue.get(timeout=3)) except Empty: - logger.debug("Checkers: 空 Scaner 队列.") - pass + if _started: + continue + logger.info( + f"Checkers: 空 Scaner 队列, 如果没有新的任务, " + f"{sync_config.timeout - (time.time() - sync_config.start_time):d}" + f"秒后退出" + ) def start(self) -> threading.Thread: self.main_thread.start() diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index 05443b3..eb456ce 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -87,8 +87,8 @@ def main(): if __name__ == "__main__": - logger_alist_sync = logging.getLogger("alist-sync") - logger_alist_sync.setLevel(logging.DEBUG) - logger_alist_sync.addHandler(logging.StreamHandler()) - logger.info("Begin...") + # logger_alist_sync = logging.getLogger("alist-sync") + # logger_alist_sync.setLevel(logging.DEBUG) + # logger_alist_sync.addHandler(logging.StreamHandler()) + # logger.info("Begin...") main() diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index fab0c1e..7442956 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -61,7 +61,7 @@ class Worker(BaseModel): def __init__(self, **data: Any): super().__init__(**data) - logger.info(f"Worker[{self.id}] Created: {self.__repr__()}") + logger.info(f"Worker[{self.short_id}] Created: {self.__repr__()}") def __repr__(self): return f" {self.target_path}>" @@ -86,10 +86,11 @@ def update(self, **field: Any): self.__dict__.update(field) if self.status in ["done", "failed"]: - logger.info(f"Worker[{self.id}] is {self.status}.") + logger.info(f"Worker[{self.short_id}] is {self.status}.") self.done_at = datetime.datetime.now() sync_config.handle.create_log(self) return sync_config.handle.delete_worker(self.id) + return sync_config.handle.update_worker(self, *field.keys()) def backup(self): @@ -117,7 +118,7 @@ def backup(self): assert _backup_target_json.re_stat() is not None self.update(status="back-upped") - logger.info(f"Worker[{self.id}] Backup Success.") + logger.info(f"Worker[{self.short_id}] Backup Success.") def downloader(self): """HTTP多线程下载""" @@ -155,19 +156,20 @@ def copy_single_stream(self): ) assert res.code == 200 + logger.info(f"Worker[{self.short_id}] Upload File [{res.code}] {res.message}.") self.update(status="uploaded") def copy_type(self): """复制任务""" - logger.debug(f"Worker[{self.id}] Start Copping") - - if self.source_path.stat().size < 10 * 1024 * 1024: - self.target_path.unlink(missing_ok=True) - self.target_path.parent.mkdir(parents=True, exist_ok=True) - _res = self.target_path.write_bytes(self.source_path.read_bytes()) + logger.debug(f"Worker[{self.short_id}] Start Copping") - else: - self.copy_single_stream() + # if self.source_path.stat().size < 10 * 1024 * 1024: + # self.target_path.unlink(missing_ok=True) + # self.target_path.parent.mkdir(parents=True, exist_ok=True) + # _res = self.target_path.write_bytes(self.source_path.read_bytes()) + # + # else: + self.copy_single_stream() assert self.target_path.re_stat().size == self.source_path.stat().size return self.update(status="copied") @@ -184,9 +186,9 @@ def recheck(self): def run(self): """启动Worker""" - logger.info(f"worker[{self.id}] 已经安排启动.") + logger.info(f"worker[{self.short_id}] 已经开始工作.") self.update() - logger.debug(f"Worker[{self.id}] Updated to DB.") + logger.debug(f"Worker[{self.short_id}] Updated to DB.") try: if self.status in ["done", "failed"]: return @@ -202,11 +204,11 @@ def run(self): self.delete_type() assert self.recheck() - self.update(status="done") + self.update(status=f"Worker[{self.short_id}] Done.") except Exception as _e: + logger.error(f"worker[{self.short_id}] 出现错误: {_e}") self.error_info = str(_e) self.update(status="failed") - logger.error(f"worker[{self.id}] 出现错误: {_e}") class Workers: @@ -248,6 +250,7 @@ def run(self, queue: Queue): # self.lockers |= sync_config.handle.load_locker() # for i in sync_config.handle.get_workers(): # self.add_worker(Worker(**i), is_loader=True) + _started = False while True: if ( queue.empty() @@ -255,16 +258,24 @@ def run(self, queue: Queue): and not prefix_in_threads("checker_") and time.time() - sync_config.start_time > sync_config.timeout ): - logger.info(f"等待Worker执行完成, 排队中的数量: {self.thread_pool.work_qsize()}") - self.thread_pool.shutdown(wait=True) + logger.info( + f"等待Worker执行完成, 排队中的数量: {self.thread_pool.work_qsize()}" + ) + self.thread_pool.shutdown(wait=True, cancel_futures=False) logger.info(f"循环线程退出 - {threading.current_thread().name}") break try: + _started = True self.add_worker(queue.get(timeout=3)) except Empty: - logger.debug("Workers: 空Worker Queue.") - pass + if _started: + continue + logger.info( + f"Checkers: 空 Scaner 队列, 如果没有新的任务, " + f"{sync_config.timeout - (time.time() - sync_config.start_time):d}" + f"秒后退出" + ) def start(self, queue: Queue) -> threading.Thread: _t = threading.Thread(target=self.run, args=(queue,), name="workers_main") @@ -274,15 +285,23 @@ def start(self, queue: Queue) -> threading.Thread: if __name__ == "__main__": - from alist_sdk import AlistPath - - _w = Worker( - type="copy", - need_backup=False, - source_path=AlistPath("http://local:/sc"), - target_path="http://target_path", + from alist_sdk import AlistPath, login_server + + _w = Worker.model_validate( + { + "owner": "test", + "created_at": "2024-02-29T23:17:46.992805", + "type": "copy", + "need_backup": False, + "backup_dir": None, + "source_path": "http://localhost:5244/local/config.json", + "target_path": "http://localhost:5244/ftp/config.json", + "id": "6b19d34f229c22de4073db8b4feff8932d773ac2", + } ) + + for s in sync_config.alist_servers: + login_server(**s.dump_for_alist_path()) + print(_w.tmp_file, type(_w.source_path), _w.target_path) - print() - print(_w.model_dump()) - print(_w.model_dump(mode="json")) + _w.run() diff --git a/alist_sync/data_handle.py b/alist_sync/data_handle.py index a418e1f..2770dd4 100644 --- a/alist_sync/data_handle.py +++ b/alist_sync/data_handle.py @@ -89,7 +89,7 @@ def update_worker(self, worker: "Worker", *field): else: data = {k: worker.__dict__.get(k) for k in field} - logger.debug(f"更新Worker[{worker.id}]: {data}") + logger.debug(f"Worker[{worker.id}]: Update: {data}") return self._workers.update_one( {"_id": worker.id}, {"$set": data}, @@ -101,6 +101,7 @@ def delete_worker(self, worker_id: str): return self._workers.delete_one({"_id": worker_id}) def get_worker(self, worker_id: str): + logger.debug("获取Worker: %s", worker_id) return self._workers.find_one({"_id": worker_id}) def get_workers(self, query=None) -> Iterable: @@ -160,7 +161,9 @@ def __init__(self, save_dir: Path): self._items = shelve.open( str(save_dir.joinpath("alist_cache_items.shelve")), writeback=True ) - self._logs = open(str(save_dir.joinpath("alist-sync-files.log", "a+"))) + self._logs = save_dir.joinpath( + "alist-sync-files.log", + ).open("a+") def __del__(self): self._workers.close() @@ -168,25 +171,32 @@ def __del__(self): self._logs.close() def create_log(self, worker: "Worker"): - logger.info(f"create log for: {worker.id}") + logger.debug(f"create log for: {worker.id}") self._logs.write(worker.model_dump_json()) def update_worker(self, worker: "Worker", *field): - self._workers[worker.id] = worker + logger.debug(f"Shelve[{worker.id}] update to workers") + self._workers[worker.id] = worker.model_dump(mode="json") + self._workers.sync() def delete_worker(self, worker_id: str): - logger.info(f"Worker[{worker_id}] remove from workers") - self._workers.pop(worker_id) + logger.debug(f"Worker[{worker_id}] remove from workers") + try: + self._workers.pop(worker_id) + except KeyError: + pass def get_worker(self, worker_id: str): + logger.debug(f"get Worker[{worker_id}] from workers") return self._workers.get(worker_id) def get_workers(self, query=None) -> Iterable["Worker"]: + logger.debug(f"get Workers from workers") for _w in self._workers.values(): yield _w def load_locker(self) -> set[AlistPath]: - logger.info("正在加载Shelve中保存的锁。") + logger.debug("正在加载Shelve中保存的锁。") return { AlistPath(p) for _w in self._workers.values() @@ -198,6 +208,7 @@ def path_in_workers(self, path: AlistPath) -> bool: return path in self.load_locker() def update_file_item(self, path: AlistPath, item, *field): + logger.debug(f"FileItem[{path}] update to items") self._items[path.as_uri()] = { "id": path, "update_time": datetime.datetime.now(), @@ -205,4 +216,5 @@ def update_file_item(self, path: AlistPath, item, *field): } def get_file_item(self, item_id: AlistPath): + logger.debug(f"get FileItem[{item_id}] from items") return self._items.get(item_id.as_uri(), {}).get("item") diff --git a/alist_sync/downloader.py b/alist_sync/downloader.py index a275318..780d38c 100644 --- a/alist_sync/downloader.py +++ b/alist_sync/downloader.py @@ -7,34 +7,138 @@ 下载器使用一个单独线程启动,它创建一个事件循环并在其内部保持同步。 """ +import queue import time import urllib.parse from typing import TYPE_CHECKING from alist_sdk import AlistPath -from alist_sync.alist_client import create_async_client +from alist_sync.alist_client import create_async_client, AlistClient if TYPE_CHECKING: ... def upload_stream(source_path: AlistPath, target_path: AlistPath): + import asyncio + + _queue = asyncio.Queue() + + # noinspection PyArgumentList + async def put_stream(): + _c: AlistClient = create_async_client(source_path.client) + async with _c.stream( + "GET", + source_path.as_download_uri(), + follow_redirects=True, + ) as stream_resp: + async for chunk in stream_resp.aiter_bytes(): + print("Put Chunk size:", len(chunk), "bytes.") + await _queue.put(chunk) + else: + await _queue.put(None) + + async def put_test(): + a = open("/Users/lcq/Downloads/drunbility.pdf", "rb") + while True: + chunk = a.read(1024) + if not chunk: + await _queue.put(None) + break + await _queue.put(chunk) + + async def get_chunk(): + while True: + chunk = await _queue.get() + if chunk is None: + break + yield chunk + + # noinspection PyAsyncCall async def main(): - async with create_async_client(source_path.client).stream( + + asyncio.create_task(put_test()) + return await create_async_client(target_path.client).put( + "/api/fs/put", + headers={ + "As-Task": "false", + "Content-Type": "application/octet-stream", + "Last-Modified": str(int(time.time() * 1000)), + "File-Path": urllib.parse.quote_plus(str(target_path.as_posix())), + }, + content=get_chunk(), + ) + + return asyncio.run(main()) + + +def upload_stream_thread(source_path: AlistPath, target_path: AlistPath): + """""" + + def put_stream(): + with source_path.client.stream( "GET", source_path.as_download_uri(), follow_redirects=True ) as stream_resp: - return await create_async_client(target_path.client).put( - "/api/fs/put", - headers={ - "As-Task": "false", - "Content-Type": "application/octet-stream", - "Last-Modified": str(int(time.time() * 1000)), - "File-Path": urllib.parse.quote_plus(str(target_path.as_posix())), - }, - content=stream_resp.aiter_bytes(), - ) + total = 0 + for i in stream_resp.iter_bytes(): + total += len(i) + print("Download Chunk size:", total, "bytes.") + _queue.put(i) - import asyncio + print("None Putted") + _queue.put(None) - return asyncio.run(main()) + def get_chunk(): + total = 0 + while True: + chunk = _queue.get() + if chunk is None: + print("None Got") + break + total += len(chunk) + print(f"\rupload Chunk size: {total} bytes. [{time.time()}]", end="") + yield chunk + + import threading + + _queue = queue.Queue() + + threading.Thread(target=put_stream).start() + print("启动线程") + return target_path.client.put( + "/api/fs/put", + timeout=0, + headers={ + "As-Task": "false", + "Content-Type": "application/octet-stream", + "Last-Modified": str(int(time.time() * 1000)), + "File-Path": urllib.parse.quote_plus(str(target_path.as_posix())), + }, + # content=open("/Users/lcq/Movies/志愿军-雄军出击-含广告.mp4", "rb"), + content=get_chunk(), + ) + + + + +if __name__ == "__main__": + from alist_sync.config import create_config, AlistServer + from alist_sdk import login_server + + import logging + + logging.basicConfig(level=logging.DEBUG) + + sync_config = create_config() + for _ in sync_config.alist_servers: + _: AlistServer + login_server(**_.dump_for_alist_path()) + + source = AlistPath("https://alist.leecq.cn/local/Country.mmdb") + target = AlistPath("http://localhost:5244/local/Country.mmdb") + + print(source.stat()) + # a = upload_stream_thread(source, target) + a = upload_big_file(target) + print(a.text) diff --git a/alist_sync/run_copy.py b/alist_sync/run_copy.py index 94e4e53..eb1b185 100644 --- a/alist_sync/run_copy.py +++ b/alist_sync/run_copy.py @@ -4,7 +4,7 @@ from alist_sync.checker import check_dir from alist_sync.common import async_all_task_names from alist_sync.job_copy import CopyJob -from alist_sync.models import AlistServer +from alist_sync.config import AlistServer logger = logging.getLogger("alist-sync.copy-to-target") diff --git a/alist_sync/run_sync.py b/alist_sync/run_sync.py index dd5c9b1..ee7d8dd 100644 --- a/alist_sync/run_sync.py +++ b/alist_sync/run_sync.py @@ -10,7 +10,7 @@ from alist_sync.base_sync import SyncBase from alist_sync.checker import check_dir from alist_sync.job_copy import CopyJob -from alist_sync.models import AlistServer +from alist_sync.config import AlistServer logger = logging.getLogger("alist-sync.run-sync") diff --git a/alist_sync/run_sync_incr.py b/alist_sync/run_sync_incr.py index 8c52aa1..2a7d501 100644 --- a/alist_sync/run_sync_incr.py +++ b/alist_sync/run_sync_incr.py @@ -12,7 +12,7 @@ from pathlib import PurePosixPath from alist_sync.base_sync import SyncBase -from alist_sync.models import AlistServer +from alist_sync.config import AlistServer class SyncIncr(SyncBase): diff --git a/config-template.yaml b/config-template.yaml index ae0685c..8323b16 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -78,3 +78,38 @@ notify: # 通知服务,当触发一些异常后,将会发送通知。 webhook_url: "" headers: K: V + +logs: + version: 1 + disable_existing_loggers: true + formatters: + simple: + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + handlers: + console: + class: logging.StreamHandler + level: DEBUG + formatter: simple + stream: ext://sys.stdout + + file: + class: logging.handlers.RotatingFileHandler + level: DEBUG + formatter: simple + filename: ./logs/alist-sync.log + maxBytes: 10485760 # 10MB + backupCount: 5 + encoding: utf8 + + loggers: + alist-sync: + level: DEBUG + handlers: + - console + - file + + alist-sdk: + level: INFO + handlers: + - console + - file \ No newline at end of file diff --git a/tools/create_storage.py b/tools/create_storage.py index 980b8c3..f8e3c4e 100644 --- a/tools/create_storage.py +++ b/tools/create_storage.py @@ -9,6 +9,7 @@ import os from pathlib import Path +from alist_sdk import Storage from alist_sdk.tools.client import ExtraClient PROJECT_ROOT = Path(__file__).parent.parent.absolute() @@ -28,11 +29,49 @@ password=admin_password, ) +# 如果--, 删除全部存储器 +if os.getenv("_RELOAD_STORAGE"): + for i in local_client.admin_storage_list().data.content: + local_client.admin_storage_delete(i.id) + +# 创建本地存储器 +local_client.admin_storage_create( + Storage.model_validate( + { + "mount_path": "/alist-sync", + "order": 0, + "driver": "Local", + "cache_expiration": 0, + "status": "work", + "addition": json.dumps( + { + "root_folder_path": str(PROJECT_ROOT.joinpath("alist/alist-sync")), + "thumbnail": False, + "thumb_cache_folder": "", + "show_hidden": True, + "mkdir_perm": "777", + } + ), + "remark": "", + "disabled": False, + "enable_sign": False, + "order_by": "name", + "order_direction": "asc", + "extract_folder": "", + "web_proxy": False, + "webdav_policy": "native_proxy", + "down_proxy_url": "", + } + ) +) + + remote_client = None if remote_url: - remote_client = ExtraClient( + local_client.import_config_from_other_client( base_url=remote_url, username=remote_username, password=remote_password, verify=False, ) + exit(0) From eaa4785ce5e463111333befb88064abed29081ab Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Fri, 1 Mar 2024 17:49:43 +0800 Subject: [PATCH 13/21] 0.1.0-a12 --- .github/workflows/alist-sync.yaml | 5 ++-- .gitignore | 5 ++++ alist_sync/d_worker.py | 42 +++++++++++++++++++++---------- bootstarp.sh => bootstrap.sh | 2 +- tools/create_storage.py | 19 +++++++++++--- 5 files changed, 54 insertions(+), 19 deletions(-) rename bootstarp.sh => bootstrap.sh (96%) mode change 100755 => 100644 diff --git a/.github/workflows/alist-sync.yaml b/.github/workflows/alist-sync.yaml index d28639a..633ac32 100644 --- a/.github/workflows/alist-sync.yaml +++ b/.github/workflows/alist-sync.yaml @@ -1,4 +1,5 @@ name: Alist Sync +run-name: ${{}} on: workflow_dispatch: @@ -14,7 +15,7 @@ on: jobs: run: - name: alist-sync + name: "sync-actions" runs-on: ubuntu-latest steps: - name: Checkout @@ -74,4 +75,4 @@ jobs: cat > alist-backup-config.json < EOF ${{ secrets.ALIST_BACKUP_CONFIG }} EOF - python3 -m alist_sync sync -c $SYNC_CONFIG \ No newline at end of file + python3 -m alist_sync sync -c $SYNC_CONFIG diff --git a/.gitignore b/.gitignore index d636786..ceb6214 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,11 @@ tmp/ config.json config.yaml +.alist-sync-cache/ +alist_sync/.alist-sync-cache/ +alist/ +tools/alist/ + # C extensions *.so diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 7442956..4ad7ff7 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -1,6 +1,7 @@ import atexit import datetime import logging +import os import threading import time from pathlib import Path @@ -9,11 +10,13 @@ from pydantic import BaseModel, computed_field, Field from pymongo.collection import Collection +from httpx import Client from alist_sdk.path_lib import AbsAlistPathType, AlistPath from alist_sync.config import create_config from alist_sync.common import sha1, prefix_in_threads from alist_sync.thread_pool import MyThreadPoolExecutor +from alist_sync.version import __version__ sync_config = create_config() @@ -32,6 +35,10 @@ logger = logging.getLogger("alist-sync.worker") +downloader_client = Client( + headers={"User-Agent": sync_config.ua or f"alist-sync/{__version__}"} +) + # noinspection PyTypeHints class Worker(BaseModel): @@ -128,7 +135,7 @@ def copy_single_stream(self): # download _tmp = self.tmp_file.open("wb") - with self.source_path.client.stream( + with downloader_client.stream( "GET", self.source_path.get_download_uri(), follow_redirects=True, @@ -137,6 +144,7 @@ def copy_single_stream(self): _tmp.write(i) _tmp.close() self.update(status="downloaded") + # upload with self.tmp_file.open("rb") as fs: res = self.target_path.client.verify_request( @@ -163,15 +171,14 @@ def copy_type(self): """复制任务""" logger.debug(f"Worker[{self.short_id}] Start Copping") - # if self.source_path.stat().size < 10 * 1024 * 1024: - # self.target_path.unlink(missing_ok=True) - # self.target_path.parent.mkdir(parents=True, exist_ok=True) - # _res = self.target_path.write_bytes(self.source_path.read_bytes()) - # - # else: + self.target_path.unlink(missing_ok=True) + self.target_path.parent.mkdir(parents=True, exist_ok=True) self.copy_single_stream() - assert self.target_path.re_stat().size == self.source_path.stat().size + assert ( + self.target_path.re_stat(retry=5, timeout=2).size + == self.source_path.stat().size + ) return self.update(status="copied") def delete_type(self): @@ -209,6 +216,8 @@ def run(self): logger.error(f"worker[{self.short_id}] 出现错误: {_e}") self.error_info = str(_e) self.update(status="failed") + if os.getenv("ALIST_SYNC_DEBUG"): + raise _e class Workers: @@ -278,7 +287,11 @@ def run(self, queue: Queue): ) def start(self, queue: Queue) -> threading.Thread: - _t = threading.Thread(target=self.run, args=(queue,), name="workers_main") + _t = threading.Thread( + target=self.run, + args=(queue,), + name="workers_main", + ) _t.start() logger.info("Worker Main Thread Start...") return _t @@ -290,13 +303,16 @@ def start(self, queue: Queue) -> threading.Thread: _w = Worker.model_validate( { "owner": "test", - "created_at": "2024-02-29T23:17:46.992805", + "created_at": "2024-03-01T15:59:37.222074", + "done_at": "2024-03-01T15:59:42.568337", "type": "copy", "need_backup": False, "backup_dir": None, - "source_path": "http://localhost:5244/local/config.json", - "target_path": "http://localhost:5244/ftp/config.json", - "id": "6b19d34f229c22de4073db8b4feff8932d773ac2", + "source_path": "http://localhost:5244/onedrive/HuaZhang.sqlite", + "target_path": "http://localhost:5244/Drive-New/HuaZhang.sqlite", + "status": "init", + "error_info": "", + "id": "228e0fa2906875ea18c83f4aa4c40aaa84d1d47e", } ) diff --git a/bootstarp.sh b/bootstrap.sh old mode 100755 new mode 100644 similarity index 96% rename from bootstarp.sh rename to bootstrap.sh index 09e3430..fa2c856 --- a/bootstarp.sh +++ b/bootstrap.sh @@ -42,7 +42,7 @@ clear) test) whereis pytest || pip install pytest clear - ./tests/alist/alist stop || pkill alist + ./alist/alist stop || pkill alist all_clear shift 1 pytest -v "$@" diff --git a/tools/create_storage.py b/tools/create_storage.py index f8e3c4e..d968f99 100644 --- a/tools/create_storage.py +++ b/tools/create_storage.py @@ -5,6 +5,7 @@ 2. 从URL读取 3. 从文件读取 """ +import datetime import json import os from pathlib import Path @@ -14,7 +15,8 @@ PROJECT_ROOT = Path(__file__).parent.parent.absolute() -alist_config = json.loads(PROJECT_ROOT.joinpath("alist/data/config.json").read_text()) +alist_config = json.loads(PROJECT_ROOT.joinpath( + "alist/data/config.json").read_text()) alist_port = alist_config["scheme"]["http_port"] admin_password = os.getenv("_ALIST_ADMIN_PASSWORD", "123456") @@ -35,7 +37,9 @@ local_client.admin_storage_delete(i.id) # 创建本地存储器 -local_client.admin_storage_create( + +PROJECT_ROOT.joinpath("alist/alist-sync").mkdir(exist_ok=True, parents=True) +res = local_client.admin_storage_create( Storage.model_validate( { "mount_path": "/alist-sync", @@ -43,6 +47,7 @@ "driver": "Local", "cache_expiration": 0, "status": "work", + "modified": datetime.datetime.now(), "addition": json.dumps( { "root_folder_path": str(PROJECT_ROOT.joinpath("alist/alist-sync")), @@ -64,9 +69,9 @@ } ) ) +print(res) -remote_client = None if remote_url: local_client.import_config_from_other_client( base_url=remote_url, @@ -75,3 +80,11 @@ verify=False, ) exit(0) + +_bk_file = PROJECT_ROOT.joinpath("alist-backup-config.json") +if _bk_file.exists(): + local_client.import_configs( + json.loads( + _bk_file.read_text() + ) + ) From 92bf31555ddd11171f20963e10180f3a1f5be30d Mon Sep 17 00:00:00 2001 From: Lee CQ Date: Fri, 1 Mar 2024 18:41:14 +0800 Subject: [PATCH 14/21] 0.1.0a12 --- .gitignore | 1 + .vscode/extensions.json | 5 +++-- alist_sync/__main__.py | 15 ++++++++++++++- alist_sync/config.py | 1 + bootstrap.sh | 5 +++++ 5 files changed, 24 insertions(+), 3 deletions(-) mode change 100644 => 100755 bootstrap.sh diff --git a/.gitignore b/.gitignore index ceb6214..0a8f42b 100644 --- a/.gitignore +++ b/.gitignore @@ -168,3 +168,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ +alist-backup-config.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 1c87cdf..d907e44 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -7,10 +7,11 @@ "ms-python.python", "ms-python.vscode-pylance", "ms-python.isort", - "ms-python.autopep8", + "ms-python.black-formatter", "ms-python.debugpy", "github.github-vscode-theme", - "github.copilot" + "github.copilot", + "mongodb.mongodb-vscode" ] } \ No newline at end of file diff --git a/alist_sync/__main__.py b/alist_sync/__main__.py index 9d1776d..f19ea6a 100644 --- a/alist_sync/__main__.py +++ b/alist_sync/__main__.py @@ -34,7 +34,6 @@ False, "--verify", "-v", - # is_flag=True, help="Verify SSL Certificates", ) @@ -224,6 +223,20 @@ def sync(config_file: str = Option(None, "--config", "-c", help="配置文件路 return main() +@app.command("get-info") +def cli_get(path :str): + """""" + from alist_sdk import login_server, AlistPath + from alist_sync.config import sync_config + + for s in sync_config.alist_servers: + login_server(**s.dump_for_alist_path()) + + echo( + AlistPath(path).re_stat(retry=5, timeout=3).model_dump_json(indent=2) + ) + + if __name__ == "__main__": from rich.logging import RichHandler diff --git a/alist_sync/config.py b/alist_sync/config.py index c74d0ea..f9f36c1 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -144,6 +144,7 @@ def __hash__(self): ) timeout: int = Field(10) + ua: str = None daemon: bool = os.getenv("ALIST_SYNC_DAEMON", "false").lower() in ( "true", diff --git a/bootstrap.sh b/bootstrap.sh old mode 100644 new mode 100755 index fa2c856..815c548 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -48,6 +48,11 @@ test) pytest -v "$@" ;; +main ) + shift + python -m alist_sync $@ + ;; + debugger) all_clear clear From 8ba853d55ea4513f6ad233e25e5aee7513197ba3 Mon Sep 17 00:00:00 2001 From: Lee CQ Date: Fri, 1 Mar 2024 23:11:34 +0800 Subject: [PATCH 15/21] 0.1.0a13 --- .vscode/settings.json | 2 +- alist_sync/d_worker.py | 9 ++++++++- bootstrap.sh | 2 +- tools/init_alist.sh | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 1f534d3..b889cdd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,6 +7,6 @@ "editor.formatOnPaste": true, "diffEditor.ignoreTrimWhitespace": false, "editor.formatOnType": true, - "editor.wordBasedSuggestions": false + "editor.wordBasedSuggestions": "off" } } \ No newline at end of file diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 4ad7ff7..c0ae347 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -73,6 +73,13 @@ def __init__(self, **data: Any): def __repr__(self): return f" {self.target_path}>" + + def __del__(self): + try: + self.tmp_file.unlink(missing_ok=True) + finally: + pass + @computed_field(return_type=str, alias="_id") @property def id(self) -> str: @@ -211,7 +218,7 @@ def run(self): self.delete_type() assert self.recheck() - self.update(status=f"Worker[{self.short_id}] Done.") + self.update(status=f"done") except Exception as _e: logger.error(f"worker[{self.short_id}] 出现错误: {_e}") self.error_info = str(_e) diff --git a/bootstrap.sh b/bootstrap.sh index 815c548..60fee96 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -32,7 +32,7 @@ alist) alist-init) pkill alist rm -rf alist/* - sh tools/init_alist.sh alist/ + bash tools/init_alist.sh . ;; clear) diff --git a/tools/init_alist.sh b/tools/init_alist.sh index 2bea880..37128d4 100644 --- a/tools/init_alist.sh +++ b/tools/init_alist.sh @@ -28,7 +28,7 @@ esac filename="alist-${platform}-${cpu}${fix}" export download_url="https://github.com/alist-org/alist/releases/download/v${VERSION}/${filename}" -if [[ ! -f alist && ! -f "alist.exe" ]]; then +if [[ ! -f "alist" && ! -f "alist.exe" ]]; then set -e echo "Will Install ${download_url}" curl -SLkO "$download_url" From b1d1c55289ad9785b9091e35266a88fc658bb7df Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Sat, 2 Mar 2024 12:33:25 +0800 Subject: [PATCH 16/21] =?UTF-8?q?0.1.0a14-=E6=B8=85=E7=90=86=E4=BB=A5?= =?UTF-8?q?=E5=89=8D=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- alist_sync/__main__.py | 192 +----------------------------------- alist_sync/base_sync.py | 54 ---------- alist_sync/checker.py | 60 ----------- alist_sync/d_worker.py | 3 +- alist_sync/job_copy.py | 133 ------------------------- alist_sync/job_remove.py | 89 ----------------- alist_sync/job_sync.py | 23 ----- alist_sync/jobs.py | 163 ------------------------------ alist_sync/models.py | 35 ------- alist_sync/run_copy.py | 33 ------- alist_sync/run_mirror.py | 48 --------- alist_sync/run_sync.py | 33 ------- alist_sync/run_sync_incr.py | 65 ------------ tests/common.py | 1 + tests/test_jobs.py | 114 --------------------- tests/test_scanner.py | 65 ------------ tests/test_sync.py | 136 ------------------------- tools/create_storage.py | 9 +- 18 files changed, 8 insertions(+), 1248 deletions(-) delete mode 100644 alist_sync/base_sync.py delete mode 100644 alist_sync/checker.py delete mode 100644 alist_sync/job_copy.py delete mode 100644 alist_sync/job_remove.py delete mode 100644 alist_sync/job_sync.py delete mode 100644 alist_sync/jobs.py delete mode 100644 alist_sync/models.py delete mode 100644 alist_sync/run_copy.py delete mode 100644 alist_sync/run_mirror.py delete mode 100644 alist_sync/run_sync.py delete mode 100644 alist_sync/run_sync_incr.py delete mode 100644 tests/test_jobs.py delete mode 100644 tests/test_scanner.py delete mode 100644 tests/test_sync.py diff --git a/alist_sync/__main__.py b/alist_sync/__main__.py index f19ea6a..37752fe 100644 --- a/alist_sync/__main__.py +++ b/alist_sync/__main__.py @@ -1,202 +1,20 @@ #!/bin/env python3 # coding: utf8 -import asyncio import logging import os from pathlib import Path -from typer import Typer, Option, echo, style +from typer import Typer, Option, echo -from alist_sync.base_sync import SyncBase -from alist_sync.checker import check_dir -from alist_sync.config import AlistServer -from alist_sync.run_copy import Copy -from alist_sync.run_mirror import Mirror -from alist_sync.run_sync import Sync -from alist_sync.run_sync_incr import SyncIncr app = Typer() -_base_url: str = Option( - "http://localhost:5244", - "--host", - "-h", - help="Base URL for Alist Host", -) - -_username: str = Option("", "--username", "-u", help="Alist Admin Username") - -_password: str = Option("", "--password", "-p", help="Alist Admin Password") - -_token: str = Option("", "--token", "-t", help="Alist Admin Token") - -_verify: bool = Option( - False, - "--verify", - "-v", - help="Verify SSL Certificates", -) - -_backup: bool = Option( - False, - "--backup", - "-b", - help="删除或覆盖目标文件时备份文件到.alist-sync-data/history", -) _stores_config: str | None = Option( None, "-c", "--store-path", help="一个包含存储配置的JSON文件,可以是AList的备份文件" ) -@app.command(name="check") -def check( - base_url: str = _base_url, - username: str = _username, - password: str = _password, - token: str = _token, - verify: bool = _verify, - storage_config: str = _stores_config, - target: list[str] = Option(..., "--target", "-t", help="Check Path"), -): - """检查各个存储中的文件状态""" - alist_info = AlistServer( - base_url=base_url, - username=username, - password=password, - token=token, - verify=verify, - storage_config=Path(storage_config) if storage_config else None, - ) - echo( - f"Will Be Check '{target} " - f"on {alist_info.base_url} [{alist_info.username}] \n" - f"将会从 {alist_info.storage_config} 存储库获取存储库信息。" - ) - c = SyncBase(alist_info=alist_info, sync_dirs=target) - - async def checker(): - await c.async_run() - _checker = await check_dir(*c.sync_dirs, client=c.client) - _checker.model_dump_table() - - asyncio.run(checker()) - - -@app.command(name="copy") -def copy( - base_url: str = _base_url, - username: str = _username, - password: str = _password, - token: str = _token, - verify: bool = _verify, - storage_config: str = _stores_config, - source: str = Option(..., "--source", "-s", help="Source Path"), - target: list[str] = Option(..., "--target", "-t", help="Target Path"), - backup: bool = _backup, -): - """复制任务""" - os.environ["ALIST_SYNC_BACKUP"] = str(backup) - - alist_info = AlistServer( - base_url=base_url, - username=username, - password=password, - token=token, - verify=verify, - storage_config=storage_config, - ) - echo( - f"Will Be Copy '{source}' -> {target} " - f"on {alist_info.base_url} [{alist_info.username}]" - f"将会从 {alist_info.storage_config} 存储库获取存储库信息。" - ) - return Copy(alist_info, source_path=source, targets_path=target).run() - - -@app.command("mirror") -def mirror( - base_url: str = _base_url, - username: str = _username, - password: str = _password, - token: str = _token, - verify: bool = _verify, - storage_config: str = _stores_config, - source: str = Option(..., "--source", "-s", help="Source Path"), - target: list[str] = Option(..., "--target", "-t", help="Target Path"), - backup: bool = _backup, -): - """镜像""" - os.environ["ALIST_SYNC_BACKUP"] = str(backup) - - alist_info = AlistServer( - base_url=base_url, - username=username, - password=password, - token=token, - verify=verify, - storage_config=storage_config, - ) - echo( - f"Will Be Mirror '{source}' -> {target} " - f"on {alist_info.base_url} [{alist_info.username}]" - ) - return Mirror(alist_info, source_path=source, targets_path=target).run() - - -@app.command() -def sync( - base_url: str = _base_url, - username: str = _username, - password: str = _password, - token: str = _token, - verify: bool = _verify, - storage_config: str = _stores_config, - sync_group: list[str] = Option(..., "--sync", "-s", help="Sync Group"), - backup: bool = _backup, -): - """同步任务""" - os.environ["ALIST_SYNC_BACKUP"] = str(backup) - - alist_info = AlistServer( - base_url=base_url, - username=username, - password=password, - token=token, - verify=verify, - storage_config=storage_config, - ) - echo(style("Hello Sync", fg="green", bg="black", bold=True)) - return Sync(alist_info, sync_group) - - -@app.command(name="sync-incr") -def sync_incr( - config_dir: str = Option(..., "--name", "-n", help="在Alist上存储的配置目录"), - cache_dir: str = Option(Path(), "--cache-dir", "-c", help="配置缓存目录"), - base_url: str = _base_url, - username: str = _username, - password: str = _password, - token: str = _token, - verify: bool = _verify, - storage_config: str = _stores_config, - sync_group: list[str] = Option(..., "--sync", "-s", help="Sync Group"), - backup: bool = _backup, -): - """增量同步""" - os.environ["ALIST_SYNC_BACKUP"] = str(backup) - alist_info = AlistServer( - base_url=base_url, - username=username, - password=password, - token=token, - verify=verify, - storage_config=storage_config, - ) - echo(f"增量同步:{sync_group}") - return SyncIncr(alist_info, config_dir, cache_dir, sync_group).run() - - @app.command("test-config") def t_config(config_file: str = Option(None, "--config", "-c", help="配置文件路径")): """测试配置""" @@ -224,17 +42,15 @@ def sync(config_file: str = Option(None, "--config", "-c", help="配置文件路 @app.command("get-info") -def cli_get(path :str): +def cli_get(path: str): """""" from alist_sdk import login_server, AlistPath from alist_sync.config import sync_config for s in sync_config.alist_servers: login_server(**s.dump_for_alist_path()) - - echo( - AlistPath(path).re_stat(retry=5, timeout=3).model_dump_json(indent=2) - ) + + echo(AlistPath(path).re_stat(retry=5, timeout=3).model_dump_json(indent=2)) if __name__ == "__main__": diff --git a/alist_sync/base_sync.py b/alist_sync/base_sync.py deleted file mode 100644 index 4414ede..0000000 --- a/alist_sync/base_sync.py +++ /dev/null @@ -1,54 +0,0 @@ -import asyncio -import logging -import os - -from alist_sdk import Client - -from alist_sync.alist_client import AlistClient -from alist_sync.common import timeout_input -from alist_sync.config import AlistServer - -logger = logging.getLogger("alist-sync.base") - - -class SyncBase: - def __init__(self, alist_info: AlistServer, sync_dirs: list[str | os.PathLike]): - self.client = AlistClient(timeout=30, **alist_info.dump_for_sdk()) - # self._client = Client(timeout=30, **alist_info.dump_for_sdk()) - - self.sync_dirs = sync_dirs - self.sync_dirs.sort() - self.alist_info = alist_info - - async def create_storages(self, storages): - """创建后端存储""" - mounted_storages = [ - s.mount_path for s in (await self.client.admin_storage_list()).data.content - ] - for st in storages: - if st["mount_path"] in mounted_storages: - logger.error("Mount_Path重复,可能会造成错误:%s", st["mount_path"]) - if timeout_input("3s内觉得是否继续 (y/N):", default="N").upper() != "Y": - raise - continue - res = await self.client.admin_storage_create(st) - if res.code == 200: - logger.info("创建存储成功: %s, id=%s", st["mount_path"], res.data.id) - else: - raise Exception( - f"创建存储失败:%s, message: %s", st["mount_path"], res.message - ) - - async def verify_sync_dir(self): - """验证同步的目录是存在的,因为增量同步有可能会在scanner之前操作数据。""" - for s_dir in self.sync_dirs: - res = await self.client.get_item_info(s_dir) - if res.code == 404: - raise FileNotFoundError(f"{s_dir} 不存在于 Alist 中。") - - def run(self): - asyncio.run(self.async_run()) - - async def async_run(self): - await self.create_storages(self.alist_info.storages()) - await self.verify_sync_dir() diff --git a/alist_sync/checker.py b/alist_sync/checker.py deleted file mode 100644 index f32e57b..0000000 --- a/alist_sync/checker.py +++ /dev/null @@ -1,60 +0,0 @@ -from pathlib import PurePosixPath - -from alist_sdk import Item -from pydantic import BaseModel - -from alist_sync.alist_client import AlistClient -from alist_sync.scanner import Scanner, scan_dirs - - -class Checker(BaseModel): - # matrix: 相对文件路径 -> {同步目录: Item} - matrix: dict[PurePosixPath, dict[PurePosixPath, Item]] - # cols: 同步目录 - base_path - cols: list[PurePosixPath] - - @classmethod - def checker(cls, scanner: Scanner): - _result = {} - for base_path, items in scanner.items.items(): - items: list[Item] - base_path: str | PurePosixPath - for item in items: - r_path = item.full_name.relative_to(base_path) - try: - _result[PurePosixPath(r_path)].setdefault( - PurePosixPath(base_path), item - ) - except KeyError: - _result[PurePosixPath(r_path)] = {PurePosixPath(base_path): item} - - return cls( - matrix=_result, cols=[PurePosixPath(t) for t in scanner.items.keys()] - ) - - def model_dump_table(self): - """Table 打印""" - from rich.console import Console - from rich.table import Table - - console = Console() - table = Table(show_header=True, header_style="bold magenta") - table.add_column( - "r_path", - style="dim red", - ) - for col in self.cols: - table.add_column(str(col), justify="center", vertical="middle") - - for r_path, raw in self.matrix.items(): - table.add_row( - str(r_path), *["True" if raw.get(tt) else "False" for tt in self.cols] - ) - console.print(table) - - -async def check_dir(*dirs, client: AlistClient) -> Checker: - """""" - return Checker.checker( - await Scanner.scans(*dirs, client=client) - ) diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index c0ae347..2fa9b34 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -73,13 +73,12 @@ def __init__(self, **data: Any): def __repr__(self): return f" {self.target_path}>" - def __del__(self): try: self.tmp_file.unlink(missing_ok=True) finally: pass - + @computed_field(return_type=str, alias="_id") @property def id(self) -> str: diff --git a/alist_sync/job_copy.py b/alist_sync/job_copy.py deleted file mode 100644 index a9fa4b6..0000000 --- a/alist_sync/job_copy.py +++ /dev/null @@ -1,133 +0,0 @@ -import logging -from pathlib import PurePosixPath -from typing import Literal, Optional, Iterator - -from pydantic import computed_field - -from alist_sync.alist_client import get_status -from alist_sync.checker import Checker -from alist_sync.jobs import JobBase, TaskBase - -logger = logging.getLogger("alist-sync.job_copy") - -CopyStatusModify = Literal[ - "init", - "created", - "waiting", - "getting src object", - "", - "running", - "success", - "failed", - "checked_done", -] - - -class CopyTask(TaskBase): - """复制任务""" - - id: Optional[str] = None # 创建任务后,Alist返回的任务ID # 一个复制任务,对应对应了2个Alist任务 - copy_name: str # 需要复制到文件名 - copy_source: PurePosixPath # 需要复制的源文件夹 - copy_target: PurePosixPath # 需要复制到的目标文件夹 - # 任务状态 init: 初始化,created: 已创建,"getting src object": 运行中,"": 已完成,"failed": 失败 - status: CopyStatusModify = "init" - message: Optional[str] = "" - - @computed_field() - @property - def name(self) -> str: - """Alist 任务名称""" - source_full_path = self.copy_source.joinpath(self.copy_name) - source_provider = source_full_path.parents[-2] - source_path = source_full_path.relative_to(source_provider) - - target_full_path = self.copy_target.joinpath(self.copy_name) - target_provider = target_full_path.parents[-2] - target_path = target_full_path.relative_to(target_provider) - _t = ( - "" - if target_path.parent.as_posix() == "." - else target_path.parent.as_posix() - ) - - return f"copy [{source_provider}](/{source_path}) to [{target_provider}](/{_t})" - - async def create(self): - """创建复制任务""" - if self.status != "init": - raise ValueError(f"任务状态错误: {self.status}") - _res = await self.client.copy( - files=[ - self.copy_name, - ], - src_dir=self.copy_source, - dst_dir=self.copy_target, - ) - if _res.code == 200: - self.status = "created" - - async def recheck_done(self) -> bool: - """检查任务复制到文件是否已经存在与目标位置""" - _res = await self.client.get_item_info( - str(self.copy_target.joinpath(self.copy_name)) - ) - if _res.code == 200 and _res.data.name == self.copy_name: - self.status = "checked_done" - return True - else: - logger.error(f"复查任务失败: [{_res.code}]{_res.message}: {self.name}") - self.status = "failed" - return False - - async def check_status(self) -> bool: - """异步运行类 - 检查该Task的状态 - - 在异步循环中反复检查 - - 1. 在alist中创建 复制任务 :: init -> created - 2. 检查复制任务已经存在于 undone_list :: created -> running - 3. 检查任务已经失败 (重试3次) :: running -> failed - 4. 检查任务已经完成 :: running -> success - 5. 复查任务是否已经完成 :: success -> checked_done - """ - - if self.status == "init": - logger.info("创建任务: %s", self.name) - if await self.backup(self.copy_target.joinpath(self.copy_name)): - return await self.create() - elif self.status == "checked_done": - return True - elif self.status == "success": - return await self.recheck_done() - try: - _status, _p = await get_status(self.name) - self.status = _status - return False - except ValueError as _e: - logger.error("获取任务状态失败: %s", self.name, exc_info=_e) - self.status = "failed" - return False - - -class CopyJob(JobBase): - """复制工作 - 从Checker中找出需要被复制的任务并创建""" - - # tasks: task_name -> task - tasks: dict[str, CopyTask] - done_tasks: dict[str, CopyTask] = {} - - @staticmethod - def create_task(_s, _t, checker: Checker) -> Iterator[CopyTask]: - """创建复制任务""" - _s, _t = PurePosixPath(_s), PurePosixPath(_t) - if _s not in checker.cols and _t not in checker.cols: - raise ValueError(f"Source: {_s} or Target: {_t} not in Checker") - for r_path, pp in checker.matrix.items(): - if pp.get(_s) is not None and pp.get(_t) is None: - yield CopyTask( - copy_name=pp.get(_s).name, - copy_source=PurePosixPath(_s).joinpath(r_path.parent), - copy_target=PurePosixPath(_t).joinpath(r_path.parent), - backup_dir=_t.joinpath(".alist-sync-data/history"), - ) diff --git a/alist_sync/job_remove.py b/alist_sync/job_remove.py deleted file mode 100644 index fc05fd5..0000000 --- a/alist_sync/job_remove.py +++ /dev/null @@ -1,89 +0,0 @@ -import logging -from pathlib import PurePosixPath -from typing import Iterator, Literal - -from alist_sync.alist_client import AlistClient -from alist_sync.checker import Checker -from alist_sync.common import get_alist_client -from alist_sync.jobs import JobBase, TaskBase - -logger = logging.getLogger("alist-sync.job_remove") -CopyStatusModify = Literal[ - "init", - "removed", - "checked_done", -] - - -class BackupTask(TaskBase): - """备份文件""" - - -class RemoveTask(TaskBase): - """删除文件的任务""" - - full_path: PurePosixPath | str - status: str = "init" - error_times: int = 0 - - @property - def name(self) -> str | PurePosixPath: - return self.full_path - - async def remove(self): - """删除文件""" - assert await self.backup(self.full_path), "Backup Error" - - res = await self.client.remove(self.full_path.parent, self.full_path.name) - if res.code == 200: - logger.info("删除文件 %s 成功", self.full_path) - self.status = "removed" - return True - logger.error(f"删除文件 {self.full_path} 失败: [{res.code=}]{res.message}") - self.error_times += 1 - return False - - async def recheck(self, client: AlistClient = None) -> bool: - """重新检查""" - client: AlistClient = client or get_alist_client() - res = await client.get_item_info(self.name) - if res == 404: - self.status = "rechecked_done" - return True - self.error_times += 1 - return False - - async def check_status(self) -> bool: - """检查任务状态""" - if self.status == "init": - if await self.backup(self.full_path): - return await self.remove() - elif self.status == "removed": - return await self.recheck() - elif self.status == "checked_done": - return True - else: - raise ValueError(f"未知的状态:{self.status}") - - -class RemoveJob(JobBase): - """""" - - tasks: dict[PurePosixPath | str, RemoveTask] - done_tasks: dict[PurePosixPath | str, RemoveTask] = {} - - @staticmethod - def create_task(_s, _t, checker: Checker) -> Iterator[RemoveTask]: - """创建任务 - _s 无 _t 有:删除 _t - """ - _s, _t = PurePosixPath(_s), PurePosixPath(_t) - if _s not in checker.cols and _t not in checker.cols: - raise ValueError(f"Source: {_s} or Target: {_t} not in Checker") - - for r_path, pp in checker.matrix.items(): - if pp.get(_s) is None and pp.get(_t) is not None: - yield RemoveTask( - full_path=pp.get(_t).full_name, - backup_dir=_t.joinpath(".alist-sync-data/history"), - ) diff --git a/alist_sync/job_sync.py b/alist_sync/job_sync.py deleted file mode 100644 index acff1f2..0000000 --- a/alist_sync/job_sync.py +++ /dev/null @@ -1,23 +0,0 @@ -# coding: utf8 -"""增量同步JOB - - -""" -import time - -from pydantic import computed_field - -from jobs import JobBase - - -class JobSync(JobBase): - sync_group: list - status: str = "init" - - @computed_field() - def update_time(self) -> int: - return int(time.time()) - - @computed_field() - def job_id(self) -> str: - return "" # FIXME 修复 diff --git a/alist_sync/jobs.py b/alist_sync/jobs.py deleted file mode 100644 index 72ce03c..0000000 --- a/alist_sync/jobs.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -""" -@File Name : jobs.py -@Author : LeeCQ -@Date-Time : 2023/12/17 18:02 - -""" -import asyncio -import logging -import os -from pathlib import PurePosixPath -from typing import Iterator - - -from alist_sync.models import BaseModel -from alist_sync.alist_client import AlistClient -from alist_sync.checker import Checker -from alist_sync.common import get_alist_client, sha1 - -logger = logging.getLogger("alist-sync.jobs") - - -def get_need_backup_from_env() -> bool: - _b = os.environ.get("ALIST_SYNC_BACKUP", "F").upper() - if _b in ["Y", "YES", "TRUE", "OK"]: - return True - return False - - -class TaskBase(BaseModel): - need_backup: bool = None - backup_status: str = "init" - backup_dir: PurePosixPath - - @property - def client(self) -> AlistClient: - return get_alist_client() - - async def backup(self, file: PurePosixPath): - """备份, move应该不会创建Task,会等待完成后返回。 - 包含创建元数据 sha1(file_path)_int(time.time()).history - """ - if self.need_backup is None: - self.need_backup = get_need_backup_from_env() - - if not self.need_backup: - logger.info("Not Need Backup!") - return True - - file_info = await self.client.get_item_info(file) - if file_info.code == 404: - logger.debug("目标文件不存在,无需备份") - return True - if file_info.code != 200: - raise FileNotFoundError( - f"BackupFileError: {file} -> {file_info.code}:{file_info.message}" - ) - - history_filename = ( - f"{sha1(str(file))}_{int(file_info.data.modified.timestamp())}.history" - ) - if await self.backup_file_exist(history_filename) is True: - raise FileExistsError(f"{history_filename}已经存在") - - logger.info("Backup File: {file}") - res_bak = await self.client.move( - src_dir=file.parent, - dst_dir=self.backup_dir, - files=file.name, - ) - assert res_bak.code == 200, f"移动文件错误: {res_bak.message}" - res_rename = await self.client.rename( - history_filename, full_path=self.backup_dir.joinpath(file.name) - ) - assert res_rename.code == 200, f"重命名错误: {res_rename.message}" - res_meta = await self.client.upload_file_put( - file_info.model_dump_json().encode(), - path=self.backup_dir.joinpath(history_filename + ".json"), - ) - assert res_meta.code == 200 - if (await self.backup_file_exist(history_filename)) is False: - raise FileNotFoundError(f"{history_filename}") - return True - - async def backup_file_exist(self, history_filename) -> bool: - """验证Backup file 是否已经存在""" - h_info = await self.client.get_item_info( - self.backup_dir.joinpath(history_filename) - ) - hj_info = await self.client.get_item_info( - self.backup_dir.joinpath(history_filename + ".json") - ) - if hj_info.code == 200 and h_info.code == 200: - return True - return False - - -class JobBase(BaseModel): - """""" - - @staticmethod - def create_task(_s, _t, checker: Checker) -> Iterator[BaseModel]: - raise NotImplementedError - - @classmethod - def from_checker(cls, source, target, checker: Checker): - """从Checker中创建Task""" - _tasks = {} - - def _1_1(sp: PurePosixPath, tp: PurePosixPath): - for _task in cls.create_task(sp, tp, checker): - _tasks[_task.name] = _task - - def _1_n(sp, tps): - sp = PurePosixPath(sp) - _tps = [PurePosixPath(tp) for tp in tps] - [_1_1(sp, tp) for tp in _tps if sp != tp] - - def _n_n(sps, tps): - [_1_n(sp, tps) for sp in sps] - - if isinstance(source, str | PurePosixPath) and isinstance( - target, str | PurePosixPath - ): - _1_1(source, target) - elif isinstance(source, str | PurePosixPath) and isinstance( - target, list | tuple - ): - _1_n(source, target) - elif isinstance(source, list | tuple) and isinstance(target, list | tuple): - _n_n(source, target) - else: - raise ValueError(f"source: {source} or target: {target} not support") - - self = cls(tasks=_tasks) - self.save_to_cache() - return cls(tasks=_tasks) - - async def start(self, client: AlistClient = None): - """开始任务""" - logger.info(f"[{self.__class__.__name__}] " f"任务开始。") - client = client or get_alist_client() - while self.tasks: - _keys = [k for k in self.tasks.keys()] - for task_name in _keys: - task = self.tasks[task_name] - _task_status = await task.check_status() - - if _task_status: - logger.info( - f"[{self.__class__.__name__}] 任务完成: {task.name = }", - ) - self.done_tasks[task.name] = task - self.tasks.pop(task.name) - else: - logger.debug( - f"[{self.__class__.__name__}] " - f"任务状态: {task.name=} -> {task.status=}" - ) - self.save_to_cache() - await asyncio.sleep(1) - logger.info(f"[{self.__class__.__name__}] " f"All Done.") diff --git a/alist_sync/models.py b/alist_sync/models.py deleted file mode 100644 index 2b07496..0000000 --- a/alist_sync/models.py +++ /dev/null @@ -1,35 +0,0 @@ -# 用于定义模型 -from pathlib import Path - -from pydantic import BaseModel as _BaseModel - -from alist_sync.config import create_config - -config = create_config() - - -__all__ = [ - "BaseModel", -] - - -class BaseModel(_BaseModel): - """基础模型""" - - @classmethod - def from_json_file(cls, file: Path): - """从文件中读取json""" - if not file.exists(): - raise FileNotFoundError(f"找不到文件:{file}") - return cls.model_validate_json(Path(file).read_text(encoding="utf-8")) - - @classmethod - def from_cache(cls): - class_name = cls.__name__ - file = config.cache_dir.joinpath(f"{class_name}.json") - return cls.from_json_file(file) - - def save_to_cache(self): - class_name = self.__class__.__name__ - file = config.cache_dir.joinpath(f"{class_name}.json") - file.write_text(self.model_dump_json(indent=2), encoding="utf-8") diff --git a/alist_sync/run_copy.py b/alist_sync/run_copy.py deleted file mode 100644 index eb1b185..0000000 --- a/alist_sync/run_copy.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging - -from alist_sync.base_sync import SyncBase -from alist_sync.checker import check_dir -from alist_sync.common import async_all_task_names -from alist_sync.job_copy import CopyJob -from alist_sync.config import AlistServer - -logger = logging.getLogger("alist-sync.copy-to-target") - - -class Copy(SyncBase): - def __init__( - self, alist_info: AlistServer, source_path: str, targets_path: list[str] - ): - self.mode = "copy" - self.source_path = source_path - self.targets_path = targets_path - - super().__init__(alist_info, [source_path, *targets_path]) - - async def async_run(self): - """异步运行""" - await super().async_run() - checker = await check_dir(*self.sync_dirs, client=self.client) - copy_job = CopyJob.from_checker( - self.source_path, - self.targets_path, - checker, - ) - await copy_job.start(self.client) - logger.info("当前全部的Task %s", async_all_task_names()) - logger.info("复制完成。") \ No newline at end of file diff --git a/alist_sync/run_mirror.py b/alist_sync/run_mirror.py deleted file mode 100644 index 1c19823..0000000 --- a/alist_sync/run_mirror.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -"""镜像复制 - -source -> [target, ...] - -1. Source 复制到 Target,若Target中没有,复制 -2. 若Target已经存在,忽略。 -3. 若Target有source无,删除。 - -""" -import asyncio -import logging - -from alist_sync.base_sync import SyncBase -from alist_sync.checker import check_dir -from alist_sync.common import async_all_task_names -from alist_sync.job_copy import CopyJob -from alist_sync.job_remove import RemoveJob - -logger = logging.getLogger("alist-sync.mirror") - - -class Mirror(SyncBase): - def __init__( - self, alist_info, source_path: str = None, targets_path: list[str] = None - ): - self.mode = "mirrors" - self.source_path = source_path - self.targets_path = [] if not targets_path else targets_path - super().__init__(alist_info, [source_path, *targets_path]) - - async def async_run(self): - # 创建复制列表 - await super().async_run() - - checker = await check_dir(*self.sync_dirs, client=self.client) - copy_job = CopyJob.from_checker(self.source_path, self.targets_path, checker) - delete_job = RemoveJob.from_checker( - self.source_path, self.targets_path, checker - ) - - await asyncio.gather( - copy_job.start(self.client), - delete_job.start(self.client), - ) - logger.info("当前全部的Task %s", async_all_task_names()) - - logger.info(f"{self.__class__.__name__}完成。") diff --git a/alist_sync/run_sync.py b/alist_sync/run_sync.py deleted file mode 100644 index ee7d8dd..0000000 --- a/alist_sync/run_sync.py +++ /dev/null @@ -1,33 +0,0 @@ -# coding: utf8 -"""基础同步 -[dir1, dir2, dir3] - -各个Dir分别成为源目录,并Copy至其他目录 - -""" -import logging - -from alist_sync.base_sync import SyncBase -from alist_sync.checker import check_dir -from alist_sync.job_copy import CopyJob -from alist_sync.config import AlistServer - -logger = logging.getLogger("alist-sync.run-sync") - - -class Sync(SyncBase): - def __init__(self, alist_info: AlistServer, dirs: list[str] = None): - super().__init__(alist_info, dirs) - - async def async_run(self): - """异步运行""" - await super().async_run() - checker = await check_dir(*self.sync_dirs, client=self.client) - copy_job = CopyJob.from_checker( - self.sync_dirs, - self.sync_dirs, - checker, - ) - - await copy_job.start(self.client) - logger.info("同步完成。") diff --git a/alist_sync/run_sync_incr.py b/alist_sync/run_sync_incr.py deleted file mode 100644 index 2a7d501..0000000 --- a/alist_sync/run_sync_incr.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding: utf8 -"""增量的复制 - -基于配置文件的复制 - -""" -import asyncio -import atexit -import json -import time -from threading import Thread -from pathlib import PurePosixPath - -from alist_sync.base_sync import SyncBase -from alist_sync.config import AlistServer - - -class SyncIncr(SyncBase): - path_data = '.alist-sync-data/' - history_path = '.alist-sync-data/history/' - locker_name = "locker.json" - last_status_name = "last_status.json" - - def __init__(self, alist_info: AlistServer, config_dir, cache_dir, sync_group): - super().__init__(alist_info, sync_group) - - self.sync_group = sync_group - self.cache_dir = cache_dir - self.config_dir = config_dir - atexit.register(self.on_exit) - - def on_exit(self): - """onexit""" - for _ in self.sync_group: - Thread(target=self._client.remove, args=( - PurePosixPath(_).joinpath(self.path_data), - self.locker_name, - )).start() - - async def create_locker(self): - """""" - - async def _locker(_dir): - while True: - if self.client.close(): - break - await self.client.upload_file_put( - json.dumps({ - "sync_group": self.sync_group, - "update_time": int(time.time()) - }).encode(), - PurePosixPath(_dir).joinpath(self.path_data).joinpath(self.locker_name), - as_task=False - ) - await asyncio.sleep(60) - - for _ in self.sync_group: - asyncio.create_task( - _locker(_), - name=f'alist-sync-locker_{_}' - ) - - async def async_run(self): - await super().async_run() - await self.create_locker() diff --git a/tests/common.py b/tests/common.py index efd5a68..0416860 100644 --- a/tests/common.py +++ b/tests/common.py @@ -16,6 +16,7 @@ class StorageInfo(NamedTuple): WORKDIR = Path(__file__).parent +WORKDIR.mkdir(exist_ok=True, parents=True) DATA_DIR = StorageInfo("/local", WORKDIR / "alist/test_dir") DATA_DIR_DST = StorageInfo("/local_dst", WORKDIR / "alist/test_dir_dst") DATA_DIR_DST2 = StorageInfo("/local_dst2", WORKDIR / "alist/test_dir_dst2") diff --git a/tests/test_jobs.py b/tests/test_jobs.py deleted file mode 100644 index 5b52cf2..0000000 --- a/tests/test_jobs.py +++ /dev/null @@ -1,114 +0,0 @@ -import asyncio - -import pytest - -from alist_sync.alist_client import AlistClient -from alist_sync.checker import Checker - -from alist_sync.job_copy import CopyTask, CopyJob -from alist_sync.job_remove import RemoveTask, RemoveJob -from alist_sync.scanner import Scanner - -from .test_check import SUP_DIR -from .common import setup_module as _sm, setup_function as _sf, DATA_DIR, DATA_DIR_DST - - -setup_module = _sm - - -def setup_function(): - _sf() - AlistClient( - base_url="http://localhost:5244", - verify=False, - username="admin", - password="123456", - ) - - -def test_base_task(): - from alist_sync.jobs import TaskBase - - TaskBase( - need_backup=False, - backup_status="init", - backup_dir="", - ) - - -@pytest.mark.parametrize("need_backup", [True, False]) -def test_copy_task(need_backup): - """""" - DATA_DIR.fs_path.joinpath("a.txt").write_text("123") - DATA_DIR_DST.fs_path.joinpath("a.txt").write_text("21") - DATA_DIR_DST.fs_path.joinpath(".alist-sync-data/history").mkdir( - parents=True, exist_ok=True - ) - - task = CopyTask( - copy_name="a.txt", - copy_source=DATA_DIR.mount_path, - copy_target=DATA_DIR_DST.mount_path, - backup_dir=DATA_DIR_DST.mount_path + "/.alist-sync-data/history", - need_backup=need_backup, - ) - - async def start(): - while True: - if await task.check_status(): - break - - asyncio.run(start()) - assert DATA_DIR_DST.fs_path.joinpath("a.txt").read_text() == "123" - if need_backup: - assert DATA_DIR_DST.fs_path.joinpath(".alist-sync-data").is_dir() - - -@pytest.mark.parametrize( - "scanner", - [ - Scanner.model_validate_json(SUP_DIR.joinpath("Scanner.json").read_text()), - Scanner.model_validate_json(SUP_DIR.joinpath("Scanner-m.json").read_text()), - ], -) -def test_job_copy_1_1(scanner): - checker: Checker = Checker.checker(scanner) - keys = list(scanner.items.keys()) - source = keys[0] - target = keys[1] - source_files = [_.full_name for _ in scanner.items.get(source)] - target_files = [_.full_name for _ in scanner.items.get(target)] - job = CopyJob.from_checker( - source=source, - target=target, - checker=checker, - ) - assert job.tasks - for task in job.tasks.values(): - assert task.copy_source.joinpath(task.copy_name) in source_files - assert task.copy_target.joinpath(task.copy_name) not in target_files - - -@pytest.mark.parametrize( - "scanner", - [ - Scanner.model_validate_json(SUP_DIR.joinpath("Scanner.json").read_text()), - Scanner.model_validate_json(SUP_DIR.joinpath("Scanner-m.json").read_text()), - ], -) -def test_job_delete_1_1(scanner): - checker: Checker = Checker.checker(scanner) - keys = list(scanner.items.keys()) - source = keys[1] - target = keys[0] - source_files = [_.full_name for _ in scanner.items.get(source)] - target_files = [_.full_name for _ in scanner.items.get(target)] - job = RemoveJob.from_checker( - source=source, - target=target, - checker=checker, - ) - assert job.tasks - for task in job.tasks.values(): - assert task.full_path not in source_files - assert task.full_path in target_files diff --git a/tests/test_scanner.py b/tests/test_scanner.py deleted file mode 100644 index 7a0afb9..0000000 --- a/tests/test_scanner.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -""" -@File Name : test_scanner.py -@Author : LeeCQ -@Date-Time : 2023/12/17 22:26 - -""" -from pathlib import Path - -import asyncio - -from alist_sync.alist_client import AlistClient -from .common import ( - DATA_DIR, - DATA_DIR_DST, - DATA_DIR_DST2, - setup_module as _sm, - setup_function as _sf, -) - -from alist_sync.scanner import scan_dirs, Scanner - -setup_module = _sm -setup_function = _sf - - -def test_scan_dir(): - items = [ - "test_scan_dir/a/a.txt", - "test_scan_dir/b/b.txt", - "test_scan_dir/c/c.txt", - "test_scan_dir/d.txt", - "e.txt", - ".alist-sync-data/sync-locker.json", - ".alist-sync-data/history/MD5.history", - ".alist-sync-data/history/MD5.history.json" - ] - for i in items: - Path(DATA_DIR.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR.fs_path / i).touch() - Path(DATA_DIR_DST.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR_DST.fs_path / i).touch() - Path(DATA_DIR_DST2.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR_DST2.fs_path / i).touch() - - assert DATA_DIR_DST2.fs_path.joinpath(".alist-sync-data/sync-locker.json").exists() - - res = asyncio.run( - scan_dirs( - DATA_DIR.mount_path, - DATA_DIR_DST.mount_path, - DATA_DIR_DST2.mount_path, - client=AlistClient( - base_url="http://localhost:5244", - verify=False, - username="admin", - password="123456", - ), - ) - ) - - assert isinstance(res, Scanner) - assert 'sync-locker.json' not in [_.name for _ in res.items.get(DATA_DIR_DST2.mount_path)] - assert 'MD5.history' not in [_.name for _ in res.items.get(DATA_DIR_DST2.mount_path)] diff --git a/tests/test_sync.py b/tests/test_sync.py deleted file mode 100644 index 816e5d3..0000000 --- a/tests/test_sync.py +++ /dev/null @@ -1,136 +0,0 @@ -from pathlib import Path - -import asyncio - -from alist_sdk import Item - -from alist_sync.alist_client import AlistClient -from alist_sync.models import AlistServer -from alist_sync.scanner import scan_dirs, Scanner -from alist_sync.run_copy import Copy -from alist_sync.run_mirror import Mirror - -from .common import ( - DATA_DIR, - DATA_DIR_DST, - DATA_DIR_DST2, - setup_module, - setup_function, -) - - -def test_scan_dir(): - items = [ - "test_scan_dir/a/a.txt", - "test_scan_dir/b/b.txt", - "test_scan_dir/c/c.txt", - "test_scan_dir/d.txt", - "e.txt", - ] - for i in items: - Path(DATA_DIR.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR.fs_path / i).touch() - - res = asyncio.run( - scan_dirs("/local", client=AlistClient( - base_url="http://localhost:5244", - verify=False, - username="admin", - password="123456", - )) - ) - - assert isinstance(res, Scanner) - assert DATA_DIR.mount_path in res.items - assert isinstance(res.items, dict) - assert isinstance(res.items[DATA_DIR.mount_path], list) - assert res.items[DATA_DIR.mount_path].__len__() == len(items) - assert isinstance(res.items[DATA_DIR.mount_path][0], Item) - assert {i.full_name.as_posix() for vs in res.items.values() for i in vs} == { - f"{DATA_DIR.mount_path}/{i}" for i in items - } - - -def test_run_copy(): - items = [ - "test_scan_dir/a/a.txt", - "test_scan_dir/b/b.txt", - "test_scan_dir/c/c.txt", - "test_scan_dir/d.txt", - "e.txt", - ] - for i in items: - Path(DATA_DIR.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR.fs_path / i).touch() - - asyncio.run( - Copy( - AlistServer( - base_url="http://localhost:5244", - verify=False, - username="admin", - password="123456", - ), - source_path="/local", - targets_path=["/local_dst", "/local_dst2"], - ).async_run() - ) - assert DATA_DIR_DST.fs_path.joinpath("test_scan_dir/a/a.txt").exists() - assert DATA_DIR_DST2.fs_path.joinpath("test_scan_dir/a/a.txt").exists() - - -def test_mirror(): - """测试镜像复制""" - items_sou = { - "test_scan_dir/a/a.txt", - "test_scan_dir/b/b.txt", - "test_scan_dir/c/c.txt", - "test_scan_dir/d.txt", - "e.txt", - } - items_tar1 = {"test_scan_dir/a/a.txt", "tar1/b.txt", "f.txt"} - items_tar2 = { - "tar2/g.txt", - "g.txt", - } - - for i in items_sou: - Path(DATA_DIR.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR.fs_path / i).touch() - for i in items_tar1: - Path(DATA_DIR_DST.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR_DST.fs_path / i).touch() - for i in items_tar2: - Path(DATA_DIR_DST2.fs_path / i).parent.mkdir(parents=True, exist_ok=True) - Path(DATA_DIR_DST2.fs_path / i).touch() - - assert not DATA_DIR_DST.fs_path.joinpath("test_scan_dir/b/b.txt").exists() - assert not DATA_DIR_DST2.fs_path.joinpath("test_scan_dir/b/b.txt").exists() - - assert DATA_DIR_DST.fs_path.joinpath("tar1/b.txt").exists() - assert DATA_DIR_DST.fs_path.joinpath("f.txt").exists() - - assert DATA_DIR_DST2.fs_path.joinpath("tar2/g.txt").exists() - assert DATA_DIR_DST2.fs_path.joinpath("g.txt").exists() - - asyncio.run( - Mirror( - AlistServer( - base_url="http://localhost:5244", - verify=False, - username="admin", - password="123456", - ), - source_path=DATA_DIR.mount_path, - targets_path=[DATA_DIR_DST.mount_path, DATA_DIR_DST2.mount_path], - ).async_run() - ) - - assert DATA_DIR_DST.fs_path.joinpath("test_scan_dir/a/a.txt").exists() - assert DATA_DIR_DST2.fs_path.joinpath("test_scan_dir/a/a.txt").exists() - - assert not DATA_DIR_DST.fs_path.joinpath("tar1/b.txt").exists() - assert not DATA_DIR_DST.fs_path.joinpath("f.txt").exists() - - assert not DATA_DIR_DST2.fs_path.joinpath("tar2/g.txt").exists() - assert not DATA_DIR_DST2.fs_path.joinpath("g.txt").exists() diff --git a/tools/create_storage.py b/tools/create_storage.py index d968f99..411234d 100644 --- a/tools/create_storage.py +++ b/tools/create_storage.py @@ -15,8 +15,7 @@ PROJECT_ROOT = Path(__file__).parent.parent.absolute() -alist_config = json.loads(PROJECT_ROOT.joinpath( - "alist/data/config.json").read_text()) +alist_config = json.loads(PROJECT_ROOT.joinpath("alist/data/config.json").read_text()) alist_port = alist_config["scheme"]["http_port"] admin_password = os.getenv("_ALIST_ADMIN_PASSWORD", "123456") @@ -83,8 +82,4 @@ _bk_file = PROJECT_ROOT.joinpath("alist-backup-config.json") if _bk_file.exists(): - local_client.import_configs( - json.loads( - _bk_file.read_text() - ) - ) + local_client.import_configs(json.loads(_bk_file.read_text())) From 156a23623b66d7e54950cce5c14360f128644dcf Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Sat, 2 Mar 2024 19:57:34 +0800 Subject: [PATCH 17/21] 0.1.0b1 --- alist_sync/__main__.py | 26 ++++++++++------ alist_sync/config.py | 5 +++ alist_sync/d_checker.py | 17 ++++++++-- alist_sync/d_main.py | 69 ++++++++++++++++++++++++++++++++++++++--- alist_sync/d_worker.py | 32 ++++++++++++++++--- alist_sync/err.py | 36 +++++++++++++++++++++ config-template.yaml | 18 +++++++---- tests/common.py | 4 +-- tests/test_check.py | 34 +++++++++++--------- 9 files changed, 199 insertions(+), 42 deletions(-) create mode 100644 alist_sync/err.py diff --git a/alist_sync/__main__.py b/alist_sync/__main__.py index 37752fe..73672f3 100644 --- a/alist_sync/__main__.py +++ b/alist_sync/__main__.py @@ -7,6 +7,7 @@ from typer import Typer, Option, echo +logger = logging.getLogger("alist-sync.__main__") app = Typer() @@ -28,16 +29,30 @@ def t_config(config_file: str = Option(None, "--config", "-c", help="配置文 logger.info("Done.") +@app.command("test-ignore") +def t_ignore(path, match): + """测试ignore""" + from fnmatch import fnmatchcase + + echo(fnmatchcase(path, match)) + + @app.command("sync") -def sync(config_file: str = Option(None, "--config", "-c", help="配置文件路径")): +def sync( + config_file: str = Option(None, "--config", "-c", help="配置文件路径"), + debug: bool = Option(False, "--debug", help="调试模式, 将以单线程启动"), +): """同步任务""" from alist_sync.config import create_config - from alist_sync.d_main import main + from alist_sync.d_main import main, main_debug if config_file and Path(config_file).exists(): os.environ["ALIST_SYNC_CONFIG"] = str(Path(config_file).resolve().absolute()) create_config() + if debug: + echo("调试模式启动") + return main_debug() return main() @@ -54,11 +69,4 @@ def cli_get(path: str): if __name__ == "__main__": - from rich.logging import RichHandler - - logger = logging.getLogger("alist-sync") - handler = RichHandler() - handler.setLevel("DEBUG") - logger.addHandler(handler) - logger.setLevel("DEBUG") app() diff --git a/alist_sync/config.py b/alist_sync/config.py index f9f36c1..0620fc7 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -98,12 +98,17 @@ def is_storage(_st): class SyncGroup(BaseModel): + def __hash__(self): + return hash(self.name + self.type) + enable: bool = True name: str type: str interval: int = 300 need_backup: bool = False backup_dir: str = ".alist-sync-backup" + blacklist: list[str] = [] + whitelist: list[str] = [] group: list[AlistPathType] = Field(min_length=2) diff --git a/alist_sync/d_checker.py b/alist_sync/d_checker.py index 72fa7f8..c92e158 100644 --- a/alist_sync/d_checker.py +++ b/alist_sync/d_checker.py @@ -6,11 +6,13 @@ @Date-Time : 2024/2/25 21:17 """ +import fnmatch import logging import threading import time from queue import Queue, Empty from typing import Iterator +from functools import lru_cache from alist_sdk import AlistPath @@ -19,6 +21,7 @@ from alist_sync.thread_pool import MyThreadPoolExecutor from alist_sync.common import prefix_in_threads + logger = logging.getLogger("alist-sync.d_checker") sync_config = create_config() @@ -36,7 +39,8 @@ def __init__(self, sync_group: SyncGroup, scaner_queue: Queue, worker_queue: Que name=f"checker_main[{self.sync_group.name}-{self.__class__.__name__}]", ) - def split_path(self, path) -> tuple[AlistPath, str]: + @lru_cache(64) + def split_path(self, path: AlistPath) -> tuple[AlistPath, str]: """将Path切割为sync_dir和相对路径""" for sr in self.sync_group.group: try: @@ -51,8 +55,17 @@ def get_backup_dir(self, path) -> AlistPath: def checker(self, source_path: AlistPath, target_path: AlistPath) -> "Worker|None": raise NotImplementedError + def ignore(self, relative_path) -> bool: + for _i in self.sync_group.blacklist: + if fnmatch.fnmatchcase(relative_path, _i): + logger.debug("Ignore: %s, [matched: %s]", relative_path, _i) + return True + return False + def checker_every_dir(self, path) -> Iterator[Worker | None]: _sync_dir, _relative_path = self.split_path(path) + # if self.ignore(_relative_path): + # return for _sd in self.sync_group.group: _sd: AlistPath if _sd == _sync_dir: @@ -105,7 +118,7 @@ def checker( target_path: AlistPath, ) -> "Worker|None": if not target_path.exists(): - logger.debug( + logger.info( f"Checked: [COPY] {source_path.as_uri()} -> {target_path.as_uri()}" ) return Worker( diff --git a/alist_sync/d_main.py b/alist_sync/d_main.py index eb456ce..8c6a755 100644 --- a/alist_sync/d_main.py +++ b/alist_sync/d_main.py @@ -5,7 +5,11 @@ import logging import threading import time +from functools import lru_cache from queue import Queue +from typing import Callable + +import alist_sdk from alist_sdk import AlistPath, login_server @@ -36,7 +40,9 @@ def _scaner(_url: AlistPath, _s_num): _queue.put(item) elif item.is_dir(): pool.submit(_scaner, item, _s_num) - finally: + except alist_sdk.AlistError: + pass + except Exception: _s_num.pop() assert url.exists(), f"目录不存在{url.as_uri()}" @@ -86,9 +92,62 @@ def main(): _tw.join() +def main_debug(): + """""" + import fnmatch + + _tw = Workers() + + def _make_ignore(_sync_group): + @lru_cache(64) + def split_path(_sync_group, path: AlistPath) -> tuple[AlistPath, str]: + """将Path切割为sync_dir和相对路径""" + for sr in _sync_group.group: + try: + return sr, path.relative_to(sr) + except ValueError: + pass + raise ValueError() + + def __ignore(relative_path) -> bool: + _, relative_path = split_path(_sync_group, relative_path) + for _i in _sync_group.blacklist: + if fnmatch.fnmatchcase(relative_path, _i): + logger.debug("Ignore: %s, [matched: %s]", relative_path, _i) + return True + return False + + return __ignore + + def iter_file(url, i_func: Callable[[str], bool] | None = None): + if i_func is not None and i_func(url): + return + + if url.is_file(): + yield url + elif url.is_dir(): + for item in url.iterdir(): + yield from iter_file(item, i_func) + else: + logger.warning("未知的文件类型: %s", url) + + for sync_group in sync_config.sync_groups: + _check = get_checker(sync_group.type)(sync_group, Queue(1), Queue(1)) + _ignore = _make_ignore(sync_group) + if sync_group.enable is False: + logger.warning("Checker: %s is disable", sync_group.name) + continue + for uri in sync_group.group: + login_alist(sync_config.get_server(uri.as_uri())) + + for _file in iter_file(sync_group.group[0], _ignore): + logger.debug(f"find file: {_file}") + for _worker in _check.checker_every_dir(_file): + if _worker is None: + continue + logger.debug(f"Worker[{_worker.short_id}]:") + _worker.run() + + if __name__ == "__main__": - # logger_alist_sync = logging.getLogger("alist-sync") - # logger_alist_sync.setLevel(logging.DEBUG) - # logger_alist_sync.addHandler(logging.StreamHandler()) - # logger.info("Begin...") main() diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 2fa9b34..9c4dec7 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -162,7 +162,7 @@ def copy_single_stream(self): "Last-Modified": str( int(self.source_path.stat().modified.timestamp() * 1000) ), - "File-Path": urllib.parse.quote_plus( + "File-Path": urllib.parse.quote( str(self.target_path.as_posix()) ), }, @@ -170,7 +170,10 @@ def copy_single_stream(self): ) assert res.code == 200 - logger.info(f"Worker[{self.short_id}] Upload File [{res.code}] {res.message}.") + logger.info( + f"Worker[{self.short_id}] Upload File " + f"[{self.target_path}] [{res.code}]{res.message}." + ) self.update(status="uploaded") def copy_type(self): @@ -193,9 +196,30 @@ def delete_type(self): assert not self.target_path.exists() self.update(status="deleted") - def recheck(self): + def recheck_copy(self, retry=5, re_time=2): + """再次检查当前Worker的结果是否符合预期。""" + try: + return ( + self.target_path.re_stat(retry=retry, timeout=re_time).size + == self.source_path.re_stat().size + ) + except FileNotFoundError: + if retry > 0: + return self.recheck_copy(retry=retry - 1, re_time=re_time) + logger.error( + f"Worker[{self.short_id}] Recheck Error: 文件不存在.({retry=})" + ) + return False + + def recheck(self) -> bool: """再次检查当前Worker的结果是否符合预期。""" - return True + if self.type == "copy": + return self.recheck_copy(retry=3, re_time=3) + elif self.type == "delete": + self.target_path.re_stat(retry=5, timeout=2) + return not self.target_path.exists() + else: + raise ValueError(f"Unknown Worker Type {self.type}.") def run(self): """启动Worker""" diff --git a/alist_sync/err.py b/alist_sync/err.py new file mode 100644 index 0000000..c27a156 --- /dev/null +++ b/alist_sync/err.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +""" +@File Name : err.py +@Author : LeeCQ +@Date-Time : 2024/3/2 11:55 + +""" + + +class AlistSyncError(Exception): + pass + + +class ScanerError(AlistSyncError): + pass + + +class CheckerError(AlistSyncError): + pass + + +class WorkerError(AlistSyncError): + pass + + +class DownloaderError(WorkerError): + pass + + +class UploadError(WorkerError): + pass + + +class RecheckError(WorkerError): + pass diff --git a/config-template.yaml b/config-template.yaml index 8323b16..932b41a 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -48,17 +48,23 @@ sync_groups: # 一个相对目录,最终为每一个group中的每一个server创建一个备份目录 backup_dir: "./.alist-sync-backup" # 默认值: ./.alist-sync-backup + # 黑名单,支持通配符, 使用 fnmatch.fnmatchcase 函数进行匹配 + # 详情参考标准库文档 https://docs.python.org/3/library/fnmatch.html + # 后面可能会重构,以支持 Linux Glob 模式。 + # 其路径必须相对与Group中定义的目录,或者使用*开头 + # 例子: + # 忽略 http://localhost:5244/test1/base/ 目录下的所有目录及文件: "base/*" + # 忽略所有bfstm文件: "*.bfstm" + blacklist: + - "*.bfstm" + - "base/*" + - "testa/b/*" + # 同步目录,一个完整的AList URL, # 对于copy, mirror 第一个为源目录,其他个为目标目录 # Alist服务器信息需要提前在alist_servers中配置 # 支持在不同的Alist服务器之间同步 # 例子:http://localhost:5244/test1 - - ignore: - dir: - - .alist-sync - match: - - "**/*.py" group: - "http://localhost:5244/test1" - "http://localhost:5244/test2" diff --git a/tests/common.py b/tests/common.py index 0416860..3d501b9 100644 --- a/tests/common.py +++ b/tests/common.py @@ -95,6 +95,6 @@ def setup_function(): cache_dir.mkdir(parents=True, exist_ok=True) -if __name__ == '__main__': +if __name__ == "__main__": setup_module() - setup_function() \ No newline at end of file + setup_function() diff --git a/tests/test_check.py b/tests/test_check.py index 1b9b79e..3986105 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -1,22 +1,28 @@ -from pathlib import Path, PurePosixPath +from alist_sdk.py312_pathlib import PurePosixPath +import fnmatch import pytest -from alist_sync.checker import Checker -from alist_sync.scanner import Scanner - -SUP_DIR = Path(__file__).parent.joinpath("resource") - @pytest.mark.parametrize( - "scanner", + "path, match, result", [ - Scanner.model_validate_json(SUP_DIR.joinpath("Scanner.json").read_text()), - Scanner.model_validate_json(SUP_DIR.joinpath("Scanner-m.json").read_text()), + ["a/b/c/d", "a/*/*/d", True], + ["a/b/c/d", "a/*/*/e", False], + ["a/test.txt", "a/*.txt", True], + ["a/b/test.txt", "a/*.txt", True], + ["a/b/c/test.txt", "*.txt", True], + [ + "电子书整理/睡眠革命/睡眠革命【微信公众号:冒犯经典】.mobi/Demo118_0_Text001.bfstm", + "电子书整理/*", + True, + ], + [ + "电子书整理/睡眠革命/睡眠革命【微信公众号:冒犯经典】.mobi/Demo118_0_Text001.bfstm", + "*微信公众号:冒犯经典*", + True, + ], ], ) -def test_check(scanner: Scanner): - cols = [PurePosixPath(base_path) for base_path, i in scanner.items.items()] - checker: Checker = Checker.checker(scanner) - assert checker.matrix - assert checker.cols == cols +def test_check(path, match, result): + assert fnmatch.fnmatchcase(path, match) == result From 623f22106b230db24c5a96ba849f647c93a7ed64 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Sat, 2 Mar 2024 21:08:01 +0800 Subject: [PATCH 18/21] 0.1.0b2 --- .github/workflows/alist-sync.yaml | 5 ++++- alist_sync/common.py | 10 +-------- alist_sync/config.py | 37 +++++++++++++++++++++++-------- alist_sync/d_worker.py | 2 +- 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/.github/workflows/alist-sync.yaml b/.github/workflows/alist-sync.yaml index 633ac32..549b66c 100644 --- a/.github/workflows/alist-sync.yaml +++ b/.github/workflows/alist-sync.yaml @@ -1,5 +1,7 @@ name: Alist Sync -run-name: ${{}} + +env: + _ALIST_SYNC_NAME: "action-${{github.actor}}-${{github.run_id}}-${{github.run_number}}" on: workflow_dispatch: @@ -71,6 +73,7 @@ jobs: - name: RUN env: SYNC_CONFIG: ${{secrets.SYNC_CONFIG}} + _ALIST_ADMIN_PASSWORD: ${{ secrets.ALIST_ADMIN_PASSWORD }} run: | cat > alist-backup-config.json < EOF ${{ secrets.ALIST_BACKUP_CONFIG }} diff --git a/alist_sync/common.py b/alist_sync/common.py index 60ec40f..31df0b0 100644 --- a/alist_sync/common.py +++ b/alist_sync/common.py @@ -2,15 +2,13 @@ import builtins import hashlib import logging +import os import selectors import sys import threading from pathlib import Path from typing import Iterable -from alist_sync.config import create_config - -cache_dir = create_config().cache_dir logger = logging.getLogger("alist-sync.common") @@ -21,7 +19,6 @@ "async_all_task_names", "is_task_all_success", "timeout_input", - "clear_cache", "clear_path", "all_thread_name", "prefix_in_threads", @@ -46,11 +43,6 @@ def clear_path(path: Path): i.rmdir() -def clear_cache(): - """清除缓存""" - clear_path(cache_dir) - - def sha1(s) -> str: return hashlib.sha1(str(s).encode()).hexdigest() diff --git a/alist_sync/config.py b/alist_sync/config.py index 0620fc7..d027a3d 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -6,25 +6,44 @@ from datetime import datetime from pathlib import Path from functools import cached_property, lru_cache -from typing import Optional, Literal, TYPE_CHECKING, Any +from typing import Optional, Literal, TYPE_CHECKING, Any, Annotated from alist_sdk import AlistPathType, AlistPath +from alist_sdk.path_lib import AlistPathPydanticAnnotation from httpx import URL -from pydantic import Field, BaseModel +from pydantic import Field, BaseModel, BeforeValidator from pymongo.database import Database + if TYPE_CHECKING: from alist_sync.data_handle import ShelveHandle, MongoHandle logger = logging.getLogger("alist-sync.config") +PAlistPathType = Annotated[ + AlistPathType, + AlistPathPydanticAnnotation, + BeforeValidator( + lambda x: ( + URL("http://localhost:5244").join(x).__str__() + if not URL(x).is_absolute_url + else x + ) + ), +] + + +def getenv(name, default=None): + """获取环境变量""" + return os.getenv(name, os.getenv("_" + name, default)) + def create_config(): """创建配置文件""" if hasattr(builtins, "sync_config"): return builtins.sync_config - config_file = os.getenv( + config_file = getenv( "ALIST_SYNC_CONFIG", Path(__file__).parent.parent / "config.yaml" ) @@ -109,7 +128,7 @@ def __hash__(self): backup_dir: str = ".alist-sync-backup" blacklist: list[str] = [] whitelist: list[str] = [] - group: list[AlistPathType] = Field(min_length=2) + group: list[PAlistPathType] = Field(min_length=2) NotifyType = Literal["email", "webhook"] @@ -138,10 +157,10 @@ class Config(BaseModel): def __hash__(self): return hash(self._id) - _id: str = "alist-sync-config" + _id: str = getenv("ALIST_SYNC_NAME", "alist-sync") cache__dir: Path = Field( - default=os.getenv( + default=getenv( "ALIST_SYNC_CACHE_DIR", Path(__file__).parent / ".alist-sync-cache", ), @@ -151,7 +170,7 @@ def __hash__(self): timeout: int = Field(10) ua: str = None - daemon: bool = os.getenv("ALIST_SYNC_DAEMON", "false").lower() in ( + daemon: bool = getenv("ALIST_SYNC_DAEMON", "false").lower() in ( "true", "1", "yes", @@ -161,9 +180,9 @@ def __hash__(self): "1", ) - runner_name: str = "test" + name: str = getenv("ALIST_SYNC_NAME", "alist-sync") - mongodb_uri: str | None = os.getenv("ALIST_SYNC_MONGODB_URI", None) + mongodb_uri: str | None = getenv("ALIST_SYNC_MONGODB_URI", None) notify: list[EMailNotify | WebHookNotify] = [] diff --git a/alist_sync/d_worker.py b/alist_sync/d_worker.py index 9c4dec7..f8b0101 100644 --- a/alist_sync/d_worker.py +++ b/alist_sync/d_worker.py @@ -42,7 +42,7 @@ # noinspection PyTypeHints class Worker(BaseModel): - owner: str = sync_config.runner_name + owner: str = sync_config.name created_at: datetime.datetime = datetime.datetime.now() done_at: datetime.datetime | None = None type: WorkerType From 32575f4ee7b0e6cef5ee8464818f13f9e219c140 Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Sat, 2 Mar 2024 21:29:01 +0800 Subject: [PATCH 19/21] 0.1.0b3 --- README.md | 29 +++++++++++------------------ alist_sync/config.py | 44 +++++++------------------------------------- config-template.yaml | 1 + 3 files changed, 19 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index e824e30..eb4c839 100644 --- a/README.md +++ b/README.md @@ -14,23 +14,22 @@ python -m alist_sync -c "" 1. FORK存储库 -2. 创建存储库机密(Action): - 1. ALIST_CONFIG: (可选) [JSON] 指定Alist静态配置(data/config.json) - 2. ALIST_BACKUP: 可选 [JSON] 现有ALIST网页端导出的备份配置 - 3. ALIST_BACKUP_URL: 可选 [URL] 可以从远程导出BACKUP的URL - 4. ALIST_BACKUP_USERNAME: 可选 [string] - 5. ALIST_BACKUP_PASSWORD: 可选 [string] - 6. ALIST_USERNAME: - 7. ALIST_PASSWORD: - 8. SYNC_CONFIG: [YAML|JSON] Alist-sync的配置 - -3. 启用Action +2. 创建SYNC_CONFIG文件,内容参见:[config-template.yaml](./config-template.yaml) + +3. 创建存储库机密(Action): + 1. ALIST_BACKUP: `可选 [JSON]` 现有ALIST网页端导出的备份配置, 用于初始化Alist + 2. ALIST_BACKUP_URL: `可选 [URL]` 可以从远程导出BACKUP的URL, 与ALIST_BACKUP二选一 + 3. ALIST_BACKUP_USERNAME: `可选 [string]` 当ALIST_BACKUP存在时,需要提供用户名 + 4. ALIST_BACKUP_PASSWORD: `可选 [string]` 当ALIST_BACKUP存在时,需要提供密码 + 5. ALIST_ADMIN_PASSWORD: `可选[string]` 管理员密码,默认值: `123456` + 6. SYNC_CONFIG: `必选[YAML|JSON]` Alist-sync的配置 + +4. 启用Action ## 预期同步模式: ### 1. copy 简单复制 (已实现) -*命令:* `alist-sync copy -h http://localhost:5244 -u admin -p 123456 -s /源目录 -t /目标目录1 -t /目标目录2 ...` #### 工作原理: @@ -41,8 +40,6 @@ python -m alist_sync -c "" ### 2. mirror 镜像复制 (已实现) -*命令:* `alist-sync mirror -h http://localhost:5244 -u admin -p 123456 -s /源目录 -t /目标目录1 -t /目标目录2 ...` - #### 工作原理: 将源目录中的文件分别镜像同步到全部的目标目录中, @@ -52,8 +49,6 @@ python -m alist_sync -c "" ### 3. sync 多源双向复制(实现中) -*命令:* `alist-sync sync -h http://localhost:5244 -u admin -p 123456 -s /目录1 -s /目录2 -s /目录3 ...` - #### 工作原理: -s 指定的目录分别作为源目录,想其他的目录中发起copy命令 @@ -64,8 +59,6 @@ python -m alist_sync -c "" ### 4. sync-incr 多源增量复制(设计中) -*命令:* `alist-sync sync_incr -h http://localhost:5244 -u admin -p 123456 -s /目录1 -s /目录2 -s /目录3 ...` - #### 工作原理: -s 指定的目录分别作为源目录,想其他的目录中发起copy命令, 首次运行后会生成配置文件,持久化保存,作为增量的识别文件。 diff --git a/alist_sync/config.py b/alist_sync/config.py index d027a3d..5e56088 100644 --- a/alist_sync/config.py +++ b/alist_sync/config.py @@ -79,41 +79,11 @@ def dump_for_alist_path(self): _data["server"] = _data.pop("base_url") return _data - def storages(self) -> list[dict]: - """返回给定的 storage_config 中包含的storages""" - - def is_storage(_st): - if not isinstance(_st, dict): - return False - if "mount_path" in _st and "driver" in _st: - return True - return False - - if not self.storage_config or self.storage_config == Path(): - return [] - if not self.storage_config.exists(): - raise FileNotFoundError(f"找不到文件:{self.storage_config}") - - _load_storages = json.load(self.storage_config.open()) - if isinstance(_load_storages, list): - _load_storages = [_s for _s in _load_storages if is_storage(_s)] - if _load_storages: - return _load_storages - raise KeyError() - - if isinstance(_load_storages, dict): - if "storages" in _load_storages: - _load_storages = [ - _s for _s in _load_storages["storages"] if is_storage(_s) - ] - if _load_storages: - return _load_storages - raise KeyError() - if is_storage(_load_storages): - return [ - _load_storages, - ] - raise KeyError("给定的") + +def set_add(x) -> set: + x = set(x) + x.add(".alist-sync*") + return x class SyncGroup(BaseModel): @@ -126,8 +96,8 @@ def __hash__(self): interval: int = 300 need_backup: bool = False backup_dir: str = ".alist-sync-backup" - blacklist: list[str] = [] - whitelist: list[str] = [] + blacklist: Annotated[list[str], BeforeValidator(lambda x: set_add(x))] = [] + whitelist: Annotated[list[str], BeforeValidator(lambda x: set_add(x))] = [] group: list[PAlistPathType] = Field(min_length=2) diff --git a/config-template.yaml b/config-template.yaml index 932b41a..404f316 100644 --- a/config-template.yaml +++ b/config-template.yaml @@ -52,6 +52,7 @@ sync_groups: # 详情参考标准库文档 https://docs.python.org/3/library/fnmatch.html # 后面可能会重构,以支持 Linux Glob 模式。 # 其路径必须相对与Group中定义的目录,或者使用*开头 + # .alist-sync* 将自动添加到黑名单中 # 例子: # 忽略 http://localhost:5244/test1/base/ 目录下的所有目录及文件: "base/*" # 忽略所有bfstm文件: "*.bfstm" From c50918dbc4f9b1d7bd585e9675a0491fa7ba8fbb Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Sat, 2 Mar 2024 21:31:55 +0800 Subject: [PATCH 20/21] 0.1.0b4 --- tests/test_worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_worker.py b/tests/test_worker.py index 5b998ca..494f281 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -17,6 +17,7 @@ sync_config = create_config() +@pytest.mark.skip() def test_worker_copy(): docs = { # "_id": "013ac712314196a73bc97baba0e0cb97f769140b", From 0eccdbaf3329c0138bb271f6942b4a95cc16ff4f Mon Sep 17 00:00:00 2001 From: LeeCQ Date: Sat, 2 Mar 2024 21:34:27 +0800 Subject: [PATCH 21/21] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_worker.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_worker.py b/tests/test_worker.py index 494f281..9da38d0 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -9,16 +9,15 @@ import sys import pytest -from alist_sdk.path_lib import PureAlistPath, AlistPath, login_server - -from alist_sync.d_worker import Worker, Workers -from alist_sync.config import create_config - -sync_config = create_config() +from alist_sdk.path_lib import login_server @pytest.mark.skip() def test_worker_copy(): + from alist_sync.d_worker import Worker + from alist_sync.config import create_config + + sync_config = create_config() docs = { # "_id": "013ac712314196a73bc97baba0e0cb97f769140b", "backup_dir": None,