From 61c63d0be971b3226ca2bf8dd2521b45c42d6c05 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 8 Dec 2022 18:19:58 -0800 Subject: [PATCH] Remove Code and Configs for Swarm/podman support (#407) - remove swarm / podman support - remove docker-compose.yml, btrixcloud.swarm package, and podman/swarm scripts from scripts/ dir- - remove python-on-whales - add error if not running in k8s - remove python-on-whales --- backend/Dockerfile | 8 - backend/btrixcloud/main.py | 20 +- backend/btrixcloud/swarm/__init__.py | 0 backend/btrixcloud/swarm/base_job.py | 102 -------- backend/btrixcloud/swarm/crawl_job.py | 113 --------- backend/btrixcloud/swarm/profile_job.py | 15 -- backend/btrixcloud/swarm/swarmmanager.py | 195 -------------- .../btrixcloud/swarm/templates/crawl_job.yaml | 80 ------ .../btrixcloud/swarm/templates/crawler.yaml | 104 -------- .../swarm/templates/profile_job.yaml | 46 ---- .../swarm/templates/profilebrowser.yaml | 42 --- backend/btrixcloud/swarm/utils.py | 240 ------------------ backend/requirements.txt | 2 - configs/config.sample.env | 96 ------- configs/docker-compose.debug-ports.yml | 24 -- configs/docker-compose.podman.yml | 38 --- configs/docker-compose.signing.yml | 37 --- configs/docker-compose.swarm.yml | 49 ---- docker-compose.yml | 67 ----- scripts/build-podman.sh | 14 - scripts/build-swarm.sh | 15 -- scripts/init-configs.sh | 9 - scripts/run-podman.sh | 27 -- scripts/run-swarm.sh | 22 -- scripts/stop-podman.sh | 13 - scripts/stop-swarm.sh | 2 - 26 files changed, 10 insertions(+), 1370 deletions(-) delete mode 100644 backend/btrixcloud/swarm/__init__.py delete mode 100644 backend/btrixcloud/swarm/base_job.py delete mode 100644 backend/btrixcloud/swarm/crawl_job.py delete mode 100644 backend/btrixcloud/swarm/profile_job.py delete mode 100644 backend/btrixcloud/swarm/swarmmanager.py delete mode 100644 backend/btrixcloud/swarm/templates/crawl_job.yaml delete mode 100644 backend/btrixcloud/swarm/templates/crawler.yaml delete mode 100644 backend/btrixcloud/swarm/templates/profile_job.yaml delete mode 100644 backend/btrixcloud/swarm/templates/profilebrowser.yaml delete mode 100644 backend/btrixcloud/swarm/utils.py delete mode 100644 configs/config.sample.env delete mode 100644 configs/docker-compose.debug-ports.yml delete mode 100644 configs/docker-compose.podman.yml delete mode 100644 configs/docker-compose.signing.yml delete mode 100644 configs/docker-compose.swarm.yml delete mode 100644 docker-compose.yml delete mode 100755 scripts/build-podman.sh delete mode 100755 scripts/build-swarm.sh delete mode 100755 scripts/init-configs.sh delete mode 100755 scripts/run-podman.sh delete mode 100755 scripts/run-swarm.sh delete mode 100755 scripts/stop-podman.sh delete mode 100755 scripts/stop-swarm.sh diff --git a/backend/Dockerfile b/backend/Dockerfile index b84fed8754..2eeaeb709f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,7 +1,3 @@ -ARG PODMAN_VERSION=4 - -FROM docker.io/mgoltzsche/podman:${PODMAN_VERSION}-remote as podmanremote - FROM docker.io/library/python:3.10-slim WORKDIR /app @@ -10,12 +6,8 @@ ADD requirements.txt /app RUN pip install -r requirements.txt -RUN python-on-whales download-cli - ADD btrixcloud/ /app/btrixcloud/ -COPY --from=podmanremote /usr/local/bin/podman-remote /usr/bin/podman - CMD uvicorn btrixcloud.main:app_root --host 0.0.0.0 --access-log --log-level info EXPOSE 8000 diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py index 4c0ca26036..da0a2056a1 100644 --- a/backend/btrixcloud/main.py +++ b/backend/btrixcloud/main.py @@ -26,6 +26,8 @@ from .colls import init_collections_api from .crawls import init_crawls_api +from .k8s.k8sman import K8SManager + API_PREFIX = "/api" app_root = FastAPI( @@ -67,16 +69,14 @@ def main(): user_manager.set_archive_ops(archive_ops) # pylint: disable=import-outside-toplevel - if os.environ.get("KUBERNETES_SERVICE_HOST"): - from .k8s.k8sman import K8SManager - - crawl_manager = K8SManager() - else: - # from .docker.dockerman import DockerManager - # crawl_manager = DockerManager(archive_ops) - from .swarm.swarmmanager import SwarmManager - - crawl_manager = SwarmManager() + if not os.environ.get("KUBERNETES_SERVICE_HOST"): + print( + "Sorry, the Browsertrix Cloud Backend must be run inside a Kubernetes environment.\ + Kubernetes not detected (KUBERNETES_SERVICE_HOST is not set), Exiting" + ) + sys.exit(1) + + crawl_manager = K8SManager() init_storages_api(archive_ops, crawl_manager, current_active_user) diff --git a/backend/btrixcloud/swarm/__init__.py b/backend/btrixcloud/swarm/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/backend/btrixcloud/swarm/base_job.py b/backend/btrixcloud/swarm/base_job.py deleted file mode 100644 index 3b19508278..0000000000 --- a/backend/btrixcloud/swarm/base_job.py +++ /dev/null @@ -1,102 +0,0 @@ -""" base k8s job driver """ - -import os -import asyncio -import signal - -import sys -import yaml - -from fastapi.templating import Jinja2Templates - -from .utils import get_templates_dir, get_runner -from ..utils import random_suffix - -runner = get_runner() - - -# ============================================================================= -# pylint: disable=too-many-instance-attributes,bare-except,broad-except -class SwarmJobMixin: - """Crawl Job State""" - - def __init__(self): - self.secrets_prefix = "/var/run/secrets/" - self.custom_config_file = os.environ.get("CUSTOM_JOB_CONFIG") - - self.curr_storage = {} - - self.job_id = os.environ.get("JOB_ID") - - # in case id is modified below, should be able to delete self - self.orig_job_id = self.job_id - - self.remove_schedule = False - self.is_scheduled = os.environ.get("RUN_MANUAL") == "0" - - if self.is_scheduled: - self.job_id += "-" + random_suffix() - - self.prefix = os.environ.get("STACK_PREFIX", "stack-") - - if self.custom_config_file: - self._populate_env(self.secrets_prefix + self.custom_config_file) - - self.templates = Jinja2Templates(directory=get_templates_dir()) - - super().__init__() - - def _populate_env(self, filename): - with open(filename, encoding="utf-8") as fh_config: - params = yaml.safe_load(fh_config) - - for key in params: - val = params[key] - if isinstance(val, str): - os.environ[key] = val - - async def init_job_objects(self, template, extra_params=None): - """init swarm objects from specified template with given extra_params""" - loop = asyncio.get_running_loop() - loop.add_signal_handler(signal.SIGUSR1, self.unschedule_job) - - params = {"id": self.job_id} - - if extra_params: - params.update(extra_params) - - params["storage_name"] = os.environ.get("STORAGE_NAME", "default") - params["env"] = os.environ - - await self._do_create(loop, template, params) - - async def delete_job_objects(self, _): - """remove swarm service stack""" - loop = asyncio.get_running_loop() - await self._do_delete(loop) - - if not self.is_scheduled or self.remove_schedule: - print("Removed other objects, removing ourselves", flush=True) - await loop.run_in_executor( - None, runner.delete_service_stack, f"job-{self.orig_job_id}" - ) - else: - sys.exit(0) - - return True - - def unschedule_job(self): - """mark job as unscheduled""" - print("Unscheduled, will delete when finished", flush=True) - self.remove_schedule = True - - async def _do_create(self, loop, template, params): - data = self.templates.env.get_template(template).render(params) - return await loop.run_in_executor( - None, runner.run_service_stack, self.prefix + self.job_id, data - ) - - async def _do_delete(self, loop): - await loop.run_in_executor( - None, runner.delete_service_stack, self.prefix + self.job_id - ) diff --git a/backend/btrixcloud/swarm/crawl_job.py b/backend/btrixcloud/swarm/crawl_job.py deleted file mode 100644 index 19d82e8700..0000000000 --- a/backend/btrixcloud/swarm/crawl_job.py +++ /dev/null @@ -1,113 +0,0 @@ -""" entry point for K8s crawl job which manages the stateful crawl """ - -import asyncio - -from fastapi import FastAPI - -from .utils import get_runner - -from .base_job import SwarmJobMixin -from ..crawl_job import CrawlJob - - -app = FastAPI() - -runner = get_runner() - - -# ============================================================================= -class SwarmCrawlJob(SwarmJobMixin, CrawlJob): - """Crawl Job""" - - async def _do_scale(self, new_scale): - loop = asyncio.get_running_loop() - - # if making scale smaller, ensure existing crawlers saved their data - if new_scale < self.scale: - # ping for final exit - for num in range(self.scale, new_scale, -1): - num = num - 1 - service_id = f"crawl-{self.job_id}-{num}_crawler" - await loop.run_in_executor( - None, runner.ping_containers, service_id, "SIGUSR2" - ) - - # delete - await self._do_delete_replicas(loop, new_scale, self.scale) - - if new_scale > self.scale: - # create new stacks - params = {} - params.update(self._cached_params) - - for num in range(self.scale, new_scale): - stack_id = f"{self.prefix}{self.job_id}-{num}" - params["index"] = num - data = self.templates.env.get_template("crawler.yaml").render(params) - await loop.run_in_executor( - None, runner.run_service_stack, stack_id, data - ) - - async def _get_crawl(self): - loop = asyncio.get_running_loop() - return await loop.run_in_executor( - None, runner.get_service, f"crawl-{self.job_id}-0_crawler" - ) - - async def _send_shutdown_signal(self, signame): - loop = asyncio.get_running_loop() - count = 0 - - for num in range(0, self.scale): - name = f"crawl-{self.job_id}-{num}_crawler" - print(f"Sending {signame} to {name}", flush=True) - count += await loop.run_in_executor( - None, runner.ping_containers, name, signame - ) - - # for now, assume success if at least 1 container is signaled - # count may not equal scale as not all containers may have launched yet - return count >= 1 - - # pylint: disable=line-too-long - @property - def redis_url(self): - return f"redis://crawl-{self.job_id}-0_redis/0" - - async def _do_create(self, loop, template, params): - scale = params.get("scale", 1) - - self._cached_params = params - - for num in range(0, scale): - stack_id = f"{self.prefix}{self.job_id}-{num}" - params["index"] = num - data = self.templates.env.get_template(template).render(params) - await loop.run_in_executor(None, runner.run_service_stack, stack_id, data) - - async def _do_delete(self, loop): - await self._do_delete_replicas(loop, 0, self.scale) - - async def _do_delete_replicas(self, loop, start, end): - # volumes = [] - - for num in range(end, start, -1): - num = num - 1 - stack_id = f"{self.prefix}{self.job_id}-{num}" - await loop.run_in_executor(None, runner.delete_service_stack, stack_id) - - # volumes.append(f"crawl-{self.job_id}-{num}") - - # likely fails as containers still shutting down - # await loop.run_in_executor(None, delete_volumes, volumes) - - async def _change_crawl_config(self, cid): - raise NotImplementedError("Not Supported") - - -# ============================================================================ -@app.on_event("startup") -async def startup(): - """init on startup""" - job = SwarmCrawlJob() - job.register_handlers(app) diff --git a/backend/btrixcloud/swarm/profile_job.py b/backend/btrixcloud/swarm/profile_job.py deleted file mode 100644 index 7659485794..0000000000 --- a/backend/btrixcloud/swarm/profile_job.py +++ /dev/null @@ -1,15 +0,0 @@ -""" entry point for K8S browser job (eg. for profile creation) """ - -from .base_job import SwarmJobMixin -from ..profile_job import ProfileJob - - -# ============================================================================= -class SwarmProfileJob(SwarmJobMixin, ProfileJob): - # class SwarmProfileJob(ProfileJob, SwarmBaseJob): - """Browser run job""" - - -if __name__ == "__main__": - job = SwarmProfileJob() - job.loop.run_forever() diff --git a/backend/btrixcloud/swarm/swarmmanager.py b/backend/btrixcloud/swarm/swarmmanager.py deleted file mode 100644 index 1e9c887081..0000000000 --- a/backend/btrixcloud/swarm/swarmmanager.py +++ /dev/null @@ -1,195 +0,0 @@ -""" Swarn Runner """ -import os -import json -import asyncio - -import aiohttp - -from ..archives import S3Storage - -from .utils import ( - get_templates_dir, - get_runner, -) - -from ..crawlmanager import BaseCrawlManager - - -# ============================================================================ -class SwarmManager(BaseCrawlManager): - """Docker Crawl Manager Interface""" - - # pylint: disable=too-many-instance-attributes,too-many-public-methods - def __init__(self): - super().__init__(get_templates_dir()) - - self.storages = { - "default": S3Storage( - name="default", - access_key=os.environ.get("STORE_ACCESS_KEY"), - secret_key=os.environ.get("STORE_SECRET_KEY"), - endpoint_url=os.environ.get("STORE_ENDPOINT_URL"), - access_endpoint_url=os.environ.get("STORE_ACCESS_ENDPOINT_URL", ""), - ) - } - - self.runner = get_runner() - - async def check_storage(self, storage_name, is_default=False): - """check if storage_name is valid storage""" - # if not default, don't validate - if not is_default: - return True - - # if default, ensure name is in default storages list - return self.storages[storage_name] - - async def get_default_storage(self, name): - """return default storage by name""" - return self.storages[name] - - async def _create_from_yaml(self, id_, yaml_data): - await self.loop.run_in_executor( - None, self.runner.run_service_stack, id_, yaml_data - ) - - async def ping_profile_browser(self, browserid): - """return ping profile browser""" - return await self.loop.run_in_executor( - None, - self.runner.ping_containers, - f"job-{browserid}_job", - "SIGUSR1", - ) - - async def get_profile_browser_metadata(self, browserid): - """get browser profile labels""" - return await self.loop.run_in_executor( - None, self.runner.get_service_labels, f"job-{browserid}_job" - ) - - async def delete_profile_browser(self, browserid): - """delete browser job, if it is a profile browser job""" - return await self.loop.run_in_executor( - None, self.runner.delete_service_stack, f"job-{browserid}" - ) - - async def delete_crawl_config_by_id(self, cid): - """delete crawl configs for crawlconfig id""" - - cid = str(cid) - - # delete scheduled crawl job, if any - await self._delete_scheduled_job(f"sched-{cid[:12]}") - - await asyncio.gather( - self.loop.run_in_executor( - None, self.runner.delete_secret, f"crawl-config-{cid}" - ), - self.loop.run_in_executor( - None, self.runner.delete_secret, f"crawl-opts-{cid}" - ), - ) - - # internal methods - # ---------------------------------------------- - def _add_extra_crawl_job_params(self, params): - """add extra crawl job params""" - params["env"] = os.environ - - async def _create_config_map(self, crawlconfig, **kwargs): - """create config map for config""" - - data = json.dumps(crawlconfig.get_raw_config()) - - labels = { - "btrix.crawlconfig": str(crawlconfig.id), - "btrix.archive": str(crawlconfig.aid), - } - - await self.loop.run_in_executor( - None, - self.runner.create_secret, - f"crawl-config-{crawlconfig.id}", - data, - labels, - ) - - data = json.dumps(kwargs) - - await self.loop.run_in_executor( - None, - self.runner.create_secret, - f"crawl-opts-{crawlconfig.id}", - data, - labels, - ) - - async def _update_scheduled_job(self, crawlconfig): - """update schedule on crawl job""" - - cid = str(crawlconfig.id) - - crawl_id = f"sched-{cid[:12]}" - stack_name = f"job-{crawl_id}" - service_name = f"{stack_name}_job" - - label_name = "swarm.cronjob.schedule" - - cron_job = await self.loop.run_in_executor( - None, self.runner.get_service, service_name - ) - - if cron_job: - curr_schedule = cron_job.spec.labels.get(label_name) - - if crawlconfig.schedule and crawlconfig.schedule != curr_schedule: - await self.loop.run_in_executor( - None, - self.runner.set_service_label, - service_name, - f"{label_name}={crawlconfig.schedule}", - ) - - if not crawlconfig.schedule: - await self._delete_scheduled_job(crawl_id) - - return - - if not crawlconfig.schedule: - return - - data = await self._load_job_template( - crawlconfig, crawl_id, manual=False, schedule=crawlconfig.schedule - ) - - await self._create_from_yaml(stack_name, data) - - async def _delete_scheduled_job(self, crawl_id): - # if currently running, ping container to exit on current job - # otherwise, delete! - if not await self.loop.run_in_executor( - None, - self.runner.ping_containers, - f"job-{crawl_id}_job", - "SIGUSR1", - ): - await self.loop.run_in_executor( - None, self.runner.delete_service_stack, f"job-{crawl_id}" - ) - - async def _post_to_job(self, crawl_id, aid, path, data=None): - """make a POST request to the container for specified crawl job""" - async with aiohttp.ClientSession() as session: - async with session.request( - "POST", f"http://job-{crawl_id}_job:8000{path}", json=data - ) as resp: - try: - return await resp.json() - # pylint: disable=bare-except - except: - return {"error": "post_failed"} - - async def _delete_crawl_configs(self, label): - """delete crawl configs by specified label""" - await self.loop.run_in_executor(None, self.runner.delete_secrets, label) diff --git a/backend/btrixcloud/swarm/templates/crawl_job.yaml b/backend/btrixcloud/swarm/templates/crawl_job.yaml deleted file mode 100644 index 92ed9cd349..0000000000 --- a/backend/btrixcloud/swarm/templates/crawl_job.yaml +++ /dev/null @@ -1,80 +0,0 @@ -version: '3.9' - -services: - job: - image: {{ job_image }} - command: ["uvicorn", "btrixcloud.swarm.crawl_job:app", "--host", "0.0.0.0", "--access-log", "--log-level", "info"] - container_name: job-{{ id }}_job - - security_opt: - - "label=disable" - - secrets: - - crawl-opts-{{ cid }} - - volumes: - - {{ env.SOCKET_SRC | default("/var/run/docker.sock", true) }}:{{ env.SOCKET_DEST | default("/var/run/docker.sock", true) }}:z - - networks: - - btrix - - deploy: - replicas: {{ 1 if not schedule else 0 }} - labels: - btrix.run.manual: "{{ manual }}" - btrix.user: {{ userid }} - btrix.archive: {{ aid }} - btrix.crawlconfig: {{ cid }} - - {% if schedule %} - swarm.cronjob.enable: "true" - swarm.cronjob.skip-running: "true" - swarm.cronjob.schedule: "{{ schedule }}" - {% endif %} - - mode: replicated - restart_policy: - condition: none - - environment: - CUSTOM_JOB_CONFIG: crawl-opts-{{ cid }} - - JOB_ID: "{{ id }}" - STACK_PREFIX: "crawl-" - - CRAWLER_IMAGE: "{{ env.CRAWLER_IMAGE }}" - REDIS_IMAGE: "{{ env.REDIS_IMAGE }}" - - CRAWL_ARGS: "{{ env.CRAWL_ARGS }}" - - CRAWLER_REQUESTS_CPU: "{{ env.CRAWLER_REQUESTS_CPU }}" - CRAWLER_REQUESTS_MEMORY: "{{ env.CRAWLER_REQUESTS_MEMORY }}" - CRAWLER_LIMITS_CPU: "{{ env.CRAWLER_LIMITS_CPU }}" - CRAWLER_LIMITS_MEMORY: "{{ env.CRAWLER_LIMITS_MEMORY }}" - - STORE_ENDPOINT_URL: "{{ env.STORE_ENDPOINT_URL }}" - STORE_ACCESS_KEY: "{{ env.STORE_ACCESS_KEY }}" - STORE_SECRET_KEY: "{{ env.STORE_SECRET_KEY }}" - - STORE_PATH: "{{ storage_path }}" - STORAGE_NAME: "{{ storage_name }}" - PROFILE_PATH: "{{ profile_path }}" - - MONGO_DB_URL: "{{ mongo_db_url }}" - - RUN_MANUAL: "{{ manual }}" - - RUNTIME: "{{ env.RUNTIME }}" - - WACZ_SIGN_URL: "{{ env.WACZ_SIGN_URL }}" - WACZ_SIGN_TOKEN: "{{ env.WACZ_SIGN_TOKEN }}" - -networks: - btrix: - external: - name: btrix-net - -secrets: - crawl-opts-{{ cid }}: - external: true - diff --git a/backend/btrixcloud/swarm/templates/crawler.yaml b/backend/btrixcloud/swarm/templates/crawler.yaml deleted file mode 100644 index 3f62e67ba8..0000000000 --- a/backend/btrixcloud/swarm/templates/crawler.yaml +++ /dev/null @@ -1,104 +0,0 @@ -version: '3.9' - -services: - crawler: - image: {{ env.CRAWLER_IMAGE }} - command: - - crawl - - --config - - /var/run/secrets/crawl-config-{{ cid }} - - --redisStoreUrl - - {{ redis_url }} - {%- if env.PROFILE_FILENAME %} - - --profile - - "@profiles/{{ env.PROFILE_FILENAME }}" - {%- endif %} - - hostname: "crawl-{{ id }}-{{ index }}_crawler" - container_name: "crawl-{{ id }}-{{ index }}_crawler" - - networks: - - btrix - - secrets: - - crawl-config-{{ cid }} - - volumes: - - crawl-data:/crawls - - stop_grace_period: 1000s - - restart: always - - deploy: - endpoint_mode: dnsrr - replicas: 1 - labels: - crawl: {{ id }} - role: crawler - - {% if env.CRAWLER_LIMITS_CPU and env.CRAWLER_LIMITS_MEMORY and - env.CRAWLER_REQUESTS_CPU and env.CRAWLER_REQUESTS_MEMORY %} - resources: - limits: - cpus: "{{ env.CRAWLER_LIMITS_CPU }}" - memory: "{{ env.CRAWLER_LIMITS_MEMORY }}" - reservations: - cpus: "{{ env.CRAWLER_REQUESTS_CPU }}" - memory: "{{ env.CRAWLER_REQUESTS_MEMORY }}" - {% endif %} - - environment: - CRAWL_ID: {{ id }} - - STORE_ENDPOINT_URL: "{{ env.STORE_ENDPOINT_URL }}" - STORE_ACCESS_KEY: "{{ env.STORE_ACCESS_KEY }}" - STORE_SECRET_KEY: "{{ env.STORE_SECRET_KEY }}" - - STORE_PATH: "{{ env.STORE_PATH }}" - STORE_FILENAME: "{{ env.STORE_FILENAME }}" - STORE_USER: "{{ env.USER_ID }}" - - {%- if env.WACZ_SIGN_URL %} - WACZ_SIGN_TOKEN: "{{ env.WACZ_SIGN_TOKEN }}" - WACZ_SIGN_URL: "{{ env.WACZ_SIGN_URL }}" - {%- endif %} - - WEBHOOK_URL: "{{ redis_url }}/crawls-done" - CRAWL_ARGS: "{{ env.CRAWL_ARGS }}" - -{% if index == 0 %} - redis: - image: {{ env.REDIS_IMAGE }} - command: ["redis-server", "--appendonly", "yes"] - container_name: "crawl-{{ id }}-{{ index }}_redis" - restart: always - - deploy: - endpoint_mode: dnsrr - replicas: 1 - labels: - crawl: {{ id }} - role: redis - - networks: - - btrix - -{% endif %} - -networks: - btrix: - external: - name: btrix-net - -secrets: - crawl-config-{{ cid }}: - external: true - -volumes: - crawl-data: - name: "crawl-{{ id }}-{{ index }}" - labels: - btrix.crawl: {{ id }} - - diff --git a/backend/btrixcloud/swarm/templates/profile_job.yaml b/backend/btrixcloud/swarm/templates/profile_job.yaml deleted file mode 100644 index 44ecac642a..0000000000 --- a/backend/btrixcloud/swarm/templates/profile_job.yaml +++ /dev/null @@ -1,46 +0,0 @@ -version: '3.9' - -services: - job: - image: {{ job_image }} - command: ["python", "-m", "btrixcloud.swarm.profile_job"] - - volumes: - - {{ env.SOCKET_SRC | default("/var/run/docker.sock", true) }}:{{ env.SOCKET_DEST | default("/var/run/docker.sock", true) }}:z - - networks: - - btrix - - deploy: - labels: - btrix.profile: "1" - btrix.archive: {{ aid }} - btrix.user: {{ userid }} - {%- if baseprofile %} - btrix.baseprofile: "{{ baseprofile }}" - {%- endif %} - - #mode: replicated-job - replicas: 1 - - environment: - JOB_ID: "{{ id }}" - STACK_PREFIX: "browser-" - - CRAWLER_IMAGE: "{{ env.CRAWLER_IMAGE }}" - - STORE_ENDPOINT_URL: "{{ env.STORE_ENDPOINT_URL }}" - STORE_ACCESS_KEY: "{{ env.STORE_ACCESS_KEY }}" - STORE_SECRET_KEY: "{{ env.STORE_SECRET_KEY }}" - - STORE_PATH: "{{ storage_path }}" - STORAGE_NAME: "{{ storage_name }}" - IDLE_TIMEOUT: "60" - START_URL: "{{ url }}" - PROFILE_PATH: "{{ profile_path }}" - -networks: - btrix: - external: - name: btrix-net - diff --git a/backend/btrixcloud/swarm/templates/profilebrowser.yaml b/backend/btrixcloud/swarm/templates/profilebrowser.yaml deleted file mode 100644 index 8d6af01d78..0000000000 --- a/backend/btrixcloud/swarm/templates/profilebrowser.yaml +++ /dev/null @@ -1,42 +0,0 @@ -version: '3.9' - -services: - browser: - image: {{ env.CRAWLER_IMAGE }} - command: - - create-login-profile - - --interactive - - --filename - - /tmp/profile.tar.gz - - --url - - "{{ url }}" - {%- if env.PROFILE_FILENAME %} - - --profile - - "@{{ env.PROFILE_FILENAME }}" - {%- endif %} - - - hostname: "browser-{{ id }}-0.browser-{{ id }}" - networks: - - btrix - - deploy: - endpoint_mode: dnsrr - replicas: 1 - labels: - browser: {{ id }} - role: browser - - environment: - STORE_ENDPOINT_URL: "{{ env.STORE_ENDPOINT_URL }}" - STORE_ACCESS_KEY: "{{ env.STORE_ACCESS_KEY }}" - STORE_SECRET_KEY: "{{ env.STORE_SECRET_KEY }}" - - STORE_PATH: "{{ env.STORE_PATH }}" - - -networks: - btrix: - external: - name: btrix-net - diff --git a/backend/btrixcloud/swarm/utils.py b/backend/btrixcloud/swarm/utils.py deleted file mode 100644 index aab802e131..0000000000 --- a/backend/btrixcloud/swarm/utils.py +++ /dev/null @@ -1,240 +0,0 @@ -""" swarm util functions """ - -import tempfile -import os -import subprocess - -from python_on_whales import client_config, DockerClient -from python_on_whales.exceptions import DockerException - - -# ============================================================================ -def get_templates_dir(): - """return directory containing templates for loading""" - return os.path.join(os.path.dirname(__file__), "templates") - - -# ============================================================================ -def get_runner(runtime=None): - """return either Swarm or Podman Runner based on env setting""" - if runtime is None: - runtime = os.environ.get("RUNTIME", "") - - if runtime == "podman": - return PodmanComposeRunner() - - return SwarmRunner() - - -# ============================================================================ -class SwarmRunner: - """Run in Swarm""" - - def __init__(self): - self.client = DockerClient() - - def run_service_stack(self, name, data): - """run compose/swarm stack via interpolated file""" - with tempfile.NamedTemporaryFile("wt") as fh_io: - fh_io.write(data) - fh_io.flush() - - try: - self.client.stack.deploy( - name, - compose_files=[fh_io.name], - orchestrator="swarm", - resolve_image="never", - ) - except DockerException as exc: - print(exc, flush=True) - - return name - - def delete_service_stack(self, name): - """remove stack""" - try: - self.client.stack.remove(name) - return True - except DockerException as exc: - print(exc, flush=True) - return False - - def delete_volumes(self, names): - """remove stack""" - try: - self.client.volume.remove(names) - return True - except DockerException as exc: - print(exc, flush=True) - return False - - def create_secret(self, name, data, labels=None): - """create secret from specified data""" - with tempfile.NamedTemporaryFile("wt") as fh_io: - fh_io.write(data) - fh_io.flush() - - try: - self.client.secret.create(name, fh_io.name, labels=labels) - except DockerException as exc: - print(exc, flush=True) - - def delete_secret(self, name): - """remove secret by name""" - try: - self.client.secret.remove(name) - return True - except DockerException as exc: - print(exc, flush=True) - return False - - def delete_secrets(self, label): - """delete secret with specified label""" - try: - configs = self.client.secret.list(filters={"label": label}) - for config in configs: - config.remove() - - return True - except DockerException as exc: - print(exc, flush=True) - return False - - def get_service(self, service_name): - """get a swarm service""" - try: - res = self.client.service.inspect(service_name) - return res - except DockerException: - return None - - def get_service_labels(self, service_name): - """get labels from a swarm service""" - service = self.get_service(service_name) - return service.spec.labels if service else {} - - def set_service_label(self, service_name, label): - """update label""" - exe_file = client_config.get_docker_binary_path_in_cache() - - try: - subprocess.run( - [ - exe_file, - "service", - "update", - service_name, - "--label-add", - label, - ], - capture_output=True, - check=True, - ) - # pylint: disable=broad-except - except Exception as exc: - print(exc, flush=True) - - def ping_containers(self, value, signal_="SIGTERM"): - """ping running containers with given service name with signal""" - try: - count = 0 - conts = self.client.container.list(filters={"name": value}) - for cont in conts: - print("Sending Signal: " + signal_, flush=True) - cont.kill(signal_) - count += 1 - return count - except DockerException as exc: - print(exc, flush=True) - return 0 - - -# ============================================================================ -class PodmanComposeRunner(SwarmRunner): - """Run via Docker Compose""" - - def __init__(self): - # pylint: disable=super-init-not-called - self.podman_exe = "podman" - # self.podman_exe = client_config.get_docker_binary_path_in_cache() - - self.client = DockerClient(client_call=[self.podman_exe]) - - def run_service_stack(self, name, data): - """run compose/swarm stack via interpolated file""" - with tempfile.NamedTemporaryFile("wt") as fh_io: - fh_io.write(data) - fh_io.flush() - - try: - result = subprocess.run( - [ - "podman-compose", - "--podman-path", - self.podman_exe, - "-f", - fh_io.name, - "-p", - name, - "up", - "-d", - ], - capture_output=True, - check=False, - ) - print("stdout") - print("------") - print(result.stdout.decode("utf-8")) - print("stderr") - print("------") - print(result.stderr.decode("utf-8"), flush=True) - # pylint: disable=broad-except - except Exception as exc: - print(exc, flush=True) - - def delete_service_stack(self, name): - """delete compose stack""" - print("Deleting Stack: " + name, flush=True) - - for container in self.client.container.list( - filters={"label": f"com.docker.compose.project={name}"} - ): - container.kill() - container.remove(volumes=True, force=True) - - for volume in self.client.volume.list( - filters={"label": f"com.docker.compose.project={name}"} - ): - volume.remove() - - def create_secret(self, name, data, labels=None): - """create secret from specified data""" - with tempfile.NamedTemporaryFile("wt") as fh_io: - fh_io.write(data) - fh_io.flush() - - try: - # labels not supported - self.client.secret.create(name, fh_io.name) - except DockerException as exc: - print(exc, flush=True) - - def delete_secret(self, name): - """remove secret by name""" - # python-on-whale calls 'remove' but podman only supports 'rm', so call directly - try: - subprocess.run([self.podman_exe, "secret", "rm", name], check=True) - return True - # pylint: disable=broad-except - except Exception as exc: - print(exc, flush=True) - return False - - def get_service(self, service_name): - """get a swarm service""" - try: - res = self.client.container.inspect(service_name) - return res - except DockerException: - return None diff --git a/backend/requirements.txt b/backend/requirements.txt index 4c4256b16f..53e407bfbe 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -8,5 +8,3 @@ aiobotocore redis>=4.2.0rc1 pyyaml jinja2 -python_on_whales -podman-compose diff --git a/configs/config.sample.env b/configs/config.sample.env deleted file mode 100644 index 3eaee62a24..0000000000 --- a/configs/config.sample.env +++ /dev/null @@ -1,96 +0,0 @@ -# Env Settings (for Docker Swarm and Podman Deployment) - -# Crawl Args -# ================= - -CRAWL_ARGS="--timeout 90 --logging stats,behaviors,debug --generateWACZ --text --workers 2 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone" - -# Images -# ================= - -CRAWLER_IMAGE=docker.io/webrecorder/browsertrix-crawler:latest - -# can set custom registry if not using public images, or local registry -# REGISTRY=localhost:5000/ - -BACKEND_TAG=latest - -FRONTEND_TAG=latest - -REDIS_IMAGE=docker.io/library/redis:latest - -# mongo -# ================= -MONGO_HOST=mongo -PASSWORD_SECRET=change_me - -MONGO_INITDB_ROOT_USERNAME=root -MONGO_INITDB_ROOT_PASSWORD=example - - -# minio -# ================= -MINIO_ROOT_USER=ADMIN -MINIO_ROOT_PASSWORD=PASSW0RD - -MINIO_BUCKET=btrix-data - -# enable to use custom volume -# MINIO_DATA_VOLUME=/minio-data - -MC_HOST_local=http://ADMIN:PASSW0RD@minio:9000 - - -# Super Admin -# ================= -SUPERUSER_EMAIL=admin@example.com - -# if blank, a password is generated automatically -SUPERUSER_PASSWORD=PASSW0RD0 - -# Resource Limits -# ================= - -# all required to use limits -- set any resource to empty to disable -# cpu/memory limits - -CRAWLER_REQUESTS_CPU=0.8 -CRAWLER_LIMITS_CPU=1.0 - -CRAWLER_REQUESTS_MEMORY=800M -CRAWLER_LIMITS_MEMORY=1G - -# Storage Config -# ================= -STORE_ENDPOINT_URL=http://minio:9000/btrix-data/ -STORE_ACCESS_ENDPOINT_URL=/data/ -STORE_ACCESS_KEY=ADMIN -STORE_SECRET_KEY=PASSW0RD - - -# enable to send verification emails -#EMAIL_SMTP_HOST=smtp.gmail.com -#EMAIL_SMTP_PORT=587 -#EMAIL_SENDER=user@example.com -#EMAIL_PASSWORD=password - -# auth sign -- uncomment WACZ_SIGN_URL to enable signing -# ================== -AUTHSIGN_TAG=0.5.0 - -AUTHSIGN_PORT=80 - -# WACZ_SIGN_URL="http://authsign:8080/sign" - -# optional token for signing (useful if using remote signing server) -# WACZ_SIGN_TOKEN="" - -# misc -# ================= -REGISTRATION_ENABLED=1 - -# number of workers to run for backend -WEB_CONCURRENCY=1 - -JWT_TOKEN_LIFETIME_MINUTES=240 - diff --git a/configs/docker-compose.debug-ports.yml b/configs/docker-compose.debug-ports.yml deleted file mode 100644 index ef5436b98e..0000000000 --- a/configs/docker-compose.debug-ports.yml +++ /dev/null @@ -1,24 +0,0 @@ -# enable additional ports for debugging - -version: '3.8' - -services: - # enable backend directly - backend: - ports: - - 8000:8000 - - # enable minio data and console access directly - minio: - ports: - - 9001:9001 - - 9000:9000 - - environment: - - MINIO_SERVER_URL=http://127.0.0.1:9000 - - # enable direct access to signing api - authsign: - ports: - - 8080:8080 - diff --git a/configs/docker-compose.podman.yml b/configs/docker-compose.podman.yml deleted file mode 100644 index 553e9a7679..0000000000 --- a/configs/docker-compose.podman.yml +++ /dev/null @@ -1,38 +0,0 @@ -# config for running with podman as the container engine - -version: '3.8' - -services: - backend: - #hostname: backend - restart: always - volumes: - - ${SOCKET_SRC}:${SOCKET_DEST}:z - - security_opt: - - "label=disable" - - environment: - - RUNTIME=podman - - SOCKET_SRC=${SOCKET_SRC} - - SOCKET_DEST=${SOCKET_DEST} - - CRAWLER_FQDN_SUFFIX=.dns.podman - - frontend: - #hostname: frontend - restart: always - environment: - - CRAWLER_FQDN_SUFFIX=.dns.podman - - mongo: - image: docker.io/library/mongo - #hostname: mongo - restart: always - - - minio: - image: docker.io/minio/minio - #hostname: minio - restart: always - - diff --git a/configs/docker-compose.signing.yml b/configs/docker-compose.signing.yml deleted file mode 100644 index ed1c3c42e4..0000000000 --- a/configs/docker-compose.signing.yml +++ /dev/null @@ -1,37 +0,0 @@ -# add to support signing of wacz files - -version: "3.8" - -services: - authsign: - image: webrecorder/authsign:${AUTHSIGN_TAG:-latest} - - volumes: - - btrix-sign-data:/data - - ./configs/signing.yaml:/app-config/signing.yaml:z - - environment: - - CONFIG=/app-config/signing.yaml - - # optional token for authenticating access to authsign - - AUTH_TOKEN=${WACZ_SIGN_TOKEN} - - env_file: - - ./configs/config.env - - # port 80 must be open to automatically generate cert via LetsEncrypt - # or set AUTHSIGN_PORT to use different port - ports: - - "${AUTHSIGN_PORT:-80}:80" - - backend: - environment: - - WACZ_SIGN_URL=${WACZ_SIGN_URL} - - # optional token for authenticating access to authsign - - WACZ_SIGN_TOKEN=${WACZ_SIGN_TOKEN} - -volumes: - btrix-sign-data: - - diff --git a/configs/docker-compose.swarm.yml b/configs/docker-compose.swarm.yml deleted file mode 100644 index 61bdb94359..0000000000 --- a/configs/docker-compose.swarm.yml +++ /dev/null @@ -1,49 +0,0 @@ -# Docker Swarm Override -# -# Use for deploying on docker swarm: -# docker stack deploy -c docker-compose.yml -c docker-compose.swarm.yml btrix - - -version: '3.8' - -services: - backend: - volumes: - - "/var/run/docker.sock:/var/run/docker.sock" - - deploy: - placement: - constraints: - - node.role == manager - - environment: - - "RUNTIME=swarm" - - swarm-cronjob: - image: crazymax/swarm-cronjob - volumes: - - "/var/run/docker.sock:/var/run/docker.sock" - - environment: - - "TZ=UTC" - - "LOG_LEVEL=debug" - - "LOG_JSON=false" - - deploy: - placement: - constraints: - - node.role == manager - - prune-resources: - image: docker - command: ["docker", "volumes", "prune", "-f", "--filter", "label=crawl"] - volumes: - - "/var/run/docker.sock:/var/run/docker.sock" - deploy: - mode: global - labels: - - "swarm.cronjob.enable=true" - - "swarm.cronjob.schedule=0 */5 * * *" - - "swarm.cronjob.skip-running=false" - restart_policy: - condition: none diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 46cb482f0f..0000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,67 +0,0 @@ -version: '3.8' - -services: - backend: - build: ./backend - image: ${REGISTRY}webrecorder/browsertrix-backend:${BACKEND_TAG:-latest} - - env_file: - - ./configs/config.env - - depends_on: - - minio - - mongo - - environment: - - CRAWLER_FQDN_SUFFIX= - - JOB_IMAGE=${REGISTRY}webrecorder/browsertrix-backend:${BACKEND_TAG:-latest} - - frontend: - build: ./frontend - image: ${REGISTRY}webrecorder/browsertrix-frontend:${FRONTEND_TAG:-latest} - ports: - - 9871:80 - - depends_on: - - backend - - minio - - environment: - - BACKEND_HOST=backend - - CRAWLER_FQDN_SUFFIX= - - CRAWLER_SVC_SUFFIX=_crawler - - mongo: - image: mongo - volumes: - - btrix-mongo-data:/data/db - - env_file: - - ./configs/config.env - - - minio: - image: minio/minio - #command: ["server", "/data", "--console-address", ":9001"] - - entrypoint: "/bin/sh" - # auto-create bucket via existing dir (only for single-node mode) - command: ["-c", "mkdir -p /data/$$MINIO_BUCKET; /usr/bin/docker-entrypoint.sh server /data --console-address :9001"] - - volumes: - - ${MINIO_DATA_VOLUME:-btrix-minio-data}:/data - - env_file: - - ./configs/config.env - - restart: always - -volumes: - btrix-redis-data: - btrix-mongo-data: - btrix-minio-data: - -networks: - default: - name: btrix-net - diff --git a/scripts/build-podman.sh b/scripts/build-podman.sh deleted file mode 100755 index caf2bc0cf2..0000000000 --- a/scripts/build-podman.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -compose=docker-compose - -# can optionally be used with podman-compose -# compose=podman-compose - -CURR=$(dirname "${BASH_SOURCE[0]}") - -# get current podman version -version=$(podman --version | grep -P '([\d]\.[\d])' -o) - -# build -$compose build --build-arg PODMAN_VERSION=$version backend frontend - diff --git a/scripts/build-swarm.sh b/scripts/build-swarm.sh deleted file mode 100755 index 432398b766..0000000000 --- a/scripts/build-swarm.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -CURR=$(dirname "${BASH_SOURCE[0]}") - -set -o allexport -source $CURR/../config.env - -if [ -n $REGISTRY ]; then - echo "using registry $REGISTRY" - docker run -d -p 5000:5000 --restart=always --name registry registry:2 -fi - -docker-compose build backend frontend - - diff --git a/scripts/init-configs.sh b/scripts/init-configs.sh deleted file mode 100755 index 5c0dbee78f..0000000000 --- a/scripts/init-configs.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -set -e 0 - -# copy shared env config (if needed) -cp -n ./configs/config.sample.env ./configs/config.env - -# copy signing (if needed) -cp -n ./configs/signing.sample.yaml ./configs/signing.yaml diff --git a/scripts/run-podman.sh b/scripts/run-podman.sh deleted file mode 100755 index 1a4e94831c..0000000000 --- a/scripts/run-podman.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -compose=docker-compose -# can optionally be used with podman-compose -#compose=podman-compose - -CURR=$(dirname "${BASH_SOURCE[0]}") - -set -o allexport -source $CURR/../configs/config.env - -export SOCKET_SRC=${XDG_RUNTIME_DIR}/podman/podman.sock -export SOCKET_DEST=/run/user/0/podman/podman.sock -export DOCKER_HOST=unix://${XDG_RUNTIME_DIR}/podman/podman.sock - -echo $SOCKET_SRC:$SOCKET_DEST - -if [ -z "$WACZ_SIGN_URL" ]; then - echo "running w/o authsign" - $compose -f $CURR/../docker-compose.yml -f $CURR/../configs/docker-compose.podman.yml up -d - -else - echo "running with authsign" - $compose -f $CURR/../docker-compose.yml -f $CURR/../configs/docker-compose.podman.yml -f $CURR/../configs/docker-compose.signing.yml up -d - -fi - diff --git a/scripts/run-swarm.sh b/scripts/run-swarm.sh deleted file mode 100755 index db417c6180..0000000000 --- a/scripts/run-swarm.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# enable to build with local registry -# export REGISTRY=localhost:5000/ - -CURR=$(dirname "${BASH_SOURCE[0]}") - -set -o allexport -source $CURR/../configs/config.env - -docker swarm init - -if [ -z "$WACZ_SIGN_URL" ]; then - echo "running w/o authsign" - docker stack deploy -c docker-compose.yml -c $CURR/../configs/docker-compose.swarm.yml btrix - -else - echo "running with authsign" - docker stack deploy -c docker-compose.yml -c $CURR/../configs/docker-compose.swarm.yml -c $CURR/../configs/docker-compose.signing.yml btrix - -fi - diff --git a/scripts/stop-podman.sh b/scripts/stop-podman.sh deleted file mode 100755 index 7b4f0aed67..0000000000 --- a/scripts/stop-podman.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -compose=docker-compose - -# can optionally be used with podman-compose -#compose=podman-compose - -CURR=$(dirname "${BASH_SOURCE[0]}") - -export DOCKER_HOST=unix://${XDG_RUNTIME_DIR}/podman/podman.sock - -$compose -f $CURR/../docker-compose.yml -f $CURR/../configs/docker-compose.podman.yml down -t 0 - diff --git a/scripts/stop-swarm.sh b/scripts/stop-swarm.sh deleted file mode 100755 index 3015f189f3..0000000000 --- a/scripts/stop-swarm.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -docker stack rm btrix