Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(db-postgres): data migration script from mongo to postgres #1772

Merged
merged 43 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f737511
initial migration
aakrem Jun 7, 2024
73e6677
fix evaluations
aakrem Jun 7, 2024
c120d1f
Merge branch 'postgres' into postgres-migration
aakrem Jun 11, 2024
e3aba63
small improvements
aakrem Jun 14, 2024
182be7f
Merge branch 'postgres' into postgres-migration
aakrem Jun 19, 2024
04075eb
fix human evaluations
aakrem Jun 21, 2024
f93ba1d
add human evaluations variant
aakrem Jun 21, 2024
2819cde
Merge branch 'postgres' into postgres-migration
aakrem Jun 21, 2024
405a1f6
add db name and create db on postgres startup if doesn't exist
aakrem Jun 23, 2024
c96d25a
remove comment from chatgpt
aakrem Jun 24, 2024
1417aca
add separated messages
aakrem Jun 24, 2024
808439c
add printing traceback
aakrem Jun 24, 2024
316fa20
improve migration output
aakrem Jun 24, 2024
92145d3
add filtering by table
aakrem Jun 24, 2024
74ba503
replace id with _id
aakrem Jun 24, 2024
4c48c40
fix base
aakrem Jun 24, 2024
a950c40
add assertion to check length of variants and variants_revisions
aakrem Jun 25, 2024
1d201ab
add migration steps
aakrem Jun 25, 2024
92fd40d
fix deployed_app_variant_revision
aakrem Jun 25, 2024
7d016c3
fix(tool): [bug] Incorrect Configuration for Variants fix migration
mmabrouk Jun 25, 2024
582b60d
fix human evaluation variants results
aakrem Jun 25, 2024
b396da1
fix tqdm progress length
aakrem Jun 25, 2024
0c6520f
move all previous migrations to a separate folder
aakrem Jun 26, 2024
1002e64
add backup
aakrem Jun 26, 2024
49c8f9d
add assertion to prevent duplicated values
aakrem Jun 27, 2024
fca96ea
support skipping documents in case of duplicated values
aakrem Jun 30, 2024
831711b
Merge pull request #1828 from Agenta-AI/cloud-migration/beanie-to-sql…
aakrem Jun 30, 2024
3fa794b
separate db engine for migration
aakrem Jun 30, 2024
339c036
Merge pull request #1830 from Agenta-AI/cloud-migration/beanie-to-sql…
aybruhm Jul 1, 2024
abc39ab
small refactoring
aakrem Jul 1, 2024
e374973
refactor (docs): moved and renamed migration.mdx to migration folder …
aybruhm Jul 1, 2024
f114e42
feat (docs): created documentation for Postgres migration
aybruhm Jul 1, 2024
9d4b31c
minor refactor (docs): replace <version> tag with 'version'
aybruhm Jul 1, 2024
573ca2a
refactor (backend): replace deprecated legacy query api .one_or_none(…
aybruhm Jul 2, 2024
0b965ea
Merge pull request #1836 from Agenta-AI/refactor/replace-one_or_none-…
aakrem Jul 2, 2024
03ff817
Merge pull request #1831 from Agenta-AI/docs/postgres-migration-docum…
mmabrouk Jul 2, 2024
747b5be
refactor (tools): set db name in postgres_uri to backend and celery_w…
aybruhm Jul 2, 2024
3a05bcd
refactor (backend): move initialization of async sqlalchemy engine to…
aybruhm Jul 3, 2024
8dd92a2
minor refactor (tests): initialize db_engine and ensure that database…
aybruhm Jul 3, 2024
3124725
minor refactor (tests): ensure that test db is dropped before closing…
aybruhm Jul 3, 2024
a8c0551
minor refactor (tools): revert back to using default database created
aybruhm Jul 3, 2024
2e3ee66
Merge pull request #1838 from Agenta-AI/resolve-failing-backend-tests
aakrem Jul 3, 2024
f518b23
Merge pull request #1839 from Agenta-AI/postgres
aakrem Jul 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
```bash
docker ps
docker exec -it {backend-container-id} bash
cd /app/agenta_backend/migrations/mongo_to_postgres
python3 migration.py
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import os
import logging
from asyncio import current_task
from typing import AsyncGenerator
from contextlib import asynccontextmanager

from sqlalchemy.ext.asyncio import (
AsyncSession,
create_async_engine,
async_sessionmaker,
async_scoped_session,
)

from agenta_backend.utils.common import isCloudEE

if isCloudEE():
from agenta_backend.commons.observability.models.db import SpanDB
from agenta_backend.commons.models.db_models import (
APIKeyDB,
WorkspaceDB,
OrganizationDB,
AppDB_ as AppDB,
UserDB_ as UserDB,
ImageDB_ as ImageDB,
TestSetDB_ as TestSetDB,
AppVariantDB_ as AppVariantDB,
EvaluationDB_ as EvaluationDB,
DeploymentDB_ as DeploymentDB,
VariantBaseDB_ as VariantBaseDB,
AppEnvironmentDB_ as AppEnvironmentDB,
AppEnvironmentRevisionDB_ as AppEnvironmentRevisionDB,
EvaluatorConfigDB_ as EvaluatorConfigDB,
HumanEvaluationDB_ as HumanEvaluationDB,
EvaluationScenarioDB_ as EvaluationScenarioDB,
HumanEvaluationScenarioDB_ as HumanEvaluationScenarioDB,
)
else:
from agenta_backend.models.db_models import (
AppDB,
UserDB,
ImageDB,
TestSetDB,
EvaluationDB,
DeploymentDB,
AppVariantDB,
VariantBaseDB,
AppEnvironmentDB,
AppEnvironmentRevisionDB,
EvaluatorConfigDB,
HumanEvaluationDB,
EvaluationScenarioDB,
HumanEvaluationScenarioDB,
)

from agenta_backend.models.db_models import (
TemplateDB,
AppVariantRevisionsDB,
)

models = [
AppDB,
UserDB,
ImageDB,
TestSetDB,
TemplateDB,
AppVariantDB,
DeploymentDB,
EvaluationDB,
VariantBaseDB,
AppEnvironmentDB,
AppEnvironmentRevisionDB,
EvaluatorConfigDB,
HumanEvaluationDB,
EvaluationScenarioDB,
AppVariantRevisionsDB,
HumanEvaluationScenarioDB,
]

if isCloudEE():
models.extend([OrganizationDB, WorkspaceDB, APIKeyDB]) # type: ignore


# Configure and set logging level
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class DBEngine:
"""
Database engine to initialize SQLAlchemy and return the engine based on mode.
"""

def __init__(self) -> None:
self.mode = os.environ.get("DATABASE_MODE", "v2")
self.db_url = f"{os.environ.get('POSTGRES_URI')}"
self.engine = create_async_engine(url=self.db_url)
self.async_session_maker = async_sessionmaker(
bind=self.engine, class_=AsyncSession, expire_on_commit=False
)
self.async_session = async_scoped_session(
session_factory=self.async_session_maker, scopefunc=current_task
)

async def init_db(self):
"""
Initialize the database based on the mode and create all tables.
"""
async with self.engine.begin() as conn:
# Drop all existing tables (if needed)
# await conn.run_sync(Base.metadata.drop_all)
# Create tables
for model in models:
await conn.run_sync(model.metadata.create_all)
logger.info(f"Using {self.mode} database...")

async def remove_db(self) -> None:
"""
Remove the database based on the mode.
"""
async with self.engine.begin() as conn:
for model in models:
await conn.run_sync(model.metadata.drop_all)

@asynccontextmanager
async def get_session(self) -> AsyncGenerator[AsyncSession, None]:
session = self.async_session()
try:
yield session
except Exception as e:
await session.rollback()
raise e
finally:
await session.close()

async def close(self):
"""
Closes and dispose all the connections using the engine.

:raises Exception: if engine is initialized
"""

if self.engine is None:
raise Exception("DBEngine is not initialized")

await self.engine.dispose()

self.engine = None
self.async_session_maker = None
self.async_session = None


db_engine = DBEngine()
Loading
Loading