Skip to content

Commit a318a2e

Browse files
HebruwuDaniel Sabanovpre-commit-ci[bot]simonhkswanDaniel Sabanov
authored
Database addition (#74)
* Add to_dict method to metrics. * Add to_dict function to metrics. * Add comments to new methods. * Remove leftover TODO * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add from_dict to metrics and corresponding tests. * Add from_dict to metrics and corresponding tests. * Remove redundant function. * Add metrics_from_yaml and metrics_yaml_dump and corresponding tests. * Add missing yaml dependency. * Add types-pyyaml to test requirements. * Add types-pyyaml to requirements for mypy purposes. * Add types-pyyaml to requirements for mypy purposes. * Corrected typing error. * Add pyyaml stubs to precommit mypy + ignore pandas * Improve YAML related tests. * Add adapter classes for metrics. * Add adapter classes for metrics. * Add plotting functions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixed metrics_usage tests. * Fixed metrics_usage tests. * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Remove pkg_resources dependency. * Fix code smells. * Rewrite continuous and discrete plotting functions. * Fix code smells. * Add docstrings. * Update pyproject.toml * Add sqlalchemy schema * Add the first tables to the database * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update pyproject.toml * Update requirements.txt * Update package requirements * Remove postgres-data * Update .gitignore * Update pyproject.toml * Update pyproject.toml * Update Dockerfile and requirements * Make metrics write their results into the database. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Make the database fill in all of its fields. * Extract tests from seperate folder * fix circular import * Add metric tests to a seperate folder * Introduce a version constant * Add tests for the databse. * Make sure the database is cleaned after each run. * Update pyproject.toml Co-authored-by: Simon Swan <[email protected]> * Update alembic.ini Co-authored-by: Simon Swan <[email protected]> * Update docker-compose.yaml Co-authored-by: Simon Swan <[email protected]> * Update pyproject.toml Co-authored-by: Simon Swan <[email protected]> * Update dock-compose.yaml * Update requirements.txt * Update src/insight/database/db_connection.py Co-authored-by: Simon Swan <[email protected]> * Update src/insight/database/utils.py Co-authored-by: Simon Swan <[email protected]> * Update src/insight/database/utils.py Co-authored-by: Simon Swan <[email protected]> * Update src/insight/database/utils.py Co-authored-by: Simon Swan <[email protected]> * Update utils.py * Update src/insight/database/utils.py Co-authored-by: Simon Swan <[email protected]> * Update base.py * Fix tests (hopefully) * fix code smells * Set version as environment variable instead of metric parameter * Metrics now upload to database automatically. * Update eval.py * Use df.attrs and name series results with DF name Co-authored-by: Daniel Sabanov <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Simon Swan <[email protected]> Co-authored-by: Daniel Sabanov <[email protected]>
1 parent 86567c1 commit a318a2e

26 files changed

+954
-48
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ env.bak/
111111
venv.bak/
112112
version.py
113113
setup.cfg
114+
postgres-data
114115

115116
# Spyder project settings
116117
.spyderproject

.pre-commit-config.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ repos:
1919
hooks:
2020
- id: mypy
2121
files: src
22-
additional_dependencies: [numpy>=1.21, types-PyYAML]
22+
additional_dependencies:
23+
- numpy>=1.21
24+
- sqlalchemy[mypy]
2325
args: [--install-types, --non-interactive]
2426
# Note that using the --install-types is problematic if running in
2527
# parallel as mutating the pre-commit env at runtime breaks cache.

Dockerfile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
FROM python:3.8 as base
2+
RUN apt-get update
3+
RUN pip install -U pip setuptools wheel
4+
5+
FROM base as builder
6+
WORKDIR /build
7+
RUN pip install build setuptools_scm
8+
COPY pyproject.toml README.md /build/
9+
COPY .git /build/.git
10+
COPY src /build/src
11+
RUN python -m build -nw --outdir /dist
12+
13+
FROM base
14+
WORKDIR /code
15+
RUN apt-get install libpq-dev postgresql-client -y
16+
COPY requirements.txt requirements.txt
17+
RUN pip install -U pip wheel
18+
RUN pip install -r requirements.txt
19+
COPY alembic.ini /code/
20+
COPY migrations /code/migrations
21+
COPY scripts /code/scripts
22+
COPY --from=builder /dist/*.whl /dist/
23+
RUN pip install /dist/*.whl
24+
CMD /bin/bash

alembic.ini

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# A generic, single database configuration.
2+
3+
[alembic]
4+
# path to migration scripts
5+
script_location = migrations
6+
7+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
8+
# Uncomment the line below if you want the files to be prepended with date and time
9+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
10+
# for all available tokens
11+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
12+
13+
# sys.path path, will be prepended to sys.path if present.
14+
# defaults to the current working directory.
15+
prepend_sys_path = .
16+
17+
# timezone to use when rendering the date within the migration file
18+
# as well as the filename.
19+
# If specified, requires the python-dateutil library that can be
20+
# installed by adding `alembic[tz]` to the pip requirements
21+
# string value is passed to dateutil.tz.gettz()
22+
# leave blank for localtime
23+
# timezone =
24+
25+
# max length of characters to apply to the
26+
# "slug" field
27+
# truncate_slug_length = 40
28+
29+
# set to 'true' to run the environment during
30+
# the 'revision' command, regardless of autogenerate
31+
# revision_environment = false
32+
33+
# set to 'true' to allow .pyc and .pyo files without
34+
# a source .py file to be detected as revisions in the
35+
# versions/ directory
36+
# sourceless = false
37+
38+
# version location specification; This defaults
39+
# to migrations/versions. When using multiple version
40+
# directories, initial revisions must be specified with --version-path.
41+
# The path separator used here should be the separator specified by "version_path_separator" below.
42+
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
43+
44+
# version path separator; As mentioned above, this is the character used to split
45+
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46+
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47+
# Valid values for version_path_separator are:
48+
#
49+
# version_path_separator = :
50+
# version_path_separator = ;
51+
# version_path_separator = space
52+
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
53+
54+
# the output encoding used when revision files
55+
# are written from script.py.mako
56+
# output_encoding = utf-8
57+
58+
sqlalchemy.url = postgresql+psycopg2://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}
59+
60+
61+
[post_write_hooks]
62+
# post_write_hooks defines scripts or Python functions that are run
63+
# on newly generated revision scripts. See the documentation for further
64+
# detail and examples
65+
66+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
67+
# hooks = black
68+
# black.type = console_scripts
69+
# black.entrypoint = black
70+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
71+
72+
# Logging configuration
73+
[loggers]
74+
keys = root,sqlalchemy,alembic
75+
76+
[handlers]
77+
keys = console
78+
79+
[formatters]
80+
keys = generic
81+
82+
[logger_root]
83+
level = WARN
84+
handlers = console
85+
qualname =
86+
87+
[logger_sqlalchemy]
88+
level = WARN
89+
handlers =
90+
qualname = sqlalchemy.engine
91+
92+
[logger_alembic]
93+
level = INFO
94+
handlers =
95+
qualname = alembic
96+
97+
[handler_console]
98+
class = StreamHandler
99+
args = (sys.stderr,)
100+
level = NOTSET
101+
formatter = generic
102+
103+
[formatter_generic]
104+
format = %(levelname)-5.5s [%(name)s] %(message)s
105+
datefmt = %H:%M:%S

docker-compose.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
version: '3.7'
3+
services:
4+
eval:
5+
build: .
6+
depends_on:
7+
- postgres
8+
tty: true
9+
environment:
10+
- POSTGRES_USER
11+
- POSTGRES_PASSWORD
12+
- POSTGRES_HOST
13+
- POSTGRES_PORT
14+
postgres:
15+
image: postgres:latest
16+
restart: always
17+
environment:
18+
- POSTGRES_USER
19+
- POSTGRES_PASSWORD
20+
- POSTGRES_HOST
21+
- POSTGRES_PORT
22+
logging:
23+
options:
24+
max-size: 10m
25+
max-file: "3"
26+
ports:
27+
- "5432:${POSTGRES_PORT}"
28+
volumes:
29+
- "./postgres-data:/var/lib/postgresql/data"
30+
...

migrations/README

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Generic single-database configuration.

migrations/env.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import os
2+
from logging.config import fileConfig
3+
4+
from alembic import context
5+
from sqlalchemy import engine_from_config, pool
6+
7+
from insight.database.schema import Base
8+
9+
# this is the Alembic Config object, which provides
10+
# access to the values within the .ini file in use.
11+
config = context.config
12+
13+
# Interpret the config file for Python logging.
14+
# This line sets up loggers basically.
15+
if config.config_file_name is not None:
16+
fileConfig(config.config_file_name)
17+
18+
target_metadata = Base.metadata
19+
20+
# other values from the config, defined by the needs of env.py,
21+
# can be acquired:
22+
# my_important_option = config.get_main_option("my_important_option")
23+
# ... etc.
24+
25+
26+
def run_migrations_offline() -> None:
27+
"""Run migrations in 'offline' mode.
28+
29+
This configures the context with just a URL
30+
and not an Engine, though an Engine is acceptable
31+
here as well. By skipping the Engine creation
32+
we don't even need a DBAPI to be available.
33+
34+
Calls to context.execute() here emit the given string to the
35+
script output.
36+
37+
"""
38+
url = config.get_main_option("sqlalchemy.url").format(**os.environ)
39+
context.configure(
40+
url=url,
41+
target_metadata=target_metadata,
42+
literal_binds=True,
43+
dialect_opts={"paramstyle": "named"},
44+
)
45+
46+
with context.begin_transaction():
47+
context.run_migrations()
48+
49+
50+
def run_migrations_online() -> None:
51+
"""Run migrations in 'online' mode.
52+
53+
In this scenario we need to create an Engine
54+
and associate a connection with the context.
55+
56+
"""
57+
config.set_main_option("sqlalchemy.url", config.get_main_option("sqlalchemy.url").format(**os.environ))
58+
connectable = engine_from_config(
59+
config.get_section(config.config_ini_section),
60+
prefix="sqlalchemy.",
61+
poolclass=pool.NullPool,
62+
)
63+
64+
with connectable.connect() as connection:
65+
context.configure(
66+
connection=connection, target_metadata=target_metadata
67+
)
68+
69+
with context.begin_transaction():
70+
context.run_migrations()
71+
72+
73+
if context.is_offline_mode():
74+
run_migrations_offline()
75+
else:
76+
run_migrations_online()

migrations/script.py.mako

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""${message}
2+
3+
Revision ID: ${up_revision}
4+
Revises: ${down_revision | comma,n}
5+
Create Date: ${create_date}
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
${imports if imports else ""}
11+
12+
# revision identifiers, used by Alembic.
13+
revision = ${repr(up_revision)}
14+
down_revision = ${repr(down_revision)}
15+
branch_labels = ${repr(branch_labels)}
16+
depends_on = ${repr(depends_on)}
17+
18+
19+
def upgrade() -> None:
20+
${upgrades if upgrades else "pass"}
21+
22+
23+
def downgrade() -> None:
24+
${downgrades if downgrades else "pass"}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""Add initial tables
2+
3+
Revision ID: 9aca5ae68ff5
4+
Revises:
5+
Create Date: 2022-07-27 12:30:49.163648
6+
7+
"""
8+
import sqlalchemy as sa
9+
from alembic import op
10+
11+
# revision identifiers, used by Alembic.
12+
revision = '9aca5ae68ff5'
13+
down_revision = None
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
def upgrade() -> None:
19+
# ### commands auto generated by Alembic - please adjust! ###
20+
op.create_table('dataset',
21+
sa.Column('id', sa.INTEGER(), nullable=False),
22+
sa.Column('name', sa.VARCHAR(length=50), nullable=False),
23+
sa.Column('num_rows', sa.INTEGER(), nullable=True),
24+
sa.Column('num_columns', sa.INTEGER(), nullable=True),
25+
sa.Column('created_at', sa.TIMESTAMP(), nullable=True),
26+
sa.PrimaryKeyConstraint('id')
27+
)
28+
op.create_table('metric',
29+
sa.Column('id', sa.INTEGER(), nullable=False),
30+
sa.Column('name', sa.VARCHAR(length=50), nullable=False),
31+
sa.Column('category', sa.VARCHAR(length=50), nullable=True),
32+
sa.Column('created_at', sa.TIMESTAMP(), nullable=True),
33+
sa.PrimaryKeyConstraint('id')
34+
)
35+
op.create_table('version',
36+
sa.Column('id', sa.INTEGER(), nullable=False),
37+
sa.Column('name', sa.VARCHAR(length=50), nullable=True),
38+
sa.Column('created_at', sa.TIMESTAMP(), nullable=True),
39+
sa.PrimaryKeyConstraint('id')
40+
)
41+
op.create_table('result',
42+
sa.Column('id', sa.INTEGER(), nullable=False),
43+
sa.Column('metric_id', sa.INTEGER(), nullable=True),
44+
sa.Column('dataset_id', sa.INTEGER(), nullable=True),
45+
sa.Column('version_id', sa.INTEGER(), nullable=True),
46+
sa.Column('value', sa.FLOAT(), nullable=True),
47+
sa.Column('created_at', sa.TIMESTAMP(), nullable=True),
48+
sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ),
49+
sa.ForeignKeyConstraint(['metric_id'], ['metric.id'], ),
50+
sa.ForeignKeyConstraint(['version_id'], ['version.id'], ),
51+
sa.PrimaryKeyConstraint('id')
52+
)
53+
# ### end Alembic commands ###
54+
55+
56+
def downgrade() -> None:
57+
# ### commands auto generated by Alembic - please adjust! ###
58+
op.drop_table('result')
59+
op.drop_table('version')
60+
op.drop_table('metric')
61+
op.drop_table('dataset')
62+
# ### end Alembic commands ###

0 commit comments

Comments
 (0)