Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(robot-server): add a new endpoint to upload csv data files #15481

Merged
1 change: 1 addition & 0 deletions api/src/opentrons/protocol_reader/file_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .file_reader_writer import BufferedFile


# TODO (spp: 2024-06-17): move file hasher to utils
class FileHasher:
"""Hashing utility class that hashes a combination of protocol and labware files."""

Expand Down
1 change: 1 addition & 0 deletions robot-server/robot_server/data_files/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Upload and management of data files."""
65 changes: 65 additions & 0 deletions robot-server/robot_server/data_files/data_files_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Store and retrieve information about uploaded data files from the database."""
from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List

import sqlalchemy.engine

from robot_server.persistence.database import sqlite_rowid
from robot_server.persistence.tables import data_files_table


@dataclass(frozen=True)
class DataFileInfo:
"""Metadata info of a saved data file."""

id: str
name: str
file_hash: str
created_at: datetime


class DataFilesStore:
"""Store and retrieve info about uploaded data files."""

def __init__(
self,
sql_engine: sqlalchemy.engine.Engine,
) -> None:
"""Create a new DataFilesStore."""
self._sql_engine = sql_engine

def get_file_info_by_hash(self, file_hash: str) -> Optional[DataFileInfo]:
"""Get the ID of data file having the provided hash."""
for file in self._sql_get_all_from_engine():
if file.file_hash == file_hash:
return file
return None

async def insert(self, file_info: DataFileInfo) -> None:
"""Insert data file info in the database."""
file_info_dict = {
"id": file_info.id,
"name": file_info.name,
"created_at": file_info.created_at,
"file_hash": file_info.file_hash,
}
statement = sqlalchemy.insert(data_files_table).values(file_info_dict)
with self._sql_engine.begin() as transaction:
transaction.execute(statement)

def _sql_get_all_from_engine(self) -> List[DataFileInfo]:
statement = sqlalchemy.select(data_files_table).order_by(sqlite_rowid)
with self._sql_engine.begin() as transaction:
all_rows = transaction.execute(statement).all()
return [
DataFileInfo(
id=sql_row.id,
name=sql_row.name,
created_at=sql_row.created_at,
file_hash=sql_row.file_hash,
)
for sql_row in all_rows
]
56 changes: 56 additions & 0 deletions robot-server/robot_server/data_files/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""FastAPI dependencies for data files endpoints."""
from pathlib import Path
from asyncio import Lock as AsyncLock
from typing import Final
from anyio import Path as AsyncPath

from fastapi import Depends
from sqlalchemy.engine import Engine as SQLEngine

from server_utils.fastapi_utils.app_state import (
AppState,
get_app_state,
AppStateAccessor,
)
from robot_server.persistence.fastapi_dependencies import (
get_active_persistence_directory,
get_sql_engine,
)

from .data_files_store import DataFilesStore

_DATA_FILES_SUBDIRECTORY: Final = "data_files"

_data_files_directory_init_lock = AsyncLock()
_data_files_directory_accessor = AppStateAccessor[Path]("data_files_directory")

_data_files_store_init_lock = AsyncLock()
_data_files_store_accessor = AppStateAccessor[DataFilesStore]("data_files_store")


async def get_data_files_directory(
app_state: AppState = Depends(get_app_state),
persistent_directory: Path = Depends(get_active_persistence_directory),
) -> Path:
"""Get the directory to save the protocol files, creating it if needed."""
async with _data_files_directory_init_lock:
data_files_dir = _data_files_directory_accessor.get_from(app_state)
if data_files_dir is None:
data_files_dir = persistent_directory / _DATA_FILES_SUBDIRECTORY
await AsyncPath(data_files_dir).mkdir(exist_ok=True)
_data_files_directory_accessor.set_on(app_state, data_files_dir)

return data_files_dir


async def get_data_files_store(
app_state: AppState = Depends(get_app_state),
sql_engine: SQLEngine = Depends(get_sql_engine),
) -> DataFilesStore:
"""Get a singleton DataFilesStore to keep track of uploaded data files."""
async with _data_files_store_init_lock:
data_files_store = _data_files_store_accessor.get_from(app_state)
if data_files_store is None:
data_files_store = DataFilesStore(sql_engine)
_data_files_store_accessor.set_on(app_state, data_files_store)
return data_files_store
14 changes: 14 additions & 0 deletions robot-server/robot_server/data_files/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Data files models."""
from datetime import datetime

from pydantic import Field

from robot_server.service.json_api import ResourceModel


class DataFile(ResourceModel):
"""A model representing an uploaded data file."""

id: str = Field(..., description="A unique identifier for this file.")
name: str = Field(..., description="Name of the uploaded file.")
createdAt: datetime = Field(..., description="When this data file was *uploaded*.")
143 changes: 143 additions & 0 deletions robot-server/robot_server/data_files/router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Router for /dataFiles endpoints."""
from datetime import datetime
from pathlib import Path
from textwrap import dedent
from typing import Optional, Literal, Union

from fastapi import APIRouter, UploadFile, File, Form, Depends, status
from opentrons.protocol_reader import FileHasher, FileReaderWriter

from robot_server.service.json_api import (
SimpleBody,
PydanticResponse,
)
from robot_server.errors.error_responses import ErrorDetails, ErrorBody
from .dependencies import get_data_files_directory, get_data_files_store
from .data_files_store import DataFilesStore, DataFileInfo
from .models import DataFile
from ..protocols.dependencies import get_file_hasher, get_file_reader_writer
from ..service.dependencies import get_current_time, get_unique_id

datafiles_router = APIRouter()


class MultipleDataFileSources(ErrorDetails):
"""An error returned when multiple data file sources are specified in one request."""

id: Literal["MultipleDataFileSources"] = "MultipleDataFileSources"
title: str = "Multiple sources found for data files"


class NoDataFileSourceProvided(ErrorDetails):
"""An error returned when no data file sources are specified in the request."""

id: Literal["NoDataFileSourceProvided"] = "NoDataFileSourceProvided"
title: str = "No data file source provided"


class FileNotFound(ErrorDetails):
"""An error returned when specified file path was not found on the robot."""

id: Literal["FileNotFound"] = "FileNotFound"
title: str = "Specified file path not found on the robot"


class UnexpectedFileFormat(ErrorDetails):
"""An error returned when specified file is not in expected format."""

id: Literal["UnexpectedFileFormat"] = "UnexpectedFileFormat"
title: str = "Unexpected file format"


@PydanticResponse.wrap_route(
datafiles_router.post,
path="/dataFiles",
summary="Upload a data file",
description=dedent(
"""
Upload data file(s) to your device.
"""
),
status_code=status.HTTP_201_CREATED,
responses={
status.HTTP_200_OK: {"model": SimpleBody[DataFile]},
status.HTTP_201_CREATED: {"model": SimpleBody[DataFile]},
status.HTTP_422_UNPROCESSABLE_ENTITY: {
"model": ErrorBody[
Union[
MultipleDataFileSources,
NoDataFileSourceProvided,
UnexpectedFileFormat,
]
]
},
status.HTTP_404_NOT_FOUND: {"model": ErrorBody[FileNotFound]},
},
)
async def upload_data_file(
file: Optional[UploadFile] = File(default=None, description="Data file to upload"),
file_path: Optional[str] = Form(
default=None,
description="Absolute path to a file on the robot.",
alias="filePath",
),
data_files_directory: Path = Depends(get_data_files_directory),
data_files_store: DataFilesStore = Depends(get_data_files_store),
file_reader_writer: FileReaderWriter = Depends(get_file_reader_writer),
file_hasher: FileHasher = Depends(get_file_hasher),
file_id: str = Depends(get_unique_id, use_cache=False),
created_at: datetime = Depends(get_current_time),
) -> PydanticResponse[SimpleBody[DataFile]]:
"""Save the uploaded data file to persistent storage and update database."""
if all([file, file_path]):
raise MultipleDataFileSources(
detail="Can accept either a file or a file path, not both."
).as_error(status.HTTP_422_UNPROCESSABLE_ENTITY)
if file is None and file_path is None:
raise NoDataFileSourceProvided(
detail="You must provide either a file or a file_path in the request."
).as_error(status.HTTP_422_UNPROCESSABLE_ENTITY)
try:
[buffered_file] = await file_reader_writer.read(files=[file or Path(file_path)]) # type: ignore[arg-type, list-item]
except FileNotFoundError as e:
raise FileNotFound(detail=str(e)).as_error(status.HTTP_404_NOT_FOUND) from e
# TODO (spp, 2024-06-18): probably also validate CSV file *contents*
if not buffered_file.name.endswith(".csv"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we should be doing some validation on files before storing them on the robot and adding them to the dataFiles database, but only allowing CSVs to this endpoint seems limiting for now. We can of course change this to make it more permissive later, but maybe instead of limiting the types of files, we can either do something like:

  • blacklist certain file types
  • limit the size of files
  • whitelist file paths on the robot to prevent any security risks

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll prefer to have a whitelist of select allowed file types rather than having a blacklist of disallowed ones so that we can be absolutely sure of the file types that get added, but ya I like these suggestions. Let's talk to the team and finalize allowed types, sizes and file paths.

raise UnexpectedFileFormat(detail="Only CSV file format is accepted.").as_error(
status.HTTP_422_UNPROCESSABLE_ENTITY
)
file_hash = await file_hasher.hash([buffered_file])
existing_file_info = data_files_store.get_file_info_by_hash(file_hash)
if existing_file_info:
return await PydanticResponse.create(
content=SimpleBody.construct(
data=DataFile.construct(
id=existing_file_info.id,
name=existing_file_info.name,
createdAt=existing_file_info.created_at,
)
),
status_code=status.HTTP_200_OK,
)

# TODO (spp, 2024-06-18): auto delete data files if max exceeded
await file_reader_writer.write(
directory=data_files_directory / file_id, files=[buffered_file]
)
file_info = DataFileInfo(
id=file_id,
name=buffered_file.name,
file_hash=file_hash,
created_at=created_at,
)
await data_files_store.insert(file_info)
return await PydanticResponse.create(
content=SimpleBody.construct(
data=DataFile.construct(
id=file_info.id,
name=file_info.name,
createdAt=created_at,
)
),
status_code=status.HTTP_201_CREATED,
)
2 changes: 2 additions & 0 deletions robot-server/robot_server/persistence/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
run_table,
run_command_table,
action_table,
data_files_table,
)


Expand All @@ -18,4 +19,5 @@
"run_table",
"run_command_table",
"action_table",
"data_files_table",
]
2 changes: 1 addition & 1 deletion robot-server/robot_server/protocols/protocol_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def get_all_ids(self) -> List[str]:
return protocol_ids

def get_id_by_hash(self, hash: str) -> Optional[str]:
"""Get all protocol hashes keyed by protocol id."""
"""Get ID of protocol corresponding to the provided hash."""
for p in self.get_all():
if p.source.content_hash == hash:
return p.protocol_id
Expand Down
6 changes: 6 additions & 0 deletions robot-server/robot_server/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .maintenance_runs.router import maintenance_runs_router
from .modules.router import modules_router
from .protocols.router import protocols_router
from .data_files.router import datafiles_router
from .robot.router import robot_router
from .runs.router import runs_router
from .service.labware.router import router as labware_router
Expand Down Expand Up @@ -64,6 +65,11 @@
dependencies=[Depends(check_version_header)],
)

router.include_router(
router=datafiles_router,
tags=["Data files Management"],
dependencies=[Depends(check_version_header)],
)
router.include_router(
router=commands_router,
tags=["Simple Commands"],
Expand Down
1 change: 1 addition & 0 deletions robot-server/tests/data_files/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for the robot_server.data_files module."""
48 changes: 48 additions & 0 deletions robot-server/tests/data_files/test_data_files_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Tests for the DataFilesStore interface."""
import pytest
from datetime import datetime, timezone
from sqlalchemy.engine import Engine as SQLEngine

from robot_server.data_files.data_files_store import DataFilesStore, DataFileInfo


@pytest.fixture
def subject(sql_engine: SQLEngine) -> DataFilesStore:
"""Get a DataFilesStore test subject."""
return DataFilesStore(sql_engine=sql_engine)


async def test_insert_data_file_info_and_fetch_by_hash(
subject: DataFilesStore,
) -> None:
"""It should add the data file info to database."""
data_file_info = DataFileInfo(
id="file-id",
name="file-name",
file_hash="abc123",
created_at=datetime(year=2024, month=6, day=20, tzinfo=timezone.utc),
)
assert subject.get_file_info_by_hash("abc123") is None
await subject.insert(data_file_info)
assert subject.get_file_info_by_hash("abc123") == data_file_info


async def test_insert_file_info_with_existing_id(
subject: DataFilesStore,
) -> None:
"""It should raise an error when trying to add the same file ID to database."""
data_file_info1 = DataFileInfo(
id="file-id",
name="file-name",
file_hash="abc123",
created_at=datetime(year=2024, month=6, day=20, tzinfo=timezone.utc),
)
data_file_info2 = DataFileInfo(
id="file-id",
name="file-name2",
file_hash="abc1234",
created_at=datetime(year=2024, month=6, day=20, tzinfo=timezone.utc),
)
await subject.insert(data_file_info1)
with pytest.raises(Exception):
await subject.insert(data_file_info2)
Loading
Loading