Skip to content

Commit

Permalink
feat(robot-server): add a new endpoint to upload csv data files (#15481)
Browse files Browse the repository at this point in the history
Closes AUTH-426

# Overview

Adds a new router for uploading and managing data files. The new
endpoint can accept a single data file, either as a file upload or a
path to the file on the robot.

The endpoint saves the uploaded file to the persistent directory on the
robot, adds the file's info to the data_files database, and returns the
file ID and other deets in the response.

Duplicate file uploads use the previously saved file & database entry
instead of creating a new one.

# Test Plan

- On a robot, upload a data file using the `POST /dataFiles` endpoint
and providing a file in the request. Verify that-
- [ ] the file is uploaded to the persistent storage at location-
`../opentrons-robot-server/data_files/<file_id>/`
    - [ ] the file's info is added to the database
    - [ ] the file's ID is returned in the response
- [ ] On a robot, send a file to `POST /dataFiles` by providing a
`file_path` to a data file on the robot (the file should be outside the
persistent directory) and verify all the above
- [ ] Send an already existing file to the server and verify that no new
file is saved and no new entry is added to database. Verify that the
response contains the previously assigned file ID

# Changelog

- added new router, models and data files store for the new endpoint
- added unit & integration tests

# Review requests

- check that the implementation meets client's expectations
- any suggestions on placement of file_reader_writer and file_hasher
classes/functions? They have been so far associated with only protocols
and hence were part of the protocols/ api packages but I think it's time
to move it to a common utility
- any other code suggestions

# Risk assessment

None to the existing system. New, isolated feature.
  • Loading branch information
sanni-t authored Jun 26, 2024
1 parent 6eedd35 commit 3d99b54
Show file tree
Hide file tree
Showing 18 changed files with 648 additions and 4 deletions.
1 change: 1 addition & 0 deletions api/src/opentrons/protocol_reader/file_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .file_reader_writer import BufferedFile


# TODO (spp: 2024-06-17): move file hasher to utils
class FileHasher:
"""Hashing utility class that hashes a combination of protocol and labware files."""

Expand Down
1 change: 1 addition & 0 deletions robot-server/robot_server/data_files/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Upload and management of data files."""
65 changes: 65 additions & 0 deletions robot-server/robot_server/data_files/data_files_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Store and retrieve information about uploaded data files from the database."""
from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List

import sqlalchemy.engine

from robot_server.persistence.database import sqlite_rowid
from robot_server.persistence.tables import data_files_table


@dataclass(frozen=True)
class DataFileInfo:
"""Metadata info of a saved data file."""

id: str
name: str
file_hash: str
created_at: datetime


class DataFilesStore:
"""Store and retrieve info about uploaded data files."""

def __init__(
self,
sql_engine: sqlalchemy.engine.Engine,
) -> None:
"""Create a new DataFilesStore."""
self._sql_engine = sql_engine

def get_file_info_by_hash(self, file_hash: str) -> Optional[DataFileInfo]:
"""Get the ID of data file having the provided hash."""
for file in self._sql_get_all_from_engine():
if file.file_hash == file_hash:
return file
return None

async def insert(self, file_info: DataFileInfo) -> None:
"""Insert data file info in the database."""
file_info_dict = {
"id": file_info.id,
"name": file_info.name,
"created_at": file_info.created_at,
"file_hash": file_info.file_hash,
}
statement = sqlalchemy.insert(data_files_table).values(file_info_dict)
with self._sql_engine.begin() as transaction:
transaction.execute(statement)

def _sql_get_all_from_engine(self) -> List[DataFileInfo]:
statement = sqlalchemy.select(data_files_table).order_by(sqlite_rowid)
with self._sql_engine.begin() as transaction:
all_rows = transaction.execute(statement).all()
return [
DataFileInfo(
id=sql_row.id,
name=sql_row.name,
created_at=sql_row.created_at,
file_hash=sql_row.file_hash,
)
for sql_row in all_rows
]
56 changes: 56 additions & 0 deletions robot-server/robot_server/data_files/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""FastAPI dependencies for data files endpoints."""
from pathlib import Path
from asyncio import Lock as AsyncLock
from typing import Final
from anyio import Path as AsyncPath

from fastapi import Depends
from sqlalchemy.engine import Engine as SQLEngine

from server_utils.fastapi_utils.app_state import (
AppState,
get_app_state,
AppStateAccessor,
)
from robot_server.persistence.fastapi_dependencies import (
get_active_persistence_directory,
get_sql_engine,
)

from .data_files_store import DataFilesStore

_DATA_FILES_SUBDIRECTORY: Final = "data_files"

_data_files_directory_init_lock = AsyncLock()
_data_files_directory_accessor = AppStateAccessor[Path]("data_files_directory")

_data_files_store_init_lock = AsyncLock()
_data_files_store_accessor = AppStateAccessor[DataFilesStore]("data_files_store")


async def get_data_files_directory(
app_state: AppState = Depends(get_app_state),
persistent_directory: Path = Depends(get_active_persistence_directory),
) -> Path:
"""Get the directory to save the protocol files, creating it if needed."""
async with _data_files_directory_init_lock:
data_files_dir = _data_files_directory_accessor.get_from(app_state)
if data_files_dir is None:
data_files_dir = persistent_directory / _DATA_FILES_SUBDIRECTORY
await AsyncPath(data_files_dir).mkdir(exist_ok=True)
_data_files_directory_accessor.set_on(app_state, data_files_dir)

return data_files_dir


async def get_data_files_store(
app_state: AppState = Depends(get_app_state),
sql_engine: SQLEngine = Depends(get_sql_engine),
) -> DataFilesStore:
"""Get a singleton DataFilesStore to keep track of uploaded data files."""
async with _data_files_store_init_lock:
data_files_store = _data_files_store_accessor.get_from(app_state)
if data_files_store is None:
data_files_store = DataFilesStore(sql_engine)
_data_files_store_accessor.set_on(app_state, data_files_store)
return data_files_store
14 changes: 14 additions & 0 deletions robot-server/robot_server/data_files/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Data files models."""
from datetime import datetime

from pydantic import Field

from robot_server.service.json_api import ResourceModel


class DataFile(ResourceModel):
"""A model representing an uploaded data file."""

id: str = Field(..., description="A unique identifier for this file.")
name: str = Field(..., description="Name of the uploaded file.")
createdAt: datetime = Field(..., description="When this data file was *uploaded*.")
143 changes: 143 additions & 0 deletions robot-server/robot_server/data_files/router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Router for /dataFiles endpoints."""
from datetime import datetime
from pathlib import Path
from textwrap import dedent
from typing import Optional, Literal, Union

from fastapi import APIRouter, UploadFile, File, Form, Depends, status
from opentrons.protocol_reader import FileHasher, FileReaderWriter

from robot_server.service.json_api import (
SimpleBody,
PydanticResponse,
)
from robot_server.errors.error_responses import ErrorDetails, ErrorBody
from .dependencies import get_data_files_directory, get_data_files_store
from .data_files_store import DataFilesStore, DataFileInfo
from .models import DataFile
from ..protocols.dependencies import get_file_hasher, get_file_reader_writer
from ..service.dependencies import get_current_time, get_unique_id

datafiles_router = APIRouter()


class MultipleDataFileSources(ErrorDetails):
"""An error returned when multiple data file sources are specified in one request."""

id: Literal["MultipleDataFileSources"] = "MultipleDataFileSources"
title: str = "Multiple sources found for data files"


class NoDataFileSourceProvided(ErrorDetails):
"""An error returned when no data file sources are specified in the request."""

id: Literal["NoDataFileSourceProvided"] = "NoDataFileSourceProvided"
title: str = "No data file source provided"


class FileNotFound(ErrorDetails):
"""An error returned when specified file path was not found on the robot."""

id: Literal["FileNotFound"] = "FileNotFound"
title: str = "Specified file path not found on the robot"


class UnexpectedFileFormat(ErrorDetails):
"""An error returned when specified file is not in expected format."""

id: Literal["UnexpectedFileFormat"] = "UnexpectedFileFormat"
title: str = "Unexpected file format"


@PydanticResponse.wrap_route(
datafiles_router.post,
path="/dataFiles",
summary="Upload a data file",
description=dedent(
"""
Upload data file(s) to your device.
"""
),
status_code=status.HTTP_201_CREATED,
responses={
status.HTTP_200_OK: {"model": SimpleBody[DataFile]},
status.HTTP_201_CREATED: {"model": SimpleBody[DataFile]},
status.HTTP_422_UNPROCESSABLE_ENTITY: {
"model": ErrorBody[
Union[
MultipleDataFileSources,
NoDataFileSourceProvided,
UnexpectedFileFormat,
]
]
},
status.HTTP_404_NOT_FOUND: {"model": ErrorBody[FileNotFound]},
},
)
async def upload_data_file(
file: Optional[UploadFile] = File(default=None, description="Data file to upload"),
file_path: Optional[str] = Form(
default=None,
description="Absolute path to a file on the robot.",
alias="filePath",
),
data_files_directory: Path = Depends(get_data_files_directory),
data_files_store: DataFilesStore = Depends(get_data_files_store),
file_reader_writer: FileReaderWriter = Depends(get_file_reader_writer),
file_hasher: FileHasher = Depends(get_file_hasher),
file_id: str = Depends(get_unique_id, use_cache=False),
created_at: datetime = Depends(get_current_time),
) -> PydanticResponse[SimpleBody[DataFile]]:
"""Save the uploaded data file to persistent storage and update database."""
if all([file, file_path]):
raise MultipleDataFileSources(
detail="Can accept either a file or a file path, not both."
).as_error(status.HTTP_422_UNPROCESSABLE_ENTITY)
if file is None and file_path is None:
raise NoDataFileSourceProvided(
detail="You must provide either a file or a file_path in the request."
).as_error(status.HTTP_422_UNPROCESSABLE_ENTITY)
try:
[buffered_file] = await file_reader_writer.read(files=[file or Path(file_path)]) # type: ignore[arg-type, list-item]
except FileNotFoundError as e:
raise FileNotFound(detail=str(e)).as_error(status.HTTP_404_NOT_FOUND) from e
# TODO (spp, 2024-06-18): probably also validate CSV file *contents*
if not buffered_file.name.endswith(".csv"):
raise UnexpectedFileFormat(detail="Only CSV file format is accepted.").as_error(
status.HTTP_422_UNPROCESSABLE_ENTITY
)
file_hash = await file_hasher.hash([buffered_file])
existing_file_info = data_files_store.get_file_info_by_hash(file_hash)
if existing_file_info:
return await PydanticResponse.create(
content=SimpleBody.construct(
data=DataFile.construct(
id=existing_file_info.id,
name=existing_file_info.name,
createdAt=existing_file_info.created_at,
)
),
status_code=status.HTTP_200_OK,
)

# TODO (spp, 2024-06-18): auto delete data files if max exceeded
await file_reader_writer.write(
directory=data_files_directory / file_id, files=[buffered_file]
)
file_info = DataFileInfo(
id=file_id,
name=buffered_file.name,
file_hash=file_hash,
created_at=created_at,
)
await data_files_store.insert(file_info)
return await PydanticResponse.create(
content=SimpleBody.construct(
data=DataFile.construct(
id=file_info.id,
name=file_info.name,
createdAt=created_at,
)
),
status_code=status.HTTP_201_CREATED,
)
2 changes: 2 additions & 0 deletions robot-server/robot_server/persistence/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
run_table,
run_command_table,
action_table,
data_files_table,
)


Expand All @@ -18,4 +19,5 @@
"run_table",
"run_command_table",
"action_table",
"data_files_table",
]
2 changes: 1 addition & 1 deletion robot-server/robot_server/protocols/protocol_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def get_all_ids(self) -> List[str]:
return protocol_ids

def get_id_by_hash(self, hash: str) -> Optional[str]:
"""Get all protocol hashes keyed by protocol id."""
"""Get ID of protocol corresponding to the provided hash."""
for p in self.get_all():
if p.source.content_hash == hash:
return p.protocol_id
Expand Down
6 changes: 6 additions & 0 deletions robot-server/robot_server/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .maintenance_runs.router import maintenance_runs_router
from .modules.router import modules_router
from .protocols.router import protocols_router
from .data_files.router import datafiles_router
from .robot.router import robot_router
from .runs.router import runs_router
from .service.labware.router import router as labware_router
Expand Down Expand Up @@ -64,6 +65,11 @@
dependencies=[Depends(check_version_header)],
)

router.include_router(
router=datafiles_router,
tags=["Data files Management"],
dependencies=[Depends(check_version_header)],
)
router.include_router(
router=commands_router,
tags=["Simple Commands"],
Expand Down
1 change: 1 addition & 0 deletions robot-server/tests/data_files/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for the robot_server.data_files module."""
48 changes: 48 additions & 0 deletions robot-server/tests/data_files/test_data_files_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Tests for the DataFilesStore interface."""
import pytest
from datetime import datetime, timezone
from sqlalchemy.engine import Engine as SQLEngine

from robot_server.data_files.data_files_store import DataFilesStore, DataFileInfo


@pytest.fixture
def subject(sql_engine: SQLEngine) -> DataFilesStore:
"""Get a DataFilesStore test subject."""
return DataFilesStore(sql_engine=sql_engine)


async def test_insert_data_file_info_and_fetch_by_hash(
subject: DataFilesStore,
) -> None:
"""It should add the data file info to database."""
data_file_info = DataFileInfo(
id="file-id",
name="file-name",
file_hash="abc123",
created_at=datetime(year=2024, month=6, day=20, tzinfo=timezone.utc),
)
assert subject.get_file_info_by_hash("abc123") is None
await subject.insert(data_file_info)
assert subject.get_file_info_by_hash("abc123") == data_file_info


async def test_insert_file_info_with_existing_id(
subject: DataFilesStore,
) -> None:
"""It should raise an error when trying to add the same file ID to database."""
data_file_info1 = DataFileInfo(
id="file-id",
name="file-name",
file_hash="abc123",
created_at=datetime(year=2024, month=6, day=20, tzinfo=timezone.utc),
)
data_file_info2 = DataFileInfo(
id="file-id",
name="file-name2",
file_hash="abc1234",
created_at=datetime(year=2024, month=6, day=20, tzinfo=timezone.utc),
)
await subject.insert(data_file_info1)
with pytest.raises(Exception):
await subject.insert(data_file_info2)
Loading

0 comments on commit 3d99b54

Please sign in to comment.