Skip to content

Commit

Permalink
add Fmeval support for python 3.11 and 3.12 (#339)
Browse files Browse the repository at this point in the history
* add support for python 3.11 and 3.12

* add cleanup disk space and test matrix strategy

* bump up version number to 1.2.2
---------

Co-authored-by: satish gollaprolu <[email protected]>
  • Loading branch information
Satish615 and satish gollaprolu authored Jan 11, 2025
1 parent 8d1897e commit 2ef90f7
Show file tree
Hide file tree
Showing 8 changed files with 3,153 additions and 2,910 deletions.
25 changes: 23 additions & 2 deletions .github/workflows/unit_test_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,37 @@ env:
AWS_DEFAULT_REGION: us-west-2

jobs:
test_coverage_python_310:
test_coverage_python:
runs-on: ubuntu-latest
continue-on-error: false
strategy:
fail-fast: false
matrix:
python-version: ['3.10', '3.11', '3.12']
env:
PYTHONWARNINGS: ignore
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/[email protected]
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false

# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true

- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: '3.10'
python-version: ${{ matrix.python-version }}

- name: Setup Environment
run: |
Expand Down
5,959 changes: 3,088 additions & 2,871 deletions poetry.lock

Large diffs are not rendered by default.

38 changes: 20 additions & 18 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fmeval"
version = "1.2.1"
version = "1.2.2"
description = "Amazon Foundation Model Evaluations"
license = "Apache License 2.0"
authors = ["Amazon FMEval Team <[email protected]>"]
Expand All @@ -14,40 +14,42 @@ classifiers=[
"Natural Language :: English",
"Programming Language :: Python",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]


[tool.poetry.dependencies]
python = "^3.10"
urllib3 = ">=2.2.2"
ray = "2.23.0"
urllib3 = ">=2.3.0"
ray = "2.40.0"
semantic-version = "2.10.0"
pyarrow = "*"
pyfunctional = "1.4.3"
torch = ">=2.0.0, !=2.0.1, !=2.1.0"
matplotlib = "^3.8.3"
pyfunctional = "1.5.0"
torch = ">=2.5.0"
matplotlib = "^3.10.0"
# https://discuss.ray.io/t/pandas-importerror-with-ray-data-dataset-show/13486
pandas = "2.1.4"
pandas = "2.2.3"
nltk = "^3.9.0"
markdown = "*"
IPython = "*"
evaluate = "^0.4.0"
rouge-score = "^0.1.2"
bert-score = "^0.3.13"
scikit-learn = "^1.3.1"
jiwer = "^3.0.3"
transformers = "^4.36.0"
sagemaker = "^2.225.0"
scikit-learn = "^1.6.0"
jiwer = "^3.0.5"
transformers = "^4.47.0"
sagemaker = "^2.237.1"
testbook = "^0.4.2"
ipykernel = "^6.26.0"
mypy-boto3-bedrock = "^1.33.2"
grpcio = "^1.60.0"
aiohttp = "^3.9.2"
ipykernel = "^6.29.5"
mypy-boto3-bedrock = "^1.35.75"
grpcio = "^1.68.1"
aiohttp = "^3.11.11"

[tool.poetry.group.dev.dependencies]
fire = "*"
black = "24.3.0"
pre-commit = "3.3.3"
black = "24.10.0"
pre-commit = "^4.0.0"
pytest = "*"
pytest-pspec = "*"
flake8 = "*"
Expand All @@ -61,7 +63,7 @@ conventional-pre-commit = "*"
optional = true

[tool.poetry.group.docs.dependencies]
pdoc = "^14.4.0"
pdoc = "^15.0.1"

[build-system]
requires = ["poetry-core"]
Expand Down
14 changes: 8 additions & 6 deletions test/unit/eval_algorithms/test_classification_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,26 @@


CLASSIFICATION_DATASET_WITHOUT_MODEL_OUTPUT = CLASSIFICATION_DATASET.drop_columns(
DatasetColumns.MODEL_OUTPUT.value.name
cols=[DatasetColumns.MODEL_OUTPUT.value.name]
)

CLASSIFICATION_DATASET_WITHOUT_MODEL_INPUT = CLASSIFICATION_DATASET.drop_columns(DatasetColumns.MODEL_INPUT.value.name)
CLASSIFICATION_DATASET_WITHOUT_MODEL_INPUT = CLASSIFICATION_DATASET.drop_columns(
cols=[DatasetColumns.MODEL_INPUT.value.name]
)

CLASSIFICATION_DATASET_WITHOUT_MODEL_INPUT_OR_MODEL_OUTPUT = CLASSIFICATION_DATASET_WITHOUT_MODEL_INPUT.drop_columns(
DatasetColumns.MODEL_OUTPUT.value.name
cols=[DatasetColumns.MODEL_OUTPUT.value.name]
)

CLASSIFICATION_DATASET_WITHOUT_TARGET_OUTPUT = CLASSIFICATION_DATASET.drop_columns(
DatasetColumns.TARGET_OUTPUT.value.name
cols=[DatasetColumns.TARGET_OUTPUT.value.name]
)

CLASSIFICATION_DATASET_WITHOUT_CATEGORY_WITHOUT_MODEL_OUTPUT = CLASSIFICATION_DATASET_WITHOUT_MODEL_OUTPUT.drop_columns(
DatasetColumns.CATEGORY.value.name
cols=[DatasetColumns.CATEGORY.value.name]
)

CLASSIFICATION_DATASET_WITHOUT_CATEGORY = CLASSIFICATION_DATASET.drop_columns(DatasetColumns.CATEGORY.value.name)
CLASSIFICATION_DATASET_WITHOUT_CATEGORY = CLASSIFICATION_DATASET.drop_columns(cols=[DatasetColumns.CATEGORY.value.name])

DATASET_SCORES = [
EvalScore(name=CLASSIFICATION_ACCURACY_SCORE, value=2 / 3),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@

DATASET_WITHOUT_MODEL_INPUT = DATASET.drop_columns(cols=[DatasetColumns.MODEL_INPUT.value.name])


CATEGORY_SCORES = [
CategoryScore(
name="brownie",
Expand Down
12 changes: 7 additions & 5 deletions test/unit/eval_algorithms/test_general_semantic_robustness.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,17 @@
]
)

DATASET_WITH_ONLY_BERT_SCORE = DATASET_WITH_SCORES.drop_columns(cols=WER_SCORE)
DATASET_WITH_ONLY_BERT_SCORE = DATASET_WITH_SCORES.drop_columns(cols=[WER_SCORE])

DATASET_WITH_MODEL_OUTPUT = DATASET_WITH_ONLY_BERT_SCORE.drop_columns(cols=BERT_SCORE_DISSIMILARITY)
DATASET_WITH_MODEL_OUTPUT = DATASET_WITH_ONLY_BERT_SCORE.drop_columns(cols=[BERT_SCORE_DISSIMILARITY])

DATASET = DATASET_WITH_MODEL_OUTPUT.drop_columns(cols=DatasetColumns.MODEL_OUTPUT.value.name)
DATASET = DATASET_WITH_MODEL_OUTPUT.drop_columns(cols=[DatasetColumns.MODEL_OUTPUT.value.name])

DATASET_NO_CATEGORY = DATASET.drop_columns(cols=DatasetColumns.CATEGORY.value.name)
DATASET_NO_CATEGORY = DATASET.drop_columns(cols=[DatasetColumns.CATEGORY.value.name])

DATASET_WITH_MODEL_OUTPUT_NO_CATEGORY = DATASET_WITH_MODEL_OUTPUT.drop_columns(cols=DatasetColumns.CATEGORY.value.name)
DATASET_WITH_MODEL_OUTPUT_NO_CATEGORY = DATASET_WITH_MODEL_OUTPUT.drop_columns(
cols=[DatasetColumns.CATEGORY.value.name]
)


class ConstantModel(ModelRunner):
Expand Down
10 changes: 5 additions & 5 deletions test/unit/eval_algorithms/test_qa_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,16 @@
]
)

QA_DATASET_WITHOUT_MODEL_OUTPUT = QA_DATASET.drop_columns(DatasetColumns.MODEL_OUTPUT.value.name)
QA_DATASET_WITHOUT_MODEL_OUTPUT = QA_DATASET.drop_columns(cols=[DatasetColumns.MODEL_OUTPUT.value.name])

QA_DATASET_WITHOUT_MODEL_INPUT = QA_DATASET.drop_columns(DatasetColumns.MODEL_INPUT.value.name)
QA_DATASET_WITHOUT_MODEL_INPUT = QA_DATASET.drop_columns(cols=[DatasetColumns.MODEL_INPUT.value.name])

QA_DATASET_WITHOUT_TARGET_OUTPUT = QA_DATASET.drop_columns(DatasetColumns.TARGET_OUTPUT.value.name)
QA_DATASET_WITHOUT_TARGET_OUTPUT = QA_DATASET.drop_columns(cols=[DatasetColumns.TARGET_OUTPUT.value.name])

QA_DATASET_WITHOUT_CATEGORY = QA_DATASET.drop_columns(DatasetColumns.CATEGORY.value.name)
QA_DATASET_WITHOUT_CATEGORY = QA_DATASET.drop_columns(cols=[DatasetColumns.CATEGORY.value.name])

QA_DATASET_WITHOUT_CATEGORY_WITHOUT_MODEL_OUTPUT = QA_DATASET_WITHOUT_CATEGORY.drop_columns(
DatasetColumns.MODEL_OUTPUT.value.name
cols=[DatasetColumns.MODEL_OUTPUT.value.name]
)

CATEGORY_SCORES = [
Expand Down
Loading

0 comments on commit 2ef90f7

Please sign in to comment.