Skip to content

Commit

Permalink
Upgrade to tensorflow==2.7.x (#199)
Browse files Browse the repository at this point in the history
* Adding rank as default required field (#153) (#155)

* Upgrading to tensorflow==2.7.0 (#156)

- Upgrading to tensorflow==2.7.0 - update interaction layer #156
- Update to tensorflow==2.7.0 - update model #157
- Update to tensorflow==2.7.0 - update requirements and build #158

* Fixing issues after merging with master

* Updating circleci config

* Updating circleci config

* Increasing coverage threshold
  • Loading branch information
lastmansleeping committed Jan 20, 2023
1 parent af7c3ee commit 26905a9
Show file tree
Hide file tree
Showing 69 changed files with 2,834 additions and 3,740 deletions.
13 changes: 9 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
command: |
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install -r python/build-requirements.txt
- run:
name: Build
Expand Down Expand Up @@ -49,6 +50,7 @@ jobs:
python3 -m venv venv
. venv/bin/activate
cd python
pip install --upgrade pip
pip install -r build-requirements.txt
- save_cache:
key: python-deps-{{ checksum "python/build-requirements.txt" }}
Expand Down Expand Up @@ -82,6 +84,7 @@ jobs:
python3 -m venv venv
. venv/bin/activate
cd python
pip install --upgrade pip
pip install -r build-requirements.txt
- save_cache:
key: python-deps-{{ checksum "python/build-requirements.txt" }}
Expand All @@ -92,12 +95,12 @@ jobs:
command: |
. venv/bin/activate
cd python
COVERAGE_FILE=reports/.coverage.classif coverage run --include "ml4ir/*" -m pytest ml4ir/applications/classification/tests/ --junitxml=test_results/results.xml
COVERAGE_FILE=reports/.coverage.classification coverage run --include "ml4ir/*" -m pytest ml4ir/applications/classification/tests/ --junitxml=test_results/results.xml
no_output_timeout: 45m # Time for running the tests before we timeout
- persist_to_workspace:
root: python/reports
paths:
- .coverage.classif
- .coverage.classification
- store_test_results:
path: python/test_results

Expand All @@ -115,6 +118,7 @@ jobs:
python3 -m venv venv
. venv/bin/activate
cd python
pip install --upgrade pip
pip install -r build-requirements.txt
- save_cache:
key: python-deps-{{ checksum "python/build-requirements.txt" }}
Expand Down Expand Up @@ -148,6 +152,7 @@ jobs:
python3 -m venv venv
. venv/bin/activate
cd python
pip install --upgrade pip
pip install -r build-requirements.txt
- save_cache:
key: python-deps-{{ checksum "python/build-requirements.txt" }}
Expand All @@ -160,9 +165,9 @@ jobs:
name: Run Python Coverage
command: |
. venv/bin/activate
coverage combine reports/.coverage.ranking reports/.coverage.classif
coverage combine reports/
coverage report
coverage html --fail-under=75
coverage html --fail-under=80
no_output_timeout: 45m # Time for running the tests before we timeout
- store_artifacts:
path: /home/circleci/project/htmlcov
Expand Down
12 changes: 12 additions & 0 deletions docs/source/misc/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.15] - 2023-01-20

### Changed

- Upgrading from tensorflow 2.0.x to 2.7.x
- Moving from Keras Functional API to Model Subclassing API for more customization capabilities

### Removed

- RankMatchFailure
- Auxiliary loss support

## [0.1.14] - 2022-11-18

### Changed
Expand Down
6 changes: 3 additions & 3 deletions python/build-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ pre-commit
mypy

# ml4ir dependencies
tensorflow==2.0.4
numpy==1.18.5
tensorflow==2.7.4
numpy==1.21.6
pandas==1.2.1
scipy==1.5.4

Expand All @@ -24,4 +24,4 @@ testfixtures==6.17.1
pyspark==3.0.1

#calibration
tensorflow_probability==0.8.0
tensorflow-probability==0.15.0
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ class ClassificationModel(RelevanceModel):
methods."""

def evaluate(
self,
test_dataset: data.TFRecordDataset,
inference_signature: str = None,
additional_features: dict = {},
group_metrics_min_queries: int = 50,
logs_dir: Optional[str] = None,
logging_frequency: int = 25,
compute_intermediate_stats: bool = True,
self,
test_dataset: data.TFRecordDataset,
inference_signature: str = None,
additional_features: dict = {},
group_metrics_min_queries: int = 50,
logs_dir: Optional[str] = None,
logging_frequency: int = 25,
compute_intermediate_stats: bool = True,
):
"""
Evaluate the Classification Model
Expand Down Expand Up @@ -238,14 +238,16 @@ def predict(
self.file_io.rm_file(outfile)
predictions_df = self._create_prediction_dataframe(logging_frequency,
test_dataset)
predictions_ = np.squeeze(self.model.predict(test_dataset))
predictions_ = np.squeeze(self.model.predict(test_dataset)[self.output_name])
# Below, avoid doing predictions.tolist() as it explodes the memory
# tolist() will create a list of lists, which consumes more memory
# than a list on numpy arrays
predictions_df[self.output_name] = [x for x in predictions_]
if logs_dir:
np.set_printoptions(formatter={'all':lambda x: str(x.decode('utf-8')) if isinstance(x, bytes) else str(x)},
linewidth=sys.maxsize, threshold=sys.maxsize) # write the full vector in the csv not ...
linewidth=sys.maxsize,
threshold=sys.maxsize, # write the full vector in the csv not a truncated version
legacy="1.13") # enables 1.13 legacy printing mode
for col in predictions_df.columns:
if isinstance(predictions_df[col].values[0], bytes):
predictions_df[col] = predictions_df[col].str.decode('utf8')
Expand Down Expand Up @@ -288,4 +290,4 @@ def _create_prediction_dataframe(self, logging_frequency, test_dataset):
val)
predictions_df = pd.DataFrame({key: val if len(val.shape) == 1 else [inner for inner in val]
for key, val in predictions.items()})
return predictions_df
return predictions_df
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,94 @@
from tensorflow.keras import layers
from tensorflow.keras import losses

from ml4ir.base.config.keys import FeatureTypeKey
from ml4ir.base.model.losses.loss_base import RelevanceLossBase
from ml4ir.applications.ranking.config.keys import LossKey


def get_loss(loss_key) -> RelevanceLossBase:
def get_loss(loss_key, output_name) -> RelevanceLossBase:
"""
Factory to get relevance loss related to classification use-case.
Parameters
----------
loss_key : str
LossKey name
output_name: str
Name of the output node after final activation op
Returns
-------
RelevanceLossBase
Corresponding loss object
"""
if loss_key == LossKey.CATEGORICAL_CROSS_ENTROPY:
return CategoricalCrossEntropy()
return CategoricalCrossEntropy(output_name=output_name)
else:
raise NotImplementedError


class CategoricalCrossEntropy(RelevanceLossBase):
def get_loss_fn(self, **kwargs):

def __init__(self, output_name, **kwargs):
"""
Initialize categorical cross entropy loss
Parameters
----------
output_name: str
Name of the output node after final activation op
"""
super().__init__(**kwargs)

self.output_name = output_name
self.final_activation_fn = layers.Activation("softmax", name=self.output_name)

self.loss_fn = losses.CategoricalCrossentropy(reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

def call(self, inputs, y_true, y_pred, training=None):
"""
Define a categorical cross entropy loss
Parameters
----------
inputs: dict of dict of tensors
Dictionary of input feature tensors
y_true: tensor
True labels
y_pred: tensor
Predicted scores
training: boolean
Boolean indicating whether the layer is being used in training mode
Returns
-------
function
Categorical cross entropy loss function
Categorical cross entropy loss
"""
cce = losses.CategoricalCrossentropy(reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

def _loss_fn(y_true, y_pred):
return cce(y_true, y_pred)
return self.loss_fn(y_true, y_pred)

return _loss_fn

def get_final_activation_op(self, output_name):
def final_activation_op(self, inputs, training=None):
"""
Define softmax activation function
Get softmax activated scores on logits
Parameters
----------
output_name : str
Name of the output to use for final activation layer
inputs: dict of dict of tensors
Dictionary of input feature tensors
Returns
-------
function
Softmax activation function
tensor
Softmax activated scores
"""
return lambda logits, mask: layers.Activation("softmax", name=output_name)(logits)
return self.final_activation_fn(inputs[FeatureTypeKey.LOGITS])

def get_config(self):
"""Return layer config that is used while serialization"""
config = super().get_config()
config.update({
"loss_fn": "categorical_cross_entropy",
"output_name": self.output_name
})
return config
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from tensorflow.keras.metrics import Metric

from ml4ir.applications.classification.config.keys import MetricKey
from ml4ir.applications.classification.model.metrics.metrics_impl import (
CategoricalAccuracy,
Top5CategoricalAccuracy,
)
from ml4ir.applications.classification.model.metrics.metrics_impl import Top5CategoricalAccuracy


def get_metric(metric_key: str) -> Metric:
Expand All @@ -21,9 +18,7 @@ def get_metric(metric_key: str) -> Metric:
Metric class
Class defining the metric computation logic
"""
if metric_key == MetricKey.CATEGORICAL_ACCURACY:
return CategoricalAccuracy
elif metric_key == MetricKey.TOP_5_CATEGORICAL_ACCURACY:
return Top5CategoricalAccuracy
if metric_key == MetricKey.TOP_5_CATEGORICAL_ACCURACY:
return Top5CategoricalAccuracy(name="top_5_categorical_accuracy")
else:
return metric_key
Original file line number Diff line number Diff line change
@@ -1,46 +1,6 @@
import tensorflow as tf
from tensorflow.keras import metrics

from ml4ir.base.model.metrics.metrics_impl import MetricState
from ml4ir.base.features.feature_config import FeatureConfig

from typing import Optional, Dict


class CategoricalAccuracy(metrics.CategoricalAccuracy):
"""
Custom metric class to compute the Categorical Accuracy.
Currently just a wrapper around tf.keras.metrics.CategoricalAccuracy
to maintain consistency of arguments to __init__
"""

def __init__(
self,
feature_config: FeatureConfig,
metadata_features: Dict,
name="categorical_accuracy",
state=MetricState.NEW,
**kwargs
):
"""
Creates a CategoricalAccuracy instance
Parameters
----------
feature_config : FeatureConfig object
FeatureConfig object that defines the configuration for each model
feature
metadata_features : dict
Dictionary of metadata feature tensors that can be used to compute
custom metrics
name : str
Name of the metric
state : {"new", "old"}
State of the metric
"""
super(CategoricalAccuracy, self).__init__(name=name)


class Top5CategoricalAccuracy(metrics.TopKCategoricalAccuracy):
"""
Expand All @@ -53,27 +13,16 @@ class Top5CategoricalAccuracy(metrics.TopKCategoricalAccuracy):

def __init__(
self,
feature_config: Optional[FeatureConfig] = None,
metadata_features: Dict = {},
name="top_5_categorical_accuracy",
state=MetricState.NEW,
**kwargs
):
"""
Creates a CategoricalAccuracy instance
Parameters
----------
feature_config : FeatureConfig object
FeatureConfig object that defines the configuration for each model
feature
metadata_features : dict
Dictionary of metadata feature tensors that can be used to compute
custom metrics
name : str
Name of the metric
state : {"new", "old"}
State of the metric
"""
super(Top5CategoricalAccuracy, self).__init__(name=name, k=5)

Expand Down
Loading

0 comments on commit 26905a9

Please sign in to comment.