From 7d41b84af268017cd8aaa37102ba44feae710d3e Mon Sep 17 00:00:00 2001 From: jay90099 Date: Fri, 23 Jul 2021 12:39:43 -0700 Subject: [PATCH] Update TFDV 1.1.0 Release --- RELEASE.md | 2 ++ tensorflow_data_validation/utils/slicing_util.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index d83b63c1..4ab90dbd 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,6 +8,8 @@ * Depends on `google-cloud-bigquery>=1.28.0,<2.21`. * Depends on `tfx-bsl>=1.1.1,<1.2`. +* Fixes error when using tfdv.experimental_get_feature_value_slicer with +* pandas==1.3.0. ## Known Issues diff --git a/tensorflow_data_validation/utils/slicing_util.py b/tensorflow_data_validation/utils/slicing_util.py index 1fa845d1..7c11b6ea 100644 --- a/tensorflow_data_validation/utils/slicing_util.py +++ b/tensorflow_data_validation/utils/slicing_util.py @@ -157,8 +157,10 @@ def feature_value_slicer(record_batch: pa.RecordBatch) -> Iterable[ for col_name in sorted(merged_df.columns): if col_name in [_PARENT_INDEX_COLUMN, _SLICE_KEY_COLUMN]: continue - slice_key_col = (_to_slice_key(col_name) + '_' + - merged_df[col_name].apply(_to_slice_key)) + feature_value_part = merged_df[col_name].apply(_to_slice_key) + if feature_value_part.empty: + feature_value_part = feature_value_part.astype(pd.StringDtype()) + slice_key_col = _to_slice_key(col_name) + '_' + feature_value_part if index == 0: merged_df[_SLICE_KEY_COLUMN] = slice_key_col index += 1