Skip to content

TFMA

tensorflow_model_analysis

Init module for TensorFlow Model Analysis.

Attributes

ANALYSIS_KEY module-attribute

ANALYSIS_KEY = 'analysis'

ARROW_INPUT_COLUMN module-attribute

ARROW_INPUT_COLUMN = '__raw_record__'

ARROW_RECORD_BATCH_KEY module-attribute

ARROW_RECORD_BATCH_KEY = 'arrow_record_batch'

ATTRIBUTIONS_KEY module-attribute

ATTRIBUTIONS_KEY = 'attributions'

AddMetricsCallbackType module-attribute

AttributionsForSlice module-attribute

AttributionsForSlice = AttributionsForSlice

BASELINE_KEY module-attribute

BASELINE_KEY = 'baseline'

BASELINE_SCORE_KEY module-attribute

BASELINE_SCORE_KEY = 'baseline_score'

CANDIDATE_KEY module-attribute

CANDIDATE_KEY = 'candidate'

DATA_CENTRIC_MODE module-attribute

DATA_CENTRIC_MODE = 'data_centric_mode'

EXAMPLE_SCORE_KEY module-attribute

EXAMPLE_SCORE_KEY = 'example_score'

EXAMPLE_WEIGHTS_KEY module-attribute

EXAMPLE_WEIGHTS_KEY = 'example_weights'

Extracts module-attribute

Extracts = MutableMapping[str, Any]

FEATURES_KEY module-attribute

FEATURES_KEY = 'features'

FEATURES_PREDICTIONS_LABELS_KEY module-attribute

FEATURES_PREDICTIONS_LABELS_KEY = '_fpl'

FeaturesPredictionsLabels module-attribute

FeaturesPredictionsLabels = NamedTuple(
    "FeaturesPredictionsLabels",
    [
        ("input_ref", int),
        ("features", DictOfFetchedTensorValues),
        ("predictions", DictOfFetchedTensorValues),
        ("labels", DictOfFetchedTensorValues),
    ],
)

INPUT_KEY module-attribute

INPUT_KEY = 'input'

LABELS_KEY module-attribute

LABELS_KEY = 'labels'

METRICS_KEY module-attribute

METRICS_KEY = 'metrics'

MODEL_CENTRIC_MODE module-attribute

MODEL_CENTRIC_MODE = 'model_centric_mode'

MaterializedColumn module-attribute

MaterializedColumn = NamedTuple(
    "MaterializedColumn",
    [
        ("name", str),
        (
            "value",
            Union[
                List[bytes],
                List[int],
                List[float],
                bytes,
                int,
                float,
            ],
        ),
    ],
)

MaybeMultipleEvalSharedModels module-attribute

MaybeMultipleEvalSharedModels = Union[
    EvalSharedModel,
    List[EvalSharedModel],
    Dict[str, EvalSharedModel],
]

MetricsForSlice module-attribute

MetricsForSlice = MetricsForSlice

PLOTS_KEY module-attribute

PLOTS_KEY = 'plots'

PREDICTIONS_KEY module-attribute

PREDICTIONS_KEY = 'predictions'

PlotsForSlice module-attribute

PlotsForSlice = PlotsForSlice

SLICE_KEY_TYPES_KEY module-attribute

SLICE_KEY_TYPES_KEY = '_slice_key_types'

TFMA_EVAL module-attribute

TFMA_EVAL = 'tfma_eval'

TF_ESTIMATOR module-attribute

TF_ESTIMATOR = 'tf_estimator'

TF_GENERIC module-attribute

TF_GENERIC = 'tf_generic'

TF_JS module-attribute

TF_JS = 'tf_js'

TF_KERAS module-attribute

TF_KERAS = 'tf_keras'

TF_LITE module-attribute

TF_LITE = 'tf_lite'

TensorType module-attribute

TensorType = Union[Tensor, SparseTensor, RaggedTensor]

TensorTypeMaybeDict module-attribute

TensorTypeMaybeDict = Union[TensorType, DictOfTensorType]

TensorValue module-attribute

TensorValue = Union[
    ndarray,
    SparseTensorValue,
    RaggedTensorValue,
    SparseTensorValue,
]

VALIDATIONS_KEY module-attribute

VALIDATIONS_KEY = 'validations'

VERSION_STRING module-attribute

VERSION_STRING = '0.48.0.dev'

ValidationResult module-attribute

ValidationResult = ValidationResult

Classes

EvalResult

Bases: NamedTuple('EvalResult', [('slicing_metrics', List[SlicedMetrics]), ('plots', List[SlicedPlots]), ('attributions', List[SlicedAttributions]), ('config', EvalConfig), ('data_location', str), ('file_format', str), ('model_location', str)])

The result of a single model analysis run.

ATTRIBUTE DESCRIPTION
slicing_metrics

a list of tfma.SlicedMetrics, containing metric values for each slice.

plots

List of slice-plot pairs.

attributions

List of SlicedAttributions containing attribution values for each slice.

config

The config containing slicing and metrics specification.

data_location

Optional location for data used with config.

file_format

Optional format for data used with config.

model_location

Optional location(s) for model(s) used with config.

Functions
get_attributions_for_all_slices
get_attributions_for_all_slices(
    metric_name: str = "",
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Dict[str, AttributionsByFeatureKey]

Get attribution feature keys and values for every slice.

PARAMETER DESCRIPTION
metric_name

Name of metric to get attributions for. Optional if only one metric used.

TYPE: str DEFAULT: ''

output_name

The name of the output (optional, only used for multi-output models).

TYPE: str DEFAULT: ''

class_id

Used with multi-class metrics to identify a specific class ID.

TYPE: Optional[int] DEFAULT: None

k

Used with multi-class metrics to identify the kth predicted value.

TYPE: Optional[int] DEFAULT: None

top_k

Used with multi-class and ranking metrics to identify top-k predicted values.

TYPE: Optional[int] DEFAULT: None

RETURNS DESCRIPTION
Dict[str, AttributionsByFeatureKey]

Dictionary mapping slices to attribution feature keys and values.

Source code in tensorflow_model_analysis/view/view_types.py
def get_attributions_for_all_slices(
    self,
    metric_name: str = '',
    output_name: str = '',
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None) -> Dict[str, AttributionsByFeatureKey]:
  """Get attribution feature keys and values for every slice.

  Args:
    metric_name: Name of metric to get attributions for. Optional if only one
      metric used.
    output_name: The name of the output (optional, only used for multi-output
      models).
    class_id: Used with multi-class metrics to identify a specific class ID.
    k: Used with multi-class metrics to identify the kth predicted value.
    top_k: Used with multi-class and ranking metrics to identify top-k
      predicted values.

  Returns:
    Dictionary mapping slices to attribution feature keys and values.
  """

  if class_id or k or top_k:
    sub_key = str(metric_types.SubKey(class_id, k, top_k))
  else:
    sub_key = ''

  all_sliced_attributions = {}
  for sliced_attributions in self.attributions:
    slice_name = sliced_attributions[0]
    attributions = sliced_attributions[1][output_name][sub_key]
    if metric_name:
      attributions = attributions[metric_name]
    elif len(attributions) == 1:
      attributions = list(attributions.values())[0]
    else:
      raise ValueError('metric_name must be one of the following: {}'.format(
          attributions.keys()))
    all_sliced_attributions[slice_name] = copy.copy(attributions)
  return all_sliced_attributions  # pytype: disable=bad-return-type
get_attributions_for_slice
get_attributions_for_slice(
    slice_name: SliceKeyType = (),
    metric_name: str = "",
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Union[AttributionsByFeatureKey, None]

Get attribution features names and values for a slice.

PARAMETER DESCRIPTION
slice_name

A tuple of the form (column, value), indicating which slice to get attributions from. Optional; if excluded, use overall slice.

TYPE: SliceKeyType DEFAULT: ()

metric_name

Name of metric to get attributions for. Optional if only one metric used.

TYPE: str DEFAULT: ''

output_name

The name of the output. Optional, only used for multi-output models.

TYPE: str DEFAULT: ''

class_id

Used with multi-class models to identify a specific class ID.

TYPE: Optional[int] DEFAULT: None

k

Used with multi-class models to identify the kth predicted value.

TYPE: Optional[int] DEFAULT: None

top_k

Used with multi-class models to identify top-k attribution values.

TYPE: Optional[int] DEFAULT: None

RETURNS DESCRIPTION
Union[AttributionsByFeatureKey, None]

Dictionary containing feature keys and values for the specified slice.

RAISES DESCRIPTION
ValueError

If metric_name is required.

Source code in tensorflow_model_analysis/view/view_types.py
def get_attributions_for_slice(
    self,
    slice_name: slicer.SliceKeyType = (),
    metric_name: str = '',
    output_name: str = '',
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None) -> Union[AttributionsByFeatureKey, None]:
  """Get attribution features names and values for a slice.

  Args:
    slice_name: A tuple of the form (column, value), indicating which slice to
      get attributions from. Optional; if excluded, use overall slice.
    metric_name: Name of metric to get attributions for. Optional if only one
      metric used.
    output_name: The name of the output. Optional, only used for multi-output
      models.
    class_id: Used with multi-class models to identify a specific class ID.
    k: Used with multi-class models to identify the kth predicted value.
    top_k: Used with multi-class models to identify top-k attribution values.

  Returns:
    Dictionary containing feature keys and values for the specified slice.

  Raises:
    ValueError: If metric_name is required.
  """

  if class_id or k or top_k:
    sub_key = str(metric_types.SubKey(class_id, k, top_k))
  else:
    sub_key = ''

  def equals_slice_name(slice_key):
    if not slice_key:
      return not slice_name
    else:
      return slice_key == slice_name

  for sliced_attributions in self.attributions:
    slice_key = sliced_attributions[0]
    slice_val = sliced_attributions[1]
    if equals_slice_name(slice_key):
      if metric_name:
        return slice_val[output_name][sub_key][metric_name]
      elif len(slice_val[output_name][sub_key]) == 1:
        return list(slice_val[output_name][sub_key].values())[0]
      else:
        raise ValueError(
            'metric_name must be one of the following: {}'.format(
                slice_val[output_name][sub_key].keys()))

  # if slice could not be found, return None
  return None
get_metric_names
get_metric_names() -> Sequence[str]

Get names of metrics.

RETURNS DESCRIPTION
Sequence[str]

List of metric names.

Source code in tensorflow_model_analysis/view/view_types.py
def get_metric_names(self) -> Sequence[str]:
  """Get names of metrics.

  Returns:
    List of metric names.
  """

  metric_names = set()
  for slicing_metric in self.slicing_metrics:
    for output_name in slicing_metric[1]:
      for metrics in slicing_metric[1][output_name].values():
        metric_names.update(metrics)
  return list(metric_names)
get_metrics_for_all_slices
get_metrics_for_all_slices(
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Dict[str, MetricsByTextKey]

Get metric names and values for every slice.

PARAMETER DESCRIPTION
output_name

The name of the output (optional, only used for multi-output models).

TYPE: str DEFAULT: ''

class_id

Used with multi-class metrics to identify a specific class ID.

TYPE: Optional[int] DEFAULT: None

k

Used with multi-class metrics to identify the kth predicted value.

TYPE: Optional[int] DEFAULT: None

top_k

Used with multi-class and ranking metrics to identify top-k predicted values.

TYPE: Optional[int] DEFAULT: None

RETURNS DESCRIPTION
Dict[str, MetricsByTextKey]

Dictionary mapping slices to metric names and values.

Source code in tensorflow_model_analysis/view/view_types.py
def get_metrics_for_all_slices(
    self,
    output_name: str = '',
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None) -> Dict[str, MetricsByTextKey]:
  """Get metric names and values for every slice.

  Args:
    output_name: The name of the output (optional, only used for multi-output
      models).
    class_id: Used with multi-class metrics to identify a specific class ID.
    k: Used with multi-class metrics to identify the kth predicted value.
    top_k: Used with multi-class and ranking metrics to identify top-k
      predicted values.

  Returns:
    Dictionary mapping slices to metric names and values.
  """

  if all(v is None for v in [class_id, k, top_k]):
    sub_key = ''
  else:
    sub_key = str(metric_types.SubKey(class_id, k, top_k))

  sliced_metrics = {}
  for slicing_metric in self.slicing_metrics:
    slice_name = slicing_metric[0]
    metrics = slicing_metric[1][output_name][sub_key]
    sliced_metrics[slice_name] = {
        metric_name: metric_value
        for metric_name, metric_value in metrics.items()
    }
  return sliced_metrics  # pytype: disable=bad-return-type
get_metrics_for_slice
get_metrics_for_slice(
    slice_name: SliceKeyType = (),
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Union[MetricsByTextKey, None]

Get metric names and values for a slice.

PARAMETER DESCRIPTION
slice_name

A tuple of the form (column, value), indicating which slice to get metrics from. Optional; if excluded, return overall metrics.

TYPE: SliceKeyType DEFAULT: ()

output_name

The name of the output. Optional, only used for multi-output models.

TYPE: str DEFAULT: ''

class_id

Used with multi-class metrics to identify a specific class ID.

TYPE: Optional[int] DEFAULT: None

k

Used with multi-class metrics to identify the kth predicted value.

TYPE: Optional[int] DEFAULT: None

top_k

Used with multi-class and ranking metrics to identify top-k predicted values.

TYPE: Optional[int] DEFAULT: None

RETURNS DESCRIPTION
Union[MetricsByTextKey, None]

Dictionary containing metric names and values for the specified slice.

Source code in tensorflow_model_analysis/view/view_types.py
def get_metrics_for_slice(
    self,
    slice_name: slicer.SliceKeyType = (),
    output_name: str = '',
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None) -> Union[MetricsByTextKey, None]:
  """Get metric names and values for a slice.

  Args:
    slice_name: A tuple of the form (column, value), indicating which slice to
      get metrics from. Optional; if excluded, return overall metrics.
    output_name: The name of the output. Optional, only used for multi-output
      models.
    class_id: Used with multi-class metrics to identify a specific class ID.
    k: Used with multi-class metrics to identify the kth predicted value.
    top_k: Used with multi-class and ranking metrics to identify top-k
      predicted values.

  Returns:
    Dictionary containing metric names and values for the specified slice.
  """
  if all(v is None for v in [class_id, k, top_k]):
    sub_key = ''
  else:
    sub_key = str(metric_types.SubKey(class_id, k, top_k))

  def equals_slice_name(slice_key):
    if not slice_key:
      return not slice_name
    else:
      return slice_key == slice_name

  for slicing_metric in self.slicing_metrics:
    slice_key = slicing_metric[0]
    slice_val = slicing_metric[1]
    if equals_slice_name(slice_key):
      return slice_val[output_name][sub_key]

  # if slice could not be found, return None
  return None
get_slice_names
get_slice_names() -> Sequence[str]

Get names of slices.

RETURNS DESCRIPTION
Sequence[str]

List of slice names.

Source code in tensorflow_model_analysis/view/view_types.py
def get_slice_names(self) -> Sequence[str]:
  """Get names of slices.

  Returns:
    List of slice names.
  """

  return [slicing_metric[0] for slicing_metric in self.slicing_metrics]  # pytype: disable=bad-return-type

EvalSharedModel

Bases: NamedTuple('EvalSharedModel', [('model_path', str), ('add_metrics_callbacks', List[Callable]), ('include_default_metrics', bool), ('example_weight_key', Union[str, Dict[str, str]]), ('additional_fetches', List[str]), ('model_loader', ModelLoader), ('model_name', str), ('model_type', str), ('rubber_stamp', bool), ('is_baseline', bool), ('resource_hints', Optional[Dict[str, Any]]), ('backend_config', Optional[Any])])

Shared model used during extraction and evaluation.

ATTRIBUTE DESCRIPTION
model_path

Path to EvalSavedModel (containing the saved_model.pb file).

add_metrics_callbacks

Optional list of callbacks for adding additional metrics to the graph. The names of the metrics added by the callbacks should not conflict with existing metrics. See below for more details about what each callback should do. The callbacks are only used during evaluation.

include_default_metrics

True to include the default metrics that are part of the saved model graph during evaluation.

example_weight_key

Example weight key (single-output model) or dict of example weight keys (multi-output model) keyed by output_name.

additional_fetches

Prefixes of additional tensors stored in signature_def.inputs that should be fetched at prediction time. The "features" and "labels" tensors are handled automatically and should not be included in this list.

model_loader

Model loader.

model_name

Model name (should align with ModelSpecs.name).

model_type

Model type (tfma.TF_KERAS, tfma.TF_LITE, tfma.TF_ESTIMATOR, ..).

rubber_stamp

True if this model is being rubber stamped. When a model is rubber stamped diff thresholds will be ignored if an associated baseline model is not passed.

is_baseline

The model is the baseline for comparison or not.

resource_hints

The beam resource hints to apply to the PTransform which runs inference for this model.

backend_config

The backend config for running model inference.

More details on add_metrics_callbacks:

Each add_metrics_callback should have the following prototype: def add_metrics_callback(features_dict, predictions_dict, labels_dict):

Note that features_dict, predictions_dict and labels_dict are not necessarily dictionaries - they might also be Tensors, depending on what the model's eval_input_receiver_fn returns.

It should create and return a metric_ops dictionary, such that metric_ops['metric_name'] = (value_op, update_op), just as in the Trainer.

Short example:

def add_metrics_callback(features_dict, predictions_dict, labels): metrics_ops = {} metric_ops['mean_label'] = tf.metrics.mean(labels) metric_ops['mean_probability'] = tf.metrics.mean(tf.slice( predictions_dict['probabilities'], [0, 1], [2, 1])) return metric_ops

ModelLoader

ModelLoader(
    construct_fn: Callable[[], Any],
    tags: Optional[List[str]] = None,
)

Model loader is responsible for loading shared model types.

ATTRIBUTE DESCRIPTION
construct_fn

A callable which creates the model instance. The callable should take no args as input (typically a closure is used to capture necessary parameters).

tags

Optional model tags (e.g. 'serve' for serving or 'eval' for EvalSavedModel).

Source code in tensorflow_model_analysis/api/types.py
def __init__(
    self, construct_fn: Callable[[], Any], tags: Optional[List[str]] = None
):
  self.construct_fn = construct_fn
  self.tags = tags
  self._shared_handle = shared.Shared()
Attributes
construct_fn instance-attribute
construct_fn = construct_fn
tags instance-attribute
tags = tags
Functions
load
load(
    model_load_time_callback: Optional[
        Callable[[int], None]
    ] = None,
) -> Any

Returns loaded model.

PARAMETER DESCRIPTION
model_load_time_callback

Optional callback to track load time.

TYPE: Optional[Callable[[int], None]] DEFAULT: None

Source code in tensorflow_model_analysis/api/types.py
def load(
    self, model_load_time_callback: Optional[Callable[[int], None]] = None
) -> Any:
  """Returns loaded model.

  Args:
    model_load_time_callback: Optional callback to track load time.
  """
  if model_load_time_callback:
    construct_fn = self._construct_fn_with_load_time(model_load_time_callback)
  else:
    construct_fn = self.construct_fn
  return self._shared_handle.acquire(construct_fn)

RaggedTensorValue

Bases: NamedTuple('RaggedTensorValue', [('values', ndarray), ('nested_row_splits', List[ndarray])])

RaggedTensorValue encapsulates a batch of ragged tensor values.

ATTRIBUTE DESCRIPTION
values

A np.ndarray of values.

nested_row_splits

A list of np.ndarray values representing the row splits (one per dimension including the batch dimension).

SparseTensorValue

Bases: NamedTuple('SparseTensorValue', [('values', ndarray), ('indices', ndarray), ('dense_shape', ndarray)])

SparseTensorValue encapsulates a batch of sparse tensor values.

ATTRIBUTE DESCRIPTION
values

A np.ndarray of values.

indices

A np.ndarray of indices.

dense_shape

A np.ndarray representing the dense shape.

VarLenTensorValue

Bases: NamedTuple('VarLenTensorValue', [('values', ndarray), ('indices', ndarray), ('dense_shape', ndarray)])

VarLenTensorValue encapsulates a batch of varlen dense tensor values.

ATTRIBUTE DESCRIPTION
values

A np.ndarray of values.

indices

A np.ndarray of indices.

dense_shape

A np.ndarray representing the dense shape of the entire tensor. Note that each row (i.e. set of values sharing the same value for the first / batch dimension) is considered to have its own shape based on the presence of values.

Classes
DenseRowIterator
DenseRowIterator(tensor)

An Iterator over rows of a VarLenTensorValue as dense np.arrays.

Because the VarLenTensorValue was created from a set of variable length (dense) arrays, we can invert this process to turn a VarLenTensorValue back into the original dense arrays.

Source code in tensorflow_model_analysis/api/types.py
def __init__(self, tensor):
  self._tensor = tensor
  self._offset = 0
Functions
Functions
dense_rows
dense_rows()
Source code in tensorflow_model_analysis/api/types.py
def dense_rows(self):
  return self.DenseRowIterator(self)
from_dense_rows classmethod
from_dense_rows(
    dense_rows: Iterable[ndarray],
) -> VarLenTensorValue

Converts a collection of variable length dense arrays into a tensor.

PARAMETER DESCRIPTION
dense_rows

A sequence of possibly variable length 1D arrays.

TYPE: Iterable[ndarray]

RETURNS DESCRIPTION
VarLenTensorValue

A new VarLenTensorValue containing the sparse representation of the

VarLenTensorValue

vertically stacked dense rows. The dense_shape attribute on the result

VarLenTensorValue

will be (num_rows, max_row_len).

Source code in tensorflow_model_analysis/api/types.py
@classmethod
def from_dense_rows(
    cls, dense_rows: Iterable[np.ndarray]
) -> 'VarLenTensorValue':
  """Converts a collection of variable length dense arrays into a tensor.

  Args:
    dense_rows: A sequence of possibly variable length 1D arrays.

  Returns:
    A new VarLenTensorValue containing the sparse representation of the
    vertically stacked dense rows. The dense_shape attribute on the result
    will be (num_rows, max_row_len).
  """
  rows = []
  index_arrays = []
  max_row_len = 0
  num_rows = 0
  for i, row in enumerate(dense_rows):
    num_rows += 1
    if row.size:
      if row.ndim <= 1:
        # Add a dimension for unsized numpy array. This will solve the problem
        # where scalar numpy arrays like np.array(None), np.array(0) can not
        # be merged with other numpy arrays.
        row = row.reshape(-1)
        rows.append(row)
      else:
        raise ValueError(
            'Each non-empty dense row should be 1D or scalar but'
            f' found row with shape {row.shape}.'
        )
      index_arrays.append(np.array([[i, j] for j in range(len(row))]))
    max_row_len = max(max_row_len, row.size)
  if index_arrays:
    values = np.concatenate(rows, axis=0)
    indices = np.concatenate(index_arrays, axis=0)
  else:
    # empty case
    values = np.array([])
    indices = np.empty((0, 2))
  dense_shape = np.array([num_rows, max_row_len])
  return cls.__new__(
      cls, values=values, indices=indices, dense_shape=dense_shape
  )

Functions

BatchedInputsToExtracts

BatchedInputsToExtracts(
    batched_inputs: PCollection,
) -> PCollection

Converts Arrow RecordBatch inputs to Extracts.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
@beam.typehints.with_input_types(Union[bytes, pa.RecordBatch, types.Extracts])
@beam.typehints.with_output_types(types.Extracts)
def BatchedInputsToExtracts(  # pylint: disable=invalid-name
    batched_inputs: beam.pvalue.PCollection,
) -> beam.pvalue.PCollection:
  """Converts Arrow RecordBatch inputs to Extracts."""

  def to_extracts(
      x: Union[bytes, types.Extracts, pa.RecordBatch],
  ) -> types.Extracts:
    result = {}
    if isinstance(x, dict):
      result.update(x)
    else:
      result[constants.ARROW_RECORD_BATCH_KEY] = x
    return result

  return batched_inputs | 'AddArrowRecordBatchKey' >> beam.Map(to_extracts)

ExtractAndEvaluate

ExtractAndEvaluate(
    extracts: PCollection,
    extractors: List[Extractor],
    evaluators: List[Evaluator],
) -> Evaluation

Performs Extractions and Evaluations in provided order.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
@beam.typehints.with_input_types(types.Extracts)
@beam.typehints.with_output_types(Any)
def ExtractAndEvaluate(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    extractors: List[extractor.Extractor],
    evaluators: List[evaluator.Evaluator],
) -> evaluator.Evaluation:
  """Performs Extractions and Evaluations in provided order."""
  # evaluation[k] = list of values for k
  evaluation = {}

  def update(evaluation: Dict[str, Any], new_evaluation: Dict[str, Any]):
    for k, v in new_evaluation.items():
      if k not in evaluation:
        evaluation[k] = []
      evaluation[k].append(v)
    return evaluation

  _ = extracts | 'TrackInputBytes' >> _TrackBytesProcessed()  # pylint: disable=no-value-for-parameter
  # Run evaluators that run before extraction (i.e. that only require
  # the incoming input extract added by ReadInputs)
  for v in evaluators:
    if not v.run_after:
      update(evaluation, extracts | v.stage_name >> v.ptransform)
  for x in extractors:
    extracts = extracts | x.stage_name >> x.ptransform
    for v in evaluators:
      if v.run_after == x.stage_name:
        update(evaluation, extracts | v.stage_name >> v.ptransform)
  for v in evaluators:
    if v.run_after == extractor.LAST_EXTRACTOR_STAGE_NAME:
      update(evaluation, extracts | v.stage_name >> v.ptransform)

  # Merge multi-valued keys if necessary.
  result = {}
  for k, v in evaluation.items():
    if len(v) == 1:
      result[k] = v[0]
      continue

    # Note that we assume that if a key is multivalued, its values are
    # dictionaries with disjoint keys. The combined value will simply be the
    # disjoint union of all the dictionaries.
    result[k] = (
        v
        | 'FlattenEvaluationOutput(%s)' % k >> beam.Flatten()
        | 'CombineEvaluationOutput(%s)' % k
        >> beam.CombinePerKey(_CombineEvaluationDictionariesFn())
    )

  return result

ExtractEvaluateAndWriteResults

ExtractEvaluateAndWriteResults(
    examples: PCollection,
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    extractors: Optional[List[Extractor]] = None,
    evaluators: Optional[List[Evaluator]] = None,
    writers: Optional[List[Writer]] = None,
    output_path: Optional[str] = None,
    display_only_data_location: Optional[str] = None,
    display_only_file_format: Optional[str] = None,
    slice_spec: Optional[List[SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    tensor_adapter_config: Optional[
        TensorAdapterConfig
    ] = None,
    schema: Optional[Schema] = None,
    config_version: Optional[int] = None,
) -> Dict[str, PCollection]

PTransform for performing extraction, evaluation, and writing results.

Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform.

Example usage:

eval_config = tfma.EvalConfig(model_specs=[...], metrics_specs=[...],
                              slicing_specs=[...])
eval_shared_model = tfma.default_eval_shared_model(
    eval_saved_model_path=model_location, eval_config=eval_config)
tfx_io = tf_example_record.TFExampleRecord(
    file_pattern=data_location,
    raw_record_column_name=tfma.ARROW_INPUT_COLUMN)
with beam.Pipeline(runner=...) as p:
  _ = (p
       | 'ReadData' >> tfx_io.BeamSource()
       | 'ExtractEvaluateAndWriteResults' >>
       tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=eval_shared_model,
           eval_config=eval_config,
           ...))
result = tfma.load_eval_result(output_path=output_path)
tfma.view.render_slicing_metrics(result)

NOTE: If running with an EvalSavedModel (i.e. the ModelSpec has signature_name
"eval"), then instead of using the tfxio.BeamSource() code use the following
beam.io.ReadFromTFRecord(data_location)

Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results.

PARAMETER DESCRIPTION
examples

PCollection of input examples or Arrow Record batches. Examples can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). If the examples are in the form of a dict it will be assumed that input is already in the form of tfma.Extracts with examples stored under tfma.INPUT_KEY (any other keys will be passed along unchanged to downstream extractors and evaluators).

TYPE: PCollection

eval_shared_model

Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers and for display purposes of the model path.

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

eval_config

Eval config.

TYPE: Optional[EvalConfig] DEFAULT: None

extractors

Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used.

TYPE: Optional[List[Extractor]] DEFAULT: None

evaluators

Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used.

TYPE: Optional[List[Evaluator]] DEFAULT: None

writers

Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used.

TYPE: Optional[List[Writer]] DEFAULT: None

output_path

Path to output results to (config file, metrics, plots, etc).

TYPE: Optional[str] DEFAULT: None

display_only_data_location

Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path.

TYPE: Optional[str] DEFAULT: None

display_only_file_format

Optional format of the examples. This is used only for display purposes.

TYPE: Optional[str] DEFAULT: None

slice_spec

Deprecated (use EvalConfig).

TYPE: Optional[List[SingleSliceSpec]] DEFAULT: None

write_config

Deprecated (use EvalConfig).

TYPE: Optional[bool] DEFAULT: True

compute_confidence_intervals

Deprecated (use EvalConfig).

TYPE: Optional[bool] DEFAULT: False

min_slice_size

Deprecated (use EvalConfig).

TYPE: int DEFAULT: 1

random_seed_for_testing

Provide for deterministic tests only.

TYPE: Optional[int] DEFAULT: None

tensor_adapter_config

Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, an attempt will be made to create the tensors using default TensorRepresentations.

TYPE: Optional[TensorAdapterConfig] DEFAULT: None

schema

A schema to use for customizing evaluators.

TYPE: Optional[Schema] DEFAULT: None

config_version

Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.

TYPE: Optional[int] DEFAULT: None

RAISES DESCRIPTION
ValueError

If EvalConfig invalid or matching Extractor not found for an Evaluator.

RETURNS DESCRIPTION
Dict[str, PCollection]

A dict of writer results keyed by the writer stage name.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples: beam.PCollection,
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    output_path: Optional[str] = None,
    display_only_data_location: Optional[str] = None,
    display_only_file_format: Optional[str] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
    schema: Optional[schema_pb2.Schema] = None,
    config_version: Optional[int] = None,
) -> Dict[str, beam.PCollection]:
  """PTransform for performing extraction, evaluation, and writing results.

  Users who want to construct their own Beam pipelines instead of using the
  lightweight run_model_analysis functions should use this PTransform.

  Example usage:

  ```python
  eval_config = tfma.EvalConfig(model_specs=[...], metrics_specs=[...],
                                slicing_specs=[...])
  eval_shared_model = tfma.default_eval_shared_model(
      eval_saved_model_path=model_location, eval_config=eval_config)
  tfx_io = tf_example_record.TFExampleRecord(
      file_pattern=data_location,
      raw_record_column_name=tfma.ARROW_INPUT_COLUMN)
  with beam.Pipeline(runner=...) as p:
    _ = (p
         | 'ReadData' >> tfx_io.BeamSource()
         | 'ExtractEvaluateAndWriteResults' >>
         tfma.ExtractEvaluateAndWriteResults(
             eval_shared_model=eval_shared_model,
             eval_config=eval_config,
             ...))
  result = tfma.load_eval_result(output_path=output_path)
  tfma.view.render_slicing_metrics(result)

  NOTE: If running with an EvalSavedModel (i.e. the ModelSpec has signature_name
  "eval"), then instead of using the tfxio.BeamSource() code use the following
  beam.io.ReadFromTFRecord(data_location)
  ```

  Note that the exact serialization format is an internal implementation detail
  and subject to change. Users should only use the TFMA functions to write and
  read the results.

  Args:
    examples: PCollection of input examples or Arrow Record batches. Examples
      can be any format the model accepts (e.g. string containing CSV row,
      TensorFlow.Example, etc). If the examples are in the form of a dict it
      will be assumed that input is already in the form of tfma.Extracts with
      examples stored under tfma.INPUT_KEY (any other keys will be passed along
      unchanged to downstream extractors and evaluators).
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Only required if needed by
      default extractors, evaluators, or writers and for display purposes of the
      model path.
    eval_config: Eval config.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    output_path: Path to output results to (config file, metrics, plots, etc).
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    display_only_file_format: Optional format of the examples. This is used only
      for display purposes.
    slice_spec: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    min_slice_size: Deprecated (use EvalConfig).
    random_seed_for_testing: Provide for deterministic tests only.
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch. If None, an attempt will be made to
      create the tensors using default TensorRepresentations.
    schema: A schema to use for customizing evaluators.
    config_version: Optional config version for this evaluation. This should not
      be explicitly set by users. It is only intended to be used in cases where
      the provided eval_config was generated internally, and thus not a reliable
      indicator of user intent.

  Raises:
    ValueError: If EvalConfig invalid or matching Extractor not found for an
      Evaluator.

  Returns:
    A dict of writer results keyed by the writer stage name.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model
  )

  if eval_config is None:
    config_version = 1 if config_version is None else config_version
    eval_config = _default_eval_config(
        eval_shared_models,
        slice_spec,
        write_config,
        compute_confidence_intervals,
        min_slice_size,
    )
  else:
    config_version = 2 if config_version is None else config_version
    eval_config = _update_eval_config_with_defaults(
        eval_config, eval_shared_model
    )
  config_util.verify_eval_config(eval_config)

  if not extractors:
    extractors = default_extractors(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        tensor_adapter_config=tensor_adapter_config,
        config_version=config_version,
    )

  if not evaluators:
    evaluators = default_evaluators(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        random_seed_for_testing=random_seed_for_testing,
        schema=schema,
        config_version=config_version,
    )

  for v in evaluators:
    evaluator.verify_evaluator(v, extractors)

  if not writers:
    writers = default_writers(
        output_path=output_path,
        eval_shared_model=eval_shared_model,
        eval_config=eval_config,
        display_only_data_location=display_only_data_location,
        display_only_data_file_format=display_only_file_format,
    )

  # pylint: disable=no-value-for-parameter
  if is_batched_input(eval_shared_model, eval_config, config_version):
    extracts = examples | 'BatchedInputsToExtracts' >> BatchedInputsToExtracts()
  else:
    extracts = examples | 'InputsToExtracts' >> InputsToExtracts()

  return (
      extracts
      | 'ExtractAndEvaluate'
      >> ExtractAndEvaluate(extractors=extractors, evaluators=evaluators)
      | 'WriteResults' >> WriteResults(writers=writers)
  )

InputsToExtracts

InputsToExtracts(inputs: PCollection) -> PCollection

Converts serialized inputs (e.g. examples) to Extracts if not already.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
# TODO(b/156538355): Find out why str is also required instead of just bytes
#   after adding types.Extracts.
@beam.typehints.with_input_types(Union[bytes, str, types.Extracts])
@beam.typehints.with_output_types(types.Extracts)
def InputsToExtracts(  # pylint: disable=invalid-name
    inputs: beam.pvalue.PCollection,
) -> beam.pvalue.PCollection:
  """Converts serialized inputs (e.g. examples) to Extracts if not already."""

  def to_extracts(x: Union[bytes, str, types.Extracts]) -> types.Extracts:
    result = {}
    if isinstance(x, dict):
      result.update(x)
    else:
      result[constants.INPUT_KEY] = x
    return result

  return inputs | 'AddInputKey' >> beam.Map(to_extracts)

Validate

Validate(
    extracts: PCollection,
    alternatives: Dict[str, PTransform],
    validators: List[Validator],
) -> Validation

Performs validation of alternative evaluations.

PARAMETER DESCRIPTION
extracts

PCollection of extracts.

TYPE: PCollection

alternatives

Dict of PTransforms (Extracts -> Evaluation) whose output will be compared for validation purposes (e.g. 'baseline' vs 'candidate').

TYPE: Dict[str, PTransform]

validators

List of validators for validating the output from running the alternatives. The Validation outputs produced by the validators will be merged into a single output. If there are overlapping output keys, later outputs will replace earlier outputs sharing the same key.

TYPE: List[Validator]

RETURNS DESCRIPTION
Validation

Validation dict.

Source code in tensorflow_model_analysis/api/verifier_lib.py
@beam.ptransform_fn
@beam.typehints.with_input_types(types.Extracts)
@beam.typehints.with_output_types(Any)
def Validate(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    alternatives: Dict[str, beam.PTransform],
    validators: List[validator.Validator],
) -> validator.Validation:
  """Performs validation of alternative evaluations.

  Args:
    extracts: PCollection of extracts.
    alternatives: Dict of PTransforms (Extracts -> Evaluation) whose output will
      be compared for validation purposes (e.g. 'baseline' vs 'candidate').
    validators: List of validators for validating the output from running the
      alternatives. The Validation outputs produced by the validators will be
      merged into a single output. If there are overlapping output keys, later
      outputs will replace earlier outputs sharing the same key.

  Returns:
    Validation dict.
  """
  evaluations = {}
  for key in alternatives:
    evaluations[key] = extracts | 'Evaluate(%s)' % key >> alternatives[key]

  validation = {}
  for v in validators:
    validation.update(evaluations | v.stage_name >> v.ptransform)
  return validation

WriteResults

WriteResults(
    evaluation_or_validation: Union[Evaluation, Validation],
    writers: List[Writer],
) -> Dict[str, PCollection]

Writes Evaluation or Validation results using given writers.

PARAMETER DESCRIPTION
evaluation_or_validation

Evaluation or Validation output.

TYPE: Union[Evaluation, Validation]

writers

Writes to use for writing out output.

TYPE: List[Writer]

RAISES DESCRIPTION
ValueError

If Evaluation or Validation is empty.

RETURNS DESCRIPTION
Dict[str, PCollection]

A dict of writer results keyed by the writer stage name.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
# TODO(b/157600974): Add input typehint.
def WriteResults(  # pylint: disable=invalid-name
    evaluation_or_validation: Union[evaluator.Evaluation, validator.Validation],
    writers: List[writer.Writer],
) -> Dict[str, beam.PCollection]:
  """Writes Evaluation or Validation results using given writers.

  Args:
    evaluation_or_validation: Evaluation or Validation output.
    writers: Writes to use for writing out output.

  Raises:
    ValueError: If Evaluation or Validation is empty.

  Returns:
    A dict of writer results keyed by the writer stage name.
  """
  if not evaluation_or_validation:
    raise ValueError('Evaluations and Validations cannot be empty')
  result = {}
  for w in writers:
    result[w.stage_name] = (
        evaluation_or_validation | w.stage_name >> w.ptransform
    )
  return result

analyze_raw_data

analyze_raw_data(
    data: DataFrame,
    eval_config: Optional[EvalConfig] = None,
    output_path: Optional[str] = None,
    extractors: Optional[List[Extractor]] = None,
    evaluators: Optional[List[Evaluator]] = None,
    writers: Optional[List[Writer]] = None,
    add_metric_callbacks: Optional[
        List[AddMetricsCallbackType]
    ] = None,
) -> EvalResult

Runs TensorFlow model analysis on a pandas.DataFrame.

This function allows you to use TFMA with Pandas DataFrames. The dataframe must include a 'predicted' column for the predicted label and a 'label' column for the actual label.

In addition to a DataFrame, this function requires an eval_config, a tfma.EvalConfig object containing various configuration parameters (see config.proto for a comprehensive list)...

  • the metrics to compute
  • the slices to compute metrics on
  • the DataFrame's column names for example labels and predictions ('label' and 'prediction' by default)
  • confidence interval options

This function returns a tfma.EvalResult, which contains TFMA's computed metrics and can be used to generate plots with tfma.view.render_slicing_metrics.

Example usage:

model_specs = [
  tfma.ModelSpec(
      prediction_key='prediction',
      label_key='label')
]
metrics_specs = [
    tfma.MetricsSpec(metrics=[
      tfma.MetricConfig(class_name='Accuracy'),
      tfma.MetricConfig(class_name='ExampleCount')
    ])
]
slicing_specs = [
    tfma.SlicingSpec(),  # the empty slice represents overall dataset
    tfma.SlicingSpec(feature_keys=['language'])
]
eval_config = tfma.EvalConfig(
    model_specs=model_specs,
    metrics_specs=metrics_specs,
    slicing_specs=slicing_specs)
result = tfma.analyze_raw_data(df, eval_config)
tfma.view.render_slicing_metrics(result)

# Example with Fairness Indicators
from tensorflow_model_analysis.addons.fairness.post_export_metrics import
fairness_indicators
from tensorflow_model_analysis.addons.fairness.view import widget_view
add_metrics_callbacks = [
    tfma.post_export_metrics.fairness_indicators(thresholds=[0.25, 0.5, 0.75])
]
result = tfma.analyze_raw_data(
    data=df,
    metrics_specs=metrics_specs,
    slicing_specs=slicing_specs,
    add_metric_callbacks=add_metrics_callbacks
)
widget_view.render_fairness_indicator(result)
PARAMETER DESCRIPTION
data

A pandas.DataFrame, where rows correspond to examples and columns correspond to features. One column must indicate a row's predicted label, and one column must indicate a row's actual label.

TYPE: DataFrame

eval_config

A tfma.EvalConfig, which contains various configuration parameters including metrics, slices, and label/prediction column names.

TYPE: Optional[EvalConfig] DEFAULT: None

output_path

Path to write EvalResult to.

TYPE: Optional[str] DEFAULT: None

extractors

Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used.

TYPE: Optional[List[Extractor]] DEFAULT: None

evaluators

Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used.

TYPE: Optional[List[Evaluator]] DEFAULT: None

writers

Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers with add_metric_callbacks will be used.

TYPE: Optional[List[Writer]] DEFAULT: None

add_metric_callbacks

Optional list of metric callbacks (if used).

TYPE: Optional[List[AddMetricsCallbackType]] DEFAULT: None

RETURNS DESCRIPTION
EvalResult

A tfma.EvalResult to extract metrics or generate visualizations from.

RAISES DESCRIPTION
KeyError

If the prediction or label columns are not found within the DataFrame.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def analyze_raw_data(
    data: pd.DataFrame,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    output_path: Optional[str] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    add_metric_callbacks: Optional[List[types.AddMetricsCallbackType]] = None,
) -> view_types.EvalResult:
  """Runs TensorFlow model analysis on a pandas.DataFrame.

  This function allows you to use TFMA with Pandas DataFrames. The dataframe
  must include a 'predicted' column for the predicted label and a 'label' column
  for the actual label.

  In addition to a DataFrame, this function requires an eval_config, a
  `tfma.EvalConfig` object containing various configuration parameters (see
  [config.proto](https://github.com/tensorflow/model-analysis/blob/master/tensorflow_model_analysis/proto/config.proto)
  for a comprehensive list)...

  * the metrics to compute
  * the slices to compute metrics on
  * the DataFrame's column names for example labels and predictions ('label'
    and 'prediction' by default)
  * confidence interval options

  This function returns a `tfma.EvalResult`, which contains TFMA's computed
  metrics and can be used to generate plots with
  `tfma.view.render_slicing_metrics`.

  Example usage:

  ```python
  model_specs = [
    tfma.ModelSpec(
        prediction_key='prediction',
        label_key='label')
  ]
  metrics_specs = [
      tfma.MetricsSpec(metrics=[
        tfma.MetricConfig(class_name='Accuracy'),
        tfma.MetricConfig(class_name='ExampleCount')
      ])
  ]
  slicing_specs = [
      tfma.SlicingSpec(),  # the empty slice represents overall dataset
      tfma.SlicingSpec(feature_keys=['language'])
  ]
  eval_config = tfma.EvalConfig(
      model_specs=model_specs,
      metrics_specs=metrics_specs,
      slicing_specs=slicing_specs)
  result = tfma.analyze_raw_data(df, eval_config)
  tfma.view.render_slicing_metrics(result)

  # Example with Fairness Indicators
  from tensorflow_model_analysis.addons.fairness.post_export_metrics import
  fairness_indicators
  from tensorflow_model_analysis.addons.fairness.view import widget_view
  add_metrics_callbacks = [
      tfma.post_export_metrics.fairness_indicators(thresholds=[0.25, 0.5, 0.75])
  ]
  result = tfma.analyze_raw_data(
      data=df,
      metrics_specs=metrics_specs,
      slicing_specs=slicing_specs,
      add_metric_callbacks=add_metrics_callbacks
  )
  widget_view.render_fairness_indicator(result)
  ```

  Args:
    data: A pandas.DataFrame, where rows correspond to examples and columns
      correspond to features. One column must indicate a row's predicted label,
      and one column must indicate a row's actual label.
    eval_config: A `tfma.EvalConfig`, which contains various configuration
      parameters including metrics, slices, and label/prediction column names.
    output_path: Path to write EvalResult to.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers with `add_metric_callbacks` will be used.
    add_metric_callbacks: Optional list of metric callbacks (if used).

  Returns:
    A tfma.EvalResult to extract metrics or generate visualizations from.

  Raises:
    KeyError: If the prediction or label columns are not found within the
      DataFrame.
  """
  for model_spec in eval_config.model_specs:  # pytype: disable=attribute-error
    model_spec.prediction_key = model_spec.prediction_key or 'prediction'
    model_spec.label_key = model_spec.label_key or 'label'
    if model_spec.prediction_key not in data.columns:
      raise KeyError(
          'The prediction_key column was not found. Looked for %s but found: %s'
          % (model_spec.prediction_key, list(data.columns))
      )
    if model_spec.label_key not in data.columns:
      raise KeyError(
          'The label_key column was not found. Looked for %s but found: %s'
          % (model_spec.label_key, list(data.columns))
      )

  # TODO(b/153570803): Validity check / assertions for dataframe structure
  if eval_config.slicing_specs is None:  # pytype: disable=attribute-error
    eval_config.slicing_specs = [config_pb2.SlicingSpec(feature_keys=[''])]
  if output_path is None:
    output_path = tempfile.mkdtemp()

  arrow_data = table_util.CanonicalizeRecordBatch(
      pa.RecordBatch.from_pandas(data)
  )
  beam_data = beam.Create([arrow_data])

  if not writers:
    writers = default_writers(
        output_path,
        eval_config=eval_config,
        add_metric_callbacks=add_metric_callbacks,
    )

  with beam.Pipeline() as p:
    _ = (
        p
        | beam_data
        | 'ExtractEvaluateAndWriteResults'
        >> ExtractEvaluateAndWriteResults(  # pylint: disable=no-value-for-parameter
            extractors=extractors,
            evaluators=evaluators,
            writers=writers,
            eval_config=eval_config,
            output_path=output_path,
        )
    )

  return load_eval_result(output_path)

default_eval_shared_model

default_eval_shared_model(
    eval_saved_model_path: str,
    add_metrics_callbacks: Optional[
        List[AddMetricsCallbackType]
    ] = None,
    include_default_metrics: Optional[bool] = True,
    example_weight_key: Optional[
        Union[str, Dict[str, str]]
    ] = None,
    additional_fetches: Optional[List[str]] = None,
    blacklist_feature_fetches: Optional[List[str]] = None,
    tags: Optional[List[str]] = None,
    model_name: str = "",
    eval_config: Optional[EvalConfig] = None,
    custom_model_loader: Optional[ModelLoader] = None,
    rubber_stamp: Optional[bool] = False,
    resource_hints: Optional[Dict[str, Any]] = None,
    backend_config: Optional[Any] = None,
) -> EvalSharedModel

Returns default EvalSharedModel.

PARAMETER DESCRIPTION
eval_saved_model_path

Path to EvalSavedModel.

TYPE: str

add_metrics_callbacks

Optional list of callbacks for adding additional metrics to the graph (see EvalSharedModel for more information on how to configure additional metrics). Metrics for example count and example weights will be added automatically. Only used if EvalSavedModel used.

TYPE: Optional[List[AddMetricsCallbackType]] DEFAULT: None

include_default_metrics

DEPRECATED. Use eval_config.options.include_default_metrics.

TYPE: Optional[bool] DEFAULT: True

example_weight_key

DEPRECATED. Use eval_config.model_specs.example_weight_key or eval_config.model_specs.example_weight_keys.

TYPE: Optional[Union[str, Dict[str, str]]] DEFAULT: None

additional_fetches

Optional prefixes of additional tensors stored in signature_def.inputs that should be fetched at prediction time. The "features" and "labels" tensors are handled automatically and should not be included. Only used if EvalSavedModel used.

TYPE: Optional[List[str]] DEFAULT: None

blacklist_feature_fetches

Optional list of tensor names in the features dictionary which should be excluded from the fetches request. This is useful in scenarios where features are large (e.g. images) and can lead to excessive memory use if stored. Only used if EvalSavedModel used.

TYPE: Optional[List[str]] DEFAULT: None

tags

Optional model tags (e.g. 'serve' for serving or 'eval' for EvalSavedModel).

TYPE: Optional[List[str]] DEFAULT: None

model_name

Optional name of the model being created (should match ModelSpecs.name). The name should only be provided if multiple models are being evaluated.

TYPE: str DEFAULT: ''

eval_config

Eval config.

TYPE: Optional[EvalConfig] DEFAULT: None

custom_model_loader

Optional custom model loader for non-TF models.

TYPE: Optional[ModelLoader] DEFAULT: None

rubber_stamp

True when this run is a first run without a baseline model while a baseline is configured, the diff thresholds will be ignored.

TYPE: Optional[bool] DEFAULT: False

resource_hints

The beam resource hints to apply to the PTransform which runs inference for this model.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

backend_config

Optional configuration of backend running model inference with some prediction extractors.

TYPE: Optional[Any] DEFAULT: None

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_eval_shared_model(
    eval_saved_model_path: str,
    add_metrics_callbacks: Optional[List[types.AddMetricsCallbackType]] = None,
    include_default_metrics: Optional[bool] = True,
    example_weight_key: Optional[Union[str, Dict[str, str]]] = None,
    additional_fetches: Optional[List[str]] = None,
    blacklist_feature_fetches: Optional[List[str]] = None,
    tags: Optional[List[str]] = None,
    model_name: str = '',
    eval_config: Optional[config_pb2.EvalConfig] = None,
    custom_model_loader: Optional[types.ModelLoader] = None,
    rubber_stamp: Optional[bool] = False,
    resource_hints: Optional[Dict[str, Any]] = None,
    backend_config: Optional[Any] = None,
) -> types.EvalSharedModel:
  """Returns default EvalSharedModel.

  Args:
    eval_saved_model_path: Path to EvalSavedModel.
    add_metrics_callbacks: Optional list of callbacks for adding additional
      metrics to the graph (see EvalSharedModel for more information on how to
      configure additional metrics). Metrics for example count and example
      weights will be added automatically. Only used if EvalSavedModel used.
    include_default_metrics: DEPRECATED. Use
      eval_config.options.include_default_metrics.
    example_weight_key: DEPRECATED. Use
      eval_config.model_specs.example_weight_key or
      eval_config.model_specs.example_weight_keys.
    additional_fetches: Optional prefixes of additional tensors stored in
      signature_def.inputs that should be fetched at prediction time. The
      "features" and "labels" tensors are handled automatically and should not
      be included. Only used if EvalSavedModel used.
    blacklist_feature_fetches: Optional list of tensor names in the features
      dictionary which should be excluded from the fetches request. This is
      useful in scenarios where features are large (e.g. images) and can lead to
      excessive memory use if stored. Only used if EvalSavedModel used.
    tags: Optional model tags (e.g. 'serve' for serving or 'eval' for
      EvalSavedModel).
    model_name: Optional name of the model being created (should match
      ModelSpecs.name). The name should only be provided if multiple models are
      being evaluated.
    eval_config: Eval config.
    custom_model_loader: Optional custom model loader for non-TF models.
    rubber_stamp: True when this run is a first run without a baseline model
      while a baseline is configured, the diff thresholds will be ignored.
    resource_hints: The beam resource hints to apply to the PTransform which
      runs inference for this model.
    backend_config: Optional configuration of backend running model inference
      with *some* prediction extractors.
  """
  if not eval_config:
    # Default to tfma eval model unless eval
    is_baseline = False
    if tags and _LEGACY_EVAL_TAG in tags:
      model_type = constants.TFMA_EVAL
    elif tags and tf.saved_model.SERVING in tags:
      model_type = constants.TF_ESTIMATOR
    else:
      model_type = constants.TFMA_EVAL
    if tags is None:
      tags = [_LEGACY_EVAL_TAG]
  else:
    model_spec = model_util.get_model_spec(eval_config, model_name)
    if not model_spec:
      raise ValueError(
          'ModelSpec for model name {} not found in EvalConfig: '
          'config={}'.format(model_name, eval_config)
      )
    is_baseline = model_spec.is_baseline
    model_type = model_util.get_model_type(
        model_spec, eval_saved_model_path, tags
    )
    if tags is None:
      # Default to serving unless tfma_eval is used.
      if model_type == constants.TFMA_EVAL:
        tags = [_LEGACY_EVAL_TAG]
      else:
        tags = [tf.saved_model.SERVING]
    if model_spec.example_weight_key or model_spec.example_weight_keys:
      example_weight_key = (
          model_spec.example_weight_key or model_spec.example_weight_keys
      )
    if eval_config.options.HasField('include_default_metrics'):
      include_default_metrics = (
          eval_config.options.include_default_metrics.value
      )

  model_loader = custom_model_loader
  if not model_loader and model_type in constants.VALID_TF_MODEL_TYPES:
    model_loader = types.ModelLoader(
        construct_fn=model_util.model_construct_fn(
            eval_saved_model_path=eval_saved_model_path,
            add_metrics_callbacks=add_metrics_callbacks,
            include_default_metrics=include_default_metrics,
            additional_fetches=additional_fetches,
            blacklist_feature_fetches=blacklist_feature_fetches,
            model_type=model_type,
            tags=tags,
        ),
        tags=tags,
    )

  return types.EvalSharedModel(
      model_name=model_name,
      model_type=model_type,
      model_path=eval_saved_model_path,
      add_metrics_callbacks=add_metrics_callbacks,
      include_default_metrics=include_default_metrics,
      example_weight_key=example_weight_key,
      additional_fetches=additional_fetches,
      model_loader=model_loader,
      rubber_stamp=rubber_stamp,
      is_baseline=is_baseline,
      resource_hints=resource_hints,
      backend_config=backend_config,
  )

default_evaluators

default_evaluators(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    schema: Optional[Schema] = None,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None,
    config_version: Optional[int] = None,
) -> List[Evaluator]

Returns the default evaluators for use in ExtractAndEvaluate.

PARAMETER DESCRIPTION
eval_shared_model

Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model.

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

eval_config

Eval config.

TYPE: Optional[EvalConfig] DEFAULT: None

schema

A schema to use for customizing default evaluators.

TYPE: Optional[Schema] DEFAULT: None

compute_confidence_intervals

Deprecated (use eval_config).

TYPE: Optional[bool] DEFAULT: False

min_slice_size

Deprecated (use eval_config).

TYPE: int DEFAULT: 1

serialize

Deprecated.

TYPE: bool DEFAULT: False

random_seed_for_testing

Provide for deterministic tests only.

TYPE: Optional[int] DEFAULT: None

config_version

Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.

TYPE: Optional[int] DEFAULT: None

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_evaluators(  # pylint: disable=invalid-name
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    schema: Optional[schema_pb2.Schema] = None,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None,
    config_version: Optional[int] = None,
) -> List[evaluator.Evaluator]:
  """Returns the default evaluators for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Only required if there are
      metrics to be computed in-graph using the model.
    eval_config: Eval config.
    schema: A schema to use for customizing default evaluators.
    compute_confidence_intervals: Deprecated (use eval_config).
    min_slice_size: Deprecated (use eval_config).
    serialize: Deprecated.
    random_seed_for_testing: Provide for deterministic tests only.
    config_version: Optional config version for this evaluation. This should not
      be explicitly set by users. It is only intended to be used in cases where
      the provided eval_config was generated internally, and thus not a reliable
      indicator of user intent.
  """
  disabled_outputs = []
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model
  )
  if eval_config:
    eval_config = _update_eval_config_with_defaults(
        eval_config, eval_shared_model
    )
    disabled_outputs = eval_config.options.disabled_outputs.values
    if _model_types(eval_shared_models) == {constants.TF_LITE} or _model_types(
        eval_shared_models
    ) == {constants.TF_JS}:
      # no in-graph metrics present when tflite or tfjs is used.
      if eval_shared_models:
        eval_shared_models = [
            v._replace(include_default_metrics=False)
            for v in eval_shared_models
        ]
  if (
      constants.METRICS_KEY in disabled_outputs
      and constants.PLOTS_KEY in disabled_outputs
      and constants.ATTRIBUTIONS_KEY in disabled_outputs
  ):
    return []

  return [
      metrics_plots_and_validations_evaluator.MetricsPlotsAndValidationsEvaluator(
          eval_config=eval_config,
          eval_shared_model=eval_shared_model,
          schema=schema,
          random_seed_for_testing=random_seed_for_testing,
      )
  ]

default_extractors

default_extractors(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    slice_spec: Optional[List[SingleSliceSpec]] = None,
    materialize: Optional[bool] = None,
    tensor_adapter_config: Optional[
        TensorAdapterConfig
    ] = None,
    custom_predict_extractor: Optional[Extractor] = None,
    config_version: Optional[int] = None,
) -> List[Extractor]

Returns the default extractors for use in ExtractAndEvaluate.

PARAMETER DESCRIPTION
eval_shared_model

Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations).

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

eval_config

Eval config.

TYPE: Optional[EvalConfig] DEFAULT: None

slice_spec

Deprecated (use EvalConfig).

TYPE: Optional[List[SingleSliceSpec]] DEFAULT: None

materialize

True to have extractors create materialized output.

TYPE: Optional[bool] DEFAULT: None

tensor_adapter_config

Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, an attempt will be made to create the tensors using default TensorRepresentations.

TYPE: Optional[TensorAdapterConfig] DEFAULT: None

custom_predict_extractor

Optional custom predict extractor for non-TF models.

TYPE: Optional[Extractor] DEFAULT: None

config_version

Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.

TYPE: Optional[int] DEFAULT: None

RAISES DESCRIPTION
NotImplementedError

If eval_config contains mixed serving and eval models.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_extractors(  # pylint: disable=invalid-name
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    materialize: Optional[bool] = None,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
    custom_predict_extractor: Optional[extractor.Extractor] = None,
    config_version: Optional[int] = None,
) -> List[extractor.Extractor]:
  """Returns the default extractors for use in ExtractAndEvaluate.

  Args:
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation). Required unless the predictions are
      provided alongside of the features (i.e. model-agnostic evaluations).
    eval_config: Eval config.
    slice_spec: Deprecated (use EvalConfig).
    materialize: True to have extractors create materialized output.
    tensor_adapter_config: Tensor adapter config which specifies how to obtain
      tensors from the Arrow RecordBatch. If None, an attempt will be made to
      create the tensors using default TensorRepresentations.
    custom_predict_extractor: Optional custom predict extractor for non-TF
      models.
    config_version: Optional config version for this evaluation. This should not
      be explicitly set by users. It is only intended to be used in cases where
      the provided eval_config was generated internally, and thus not a reliable
      indicator of user intent.

  Raises:
    NotImplementedError: If eval_config contains mixed serving and eval models.
  """
  if materialize is None:
    # TODO(b/172969312): Once analysis table is supported, remove defaulting
    #  to false unless 'analysis' is in disabled_outputs.
    materialize = False
  if slice_spec and eval_config:
    raise ValueError('slice_spec is deprecated, only use eval_config')

  if eval_config is not None:
    eval_config = _update_eval_config_with_defaults(
        eval_config, eval_shared_model
    )
  tensor_representations = None
  if tensor_adapter_config:
    tensor_representations = tensor_adapter_config.tensor_representations

  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model
  )
  slicing_extractors = []
  if _has_sql_slices(eval_config):
    slicing_extractors.append(
        sql_slice_key_extractor.SqlSliceKeyExtractor(eval_config)
    )
  slicing_extractors.extend([
      unbatch_extractor.UnbatchExtractor(),
      slice_key_extractor.SliceKeyExtractor(
          eval_config=eval_config, materialize=materialize
      ),
  ])

  extract_features = features_extractor.FeaturesExtractor(
      eval_config=eval_config, tensor_representations=tensor_representations
  )
  extract_labels = labels_extractor.LabelsExtractor(eval_config=eval_config)
  extract_example_weights = example_weights_extractor.ExampleWeightsExtractor(
      eval_config=eval_config
  )
  extract_materialized_predictions = (
      materialized_predictions_extractor.MaterializedPredictionsExtractor(
          eval_config=eval_config
      )
  )
  if eval_shared_model:
    model_types = _model_types(eval_shared_models)
    logging.info('eval_shared_models have model_types: %s', model_types)
    assert model_types is not None
    if (
        not model_types.issubset(constants.VALID_TF_MODEL_TYPES)
        and not custom_predict_extractor
    ):
      raise NotImplementedError(
          'either a custom_predict_extractor must be used or model type must '
          'be one of: {}. evalconfig={}'.format(
              str(constants.VALID_TF_MODEL_TYPES), eval_config
          )
      )

    if model_types == {constants.MATERIALIZED_PREDICTION}:
      return [
          extract_features,
          extract_labels,
          extract_example_weights,
          extract_materialized_predictions,
      ] + slicing_extractors
    elif model_types == {constants.TF_LITE}:
      # TODO(b/163889779): Convert TFLite extractor to operate on batched
      # extracts. Then we can remove the input extractor.
      return [
          extract_features,
          transformed_features_extractor.TransformedFeaturesExtractor(
              eval_config=eval_config, eval_shared_model=eval_shared_model
          ),
          extract_labels,
          extract_example_weights,
          (
              custom_predict_extractor
              or tflite_predict_extractor.TFLitePredictExtractor(
                  eval_config=eval_config, eval_shared_model=eval_shared_model
              )
          ),
      ] + slicing_extractors
    elif constants.TF_LITE in model_types:
      raise NotImplementedError(
          'support for mixing tf_lite and non-tf_lite models is not '
          'implemented: eval_config={}'.format(eval_config)
      )
    elif model_types == {constants.TF_JS}:
      return [
          extract_features,
          extract_labels,
          extract_example_weights,
          (
              custom_predict_extractor
              or tfjs_predict_extractor.TFJSPredictExtractor(
                  eval_config=eval_config, eval_shared_model=eval_shared_model
              )
          ),
      ] + slicing_extractors
    elif constants.TF_JS in model_types:
      raise NotImplementedError(
          'support for mixing tf_js and non-tf_js models is not '
          'implemented: eval_config={}'.format(eval_config)
      )
    else:
      extractors = [extract_features]
      if not custom_predict_extractor:
        extractors.append(
            transformed_features_extractor.TransformedFeaturesExtractor(
                eval_config=eval_config, eval_shared_model=eval_shared_model
            )
        )
      extractors.extend([
          extract_labels,
          extract_example_weights,
          (
              custom_predict_extractor
              or predictions_extractor.PredictionsExtractor(
                  eval_config=eval_config, eval_shared_model=eval_shared_model
              )
          ),
      ])
      extractors.extend(slicing_extractors)
      return extractors
  else:
    return [
        extract_features,
        extract_labels,
        extract_example_weights,
        extract_materialized_predictions,
    ] + slicing_extractors

default_writers

default_writers(
    output_path: Optional[str],
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    display_only_data_location: Optional[str] = None,
    display_only_data_file_format: Optional[str] = None,
    output_file_format: str = "tfrecord",
    add_metric_callbacks: Optional[
        List[AddMetricsCallbackType]
    ] = None,
) -> List[Writer]

Returns the default writers for use in WriteResults.

Note, sharding will be enabled by default if an output_file_format is provided. Filenames will be -SSSSS-of-NNNNN. where SSSSS is the shard number and NNNNN is the number of shards.

PARAMETER DESCRIPTION
output_path

Output path.

TYPE: Optional[str]

eval_shared_model

Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations).

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

eval_config

Eval config for writing out config along with results. Also used for to check for missing slices.

TYPE: Optional[EvalConfig] DEFAULT: None

display_only_data_location

Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path.

TYPE: Optional[str] DEFAULT: None

display_only_data_file_format

Optional format of the input examples. This is used only for display purposes.

TYPE: Optional[str] DEFAULT: None

output_file_format

File format to use when saving files. Currently only 'tfrecord' is supported.

TYPE: str DEFAULT: 'tfrecord'

add_metric_callbacks

Optional list of metric callbacks (if used).

TYPE: Optional[List[AddMetricsCallbackType]] DEFAULT: None

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_writers(
    output_path: Optional[str],
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    display_only_data_location: Optional[str] = None,
    display_only_data_file_format: Optional[str] = None,
    output_file_format: str = 'tfrecord',
    add_metric_callbacks: Optional[List[types.AddMetricsCallbackType]] = None,
) -> List[writer.Writer]:  # pylint: disable=invalid-name
  """Returns the default writers for use in WriteResults.

  Note, sharding will be enabled by default if an output_file_format is
  provided. Filenames will be <output_path>-SSSSS-of-NNNNN.<output_file_format>
  where SSSSS is the shard number and NNNNN is the number of shards.

  Args:
    output_path: Output path.
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Required unless the predictions
      are provided alongside of the features (i.e. model-agnostic evaluations).
    eval_config: Eval config for writing out config along with results. Also
      used for to check for missing slices.
    display_only_data_location: Optional path indicating where the examples were
      read from. This is used only for display purposes - data will not actually
      be read from this path.
    display_only_data_file_format: Optional format of the input examples. This
      is used only for display purposes.
    output_file_format: File format to use when saving files. Currently only
      'tfrecord' is supported.
    add_metric_callbacks: Optional list of metric callbacks (if used).
  """
  writers = []

  if not add_metric_callbacks:
    add_metric_callbacks = []
  # The add_metric_callbacks are used in the metrics and plots serialization
  # code to post process the metric data by calling populate_stats_and_pop.
  # While both the legacy (V1) and new (V2) evaluation implementations support
  # EvalSavedModels using add_metric_callbacks, this particular code is only
  # required for the legacy evaluation based on the MetricsAndPlotsEvaluator.
  # The V2 MetricsAndPlotsEvaluator output requires no additional processing.
  # Since the V1 code only supports a single EvalSharedModel, we only set the
  # add_metrics_callbacks if a dict is not passed.
  if (
      eval_shared_model
      and not isinstance(eval_shared_model, dict)
      and not isinstance(eval_shared_model, list)
  ):
    add_metric_callbacks = eval_shared_model.add_metrics_callbacks

  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model
  )

  if eval_config:
    model_locations = {}
    for v in eval_shared_models or [None]:
      k = '' if v is None else v.model_name
      model_locations[k] = (
          '<unknown>' if v is None or v.model_path is None else v.model_path
      )
    writers.append(
        eval_config_writer.EvalConfigWriter(
            output_path,
            eval_config=eval_config,
            data_location=display_only_data_location,
            data_file_format=display_only_data_file_format,
            model_locations=model_locations,
        )
    )

  output_paths = {
      constants.METRICS_KEY: os.path.join(output_path, constants.METRICS_KEY),
      constants.PLOTS_KEY: os.path.join(output_path, constants.PLOTS_KEY),
      constants.ATTRIBUTIONS_KEY: os.path.join(
          output_path, constants.ATTRIBUTIONS_KEY
      ),
      constants.VALIDATIONS_KEY: os.path.join(
          output_path, constants.VALIDATIONS_KEY
      ),
  }
  writers.append(
      metrics_plots_and_validations_writer.MetricsPlotsAndValidationsWriter(
          output_paths=output_paths,
          # Empty EvalConfig supported for backwards compatibility.
          eval_config=eval_config or config_pb2.EvalConfig(),
          add_metrics_callbacks=add_metric_callbacks,
          output_file_format=output_file_format,
          rubber_stamp=model_util.has_rubber_stamp(eval_shared_models),
      )
  )
  return writers

is_batched_input

is_batched_input(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    config_version: Optional[int] = None,
) -> bool

Returns true if batched input should be used.

We will keep supporting the legacy unbatched V1 PredictExtractor as it parses the features and labels, and is the only solution currently that allows for slicing on transformed features. Eventually we should have support for transformed features via keras preprocessing layers.

PARAMETER DESCRIPTION
eval_shared_model

Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations).

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

eval_config

Eval config.

TYPE: Optional[EvalConfig] DEFAULT: None

config_version

Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.

TYPE: Optional[int] DEFAULT: None

RETURNS DESCRIPTION
bool

A boolean indicating if batched extractors should be used.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def is_batched_input(
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    config_version: Optional[int] = None,
) -> bool:
  """Returns true if batched input should be used.

   We will keep supporting the legacy unbatched V1 PredictExtractor as it parses
   the features and labels, and is the only solution currently that allows for
   slicing on transformed features. Eventually we should have support for
   transformed features via keras preprocessing layers.

  Args:
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation). Required unless the predictions are
      provided alongside of the features (i.e. model-agnostic evaluations).
    eval_config: Eval config.
    config_version: Optional config version for this evaluation. This should not
      be explicitly set by users. It is only intended to be used in cases where
      the provided eval_config was generated internally, and thus not a reliable
      indicator of user intent.

  Returns:
    A boolean indicating if batched extractors should be used.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model
  )
  return not _is_legacy_eval(config_version, eval_shared_models, eval_config)

is_legacy_estimator

is_legacy_estimator(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
) -> bool

Returns true if there is a legacy estimator.

PARAMETER DESCRIPTION
eval_shared_model

Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations).

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

RETURNS DESCRIPTION
bool

A boolean indicating if legacy predict extractor will be used.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def is_legacy_estimator(
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
) -> bool:
  """Returns true if there is a legacy estimator.

  Args:
    eval_shared_model: Shared model (single-model evaluation) or list of shared
      models (multi-model evaluation). Required unless the predictions are
      provided alongside of the features (i.e. model-agnostic evaluations).

  Returns:
    A boolean indicating if legacy predict extractor will be used.
  """
  eval_shared_models = model_util.verify_and_update_eval_shared_models(
      eval_shared_model
  )
  model_types = _model_types(eval_shared_models)
  return (
      model_types is not None
      and model_types == {constants.TFMA_EVAL}
      and all(
          _LEGACY_EVAL_TAG in m.model_loader.tags for m in eval_shared_models
      )
  )

load_attributions

load_attributions(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[AttributionsForSlice]

Read and deserialize the AttributionsForSlice records.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_attributions(
    output_path: str, output_file_format: str = 'tfrecord'
) -> Iterator[AttributionsForSlice]:
  """Read and deserialize the AttributionsForSlice records."""
  for (
      a
  ) in metrics_plots_and_validations_writer.load_and_deserialize_attributions(
      output_path, output_file_format
  ):
    yield a

load_eval_result

load_eval_result(
    output_path: str,
    output_file_format: Optional[str] = "tfrecord",
    model_name: Optional[str] = None,
) -> EvalResult

Loads EvalResult object for use with the visualization functions.

PARAMETER DESCRIPTION
output_path

Output directory containing config, metrics, plots, etc.

TYPE: str

output_file_format

Optional file extension to filter files by.

TYPE: Optional[str] DEFAULT: 'tfrecord'

model_name

Optional model name. Required if multi-model evaluation was run.

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
EvalResult

EvalResult object for use with the visualization functions.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_eval_result(
    output_path: str,
    output_file_format: Optional[str] = 'tfrecord',
    model_name: Optional[str] = None,
) -> view_types.EvalResult:
  """Loads EvalResult object for use with the visualization functions.

  Args:
    output_path: Output directory containing config, metrics, plots, etc.
    output_file_format: Optional file extension to filter files by.
    model_name: Optional model name. Required if multi-model evaluation was run.

  Returns:
    EvalResult object for use with the visualization functions.
  """
  # Config, metrics, and plots files should all exist under the given output
  # directory, but fairness plugin has a use-case where only the metrics are
  # provided so we support all files as being optional (the EvalResult will have
  # corresponding None values for files that are not present).
  eval_config, data_location, file_format, model_locations = (
      eval_config_writer.load_eval_run(output_path)
  )
  metrics_list = []
  for p in metrics_plots_and_validations_writer.load_and_deserialize_metrics(
      output_path, output_file_format
  ):
    metrics = view_util.convert_metrics_proto_to_dict(p, model_name=model_name)
    if metrics is not None:
      metrics_list.append(metrics)
  plots_list = []
  for p in metrics_plots_and_validations_writer.load_and_deserialize_plots(
      output_path, output_file_format
  ):
    plots = view_util.convert_plots_proto_to_dict(p, model_name=model_name)
    if plots is not None:
      plots_list.append(plots)
  attributions_list = []
  for (
      a
  ) in metrics_plots_and_validations_writer.load_and_deserialize_attributions(
      output_path, output_file_format
  ):
    attributions = view_util.convert_attributions_proto_to_dict(
        a, model_name=model_name
    )
    if attributions is not None:
      attributions_list.append(attributions)
  if not model_locations:
    model_location = ''
  elif model_name is None:
    model_location = list(model_locations.values())[0]
  else:
    model_location = model_locations[model_name]
  return view_types.EvalResult(  # pytype: disable=wrong-arg-types
      slicing_metrics=metrics_list,
      plots=plots_list,
      attributions=attributions_list,
      config=eval_config,
      data_location=data_location,
      file_format=file_format,
      model_location=model_location,
  )

load_eval_results

load_eval_results(
    output_paths: Union[str, List[str]],
    output_file_format: Optional[str] = "tfrecord",
    mode: str = MODEL_CENTRIC_MODE,
    model_name: Optional[str] = None,
) -> EvalResults

Loads results for multiple models or multiple data sets.

PARAMETER DESCRIPTION
output_paths

A single path or list of output paths of completed tfma runs.

TYPE: Union[str, List[str]]

output_file_format

Optional file extension to filter files by.

TYPE: Optional[str] DEFAULT: 'tfrecord'

mode

The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and tfma.MODEL_CENTRIC_MODE are supported.

TYPE: str DEFAULT: MODEL_CENTRIC_MODE

model_name

Filters to only return results for given model. If unset all models are returned.

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
EvalResults

An EvalResults containing the evaluation results serialized at output_paths.

EvalResults

This can be used to construct a time series view.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_eval_results(
    output_paths: Union[str, List[str]],
    output_file_format: Optional[str] = 'tfrecord',
    mode: str = constants.MODEL_CENTRIC_MODE,
    model_name: Optional[str] = None,
) -> view_types.EvalResults:
  """Loads results for multiple models or multiple data sets.

  Args:
    output_paths: A single path or list of output paths of completed tfma runs.
    output_file_format: Optional file extension to filter files by.
    mode: The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and
      tfma.MODEL_CENTRIC_MODE are supported.
    model_name: Filters to only return results for given model. If unset all
      models are returned.

  Returns:
    An EvalResults containing the evaluation results serialized at output_paths.
    This can be used to construct a time series view.
  """
  results = []
  if not isinstance(output_paths, list):
    output_paths = [output_paths]
  for output_path in output_paths:
    if model_name is None:
      _, _, _, model_locations = eval_config_writer.load_eval_run(output_path)
      model_names = list(model_locations)
    else:
      model_names = [model_name]
    for model_name in model_names:
      results.append(
          load_eval_result(
              output_path, output_file_format, model_name=model_name
          )
      )
  return make_eval_results(results, mode)

load_metrics

load_metrics(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[MetricsForSlice]

Read and deserialize the MetricsForSlice records.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_metrics(
    output_path: str, output_file_format: str = 'tfrecord'
) -> Iterator[MetricsForSlice]:
  """Read and deserialize the MetricsForSlice records."""
  for m in metrics_plots_and_validations_writer.load_and_deserialize_metrics(
      output_path, output_file_format
  ):
    yield m

load_plots

load_plots(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[PlotsForSlice]

Read and deserialize the PlotsForSlice records.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_plots(
    output_path: str, output_file_format: str = 'tfrecord'
) -> Iterator[PlotsForSlice]:
  """Read and deserialize the PlotsForSlice records."""
  for p in metrics_plots_and_validations_writer.load_and_deserialize_plots(
      output_path, output_file_format
  ):
    yield p

load_validation_result

load_validation_result(
    output_path: str, output_file_format: str = ""
) -> ValidationResult

Read and deserialize the ValidationResult.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_validation_result(
    output_path: str, output_file_format: str = ''
) -> ValidationResult:
  """Read and deserialize the ValidationResult."""
  return metrics_plots_and_validations_writer.load_and_deserialize_validation_result(
      output_path, output_file_format
  )

make_eval_results

make_eval_results(
    results: List[EvalResult], mode: str
) -> EvalResults

Run model analysis for a single model on multiple data sets.

PARAMETER DESCRIPTION
results

A list of TFMA evaluation results.

TYPE: List[EvalResult]

mode

The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and tfma.MODEL_CENTRIC_MODE are supported.

TYPE: str

RETURNS DESCRIPTION
EvalResults

An tfma.view.EvalResults object containing all evaluation results. This

EvalResults

can be used to construct a time series view.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def make_eval_results(
    results: List[view_types.EvalResult], mode: str
) -> view_types.EvalResults:
  """Run model analysis for a single model on multiple data sets.

  Args:
    results: A list of TFMA evaluation results.
    mode: The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and
      tfma.MODEL_CENTRIC_MODE are supported.

  Returns:
    An `tfma.view.EvalResults` object containing all evaluation results. This
    can be used to construct a time series view.
  """
  return view_types.EvalResults(results, mode)

multiple_data_analysis

multiple_data_analysis(
    model_location: str, data_locations: List[str], **kwargs
) -> EvalResults

Run model analysis for a single model on multiple data sets.

PARAMETER DESCRIPTION
model_location

The location of the exported eval saved model.

TYPE: str

data_locations

A list of data set locations.

TYPE: List[str]

**kwargs

The args used for evaluation. See tfma.run_model_analysis() for details.

DEFAULT: {}

RETURNS DESCRIPTION
EvalResults

A tfma.EvalResults containing all the evaluation results with the same order

EvalResults

as data_locations.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def multiple_data_analysis(
    model_location: str, data_locations: List[str], **kwargs
) -> view_types.EvalResults:
  """Run model analysis for a single model on multiple data sets.

  Args:
    model_location: The location of the exported eval saved model.
    data_locations: A list of data set locations.
    **kwargs: The args used for evaluation. See tfma.run_model_analysis() for
      details.

  Returns:
    A tfma.EvalResults containing all the evaluation results with the same order
    as data_locations.
  """
  results = []
  for d in data_locations:
    results.append(single_model_analysis(model_location, d, **kwargs))
  return view_types.EvalResults(results, constants.DATA_CENTRIC_MODE)

multiple_model_analysis

multiple_model_analysis(
    model_locations: List[str], data_location: str, **kwargs
) -> EvalResults

Run model analysis for multiple models on the same data set.

PARAMETER DESCRIPTION
model_locations

A list of paths to the export eval saved model.

TYPE: List[str]

data_location

The location of the data files.

TYPE: str

**kwargs

The args used for evaluation. See tfma.single_model_analysis() for details.

DEFAULT: {}

RETURNS DESCRIPTION
EvalResults

A tfma.EvalResults containing all the evaluation results with the same order

EvalResults

as model_locations.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def multiple_model_analysis(
    model_locations: List[str], data_location: str, **kwargs
) -> view_types.EvalResults:
  """Run model analysis for multiple models on the same data set.

  Args:
    model_locations: A list of paths to the export eval saved model.
    data_location: The location of the data files.
    **kwargs: The args used for evaluation. See tfma.single_model_analysis() for
      details.

  Returns:
    A tfma.EvalResults containing all the evaluation results with the same order
    as model_locations.
  """
  results = []
  for m in model_locations:
    results.append(single_model_analysis(m, data_location, **kwargs))
  return view_types.EvalResults(results, constants.MODEL_CENTRIC_MODE)

run_model_analysis

run_model_analysis(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    data_location: str = "",
    file_format: str = "tfrecords",
    output_path: Optional[str] = None,
    extractors: Optional[List[Extractor]] = None,
    evaluators: Optional[List[Evaluator]] = None,
    writers: Optional[List[Writer]] = None,
    pipeline_options: Optional[Any] = None,
    slice_spec: Optional[List[SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    schema: Optional[Schema] = None,
) -> Union[EvalResult, EvalResults]

Runs TensorFlow model analysis.

It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow Eval SavedModel and returns the results.

This is a simplified API for users who want to quickly get something running locally. Users who wish to create their own Beam pipelines can use the Evaluate PTransform instead.

PARAMETER DESCRIPTION
eval_shared_model

Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers.

TYPE: Optional[MaybeMultipleEvalSharedModels] DEFAULT: None

eval_config

Eval config.

TYPE: Optional[EvalConfig] DEFAULT: None

data_location

The location of the data files.

TYPE: str DEFAULT: ''

file_format

The file format of the data, can be either 'text' or 'tfrecords' for now. By default, 'tfrecords' will be used.

TYPE: str DEFAULT: 'tfrecords'

output_path

The directory to output metrics and results to. If None, we use a temporary directory.

TYPE: Optional[str] DEFAULT: None

extractors

Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used.

TYPE: Optional[List[Extractor]] DEFAULT: None

evaluators

Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used.

TYPE: Optional[List[Evaluator]] DEFAULT: None

writers

Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used.

TYPE: Optional[List[Writer]] DEFAULT: None

pipeline_options

Optional arguments to run the Pipeline, for instance whether to run directly.

TYPE: Optional[Any] DEFAULT: None

slice_spec

Deprecated (use EvalConfig).

TYPE: Optional[List[SingleSliceSpec]] DEFAULT: None

write_config

Deprecated (use EvalConfig).

TYPE: Optional[bool] DEFAULT: True

compute_confidence_intervals

Deprecated (use EvalConfig).

TYPE: Optional[bool] DEFAULT: False

min_slice_size

Deprecated (use EvalConfig).

TYPE: int DEFAULT: 1

random_seed_for_testing

Provide for deterministic tests only.

TYPE: Optional[int] DEFAULT: None

schema

Optional tf.Metadata schema of the input data.

TYPE: Optional[Schema] DEFAULT: None

RETURNS DESCRIPTION
Union[EvalResult, EvalResults]

An EvalResult that can be used with the TFMA visualization functions.

RAISES DESCRIPTION
ValueError

If the file_format is unknown to us.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def run_model_analysis(
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    data_location: str = '',
    file_format: str = 'tfrecords',
    output_path: Optional[str] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    pipeline_options: Optional[Any] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    schema: Optional[schema_pb2.Schema] = None,
) -> Union[view_types.EvalResult, view_types.EvalResults]:
  """Runs TensorFlow model analysis.

  It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow
  Eval SavedModel and returns the results.

  This is a simplified API for users who want to quickly get something running
  locally. Users who wish to create their own Beam pipelines can use the
  Evaluate PTransform instead.

  Args:
    eval_shared_model: Optional shared model (single-model evaluation) or list
      of shared models (multi-model evaluation). Only required if needed by
      default extractors, evaluators, or writers.
    eval_config: Eval config.
    data_location: The location of the data files.
    file_format: The file format of the data, can be either 'text' or
      'tfrecords' for now. By default, 'tfrecords' will be used.
    output_path: The directory to output metrics and results to. If None, we use
      a temporary directory.
    extractors: Optional list of Extractors to apply to Extracts. Typically
      these will be added by calling the default_extractors function. If no
      extractors are provided, default_extractors (non-materialized) will be
      used.
    evaluators: Optional list of Evaluators for evaluating Extracts. Typically
      these will be added by calling the default_evaluators function. If no
      evaluators are provided, default_evaluators will be used.
    writers: Optional list of Writers for writing Evaluation output. Typically
      these will be added by calling the default_writers function. If no writers
      are provided, default_writers will be used.
    pipeline_options: Optional arguments to run the Pipeline, for instance
      whether to run directly.
    slice_spec: Deprecated (use EvalConfig).
    write_config: Deprecated (use EvalConfig).
    compute_confidence_intervals: Deprecated (use EvalConfig).
    min_slice_size: Deprecated (use EvalConfig).
    random_seed_for_testing: Provide for deterministic tests only.
    schema: Optional tf.Metadata schema of the input data.

  Returns:
    An EvalResult that can be used with the TFMA visualization functions.

  Raises:
    ValueError: If the file_format is unknown to us.
  """
  _assert_tensorflow_version()

  if output_path is None:
    output_path = tempfile.mkdtemp()
  if not tf.io.gfile.exists(output_path):
    tf.io.gfile.makedirs(output_path)

  if eval_config is None:
    config_version = 1
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )
    eval_config = _default_eval_config(
        eval_shared_models,
        slice_spec,
        write_config,
        compute_confidence_intervals,
        min_slice_size,
    )
  else:
    config_version = 2
    eval_config = _update_eval_config_with_defaults(
        eval_config, eval_shared_model
    )

  tensor_adapter_config = None
  with beam.Pipeline(options=pipeline_options) as p:
    if file_format == 'tfrecords':
      if is_batched_input(eval_shared_model, eval_config, config_version):
        if is_legacy_estimator(eval_shared_model):
          tfxio = raw_tf_record.RawTfRecordTFXIO(
              file_pattern=data_location,
              raw_record_column_name=constants.ARROW_INPUT_COLUMN,
              telemetry_descriptors=['StandaloneTFMA'],
          )
        else:
          tfxio = tf_example_record.TFExampleRecord(
              file_pattern=data_location,
              schema=schema,
              raw_record_column_name=constants.ARROW_INPUT_COLUMN,
              telemetry_descriptors=['StandaloneTFMA'],
          )
          if schema is not None:
            tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                arrow_schema=tfxio.ArrowSchema(),
                tensor_representations=tfxio.TensorRepresentations(),
            )
        data = p | 'ReadFromTFRecordToArrow' >> tfxio.BeamSource()
      else:
        data = p | 'ReadFromTFRecord' >> beam.io.ReadFromTFRecord(
            file_pattern=data_location,
            compression_type=beam.io.filesystem.CompressionTypes.AUTO,
        )
    elif file_format == 'text':
      tfxio = raw_tf_record.RawBeamRecordTFXIO(
          physical_format='csv',
          raw_record_column_name=constants.ARROW_INPUT_COLUMN,
          telemetry_descriptors=['StandaloneTFMA'],
      )
      data = (
          p
          | 'ReadFromText'
          >> beam.io.textio.ReadFromText(
              data_location, coder=beam.coders.BytesCoder()
          )
          | 'ConvertToArrow' >> tfxio.BeamSource()
      )
    else:
      raise ValueError('unknown file_format: {}'.format(file_format))

    # pylint: disable=no-value-for-parameter
    _ = (
        data
        | 'ExtractEvaluateAndWriteResults'
        >> ExtractEvaluateAndWriteResults(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            display_only_data_location=data_location,
            display_only_file_format=file_format,
            output_path=output_path,
            extractors=extractors,
            evaluators=evaluators,
            writers=writers,
            random_seed_for_testing=random_seed_for_testing,
            tensor_adapter_config=tensor_adapter_config,
            schema=schema,
            config_version=config_version,
        )
    )
    # pylint: enable=no-value-for-parameter

  if len(eval_config.model_specs) <= 1:
    return load_eval_result(output_path)
  else:
    results = []
    for spec in eval_config.model_specs:
      results.append(load_eval_result(output_path, model_name=spec.name))
    return view_types.EvalResults(results, constants.MODEL_CENTRIC_MODE)