unitxt.test_utils.metrics module

unitxt.test_utils.metrics.apply_metric(metric: Metric, predictions: List[Any], references: List[List[Any]], task_data: List[dict] | None = None, perform_validations_in_apply_metric=True)[source]
unitxt.test_utils.metrics.check_scores(global_target: dict, instance_targets: List[dict], global_outputs: dict, instance_outputs: List[dict], score_keys_to_ignore: List[str] | None = None)[source]
unitxt.test_utils.metrics.dict_equal(dict1, dict2)[source]
unitxt.test_utils.metrics.round_floats(obj, precision=2, recursive=True)[source]
unitxt.test_utils.metrics.test_evaluate(global_target: dict, instance_targets: List[dict], task_data: List[dict] | None, metric_name: str)[source]
unitxt.test_utils.metrics.test_metric(metric: Metric, predictions: List[Any], references: List[List[Any]], instance_targets: List[dict], global_target: dict, task_data: List[dict] | None = None, score_keys_to_ignore: List[str] | None = None)[source]