unitxt.benchmark module

class unitxt.benchmark.BaseBenchmark(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, format: unitxt.formats.Format = None, num_demos: int = None, system_prompt: unitxt.system_prompts.SystemPrompt = None, loader_limit: int = None, splits: List[str] = ['train', 'validation', 'test'], subset: str | NoneType = None)[source]

Bases: SourceOperator

class unitxt.benchmark.Benchmark(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, format: unitxt.formats.Format = None, num_demos: int = None, system_prompt: unitxt.system_prompts.SystemPrompt = None, loader_limit: int = None, splits: List[str] = ['train', 'validation', 'test'], subset: str | NoneType = None, subsets: Dict[str, unitxt.standard.DatasetRecipe | unitxt.benchmark.BaseBenchmark] = __required__, max_total_samples: int = None, max_samples_per_subset: int = None)[source]

Bases: BaseBenchmark