unitxt.processors module¶
- class unitxt.processors.Capitalize(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ConvertToBoolean(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.DictOfListsToPairs(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, position_key_before_value: bool = True)¶
Bases:
FieldOperator
- class unitxt.processors.ExtractArenaHardNumericalJudgment(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ExtractMtBenchLabelJudgment(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ExtractMtBenchRatingJudgment(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ExtractSafeUnsafeJudgment(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ExtractWithRegex(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, regex: str, termination_regex: str = None)¶
Bases:
RegexParser
- class unitxt.processors.FirstCharacter(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.GetStringAfter(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, substring: str)¶
Bases:
FieldOperator
- class unitxt.processors.ListToEmptyEntitiesTuples(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.LiteralEval(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.LowerCase(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.LowerCaseTillPunc(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.MatchClosestOption(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, options_field: str = 'options')¶
Bases:
InstanceFieldOperator
- class unitxt.processors.RegexParser(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, regex: str, termination_regex: str = None)¶
Bases:
FieldOperator
A processor that uses regex in order to parse a string.
- class unitxt.processors.SplitStrip(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, delimiter: str = ' ', strip_every_element: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.StanceToProCon(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.StrToFloatFormat(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.StringOrNotString(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, string: str)¶
Bases:
FieldOperator
- class unitxt.processors.Substring(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, begin: int = 0, end: int = None)¶
Bases:
FieldOperator
- class unitxt.processors.TakeFirstNonEmptyLine(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.TakeFirstWord(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ToListByComma(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, delimiter: str = ', ', strip_every_element: bool = True)¶
Bases:
SplitStrip
- class unitxt.processors.ToListByCommaSpace(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, delimiter: str = ', ', strip_every_element: bool = True)¶
Bases:
SplitStrip
- class unitxt.processors.ToString(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ToStringStripped(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.ToYesOrNone(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.YesNoToInt(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- class unitxt.processors.YesToOneElseZero(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, caching: bool = None, apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, use_query: bool, process_every_value: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False)¶
Bases:
FieldOperator
- unitxt.processors.process_instance_value(self, value, instance)¶