unitxt.processors module

class unitxt.processors.Capitalize(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.ConvertToBoolean(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.DictOfListsToPairs(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False, position_key_before_value: bool = True)

Bases: FieldOperator

class unitxt.processors.FirstCharacter(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.GetStringAfter(apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, substring: str)

Bases: FieldOperator

class unitxt.processors.ListToEmptyEntitiesTuples(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.LoadJson(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.LowerCase(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.LowerCaseTillPunc(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.MatchClosestOption(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False, options_field: str = 'options')

Bases: InstanceFieldOperator

class unitxt.processors.RegexParser(apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, regex: str, termination_regex: str = None)

Bases: FieldOperator

A processor that uses regex in order to parse a string.

class unitxt.processors.SplitStrip(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False, delimiter: str = ' ', strip_every_element: bool = False)

Bases: FieldOperator

class unitxt.processors.StanceToProCon(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.StrToFloatFormat(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.StringOrNotString(apply_to_streams: ~typing.List[str] = None, dont_apply_to_streams: ~typing.List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: ~typing.List[~typing.List[str]] | ~typing.Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: ~typing.Any = None, not_exist_ok: bool = False, string: str)

Bases: FieldOperator

class unitxt.processors.Substring(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False, begin: int = 0, end: int = None)

Bases: FieldOperator

class unitxt.processors.TakeFirstNonEmptyLine(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.TakeFirstWord(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.ToListByComma(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False, delimiter: str = ',', strip_every_element: bool = True)

Bases: SplitStrip

class unitxt.processors.ToString(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.ToStringStripped(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.ToYesOrNone(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.YesNoToInt(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

class unitxt.processors.YesToOneElseZero(apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | None = None, to_field: str | None = None, field_to_field: List[List[str]] | Dict[str, str] | None = None, process_every_value: bool = False, use_query: bool = False, get_default: Any = None, not_exist_ok: bool = False)

Bases: FieldOperator

unitxt.processors.process_instance_value(self, value, instance)