unitxt.string_operators module

class unitxt.string_operators.FormatText(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, to_field: str = __required__, text: str = __required__)[source]

Bases: InstanceOperator

class unitxt.string_operators.Join(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, by: str = __required__)[source]

Bases: FieldOperator

class unitxt.string_operators.MapReplace(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, mapping: Dict[str, str] = __required__)[source]

Bases: FieldOperator

class unitxt.string_operators.RegexReplace(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, pattern: str = __required__, replacement: str = __required__)[source]

Bases: FieldOperator

class unitxt.string_operators.RegexSplit(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, by: str = __required__)[source]

Bases: FieldOperator

class unitxt.string_operators.Replace(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, old: str = __required__, new: str = __required__)[source]

Bases: FieldOperator

class unitxt.string_operators.Split(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, by: str = __required__)[source]

Bases: FieldOperator

class unitxt.string_operators.Strip(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False)[source]

Bases: FieldOperator

class unitxt.string_operators.TokensSlice(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = ['transformers'], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, model: str = __required__, start: int | NoneType = None, stop: int | NoneType = None, step: int | NoneType = None)[source]

Bases: FieldOperator

class unitxt.string_operators.TokensSplit(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = ['transformers'], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, model: str = __required__)[source]

Bases: FieldOperator