unitxt.collections_operators module¶
- class unitxt.collections_operators.Chunk(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, size: int = __required__)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.DictToTuplesList(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.Dictify(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, with_keys: List[str] = __required__)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.DuplicateByList(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str = __required__, to_field: str | NoneType = None, use_deep_copy: bool = False)[source]¶
Bases:
StreamOperator
- class unitxt.collections_operators.DuplicateBySubLists(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str = __required__, to_field: str | NoneType = None, use_deep_copy: bool = False)[source]¶
Bases:
StreamOperator
- class unitxt.collections_operators.Explode(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str = __required__, to_field: str | NoneType = None, use_deep_copy: bool = False)[source]¶
Bases:
DuplicateByList
- class unitxt.collections_operators.Filter(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, values: List[Any] = __required__)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.Get(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, item: Any = __required__)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.GetLength(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.Slice(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, start: int | NoneType = None, stop: int | NoneType = None, step: int | NoneType = None)[source]¶
Bases:
FieldOperator
- class unitxt.collections_operators.Wrap(data_classification_policy: List[str] = None, _requirements_list: List[str] | Dict[str, str] = [], requirements: List[str] | Dict[str, str] = [], caching: bool = None, apply_to_streams: List[str] = None, dont_apply_to_streams: List[str] = None, field: str | NoneType = None, to_field: str | NoneType = None, field_to_field: List[List[str]] | Dict[str, str] | NoneType = None, use_query: bool | NoneType = None, process_every_value: bool = False, get_default: Any = None, not_exist_ok: bool = False, not_exist_do_nothing: bool = False, inside: str = __required__)[source]¶
Bases:
FieldOperator