unitxt.inference module

class unitxt.inference.HFPipelineBasedInferenceEngine(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, _requirements_list: ~typing.List[str] | ~typing.Dict[str, str] = {'transformers': "Install huggingface package using 'pip install --upgrade transformers"}, model_name: str, max_new_tokens: int, use_fp16: bool = True, lazy_load: bool = False)

Bases: InferenceEngine, PackageRequirementsMixin

class unitxt.inference.IbmGenAiInferenceEngine(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = ['public', 'proprietary'], _requirements_list: ~typing.List[str] | ~typing.Dict[str, str] = {'genai': "Install ibm-genai package using 'pip install --upgrade ibm-generative-ai"}, beam_width: int | None = None, decoding_method: ~typing.Literal['greedy', 'sample'] | None = None, include_stop_sequence: bool | None = None, length_penalty: ~typing.Any = None, max_new_tokens: int | None = None, min_new_tokens: int | None = None, random_seed: int | None = None, repetition_penalty: float | None = None, return_options: ~typing.Any = None, stop_sequences: ~typing.List[str] | None = None, temperature: float | None = None, time_limit: int | None = None, top_k: int | None = None, top_p: float | None = None, truncate_input_tokens: int | None = None, typical_p: float | None = None, label: str = 'ibm_genai', model_name: str, parameters: ~unitxt.inference.IbmGenAiInferenceEngineParams | None = None)

Bases: InferenceEngine, IbmGenAiInferenceEngineParamsMixin, PackageRequirementsMixin

data_classification_policy: List[str] = ['public', 'proprietary']
class unitxt.inference.IbmGenAiInferenceEngineParamsMixin(__tags__: Dict[str, str] = {}, data_classification_policy: List[str] = None, beam_width: int | None = None, decoding_method: Literal['greedy', 'sample'] | None = None, include_stop_sequence: bool | None = None, length_penalty: Any = None, max_new_tokens: int | None = None, min_new_tokens: int | None = None, random_seed: int | None = None, repetition_penalty: float | None = None, return_options: Any = None, stop_sequences: List[str] | None = None, temperature: float | None = None, time_limit: int | None = None, top_k: int | None = None, top_p: float | None = None, truncate_input_tokens: int | None = None, typical_p: float | None = None)

Bases: Artifact

class unitxt.inference.InferenceEngine(__tags__: Dict[str, str] = {}, data_classification_policy: List[str] = None)

Bases: ABC, Artifact

Abstract base class for inference.

class unitxt.inference.LogProbInferenceEngine(__tags__: Dict[str, str] = {}, data_classification_policy: List[str] = None)

Bases: ABC, Artifact

Abstract base class for inference with log probs.

class unitxt.inference.MockInferenceEngine(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = None, model_name: str)

Bases: InferenceEngine

class unitxt.inference.OpenAiInferenceEngine(__tags__: ~typing.Dict[str, str] = {}, data_classification_policy: ~typing.List[str] = ['public'], _requirements_list: ~typing.List[str] | ~typing.Dict[str, str] = {'openai': "Install openai package using 'pip install --upgrade openai"}, frequency_penalty: float | None = None, presence_penalty: float | None = None, max_tokens: int | None = None, seed: int | None = None, stop: str | None | ~typing.List[str] = None, temperature: float | None = None, top_p: float | None = None, top_logprobs: int | None = 20, logit_bias: ~typing.Dict[str, int] | None = None, logprobs: bool | None = None, n: int | None = None, parallel_tool_calls: bool = None, service_tier: ~typing.Literal['auto', 'default'] | None = None, label: str = 'openai', model_name: str, parameters: ~unitxt.inference.OpenAiInferenceEngineParams | None = None)

Bases: InferenceEngine, LogProbInferenceEngine, OpenAiInferenceEngineParamsMixin, PackageRequirementsMixin

data_classification_policy: List[str] = ['public']
class unitxt.inference.OpenAiInferenceEngineParamsMixin(__tags__: Dict[str, str] = {}, data_classification_policy: List[str] = None, frequency_penalty: float | None = None, presence_penalty: float | None = None, max_tokens: int | None = None, seed: int | None = None, stop: str | None | List[str] = None, temperature: float | None = None, top_p: float | None = None, top_logprobs: int | None = 20, logit_bias: Dict[str, int] | None = None, logprobs: bool | None = None, n: int | None = None, parallel_tool_calls: bool = None, service_tier: Literal['auto', 'default'] | None = None)

Bases: Artifact

class unitxt.inference.WMLInferenceEngine(__tags__: Dict[str, str] = {}, data_classification_policy: List[str] = ['proprietary'], _requirements_list: List[str] | Dict[str, str] = {'ibm_watsonx_ai': "Install ibm-watsonx-ai package using 'pip install --upgrade ibm-watsonx-ai'. It is advised to have Python version >=3.10 installed, as at lower version this package may cause conflicts with other installed packages."}, decoding_method: Literal['greedy', 'sample'] | None = None, length_penalty: Dict[str, float | int] | None = None, temperature: float | None = None, top_p: float | None = None, top_k: int | None = None, random_seed: int | None = None, repetition_penalty: float | None = None, min_new_tokens: int | None = None, max_new_tokens: int | None = None, stop_sequences: List[str] | None = None, time_limit: int | None = None, truncate_input_tokens: int | None = None, prompt_variables: Dict[str, Any] | None = None, return_options: Dict[str, bool] | None = None, client: Any = None, credentials: Any = None, model_name: str | None = None, deployment_id: str | None = None, label: str = 'wml', parameters: WMLInferenceEngineParams | None = None)

Bases: InferenceEngine, WMLInferenceEngineParamsMixin, PackageRequirementsMixin

Runs inference using ibm-watsonx-ai.

client

By default, it is created by a class instance but can be directly provided instead as an instance of ‘ibm_watsonx_ai.client.APIClient’.

Type:

Any

credentials

By default, it is created by a class instance which tries to retrieve proper environment variables (“WML_URL”, “WML_PROJECT_ID”, “WML_APIKEY”). However, either a dictionary with the following keys: “url”, “apikey”, “project_id”, or an instance of ‘ibm_watsonx_ai.credentials.Credentials’ can be directly provided instead.

Type:

Any

model_name

ID of a model to be used for inference. Mutually exclusive with ‘deployment_id’.

Type:

str, optional

deployment_id

Deployment ID of a tuned model to be used for inference. Mutually exclusive with ‘model_name’.

Type:

str, optional

parameters

Instance of WMLInferenceEngineParams which defines inference parameters and their values. Deprecated attribute, please pass respective parameters directly to the WMLInferenceEngine class instead.

Type:

WMLInferenceEngineParams, optional

Examples

from .api import load_dataset

wml_credentials = {

“url”: “some_url”, “project_id”: “some_id”, “api_key”: “some_key”

} model_name = “google/flan-t5-xxl” wml_inference = WMLInferenceEngine(

credentials=wml_credentials, model_name=model_name, data_classification_policy=[“public”], top_p=0.5, random_seed=123,

)

dataset = load_dataset(

dataset_query=”card=cards.argument_topic,template_card_index=0,loader_limit=5”

) results = wml_inference.infer(dataset[“test”])

data_classification_policy: List[str] = ['proprietary']
class unitxt.inference.WMLInferenceEngineParamsMixin(__tags__: Dict[str, str] = {}, data_classification_policy: List[str] = None, decoding_method: Literal['greedy', 'sample'] | None = None, length_penalty: Dict[str, float | int] | None = None, temperature: float | None = None, top_p: float | None = None, top_k: int | None = None, random_seed: int | None = None, repetition_penalty: float | None = None, min_new_tokens: int | None = None, max_new_tokens: int | None = None, stop_sequences: List[str] | None = None, time_limit: int | None = None, truncate_input_tokens: int | None = None, prompt_variables: Dict[str, Any] | None = None, return_options: Dict[str, bool] | None = None)

Bases: Artifact