Class Evals (1.130.0)

  Evals 
 ( 
 api_client_ 
 : 
 google 
 . 
 genai 
 . 
 _api_client 
 . 
 BaseApiClient 
 ) 
 

API documentation for Evals class.

Methods

batch_evaluate

  batch_evaluate 
 ( 
 * 
 , 
 dataset 
 : 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDatasetDict 
 , 
 ], 
 metrics 
 : 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 ] 
 ], 
 dest 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateDatasetConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateDatasetConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateDatasetOperation 
 

Evaluates a dataset based on a set of given metrics.

create_evaluation_item

  create_evaluation_item 
 ( 
 * 
 , 
 evaluation_item_type 
 : 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationItemType 
 , 
 gcs_uri 
 : 
 str 
 , 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationItemConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationItemConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationItem 
 

Creates an EvaluationItem.

create_evaluation_run

  create_evaluation_run 
 ( 
 * 
 , 
 dataset 
 : 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunDataSource 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 ], 
 dest 
 : 
 str 
 , 
 metrics 
 : 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunMetric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunMetricDict 
 , 
 ] 
 ], 
 name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 agent_info 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 AgentInfo 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 AgentInfoDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 labels 
 : 
 typing 
 . 
 Optional 
 [ 
 dict 
 [ 
 str 
 , 
 str 
 ]] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationRunConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationRunConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRun 
 

Creates an EvaluationRun.

create_evaluation_set

  create_evaluation_set 
 ( 
 * 
 , 
 evaluation_items 
 : 
 list 
 [ 
 str 
 ], 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationSetConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationSetConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationSet 
 

Creates an EvaluationSet.

evaluate

  evaluate 
 ( 
 * 
 , 
 dataset 
 : 
 typing 
 . 
 Union 
 [ 
 pandas 
 . 
 core 
 . 
 frame 
 . 
 DataFrame 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDatasetDict 
 , 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDatasetDict 
 , 
 ] 
 ], 
 ], 
 metrics 
 : 
 typing 
 . 
 Optional 
 [ 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 , 
 ] 
 ] 
 ] 
 = 
 None 
 , 
 location 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateMethodConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateMethodConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 ** 
 kwargs 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationResult 
 

Evaluates candidate responses in the provided dataset(s) using the specified metrics.

evaluate_instances

  evaluate_instances 
 ( 
 * 
 , 
 metric_config 
 : 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 _EvaluateInstancesRequestParameters 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateInstancesResponse 
 

Evaluates an instance of a model.

generate_rubrics

  generate_rubrics 
 ( 
 * 
 , 
 src 
 : 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 pd 
 . 
 DataFrame 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 ], 
 rubric_group_name 
 : 
 str 
 , 
 prompt_template 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 generator_model_config 
 : 
 typing 
 . 
 Optional 
 [ 
 genai_types 
 . 
 AutoraterConfigOrDict 
 ] 
 = 
 None 
 , 
 rubric_content_type 
 : 
 typing 
 . 
 Optional 
 [ 
 types 
 . 
 RubricContentType 
 ] 
 = 
 None 
 , 
 rubric_type_ontology 
 : 
 typing 
 . 
 Optional 
 [ 
 list 
 [ 
 str 
 ]] 
 = 
 None 
 , 
 predefined_spec_name 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 types 
 . 
 PrebuiltMetric 
 ] 
 ] 
 = 
 None 
 , 
 metric_spec_parameters 
 : 
 typing 
 . 
 Optional 
 [ 
 dict 
 [ 
 str 
 , 
 typing 
 . 
 Any 
 ]] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 RubricGenerationConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 RubricGenerationConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 

Generates rubrics for each prompt in the source and adds them as a new column structured as a dictionary.

You can generate rubrics by providing either:

  1. A predefined_spec_name to use a Vertex AI backend recipe.
  2. A prompt_template along with other configuration parameters ( generator_model_config , rubric_content_type , rubric_type_ontology ) for custom rubric generation.

These two modes are mutually exclusive.

get_evaluation_item

  get_evaluation_item 
 ( 
 * 
 , 
 name 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationItemConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationItemConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationItem 
 

Retrieves an EvaluationItem from the resource name.

get_evaluation_run

  get_evaluation_run 
 ( 
 * 
 , 
 name 
 : 
 str 
 , 
 include_evaluation_items 
 : 
 bool 
 = 
 False 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationRunConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationRunConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRun 
 

Retrieves an EvaluationRun from the resource name.

Exceptions
Type
Description
ValueError
If the name is empty or invalid.

get_evaluation_set

  get_evaluation_set 
 ( 
 * 
 , 
 name 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationSetConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationSetConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationSet 
 

Retrieves an EvaluationSet from the resource name.

run_inference

  run_inference 
 ( 
 * 
 , 
 src 
 : 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 pandas 
 . 
 core 
 . 
 frame 
 . 
 DataFrame 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 ], 
 model 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 typing 
 . 
 Callable 
 [[ 
 typing 
 . 
 Any 
 ], 
 typing 
 . 
 Any 
 ]] 
 ] 
 = 
 None 
 , 
 agent 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 AgentEngine 
 ] 
 ] 
 = 
 None 
 , 
 location 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvalRunInferenceConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvalRunInferenceConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 

Runs inference on a dataset for evaluation.

Design a Mobile Site
View Site in Mobile | Classic
Share by: