Class Evals (1.153.1)

  Evals 
 ( 
 api_client_ 
 : 
 google 
 . 
 genai 
 . 
 _api_client 
 . 
 BaseApiClient 
 ) 
 

API documentation for Evals class.

Methods

batch_evaluate

  batch_evaluate 
 ( 
 * 
 , 
 dataset 
 : 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDatasetDict 
 , 
 ], 
 metrics 
 : 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 ] 
 ], 
 dest 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateDatasetConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateDatasetConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateDatasetOperation 
 

Evaluates a dataset based on a set of given metrics.

create_evaluation_item

  create_evaluation_item 
 ( 
 * 
 , 
 evaluation_item_type 
 : 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationItemType 
 , 
 gcs_uri 
 : 
 str 
 , 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationItemConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationItemConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationItem 
 

Creates an EvaluationItem.

create_evaluation_metric

  create_evaluation_metric 
 ( 
 * 
 , 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 description 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 metric 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 ] 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationMetricConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationMetricConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> str 
 

Creates an EvaluationMetric.

create_evaluation_run

  create_evaluation_run 
 ( 
 * 
 , 
 dataset 
 : 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunDataSource 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 ], 
 dest 
 : 
 str 
 , 
 metrics 
 : 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunMetric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunMetricDict 
 , 
 ] 
 ], 
 name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 agent_info 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 AgentInfo 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 AgentInfoDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 agent 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 user_simulator_config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 UserSimulatorConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 UserSimulatorConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 inference_configs 
 : 
 typing 
 . 
 Optional 
 [ 
 dict 
 [ 
 str 
 , 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunInferenceConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRunInferenceConfigDict 
 , 
 ], 
 ] 
 ] 
 = 
 None 
 , 
 labels 
 : 
 typing 
 . 
 Optional 
 [ 
 dict 
 [ 
 str 
 , 
 str 
 ]] 
 = 
 None 
 , 
 loss_analysis_metrics 
 : 
 typing 
 . 
 Optional 
 [ 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 , 
 ] 
 ] 
 ] 
 = 
 None 
 , 
 loss_analysis_configs 
 : 
 typing 
 . 
 Optional 
 [ 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 LossAnalysisConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 LossAnalysisConfigDict 
 , 
 ] 
 ] 
 ] 
 = 
 None 
 , 
 red_teaming_config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 RedTeamingAnalysisConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 RedTeamingAnalysisConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationRunConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationRunConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRun 
 

Creates an EvaluationRun.

create_evaluation_set

  create_evaluation_set 
 ( 
 * 
 , 
 evaluation_items 
 : 
 list 
 [ 
 str 
 ], 
 display_name 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationSetConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 CreateEvaluationSetConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationSet 
 

Creates an EvaluationSet.

delete_evaluation_metric

  delete_evaluation_metric 
 ( 
 * 
 , 
 metric_resource_name 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 DeleteEvaluationMetricConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 DeleteEvaluationMetricConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> None 
 

Deletes an EvaluationMetric.

evaluate

  evaluate 
 ( 
 * 
 , 
 dataset 
 : 
 typing 
 . 
 Union 
 [ 
 pandas 
 . 
 core 
 . 
 frame 
 . 
 DataFrame 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDatasetDict 
 , 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDatasetDict 
 , 
 ] 
 ], 
 ], 
 metrics 
 : 
 typing 
 . 
 Optional 
 [ 
 list 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 , 
 ] 
 ] 
 ] 
 = 
 None 
 , 
 location 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateMethodConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateMethodConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 ** 
 kwargs 
 : 
 typing 
 . 
 Any 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationResult 
 

Evaluates candidate responses in the provided dataset(s) using the specified metrics.

evaluate_instances

  evaluate_instances 
 ( 
 * 
 , 
 metric_config 
 : 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 _EvaluateInstancesRequestParameters 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluateInstancesResponse 
 

Evaluates an instance of a model.

generate_conversation_scenarios

  generate_conversation_scenarios 
 ( 
 * 
 , 
 agent_info 
 : 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 AgentInfo 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 AgentInfoDict 
 ], 
 config 
 : 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 UserScenarioGenerationConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 evals 
 . 
 UserScenarioGenerationConfigDict 
 , 
 ], 
 allow_cross_region_model 
 : 
 typing 
 . 
 Optional 
 [ 
 bool 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 

Generates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test.

generate_loss_clusters

  generate_loss_clusters 
 ( 
 * 
 , 
 eval_result 
 : 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationResult 
 , 
 metric 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 , 
 ] 
 ] 
 = 
 None 
 , 
 candidate 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 LossAnalysisConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 LossAnalysisConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GenerateLossClustersResponse 
 

Generates loss clusters from evaluation results.

Analyzes "Pass/Fail" signals from rubric-based autoraters and groups them into semantic "Loss Patterns" (e.g., "Hallucination of Action").

This method calls the GenerateLossClusters LRO and polls until completion, returning the results directly.

If metric or candidate are not provided, they will be auto-inferred from eval_result when unambiguous (i.e., when the eval result contains exactly one metric or one candidate). For multi-metric or multi-candidate evaluations, provide them explicitly.

Available candidate names can be found in eval_result.metadata.candidate_names .

Note: This API is only available in the global region.

generate_rubrics

  generate_rubrics 
 ( 
 * 
 , 
 src 
 : 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 pd 
 . 
 DataFrame 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 ], 
 rubric_group_name 
 : 
 str 
 , 
 prompt_template 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 generator_model_config 
 : 
 typing 
 . 
 Optional 
 [ 
 genai_types 
 . 
 AutoraterConfigOrDict 
 ] 
 = 
 None 
 , 
 rubric_content_type 
 : 
 typing 
 . 
 Optional 
 [ 
 types 
 . 
 RubricContentType 
 ] 
 = 
 None 
 , 
 rubric_type_ontology 
 : 
 typing 
 . 
 Optional 
 [ 
 list 
 [ 
 str 
 ]] 
 = 
 None 
 , 
 predefined_spec_name 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 types 
 . 
 PrebuiltMetric 
 ] 
 ] 
 = 
 None 
 , 
 metric_spec_parameters 
 : 
 typing 
 . 
 Optional 
 [ 
 dict 
 [ 
 str 
 , 
 typing 
 . 
 Any 
 ]] 
 = 
 None 
 , 
 metric 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 Metric 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 MetricDict 
 ] 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 RubricGenerationConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 RubricGenerationConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 

Generates rubrics for each prompt in the source and adds them as a new column structured as a dictionary.

You can generate rubrics by providing either:

  1. A metric to use a pre-registered metric resource.
  2. A predefined_spec_name to use a Vertex AI backend recipe.
  3. A prompt_template along with other configuration parameters ( generator_model_config , rubric_content_type , rubric_type_ontology ) for custom rubric generation. with metric taking precedence over predefined_spec_name , and predefined_spec_name taking precedence over prompt_template

These two modes are mutually exclusive.

get_evaluation_item

  get_evaluation_item 
 ( 
 * 
 , 
 name 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationItemConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationItemConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationItem 
 

Retrieves an EvaluationItem from the resource name.

get_evaluation_metric

  get_evaluation_metric 
 ( 
 * 
 , 
 metric_resource_name 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationMetricConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationMetricConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationMetric 
 

Retrieves an EvaluationMetric from the resource name.

get_evaluation_run

  get_evaluation_run 
 ( 
 * 
 , 
 name 
 : 
 str 
 , 
 include_evaluation_items 
 : 
 bool 
 = 
 False 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationRunConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationRunConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationRun 
 

Retrieves an EvaluationRun from the resource name.

Exceptions
Type
Description
ValueError
If the name is empty or invalid.

get_evaluation_set

  get_evaluation_set 
 ( 
 * 
 , 
 name 
 : 
 str 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationSetConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 GetEvaluationSetConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationSet 
 

Retrieves an EvaluationSet from the resource name.

list_evaluation_metrics

  list_evaluation_metrics 
 ( 
 * 
 , 
 filter 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 order_by 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 ListEvaluationMetricsConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 ListEvaluationMetricsConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 ListEvaluationMetricsResponse 
 

Lists EvaluationMetrics.

run_inference

  run_inference 
 ( 
 * 
 , 
 src 
 : 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 pandas 
 . 
 core 
 . 
 frame 
 . 
 DataFrame 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 ], 
 model 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 typing 
 . 
 Callable 
 [[ 
 typing 
 . 
 Any 
 ], 
 typing 
 . 
 Any 
 ]] 
 ] 
 = 
 None 
 , 
 agent 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 str 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 AgentEngine 
 ] 
 ] 
 = 
 None 
 , 
 location 
 : 
 typing 
 . 
 Optional 
 [ 
 str 
 ] 
 = 
 None 
 , 
 config 
 : 
 typing 
 . 
 Optional 
 [ 
 typing 
 . 
 Union 
 [ 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvalRunInferenceConfig 
 , 
 vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvalRunInferenceConfigDict 
 , 
 ] 
 ] 
 = 
 None 
 ) 
 - 
> vertexai 
 . 
 _genai 
 . 
 types 
 . 
 common 
 . 
 EvaluationDataset 
 

Runs inference on a dataset for evaluation.

Design a Mobile Site
View Site in Mobile | Classic
Share by: