Starting April 29, 2025, Gemini 1.5 Pro and Gemini 1.5 Flash models are not available in projects that have no prior usage of these models, including new projects. For details, see
Model versions and lifecycle.
Evaluate a model response against a reference (ground truth) using the ROUGE metric
Stay organized with collections
Save and categorize content based on your preferences.
This code sample demonstrates how to use Vertex AI to compute ROUGE metrics for evaluating text summarization models. It shows how to define an evaluation task and calculate ROUGE scores for multiple generated summaries against a reference summary.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.
[[["Easy to understand","easyToUnderstand","thumb-up"],["Solved my problem","solvedMyProblem","thumb-up"],["Other","otherUp","thumb-up"]],[["Hard to understand","hardToUnderstand","thumb-down"],["Incorrect information or sample code","incorrectInformationOrSampleCode","thumb-down"],["Missing the information/samples I need","missingTheInformationSamplesINeed","thumb-down"],["Other","otherDown","thumb-down"]],[],[],[],null,["# Evaluate a model response against a reference (ground truth) using the ROUGE metric\n\nThis code sample demonstrates how to use Vertex AI to compute ROUGE metrics for evaluating text summarization models. It shows how to define an evaluation task and calculate ROUGE scores for multiple generated summaries against a reference summary.\n\nExplore further\n---------------\n\n\nFor detailed documentation that includes this code sample, see the following:\n\n- [Gen AI evaluation service API](/vertex-ai/generative-ai/docs/model-reference/evaluation)\n\nCode sample\n-----------\n\n### Go\n\n\nBefore trying this sample, follow the Go setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Go API\nreference documentation](/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import (\n \t\"context\"\n \t\"fmt\"\n \t\"io\"\n\n \taiplatform \"cloud.google.com/go/aiplatform/apiv1beta1\"\n \taiplatformpb \"cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb\"\n \t\"google.golang.org/api/option\"\n )\n\n // getROUGEScore evaluates a model response against a reference (ground truth) using the ROUGE metric\n func getROUGEScore(w io.Writer, projectID, location string) error {\n \t// location = \"us-central1\"\n \tctx := context.Background()\n \tapiEndpoint := fmt.Sprintf(\"%s-aiplatform.googleapis.com:443\", location)\n \tclient, err := aiplatform.https://cloud.google.com/go/docs/reference/cloud.google.com/go/aiplatform/latest/apiv1beta1.html#cloud_google_com_go_aiplatform_apiv1beta1_EvaluationClient_NewEvaluationClient(ctx, option.WithEndpoint(apiEndpoint))\n\n \tif err != nil {\n \t\treturn fmt.Errorf(\"unable to create aiplatform client: %w\", err)\n \t}\n \tdefer client.Close()\n\n \tmodelResponse := `\n The Great Barrier Reef, the world's largest coral reef system located in Australia,\n is a vast and diverse ecosystem. However, it faces serious threats from climate change,\n ocean acidification, and coral bleaching, endangering its rich marine life.\n `\n \treference := `\n The Great Barrier Reef, the world's largest coral reef system, is\n located off the coast of Queensland, Australia. It's a vast\n ecosystem spanning over 2,300 kilometers with thousands of reefs\n and islands. While it harbors an incredible diversity of marine\n life, including endangered species, it faces serious threats from\n climate change, ocean acidification, and coral bleaching.\n `\n \treq := aiplatformpb.EvaluateInstancesRequest{\n \t\tLocation: fmt.Sprintf(\"projects/%s/locations/%s\", projectID, location),\n \t\tMetricInputs: &aiplatformpb.EvaluateInstancesRequest_RougeInput{\n \t\t\tRougeInput: &aiplatformpb.RougeInput{\n \t\t\t\t// Check the API reference for the list of supported ROUGE metric types:\n \t\t\t\t// https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1beta1#rougespec\n \t\t\t\tMetricSpec: &aiplatformpb.RougeSpec{\n \t\t\t\t\tRougeType: \"rouge1\",\n \t\t\t\t},\n \t\t\t\tInstances: []*aiplatformpb.RougeInstance{\n \t\t\t\t\t{\n \t\t\t\t\t\tPrediction: &modelResponse,\n \t\t\t\t\t\tReference: &reference,\n \t\t\t\t\t},\n \t\t\t\t},\n \t\t\t},\n \t\t},\n \t}\n\n \tresp, err := client.EvaluateInstances(ctx, &req)\n \tif err != nil {\n \t\treturn fmt.Errorf(\"evaluateInstances failed: %v\", err)\n \t}\n\n \tfmt.Fprintln(w, \"evaluation results:\")\n \tfmt.Fprintln(w, resp.GetRougeResults().GetRougeMetricValues())\n \t// Example response:\n \t// [score:0.6597938]\n\n \treturn nil\n }\n\n### Python\n\n\nBefore trying this sample, follow the Python setup instructions in the\n[Vertex AI quickstart using\nclient libraries](/vertex-ai/docs/start/client-libraries).\n\n\nFor more information, see the\n[Vertex AI Python API\nreference documentation](/python/docs/reference/aiplatform/latest).\n\n\nTo authenticate to Vertex AI, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import pandas as pd\n\n import https://cloud.google.com/python/docs/reference/vertexai/latest/\n from vertexai.preview.evaluation import EvalTask\n\n # TODO(developer): Update & uncomment line below\n # PROJECT_ID = \"your-project-id\"\n https://cloud.google.com/python/docs/reference/vertexai/latest/.init(project=PROJECT_ID, location=\"us-central1\")\n\n reference_summarization = \"\"\"\n The Great Barrier Reef, the world's largest coral reef system, is\n located off the coast of Queensland, Australia. It's a vast\n ecosystem spanning over 2,300 kilometers with thousands of reefs\n and islands. While it harbors an incredible diversity of marine\n life, including endangered species, it faces serious threats from\n climate change, ocean acidification, and coral bleaching.\"\"\"\n\n # Compare pre-generated model responses against the reference (ground truth).\n eval_dataset = pd.DataFrame(\n {\n \"response\": [\n \"\"\"The Great Barrier Reef, the world's largest coral reef system located\n in Australia, is a vast and diverse ecosystem. However, it faces serious\n threats from climate change, ocean acidification, and coral bleaching,\n endangering its rich marine life.\"\"\",\n \"\"\"The Great Barrier Reef, a vast coral reef system off the coast of\n Queensland, Australia, is the world's largest. It's a complex ecosystem\n supporting diverse marine life, including endangered species. However,\n climate change, ocean acidification, and coral bleaching are serious\n threats to its survival.\"\"\",\n \"\"\"The Great Barrier Reef, the world's largest coral reef system off the\n coast of Australia, is a vast and diverse ecosystem with thousands of\n reefs and islands. It is home to a multitude of marine life, including\n endangered species, but faces serious threats from climate change, ocean\n acidification, and coral bleaching.\"\"\",\n ],\n \"reference\": [reference_summarization] * 3,\n }\n )\n eval_task = EvalTask(\n dataset=eval_dataset,\n metrics=[\n \"rouge_1\",\n \"rouge_2\",\n \"rouge_l\",\n \"rouge_l_sum\",\n ],\n )\n result = eval_task.evaluate()\n\n print(\"Summary Metrics:\\n\")\n for key, value in result.summary_metrics.items():\n print(f\"{key}: \\t{value}\")\n\n print(\"\\n\\nMetrics Table:\\n\")\n print(result.metrics_table)\n # Example response:\n #\n # Summary Metrics:\n #\n # row_count: 3\n # rouge_1/mean: 0.7191161666666667\n # rouge_1/std: 0.06765143922270488\n # rouge_2/mean: 0.5441118566666666\n # ...\n # Metrics Table:\n #\n # response reference ... rouge_l/score rouge_l_sum/score\n # 0 The Great Barrier Reef, the world's ... \\n The Great Barrier Reef, the ... ... 0.577320 0.639175\n # 1 The Great Barrier Reef, a vast coral... \\n The Great Barrier Reef, the ... ... 0.552381 0.666667\n # 2 The Great Barrier Reef, the world's ... \\n The Great Barrier Reef, the ... ... 0.774775 0.774775\n\nWhat's next\n-----------\n\n\nTo search and filter code samples for other Google Cloud products, see the\n[Google Cloud sample browser](/docs/samples?product=generativeaionvertexai)."]]