Vertex AI SDK migration guide

The Generative AI module in the Vertex AI SDK is deprecated and will no longer be available after June 24, 2026. The Google Gen AI SDK contains all the capabilities of the Vertex AI SDK, and supports many additional capabilities.

Use this migration guide to convert Python, Java, JavaScript, and Go code using the Vertex AI SDK to the Google Gen AI SDK.

Code migration

Use the following sections to migrate specific code snippets from the Vertex AI SDK to the Google Gen AI SDK.

Installation

Replace the Vertex AI SDK dependency with the Google Gen AI SDK dependency.

Before

Python

pip install -U -q "google-cloud-aiplatform"

Java

implementation 'com.google.cloud:google-cloud-vertexai:1.24.0'

JavaScript

npm install @google-cloud/vertexai

Go

go get cloud.google.com/go/vertexai/genai

After

Python

pip install -U -q "google-genai"

Java

implementation 'com.google.genai:1.3.0'

JavaScript

npm install @google/genai

Go

go get google.golang.org/genai

Context caching

Context caching involves storing and reusing frequently used portions of model prompts for similar requests. Replace the Vertex AI SDK implementation with the Google Gen AI SDK dependency.

Before

Python

Imports

from google.cloud import aiplatform
import vertexai
import datetime

Create

vertexai.init(project=GOOGLE_CLOUD_PROJECT, location=GOOGLE_CLOUD_LOCATION)

cache_content = vertexai.caching.CachedContent.create(
  model_name=MODEL_NAME,
  system_instruction='Please answer my question formally',
  contents=['user content'],
  ttl=datetime.timedelta(days=1),
)

Get

vertexai.init(project=GOOGLE_CLOUD_PROJECT, location=GOOGLE_CLOUD_LOCATION)
cache_content = vertexai.caching.CachedContent.get(cached_content_name="projects/{project}/locations/{location}/cachedContents/{cached_content}")

Delete

cache_content.delete()

Update

cache_content.update(ttl=datetime.timedelta(days=2))

List

cache_contents = vertexai.caching.CachedContent.list()

Java

Context caching is not supported by the Java Vertex AI SDK, but is supported by the Google Gen AI SDK.

JavaScript

Context caching is not supported by the JavaScript Vertex AI SDK, but is supported by the Google Gen AI SDK.

Go

Imports

package contextcaching

// [START generativeaionvertexai_gemini_create_context_cache]
import (
  "context"
  "fmt"
  "io"
  "time"

  "cloud.google.com/go/vertexai/genai"
)

Create

content := &genai.CachedContent{
  Model: modelName,
  SystemInstruction: &genai.Content{
    Parts: []genai.Part{genai.Text(systemInstruction)},
  },
  Expiration: genai.ExpireTimeOrTTL{TTL: 60 * time.Minute},
  Contents: []*genai.Content{
    {
      Role:  "user",
      Parts: []genai.Part{part1, part2},
    },
  },
}

result, err := client.CreateCachedContent(context, content)

Get

cachedContent, err := client.GetCachedContent(context, contentName)

Delete

err = client.DeleteCachedContent(context, contentName)

Update

newExpireTime := cc.Expiration.ExpireTime.Add(15 * time.Minute)
ccUpdated := client.UpdateCachedContent(context, cc, &genai.CachedContentToUpdate{
        Expiration: &genai.ExpireTimeOrTTL{ExpireTime: newExpireTime},
})

List

iter, err := client.ListCachedContents(context, contentName)

After

Python

Imports

from google import genai
from google.genai.types import Content, CreateCachedContentConfig, HttpOptions, Part

Create

client = genai.Client(http_options=HttpOptions(api_version="v1"))

content_cache = client.caches.create(
    model="gemini-2.5-flash-preview-05-20",
    config=CreateCachedContentConfig(
        contents=contents,
        system_instruction=system_instruction,
        display_name="example-cache",
        ttl="86400s",
    ),
)

Get

content_cache_list = client.caches.list()

# Access individual properties of a ContentCache object(s)
for content_cache in content_cache_list:
    print(f"Cache `{content_cache.name}` for model `{content_cache.model}`")
    print(f"Last updated at: {content_cache.update_time}")
    print(f"Expires at: {content_cache.expire_time}")

Delete

client.caches.delete(name=cache_name)

Update

content_cache = client.caches.update(
    name=cache_name, config=UpdateCachedContentConfig(ttl="36000s")
)

List

cache_contents = client.caches.list(config={'page_size': 2})

Java

Imports

import com.google.genai.types.CachedContent;
import com.google.genai.types.Content;
import com.google.genai.types.CreateCachedContentConfig;
import com.google.genai.types.DeleteCachedContentResponse;
import com.google.genai.types.ListCachedContentsConfig;

Create

Content content =
  Content.fromParts(
    fetchPdfPart(
      "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf"));

CreateCachedContentConfig config =
  CreateCachedContentConfig.builder()
    .systemInstruction(Content.fromParts(Part.fromText("summarize the pdf")))
    .expireTime(Instant.now().plus(Duration.ofHours(1)))
    .contents(content)
    .build();

CachedContent cachedContent1 = client.caches.create("gemini-2.0-flash-001", config);

Get

CachedContent cachedContent2 = client.caches.get(cachedContent1.name().get(), null);
System.out.println("get cached content: " + cachedContent2);

Delete

DeleteCachedContentResponse unused = client.caches.delete(cachedContent1.name().get(), null);
System.out.println("Deleted cached content: " + cachedContent1.name().get());

Update

CachedContent cachedContentUpdate =
    client.caches.update(
        cachedContent.name().get(),
        UpdateCachedContentConfig.builder().ttl(Duration.ofMinutes(10)).build());
System.out.println("Update cached content: " + cachedContentUpdate);

List

System.out.println("List cached contents resrouce names: ");
for (CachedContent cachedContent :
    client.caches.list(ListCachedContentsConfig.builder().pageSize(5).build())) {
  System.out.println(cachedContent.name().get());
}

JavaScript

Imports

import {GoogleGenAI, Part} from '@google/genai';

Create

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const cachedContent1: Part = {
  fileData: {
    fileUri: 'gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf',
    mimeType: 'application/pdf',
  },
};

const cachedContent2: Part = {
  fileData: {
    fileUri: 'gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf',
    mimeType: 'application/pdf',
  },
};

const cache = await ai.caches.create({
  model: 'gemini-1.5-pro-002',
  config: {contents: [cachedContent1, cachedContent2]},
});

Get

const getResponse = await ai.caches.get({name: cacheName});

Delete

await ai.caches.delete({name: cacheName});

Update

const updateResponse = await ai.caches.update({
  name: cacheName,
  config: {ttl: '86400s'},
});

List

const listResponse = await ai.caches.list();
let i = 1;
for await (const cachedContent of listResponse) {
  console.debug(`List response ${i++}: `, JSON.stringify(cachedContent));
}

Go

Imports

import (
  "context"
  "encoding/json"
  "fmt"
  "io"

  genai "google.golang.org/genai"
)

Create

cacheContents := []*genai.Content{
  {
    Parts: []*genai.Part{
      {FileData: &genai.FileData{
        FileURI:  "gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf",
        MIMEType: "application/pdf",
      }},
      {FileData: &genai.FileData{
        FileURI:  "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
        MIMEType: "application/pdf",
      }},
    },
    Role: "user",
  },
}
config := &genai.CreateCachedContentConfig{
  Contents: cacheContents,
  SystemInstruction: &genai.Content{
    Parts: []*genai.Part{
      {Text: systemInstruction},
    },
  },
  DisplayName: "example-cache",
  TTL:         "86400s",
}

res, err := client.Caches.Create(ctx, modelName, config)

Get

cachedContent, err := client.GetCachedContent(ctx, contentName)

Delete

_, err = client.Caches.Delete(ctx, result.Name, &genai.DeleteCachedContentConfig{})

Update

result, err = client.Caches.Update(ctx, result.Name, &genai.UpdateCachedContentConfig{
  ExpireTime: time.Now().Add(time.Hour),
})

List

// List the first page.
page, err := client.Caches.List(ctx, &genai.ListCachedContentsConfig{PageSize: 2})

// Continue to the next page.
page, err = page.Next(ctx)

// Resume the page iteration using the next page token.
page, err = client.Caches.List(ctx, &genai.ListCachedContentsConfig{PageSize: 2, PageToken: page.NextPageToken})

Configuration and system instructions

Configuration defines parameters that control model behavior, and system instructions provide guiding directives to steer model responses towards a specific persona, style, or task. Replace the configuration and system instructions from the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

model = generative_models.GenerativeModel(
  GEMINI_MODEL_NAME,
  system_instruction=[
    "Talk like a pirate.",
    "Don't use rude words.",
  ],
)
response = model.generate_content(
  contents="Why is sky blue?",
  generation_config=generative_models.GenerationConfig(
    temperature=0,
    top_p=0.95,
    top_k=20,
    candidate_count=1,
    max_output_tokens=100,
    stop_sequences=["STOP!"],
    response_logprobs=True,
    logprobs=3,
  ),
  safety_settings={
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_NONE,
  },
)

Java

import com.google.cloud.vertexai.api.GenerationConfig;

GenerationConfig generationConfig =
  GenerationConfig.newBuilder().setMaxOutputTokens(50).build();

// Use the builder to instantialize the model with the configuration.
GenerativeModel model =
  new GenerativeModel.Builder()
    .setModelName("gemino-pro")
    .setVertexAi(vertexAi)
    .setGenerationConfig(generationConfig)
    .build();

JavaScript

const {VertexAI} = require('@google-cloud/vertexai');

const generativeModel = vertexAI.getGenerativeModel({
  model: 'gemini-2.0-flash-001',
  systemInstruction: {
    parts: [
      {text: 'You are a helpful language translator.'},
      {text: 'Your mission is to translate text in English to French.'},
    ],
  },
});

const textPart = {
  text: `
  User input: I like bagels.
  Answer:`,
};

const request = {
  contents: [{role: 'user', parts: [textPart]}],
};

const resp = await generativeModel.generateContent(request);
const contentResponse = await resp.response;
console.log(JSON.stringify(contentResponse));

Go

import (
  "context"
  "cloud.google.com/go/vertexai/genai"
)

model := client.GenerativeModel(modelName)

model.GenerationConfig = genai.GenerationConfig{
  TopP:            proto.Float32(1),
  TopK:            proto.Int32(32),
  Temperature:     proto.Float32(0.4),
  MaxOutputTokens: proto.Int32(2048),
}

systemInstruction := fmt.Sprintf("Your mission is to translate text from %xs to %s", sourceLanguageCode, targetLanguageCode)

model.SystemInstruction = &genai.Content{
  Role:  "user",
  Parts: []genai.Part{genai.Text(systemInstruction)},
}

After

Python

from google.genai import types

response = client.models.generate_content(
  model='gemini-2.0-flash-001',
  contents='high',
  config=types.GenerateContentConfig(
    system_instruction='I say high, you say low',
    max_output_tokens=3,
    temperature=0.3,
    response_logprobs=True,
    logprobs=3,
  ),
)

Java

Import GenerateContentConfig:

import com.google.genai.types.GenerateContentConfig;

Create the system instruction:

Content systemInstruction = Content.fromParts(Part.fromText("You are a history teacher."));

Add the system instructions to the content config:

GenerateContentConfig config =
  GenerateContentConfig.builder()
    ...
    .systemInstruction(systemInstruction)
    .build();

For the full implementation, see GenerateContentWithConfigs.java.

JavaScript

import {GoogleGenAI} from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});
const response = await ai.models.generateContent({
  model: 'gemini-2.0-flash',
  contents: 'high',
  config: {systemInstruction: 'I say high you say low.'},
});
console.debug(response.text);

await generateContentFromVertexAI().catch((e) =>
  console.error('got error', e),
);

Go

import (
  "context"
  genai "google.golang.org/genai"
)

config := &genai.GenerateContentConfig{
  SystemInstruction: &genai.Content{
    Parts: []*genai.Part{
      {Text: "You're a language translator. Your mission is to translate text in English to French."},
    },
  },
}

resp, err := client.Models.GenerateContent(ctx, modelName, contents, config)

Embeddings

Embeddings are numerical vector representations of text, images, or video that capture their semantic or visual meaning and relationships in a high-dimensional space. Replace the embedding implementation from the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

model = vertexai.vision_models.MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")
image = Image.load_from_file("image.png")
video = Video.load_from_file("video.mp4")

embeddings = model.get_embeddings(
  # One of image, video or contextual_text is required.
  image=image,
  video=video,
  contextual_text="Hello world",
)
image_embedding = embeddings.image_embedding
video_embeddings = embeddings.video_embeddings
text_embedding = embeddings.text_embedding

Java

Embeddings are not supported by the Java Vertex AI SDK, but are supported by the Google Gen AI SDK.

JavaScript

Embeddings are not supported by the JavaScript Vertex AI SDK, but are supported by the Google Gen AI SDK.

Go

Embeddings are not supported by the Go Vertex AI SDK, but are supported by the Google Gen AI SDK.

After

Python

from google.genai.types import EmbedContentConfig

client = genai.Client()
response = client.models.embed_content(
  model="gemini-embedding-001",
  contents="How do I get a driver's license/learner's permit?",
  config=EmbedContentConfig(
    task_type="RETRIEVAL_DOCUMENT",  # Optional
    output_dimensionality=3072,  # Optional
    title="Driver's License",  # Optional
  ),
)

Java

import com.google.genai.Client;
import com.google.genai.types.EmbedContentResponse;

EmbedContentResponse response =
    client.models.embedContent("text-embedding-004", "why is the sky blue?", null);

JavaScript

import {GoogleGenAI} from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const response = await ai.models.embedContent({
  model: 'text-embedding-004',
  contents: 'Hello world!',
});

console.debug(JSON.stringify(response));

await embedContentFromVertexAI().catch((e) =>
  console.error('got error', e),
);

Go

import (
  "context"
  "fmt"
  "google.golang.org/genai"
)

result, err := client.Models.EmbedContent(ctx, *model, genai.Text("What is your name?"), &genai.EmbedContentConfig{TaskType: "RETRIEVAL_QUERY"})
fmt.Printf("%#v\n", result.Embeddings[0])

fmt.Println("Embed content RETRIEVAL_DOCUMENT task type example.")
result, err = client.Models.EmbedContent(ctx, *model, genai.Text("What is your name?"), &genai.EmbedContentConfig{TaskType: "RETRIEVAL_DOCUMENT"})
fmt.Printf("%#v\n", result.Embeddings[0])

Function calling

Function calling enables a model to identify when to invoke an external tool or API and then generate structured data containing the necessary function and arguments for execution. Replace the function calling implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

get_current_weather_func = generative_models.FunctionDeclaration(
  name="get_current_weather",
  description="Get the current weather in a given location",
  parameters=_REQUEST_FUNCTION_PARAMETER_SCHEMA_STRUCT,
)

weather_tool = generative_models.Tool(
  function_declarations=[get_current_weather_func],
)

model = generative_models.GenerativeModel(
  GEMINI_MODEL_NAME,
  tools=[weather_tool],
)

chat = model.start_chat()

response1 = chat.send_message("What is the weather like in Boston?")
assert (
  response1.candidates[0].content.parts[0].function_call.name
  == "get_current_weather"
)
response2 = chat.send_message(
  generative_models.Part.from_function_response(
    name="get_current_weather",
    response={
      "content": {"weather": "super nice"},
    },
  ),
)
assert response2.text

Java

Tool tool =
    Tool.newBuilder()
        .addFunctionDeclarations(
          FunctionDeclarationMaker.fromJsonString(jsonString)
        )
        .build();

// Start a chat session from a model, with the use of the declared
// function.
GenerativeModel model =
    new GenerativeModel.Builder()
        .setModelName(MODEL_NAME)
        .setVertexAi(vertexAi)
        .setTools(Arrays.asList(tool))
        .build();
ChatSession chat = model.startChat();

System.out.println(String.format("Ask the question: %s", TEXT));
GenerateContentResponse response = chat.sendMessage(TEXT);

// Provide an answer to the model so that it knows what the result of a
// "function call" is.
Content content =
    ContentMaker.fromMultiModalData(
        PartMaker.fromFunctionResponse(
            "getCurrentWeather", Collections.singletonMap("currentWeather", "snowing")));
response = chat.sendMessage(content);

JavaScript

const {
  VertexAI,
  FunctionDeclarationSchemaType,
} = require('@google-cloud/vertexai');

const functionDeclarations = [
  {
    function_declarations: [
      {
        name: 'get_current_weather',
        description: 'get weather in a given location',
        parameters: {
          type: FunctionDeclarationSchemaType.OBJECT,
          properties: {
            location: {type: FunctionDeclarationSchemaType.STRING},
            unit: {
              type: FunctionDeclarationSchemaType.STRING,
              enum: ['celsius', 'fahrenheit'],
            },
          },
          required: ['location'],
        },
      },
    ],
  },
];

async function functionCallingBasic(
  projectId = 'PROJECT_ID',
  location = 'us-central1',
  model = 'gemini-2.0-flash-001'
) {
  // Initialize Vertex with your Cloud project and location
  const vertexAI = new VertexAI({project: projectId, location: location});

  // Instantiate the model
  const generativeModel = vertexAI.preview.getGenerativeModel({
    model: model,
  });

  const request = {
    contents: [
      {role: 'user', parts: [{text: 'What is the weather in Boston?'}]},
    ],
    tools: functionDeclarations,
  };
  const result = await generativeModel.generateContent(request);
  console.log(JSON.stringify(result.response.candidates[0].content));
}

Go

package functioncalling

import (
  "context"
  "encoding/json"
  "errors"
  "fmt"
  "io"

  "cloud.google.com/go/vertexai/genai"
)

funcName := "getCurrentWeather"
funcDecl := &genai.FunctionDeclaration{
  Name:        funcName,
  Description: "Get the current weather in a given location",
  Parameters: &genai.Schema{
    Type: genai.TypeObject,
    Properties: map[string]*genai.Schema{
      "location": {
        Type:        genai.TypeString,
        Description: "location",
      },
    },
    Required: []string{"location"},
  },
}

// Add the weather function to our model toolbox.
model.Tools = []*genai.Tool{
  {
    FunctionDeclarations: []*genai.FunctionDeclaration{funcDecl},
  },
}

prompt := genai.Text("What's the weather like in Boston?")
resp, err := model.GenerateContent(ctx, prompt)

if len(resp.Candidates) == 0 {
  return errors.New("got empty response from model")
} else if len(resp.Candidates[0].FunctionCalls()) == 0 {
  return errors.New("got no function call suggestions from model")
}

funcResp := &genai.FunctionResponse{
  Name: funcName,
  Response: map[string]any{
    "content": mockAPIResp,
  },
}

// Return the API response to the model allowing it to complete its response.
resp, err = model.GenerateContent(ctx, prompt, funcResp)
if err != nil {
  return fmt.Errorf("failed to generate content: %w", err)
}
if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
  return errors.New("got empty response from model")
}

After

Python

from google.genai import types

def get_current_weather(location: str) -> str:
  """Returns the current weather.

  Args:
    location: The city and state, e.g. San Francisco, CA
  """
  return 'sunny'

response = client.models.generate_content(
  model='gemini-2.0-flash-001',
  contents='What is the weather like in Boston?',
  config=types.GenerateContentConfig(tools=[get_current_weather]),
)

Java

Use either the Chat or GenerateContent methods to implement function calling.

Using Chat

Declare the methods that will become callable functions:

Method method1 =
    ChatWithFunctionCall.class.getDeclaredMethod("getCurrentWeather", String.class);
Method method2 =
    ChatWithFunctionCall.class.getDeclaredMethod("divideTwoIntegers", int.class, int.class);

Add the two methods as callable functions to the tool within the content config:

GenerateContentConfig config =
    GenerateContentConfig.builder().tools(Tool.builder().functions(method1, method2)).build();

Create a chat session with the config:

Chat chatSession = client.chats.create("gemini-2.0-flash-001", config);

GenerateContentResponse response1 =
    chatSession.sendMessage("what is the weather in San Francisco?");

For the full implementation, see ChatWithFunctionCall.java.

Using GenerateContent

Declare the methods that will become callable functions:

Method method1 =
  GenerateContentWithFunctionCall.class.getMethod(
    "getCurrentWeather", String.class, String.class);
Method method2 =
  GenerateContentWithFunctionCall.class.getMethod(
    "divideTwoIntegers", Integer.class, Integer.class);

Add the two methods as callable functions to the tool within the content config:

GenerateContentConfig config =
    GenerateContentConfig.builder().tools(Tool.builder().functions(method1, method2)).build();

Use generateContent with the config:

GenerateContentResponse response =
    client.models.generateContent(
        "gemini-2.0-flash-001",
        "What is the weather in Vancouver? And can you divide 10 by 0?",
        config);

For the full implementation, see GenerateContentWithFunctionCall.java.

JavaScript

import {
  FunctionCall,
  FunctionCallingConfigMode,
  FunctionDeclaration,
  GoogleGenAI,
  Type,
} from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const controlLightFunctionDeclaration: FunctionDeclaration = {
  name: 'controlLight',
  parameters: {
    type: Type.OBJECT,
    description: 'Set the brightness and color temperature of a room light.',
    properties: {
      brightness: {
        type: Type.NUMBER,
        description:
          'Light level from 0 to 100. Zero is off and 100 is full brightness.',
      },
      colorTemperature: {
        type: Type.STRING,
        description:
          'Color temperature of the light fixture which can be `daylight`, `cool` or `warm`.',
      },
    },
    required: ['brightness', 'colorTemperature'],
  },
};
const response = await ai.models.generateContent({
  model: 'gemini-2.0-flash',
  contents: 'Dim the lights so the room feels cozy and warm.',
  config: {
    tools: [{functionDeclarations: [controlLightFunctionDeclaration]}],
    toolConfig: {
      functionCallingConfig: {
        mode: FunctionCallingConfigMode.ANY,
        allowedFunctionNames: ['controlLight'],
      },
    },
  },
});

console.debug(response.functionCalls);

Go

package main

import (
  "context"
  "encoding/json"
  "flag"
  "fmt"
  "log"

  "google.golang.org/genai"
)

var model = flag.String("model", "gemini-2.0-flash", "the model name, e.g. gemini-2.0-flash")

func run(ctx context.Context) {
  client, err := genai.NewClient(ctx, nil)
  if err != nil {
    log.Fatal(err)
  }

  funcName := "getCurrentWeather"
  funcDecl := &genai.FunctionDeclaration{
    Name:        funcName,
    Description: "Get the current weather in a given location",
    Parameters: &genai.Schema{
      Type: genai.TypeObject,
      Properties: map[string]*genai.Schema{
        "location": {
          Type:        genai.TypeString,
          Description: "location",
        },
      },
      Required: []string{"location"},
    },
  }
  // Add the weather function to our model toolbox.
  var config *genai.GenerateContentConfig = &genai.GenerateContentConfig{
    Tools: []*genai.Tool{
      {
        FunctionDeclarations: []*genai.FunctionDeclaration{funcDecl},
      },
    },
  }
  // Call the GenerateContent method.
  result, err := client.Models.GenerateContent(ctx, *model, genai.Text("What's the weather like in Boston?"), config)
  if err != nil {
    log.Fatal(err)
  }
  fmt.Println(result.Candidates[0].Content.Parts[0].FunctionCall.Name)

  // Use synthetic data to simulate a response from the external API.
  // In a real application, this would come from an actual weather API.
  mockAPIResp, err := json.Marshal(map[string]string{
    "location":         "Boston",
    "temperature":      "38",
    "temperature_unit": "F",
    "description":      "Cold and cloudy",
    "humidity":         "65",
    "wind":             `{"speed": "10", "direction": "NW"}`,
  })
  if err != nil {
    log.Fatal(err)
  }

  funcResp := &genai.FunctionResponse{
    Name: funcName,
    Response: map[string]any{
      "content": mockAPIResp,
    },
  }

  // Return the API response to the model allowing it to complete its response.
  mockedFunctionResponse := []*genai.Content{
    &genai.Content{
      Role: "user",
      Parts: []*genai.Part{
        &genai.Part{Text: "What's the weather like in Boston?"},
      },
    },
    result.Candidates[0].Content,
    &genai.Content{
      Role: "tool",
      Parts: []*genai.Part{
        &genai.Part{FunctionResponse: funcResp},
      },
    },
  }
  result, err = client.Models.GenerateContent(ctx, *model, mockedFunctionResponse, config)
  if err != nil {
    log.Fatal(err)
  }
  fmt.Println(result.Text())
}

func main() {
  ctx := context.Background()
  flag.Parse()
  run(ctx)
}

Grounding

Grounding is the process of providing a model with external, domain-specific information to improve response accuracy, relevance, and consistency. Replace the grounding implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

model = generative_models.GenerativeModel(GEMINI_MODEL_NAME)
google_search_retriever_tool = (
  generative_models.Tool.from_google_search_retrieval(
    generative_models.grounding.GoogleSearchRetrieval()
  )
)
response = model.generate_content(
  "Why is sky blue?",
  tools=[google_search_retriever_tool],
  generation_config=generative_models.GenerationConfig(temperature=0),
)

Java

import com.google.cloud.vertexai.api.GroundingMetadata;

Tool googleSearchTool =
  Tool.newBuilder()
    .setGoogleSearch(GoogleSearch.newBuilder())
    .build();

GenerativeModel model =
  new GenerativeModel(modelName, vertexAI)
    .withTools(Collections.singletonList(googleSearchTool));

GenerateContentResponse response = model.generateContent("Why is the sky blue?");

GroundingMetadata groundingMetadata = response.getCandidates(0).getGroundingMetadata();
String answer = ResponseHandler.getText(response);

JavaScript

const {VertexAI} = require('@google-cloud/vertexai');

const vertexAI = new VertexAI({project: projectId, location: location});

const generativeModelPreview = vertexAI.preview.getGenerativeModel({
  model: model,
  generationConfig: {maxOutputTokens: 256},
});

const googleSearchTool = {
  googleSearch: {},
};

const request = {
  contents: [{role: 'user', parts: [{text: 'Why is the sky blue?'}]}],
  tools: [googleSearchTool],
};

const result = await generativeModelPreview.generateContent(request);
const response = await result.response;
const groundingMetadata = response.candidates[0].groundingMetadata;
console.log(
  'Response: ',
  JSON.stringify(response.candidates[0].content.parts[0].text)
);
console.log('GroundingMetadata is: ', JSON.stringify(groundingMetadata));

Go

Grounding is not supported by the Go Vertex AI SDK, but is supported by the Google Gen AI SDK.

After

Python

from google.genai import types
from google.genai import Client

client = Client(
  vertexai=True,
  project=GOOGLE_CLOUD_PROJECT,
  location=GOOGLE_CLOUD_LOCATION
)

response = client.models.generate_content(
  model='gemini-2.0-flash-exp',
  contents='Why is the sky blue?',
  config=types.GenerateContentConfig(
  tools=[types.Tool(google_search=types.GoogleSearch())]),
)

Java

Import the Tool module:

import com.google.genai.types.Tool;

Set the Google Search tool in the config:

Tool googleSearchTool = Tool.builder().googleSearch(GoogleSearch.builder()).build();

Add the tool to the content config:

GenerateContentConfig config =
    GenerateContentConfig.builder()
        ...
        .tools(googleSearchTool)
        .build();

For the full implementation, see GenerateContentWithConfigs.java.

JavaScript

import {GoogleGenAI} from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});
const response = await ai.models.generateContent({
  model: 'gemini-2.0-flash',
  contents:
    'What is the sum of the first 50 prime numbers? Generate and run code for the calculation, and make sure you get all 50.',
  config: {
    tools: [{googleSearch: {}}],
  },
});
console.debug(JSON.stringify(response?.candidates?.[0]?.groundingMetadata));

Go

package main

import (
  "context"
  "flag"
  "fmt"
  "log"

  "google.golang.org/genai"
)

var model = flag.String("model", "gemini-2.0-flash", "the model name, e.g. gemini-2.0-flash")

func run(ctx context.Context) {
  client, err := genai.NewClient(ctx, nil)
  if err != nil {
    log.Fatal(err)
  }

  // Add the Google Search grounding tool to the GenerateContentConfig.
  var config *genai.GenerateContentConfig = &genai.GenerateContentConfig{
    Tools: []*genai.Tool{
      {
        GoogleSearch: &genai.GoogleSearch{},
      },
    },
  }
  // Call the GenerateContent method.
  result, err := client.Models.GenerateContent(ctx, *model, genai.Text("Why is the sky blue?"), config)
  if err != nil {
    log.Fatal(err)
  }
  fmt.Println(result.Text())
}

func main() {
  ctx := context.Background()
  flag.Parse()
  run(ctx)
}

Safety settings

Safety settings are configurable parameters that allow users to manage model responses by filtering or blocking content related to specific harmful categories, such as hate speech, sexual content, or violence. Replace the safety settings implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

model = generative_models.GenerativeModel(
  GEMINI_MODEL_NAME,
  system_instruction=[
    "Talk like a pirate.",
    "Don't use rude words.",
  ],
)
response = model.generate_content(
  contents="Why is sky blue?",
  generation_config=generative_models.GenerationConfig(
    temperature=0,
    top_p=0.95,
    top_k=20,
    candidate_count=1,
    max_output_tokens=100,
    stop_sequences=["STOP!"],
    response_logprobs=True,
    logprobs=3,
  ),
  safety_settings={
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_NONE,
  },
)

Java

import com.google.cloud.vertexai.api.SafetySetting;
import com.google.cloud.vertexai.api.SafetySetting.HarmBlockThreshold;

SafetySetting safetySetting =
  SafetySetting.newBuilder()
    .setCategory(HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT)
    .setThreshold(HarmBlockThreshold.BLOCK_LOW_AND_ABOVE)
    .build();

GenerateContentResponse response =
  model
    .withSafetySetting(Arrays.asList(SafetySetting))
    .generateContent("Please explain LLM?");

JavaScript

const {
  VertexAI,
  HarmCategory,
  HarmBlockThreshold,
} = require('@google-cloud/vertexai');

// Initialize Vertex with your Cloud project and location
const vertexAI = new VertexAI({project: PROJECT_ID, location: LOCATION});

// Instantiate the model
const generativeModel = vertexAI.getGenerativeModel({
  model: MODEL,
  safetySettings: [
    {
      category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
      threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    },
    {
      category: HarmCategory.HARM_CATEGORY_HARASSMENT,
      threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    },
  ],
});

const request = {
  contents: [{role: 'user', parts: [{text: 'Tell me something dangerous.'}]}],
};

console.log('Prompt:');
console.log(request.contents[0].parts[0].text);
console.log('Streaming Response Text:');

// Create the response stream
const responseStream = await generativeModel.generateContentStream(request);

// Log the text response as it streams
for await (const item of responseStream.stream) {
  if (item.candidates[0].finishReason === 'SAFETY') {
    console.log('This response stream terminated due to safety concerns.');
    break;
  } else {
    process.stdout.write(item.candidates[0].content.parts[0].text);
  }
}
console.log('This response stream terminated due to safety concerns.');

Go

package safetysettings

import (
  "context"
  "fmt"
  "io"

  "cloud.google.com/go/vertexai/genai"
)

// generateContent generates text from prompt and configurations provided.
func generateContent(w io.Writer, projectID, location, modelName string) error {
  // location := "us-central1"
  // model := "gemini-2.0-flash-001"
  ctx := context.Background()

  client, err := genai.NewClient(ctx, projectID, location)
  if err != nil {
    return err
  }
  defer client.Close()

  model := client.GenerativeModel(modelName)
  model.SetTemperature(0.8)

  // configure the safety settings thresholds
  model.SafetySettings = []*genai.SafetySetting{
    {
      Category:  genai.HarmCategoryHarassment,
      Threshold: genai.HarmBlockLowAndAbove,
    },
    {
      Category:  genai.HarmCategoryDangerousContent,
      Threshold: genai.HarmBlockLowAndAbove,
    },
  }

  res, err := model.GenerateContent(ctx, genai.Text("Hello, say something mean to me."))
  if err != nil {
    return fmt.Errorf("unable to generate content: %v", err)
  }
  fmt.Fprintf(w, "generate-content response: %v\n", res.Candidates[0].Content.Parts[0])

  fmt.Fprintf(w, "safety ratings:\n")
  for _, r := range res.Candidates[0].SafetyRatings {
    fmt.Fprintf(w, "\t%+v\n", r)
  }

  return nil
}

After

Python

from google.genai import types

response = client.models.generate_content(
  model='gemini-2.0-flash-001',
  contents='Say something bad.',
  config=types.GenerateContentConfig(
    safety_settings=[
      types.SafetySetting(
        category='HARM_CATEGORY_HATE_SPEECH',
        threshold='BLOCK_ONLY_HIGH',
      )
    ]
  ),
)

Java

Import the HarmBlockThreshold, HarmCategory, and SafetySetting modules:

import com.google.genai.types.HarmBlockThreshold;
import com.google.genai.types.HarmCategory;
import com.google.genai.types.SafetySetting;

Set the safety settings in the config:

ImmutableList<SafetySetting> safetySettings =
  ImmutableList.of(
    SafetySetting.builder()
      .category(HarmCategory.Known.HARM_CATEGORY_HATE_SPEECH)
      .threshold(HarmBlockThreshold.Known.BLOCK_ONLY_HIGH)
      .build(),
    SafetySetting.builder()
      .category(HarmCategory.Known.HARM_CATEGORY_DANGEROUS_CONTENT)
      .threshold(HarmBlockThreshold.Known.BLOCK_LOW_AND_ABOVE)
      .build());

Add the safety settings to the content config:

GenerateContentConfig config =
GenerateContentConfig.builder()
    ...
    .safetySettings(safetySettings)
    .build();

For the full implementation, see GenerateContentWithConfigs.java.

JavaScript

import {
  GoogleGenAI,
  HarmBlockMethod,
  HarmBlockThreshold,
  HarmCategory,
} from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const response = await ai.models.generateContent({
  model: 'gemini-2.0-flash',
  contents: 'say something bad',
  config: {
    safetySettings: [
      {
        method: HarmBlockMethod.SEVERITY,
        category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
      },
      {
        method: HarmBlockMethod.SEVERITY,
        category: HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
      },
    ],
  },
});

console.debug(JSON.stringify(response?.candidates?.[0]?.safetyRatings));

Go

package main

import (
  "context"
  "flag"
  "fmt"
  "log"

  "google.golang.org/genai"
)

var model = flag.String("model", "gemini-2.0-flash", "the model name, e.g. gemini-2.0-flash")

func run(ctx context.Context) {
  client, err := genai.NewClient(ctx, nil)
  if err != nil {
    log.Fatal(err)
  }

  var safetySettings []*genai.SafetySetting = []*genai.SafetySetting{
    {
      Category:  genai.HarmCategoryHarassment,
      Threshold: genai.HarmBlockThresholdBlockMediumAndAbove,
    },
    {
      Category:  genai.HarmCategoryDangerousContent,
      Threshold: genai.HarmBlockThresholdBlockMediumAndAbove,
    },
  }
  var config *genai.GenerateContentConfig = &genai.GenerateContentConfig{
    SafetySettings: safetySettings,
  }
  // Call the GenerateContent method.
  result, err := client.Models.GenerateContent(ctx, *model, genai.Text("What is your name?"), config)
  if err != nil {
    log.Fatal(err)
  }
  fmt.Println(result.Text())
}

func main() {
  ctx := context.Background()
  flag.Parse()
  run(ctx)
}

Chat sessions

Chat sessions are conversational interactions where the model maintains context over multiple turns by recalling previous messages and using them to inform current responses. Replace the implementation from the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

model = GenerativeModel(
  "gemini-pro",
  # You can specify tools when creating a model to avoid having to send them with every request.
  tools=[weather_tool],
  tool_config=tool_config,
)
chat = model.start_chat()
print(chat.send_message("What is the weather like in Boston?"))
print(chat.send_message(
  Part.from_function_response(
    name="get_current_weather",
    response={
      "content": {"weather_there": "super nice"},
      }
  ),
))

Java

import com.google.cloud.vertexai.generativeai.ChatSession;

GenerativeModel model = new GenerativeModel("gemini-pro", vertexAi);
ChatSession chat = model.startChat();

ResponseStream<GenerateContentResponse> response = chat
  .sendMessageStream("Can you tell me a story about cheese in 100 words?");
ResponseStream<GenerateContentResponse> anotherResponse = chat
  .sendMessageStream("Can you modify the story to be written for a 5 year old?");

JavaScript

const {VertexAI} = require('@google-cloud/vertexai');

const chat = generativeModel.startChat({});

const result1 = await chat.sendMessage('Hello');
const response1 = await result1.response;
console.log('Chat response 1: ', JSON.stringify(response1));

const result2 = await chat.sendMessage(
  'Can you tell me a scientific fun fact?'
);
const response2 = await result2.response;
console.log('Chat response 2: ', JSON.stringify(response2));

Go

import (
  "context"
  "errors"
  "fmt"

  "cloud.google.com/go/vertexai/genai"
)

prompt := "Do you have the Pixel 8 Pro in stock?"
fmt.Fprintf(w, "Question: %s\n", prompt)
resp, err := chat.SendMessage(ctx, genai.Text(prompt))

After

Python

Synchronous

chat = client.chats.create(model='gemini-2.0-flash-001')
response = chat.send_message('tell me a story')
print(response.text)
response = chat.send_message('summarize the story you told me in 1 sentence')
print(response.text)

Asynchronous

chat = client.aio.chats.create(model='gemini-2.0-flash-001')
response = await chat.send_message('tell me a story')
print(response.text)

Synchronous streaming

chat = client.chats.create(model='gemini-2.0-flash-001')
for chunk in chat.send_message_stream('tell me a story'):
    print(chunk.text, end='')

Asynchronous streaming

chat = client.aio.chats.create(model='gemini-2.0-flash-001')
async for chunk in await chat.send_message_stream('tell me a story'):
    print(chunk.text, end='') # end='' is optional, for demo purposes.

Java

Import the Chat and GenerateContentResponse modules:

import com.google.genai.Chat;
import com.google.genai.types.GenerateContentResponse;

Create a chat session:

Chat chatSession = client.chats.create("gemini-2.0-flash-001");

Use GenerateContentResponse to provide prompts:

GenerateContentResponse response =
    chatSession
      .sendMessage("Can you tell me a story about cheese in 100 words?");
// Gets the text string from the response by the quick accessor method `text()`.
System.out.println("Unary response: " + response.text());

GenerateContentResponse response2 =
    chatSession
      .sendMessage("Can you modify the story to be written for a 5 year old?");
// Gets the text string from the second response.
System.out.println("Unary response: " + response2.text());

For the full implementation, see ChatWithHistory.java.

JavaScript

import {GoogleGenAI} from '@google/genai';
const chat = ai.chats.create({model: 'gemini-2.0-flash'});

const response = await chat.sendMessage({message: 'Why is the sky blue?'});
console.debug('chat response 1: ', response.text);
const response2 = await chat.sendMessage({message: 'Why is the sunset red?'});
console.debug('chat response 2: ', response2.text);

const history = chat.getHistory();
for (const content of history) {
  console.debug('chat history: ', JSON.stringify(content, null, 2));
}

Go

package main

import (
  "context"
  "flag"
  "fmt"
  "log"

  "google.golang.org/genai"
)

var model = flag.String("model", "gemini-2.0-flash", "the model name, e.g. gemini-2.0-flash")

var config *genai.GenerateContentConfig = &genai.GenerateContentConfig{Temperature: genai.Ptr[float32](0.5)}

// Create a new Chat.
chat, err := client.Chats.Create(ctx, *model, config, nil)

// Send first chat message.
result, err := chat.SendMessage(ctx, genai.Part{Text: "What's the weather in San Francisco?"})
if err != nil {
  log.Fatal(err)
}
fmt.Println(result.Text())

// Send second chat message.
result, err = chat.SendMessage(ctx, genai.Part{Text: "How about New York?"})
if err != nil {
  log.Fatal(err)
}
fmt.Println(result.Text())

Multimodal inputs

Multimodal inputs refers to the ability of a model to process and understand information from data types beyond text, such as images, audio, and video. Replace the implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

from vertexai.generative_models import GenerativeModel, Image
vision_model = GenerativeModel("gemini-pro-vision")

# Local image
image = Image.load_from_file("image.jpg")
print(vision_model.generate_content(["What is shown in this image?", image]))

# Image from Cloud Storage
image_part = generative_models.Part.from_uri("gs://download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg", mime_type="image/jpeg")
print(vision_model.generate_content([image_part, "Describe this image?"]))

# Text and video
video_part = Part.from_uri("gs://cloud-samples-data/video/animals.mp4", mime_type="video/mp4")
print(vision_model.generate_content(["What is in the video? ", video_part]))

Java

import com.google.cloud.vertexai.generativeai.ContentMaker;

GenerativeModel model = new GenerativeModel("gemini-pro-vision", vertexAi);

ResponseStream<GenerateContentResponse> stream =
  model.generateContentStream(ContentMaker.fromMultiModalData(
    "Please describe this image",
    PartMaker.fromMimeTypeAndData("image/jpeg", IMAGE_URI)
  ));

JavaScript

const {VertexAI, HarmBlockThreshold, HarmCategory} = require('@google-cloud/vertexai');

// Initialize Vertex with your Cloud project and location
const vertex_ai = new VertexAI({project: project, location: location});

// Instantiate the model
const generativeVisionModel = vertex_ai.getGenerativeModel({
    model: 'gemini-ultra-vision',
});

async function multiPartContent() {
    const filePart = {file_data: {file_uri: "gs://sararob_imagegeneration_test/kitten.jpeg", mime_type: "image/jpeg"}};
    const textPart = {text: 'What is this picture about?'};

    const request = {
        contents: [{role: 'user', parts: [textPart, filePart]}],
      };

    const resp = await generativeVisionModel.generateContentStream(request);
    const contentResponse = await resp.response;
    console.log(JSON.stringify(contentResponse));
}

multiPartContent();

Go

Images

import (
  "context"
  "encoding/json"
  "fmt"
  "io"

  "cloud.google.com/go/vertexai/genai"
)

img := genai.FileData{
  MIMEType: "image/jpeg",
  FileURI:  "gs://generativeai-downloads/images/scones.jpg",
}
prompt := genai.Text("What is in this image?")

resp, err := gemini.GenerateContent(ctx, img, prompt)
if err != nil {
  return fmt.Errorf("error generating content: %w", err)
}

Video

package multimodalvideoaudio

import (
  "context"
  "errors"
  "fmt"
  "io"
  "mime"
  "path/filepath"

  "cloud.google.com/go/vertexai/genai"
)

part := genai.FileData{
  MIMEType: mime.TypeByExtension(filepath.Ext("pixel8.mp4")),
  FileURI:  "gs://cloud-samples-data/generative-ai/video/pixel8.mp4",
}

res, err := model.GenerateContent(ctx, part, genai.Text(`
    Provide a description of the video.
    The description should also contain anything important which people say in the video.
`))

After

Python

from google import genai
from google.genai.types import HttpOptions, Part

client = genai.Client(http_options=HttpOptions(api_version="v1"))
response = client.models.generate_content(
    model="gemini-2.5-flash-preview-05-20",
    contents=[
        Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/video/ad_copy_from_video.mp4",
            mime_type="video/mp4",
        ),
        "What is in the video?",
    ],
)
print(response.text)

Java

Import the GenerateContentResponse module:

import com.google.genai.types.GenerateContentResponse;

Provide a combination of text, image, and video for multimodal prompting:

Content content =
  Content.fromParts(
    Part.fromText("describe the image"),
    Part.fromUri("gs://cloud-samples-data/generative-ai/image/scones.jpg", "image/jpeg"));

Provide the combined prompt to the model:

GenerateContentResponse response =
  client.models.generateContent("gemini-2.0-flash-001", content, null);

For the full implementation, see GenerateContentWithImageInput.java.

JavaScript

const filePart = {file_data: {file_uri: "gs://sararob_imagegeneration_test/kitten.jpeg", mime_type: "image/jpeg"}};
const textPart = {text: 'What is this picture about?'};
const contents = [{role: 'user', parts: [textPart, filePart]}];
const response = await ai.models.generateContentStream({
  model: 'gemini-2.0-flash-exp',
  contents: contents,
});
let i = 0;
for await (const chunk of response) {
  const text = chunk.text;
  if (text) {
    console.debug(text);
  }
}

Go

Images

package main

import (
  "context"
  "encoding/json"
  "flag"
  "fmt"
  "log"

  "google.golang.org/genai"
)

config := &genai.GenerateContentConfig{}
config.ResponseModalities = []string{"IMAGE", "TEXT"}
// Call the GenerateContent method.
result, err := client.Models.GenerateContent(ctx, *model, genai.Text("Generate a story about a cute baby turtle in a 3d digital art style. For each scene, generate an image."), config)
if err != nil {
  log.Fatal(err)
}

Video and Audio

package multimodalvideoaudio

import (
  "context"
  "errors"
  "fmt"
  "io"
  "mime"
  "path/filepath"

  "cloud.google.com/go/vertexai/genai"
)

part := genai.FileData{
  MIMEType: mime.TypeByExtension(filepath.Ext("pixel8.mp4")),
  FileURI:  "gs://cloud-samples-data/generative-ai/video/pixel8.mp4",
}

res, err := model.GenerateContent(ctx, part, genai.Text(`
    Provide a description of the video.
    The description should also contain anything important which people say in the video.
`))

Text generation

Text generation is the process by which a model produces human-like written content based on a given prompt. Replace the implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Synchronous generation

Before

Python

response = model.generate_content(
  "Why is sky blue?",
  generation_config=generative_models.GenerationConfig(temperature=0),
)
assert response.text

Java

import com.google.cloud.vertexai.api.GenerateContentResponse;
GenerativeModel model = new GenerativeModel("gemini-pro", vertexAi);
GenerateContentResponse response = model.generateContent("How are you?");

JavaScript

Both the Vertex AI SDK and Google Gen AI SDK only support asynchronous text generation for JavaScript.

Go

gemini := client.GenerativeModel(modelName)
prompt := genai.Text(
  "What's a good name for a flower shop that specializes in selling bouquets of dried flowers?")

resp, err := gemini.GenerateContent(ctx, prompt)

After

Python

response = client.models.generate_content(
  model='gemini-2.0-flash-001', contents='Why is the sky blue?'
)
print(response.text)

Java

Import the GenerateContentResponse module:

import com.google.genai.types.GenerateContentResponse;

Generate text with generateContent:

GenerateContentResponse response =
  client.models.generateContent("gemini-2.0-flash-001", "What is your name?", null);

For the full implementation, see GenerateContent.java.

JavaScript

Both the Vertex AI SDK and Google Gen AI SDK only support asynchronous text generation for JavaScript.

Go

var config *genai.GenerateContentConfig = &genai.GenerateContentConfig{Temperature: genai.Ptr[float32](0)}
// Call the GenerateContent method.
result, err := client.Models.GenerateContent(ctx, *model, genai.Text("What is your name?"), config)

Asynchronous generation

Before

Python

response = await model.generate_content_async(
  "Why is sky blue?",
  generation_config=generative_models.GenerationConfig(temperature=0),
)

Java

import com.google.cloud.vertexai.api.GenerateContentResponse;

GenerativeModel model = new GenerativeModel("gemini-pro", vertexAi);
ApiFuture<GenerateContentResponse> future = model.generateContentAsync("How are you?");
GenerateContentResponse response = future.get();

JavaScript

const {VertexAI} = require('@google-cloud/vertexai');

// Initialize Vertex with your Cloud project and location
const vertexAI = new VertexAI({project: projectId, location: location});

// Instantiate the model
const generativeModel = vertexAI.getGenerativeModel({
  model: model,
});

const request = {
  contents: [
    {
      role: 'user',
      parts: [
        {
          text: 'Write a story about a magic backpack.',
        },
      ],
    },
  ],
};

console.log(JSON.stringify(request));
const result = await generativeModel.generateContent(request);
console.log(result.response.text);

Go

Not applicable: Go manages concurrent tasks without asynchronous operations.

After

Python

response = await client.aio.models.generate_content(
  model='gemini-2.0-flash-001', contents='Tell me a story in 300 words.'
)

print(response.text)

Java

Import the GenerateContentResponse module:

import com.google.genai.types.GenerateContentResponse;

Generate text asynchronously:

CompletableFuture<GenerateContentResponse> responseFuture =
  client.async.models.generateContent(
    "gemini-2.0-flash-001", "Introduce Google AI Studio.", null);

responseFuture
  .thenAccept(
    response -> {
      System.out.println("Async response: " + response.text());
    })
  .join();

For the full implementation, see GenerateContentAsync.java.

JavaScript

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const response = await ai.models.generateContent({
  model: 'gemini-2.0-flash',
  contents: 'why is the sky blue?',
});

console.debug(response.text);

Go

Not applicable: Go manages concurrent tasks without asynchronous operations.

Streaming

Before

Python

Synchronous streaming

stream = model.generate_content(
  "Why is sky blue?",
  stream=True,
  generation_config=generative_models.GenerationConfig(temperature=0),
)
for chunk in stream:
  assert (
    chunk.text
    or chunk.candidates[0].finish_reason
    is generative_models.FinishReason.STOP
  )

Asynchronous streaming

async_stream = await model.generate_content_async(
  "Why is sky blue?",
  stream=True,
  generation_config=generative_models.GenerationConfig(temperature=0),
)
async for chunk in async_stream:
  assert (
    chunk.text
    or chunk.candidates[0].finish_reason
    is generative_models.FinishReason.STOP
  )

Java

import com.google.cloud.vertexai.generativeai.ResponseStream;
import com.google.cloud.vertexai.api.GenerateContentResponse;

GenerativeModel model = new GenerativeModel("gemini-pro", vertexAi);
ResponseStream<GenerateContentResponse> responseStream = model.generateContentStream("How are you?");

JavaScript

// Initialize Vertex with your Cloud project and location
const vertexAI = new VertexAI({project: projectId, location: location});

// Instantiate the model
const generativeModel = vertexAI.getGenerativeModel({
  model: model,
});

const request = {
  contents: [{role: 'user', parts: [{text: 'What is Node.js?'}]}],
};

console.log('Prompt:');
console.log(request.contents[0].parts[0].text);
console.log('Streaming Response Text:');

// Create the response stream
const responseStream = await generativeModel.generateContentStream(request);

// Log the text response as it streams
for await (const item of responseStream.stream) {
  process.stdout.write(item.candidates[0].content.parts[0].text);
}

Go

package streamtextbasic

import (
  "context"
  "errors"
  "fmt"
  "io"

  "cloud.google.com/go/vertexai/genai"
  "google.golang.org/api/iterator"
)

model := client.GenerativeModel(modelName)

iter := model.GenerateContentStream(
  ctx,
  genai.Text("Write a story about a magic backpack."),
)
for {
  resp, err := iter.Next()
  fmt.Fprint(w, "generated response: ")
  for _, c := range resp.Candidates {
    for _, p := range c.Content.Parts {
      fmt.Fprintf(w, "%s ", p)
    }
  }
}

After

Python

Synchronous streaming

for chunk in client.models.generate_content_stream(
  model='gemini-2.0-flash-001', contents='Tell me a story in 300 words.'
):
  print(chunk.text, end='')

Asynchronous streaming

async for chunk in await client.aio.models.generate_content_stream(
  model='gemini-2.0-flash-001', contents='Tell me a story in 300 words.'
):
  print(chunk.text, end='')

Java

Import the ResponseStream and GenerateContentResponse modules:

import com.google.genai.ResponseStream;
import com.google.genai.types.GenerateContentResponse;

Provide the model a prompt and stream the results:

ResponseStream<GenerateContentResponse> responseStream =
  client.models.generateContentStream(
    "gemini-2.0-flash-001", "Tell me a story in 300 words.", null);

System.out.println("Streaming response: ");
for (GenerateContentResponse res : responseStream) {
  System.out.print(res.text());
}

For the full implementation, see GenerateContentAsync.java.

JavaScript

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const response = await ai.models.generateContentStream({
  model: 'gemini-2.0-flash-exp',
  contents:
    'Generate a story about a cute baby turtle in a 3d digital art style. For each scene, generate an image.',
  config: {
    responseModalities: [Modality.IMAGE, Modality.TEXT],
  },
});

let i = 0;
for await (const chunk of response) {
  const text = chunk.text;
  const data = chunk.data;
  if (text) {
    console.debug(text);
  } else if (data) {
    const fileName = `generate_content_streaming_image_${i++}.png`;
    console.debug(`Writing response image to file: ${fileName}.`);
    fs.writeFileSync(fileName, data);
  }
}

Go

client, err := genai.NewClient(ctx, nil)
var config *genai.GenerateContentConfig = &genai.GenerateContentConfig{SystemInstruction: &genai.Content{Parts: []*genai.Part{&genai.Part{Text: "You are a story writer."}}}}
// Call the GenerateContent method.
for result, err := range client.Models.GenerateContentStream(ctx, *model, genai.Text("Tell me a story in 300 words."), config) {
  if err != nil {
    log.Fatal(err)
  }
  fmt.Print(result.Text())
}

Image generation

Image generation is the process by which a models creates images from textual descriptions or other input modalities. Replace the implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

model = ImageGenerationModel.from_pretrained("imagegeneration@002")
response = model.generate_images(
    prompt="Astronaut riding a horse",
    # Optional:
    number_of_images=1,
    seed=0,
)
response[0].show()
response[0].save("image1.png")

Java

Image generation is not supported by the Java Vertex AI SDK, but is supported by the Google Gen AI SDK.

JavaScript

Image generation is not supported by the JavaScript Vertex AI SDK, but is supported by the Google Gen AI SDK.

Go

Image generation is not supported by the Go Vertex AI SDK, but is supported by the Google Gen AI SDK.

After

Python

from google.genai import types

# Generate Image
response1 = client.models.generate_images(
    model='imagen-3.0-generate-002',
    prompt='An umbrella in the foreground, and a rainy night sky in the background',
    config=types.GenerateImagesConfig(
        number_of_images=1,
        include_rai_reason=True,
        output_mime_type='image/jpeg',
    ),
)
response1.generated_images[0].image.show()

Java

import com.google.genai.types.GenerateImagesConfig;
import com.google.genai.types.GenerateImagesResponse;
import com.google.genai.types.Image;

GenerateImagesConfig generateImagesConfig =
    GenerateImagesConfig.builder()
        .numberOfImages(1)
        .outputMimeType("image/jpeg")
        .includeSafetyAttributes(true)
        .build();

GenerateImagesResponse generatedImagesResponse =
    client.models.generateImages(
        "imagen-3.0-generate-002", "Robot holding a red skateboard", generateImagesConfig);

Image generatedImage = generatedImagesResponse.generatedImages().get().get(0).image().get();

JavaScript

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});
const response = await ai.models.generateImages({
  model: 'imagen-3.0-generate-002',
  prompt: 'Robot holding a red skateboard',
  config: {
    numberOfImages: 1,
    includeRaiReason: true,
  },
});

console.debug(response?.generatedImages?.[0]?.image?.imageBytes);

Go

import (
  "encoding/json"
  "google.golang.org/genai"
)

fmt.Println("Generate image example.")
response1, err := client.Models.GenerateImages(
  ctx, "imagen-3.0-generate-002",
  /*prompt=*/ "An umbrella in the foreground, and a rainy night sky in the background",
  &genai.GenerateImagesConfig{
    IncludeRAIReason:        true,
    IncludeSafetyAttributes: true,
    OutputMIMEType:          "image/jpeg",
  },
)

Controlled generation

controlled generation refers to the process of guiding model output to adhere to specific constraints, formats, styles, or attributes, rather than generating free-form text. Replace the implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

_RESPONSE_SCHEMA_STRUCT = {
    "type": "object",
    "properties": {
        "location": {
            "type": "string",
        },
    },
    "required": ["location"],
}

response = model.generate_content(
    contents="Why is sky blue? Respond in JSON Format.",
    generation_config=generative_models.GenerationConfig(
        ...
        response_schema=_RESPONSE_SCHEMA_STRUCT,
    ),
)

Java

import com.google.cloud.vertexai.api.Schema;
import com.google.cloud.vertexai.api.Type;
import com.google.cloud.vertexai.generativeai.ContentMaker;
import com.google.cloud.vertexai.generativeai.PartMaker;

GenerationConfig generationConfig = GenerationConfig.newBuilder()
  .setResponseMimeType("application/json")
  .setResponseSchema(Schema.newBuilder()
    .setType(Type.ARRAY)
    .setItems(Schema.newBuilder()
      .setType(Type.OBJECT)
      .putProperties("object", Schema.newBuilder().setType(Type.STRING).build())
      .build())
    .build())
  .build();

GenerativeModel model = new GenerativeModel(modelName, vertexAI)
  .withGenerationConfig(generationConfig);

GenerateContentResponse response = model.generateContent(
  ContentMaker.fromMultiModalData(
    PartMaker.fromMimeTypeAndData("image/jpeg",
      "gs://cloud-samples-data/generative-ai/image/office-desk.jpeg"),
    PartMaker.fromMimeTypeAndData("image/jpeg",
      "gs://cloud-samples-data/generative-ai/image/gardening-tools.jpeg"),
    "Generate a list of objects in the images."
  )
);

JavaScript

// Initialize Vertex with your Cloud project and location
const vertex_ai = new VertexAI({project: project, location: location});

// Instantiate the model
const responseSchema = {
  type: 'ARRAY',
  items: {
    type: 'OBJECT',
    properties: {
      'recipeName': {
        type: 'STRING',
        description: 'Name of the recipe',
        nullable: false,
      },
    },
    required: ['recipeName'],
  },
};

const generativeModel = vertex_ai.getGenerativeModel({
    model: 'gemini-2.0-flash',
    generationConfig: {
      responseSchema: responseSchema,
      responseMimeType: 'application/json',
    }
  });

async function generateContentControlledOutput() {

  const req = {
    contents: [{role: 'user', parts: [{text: 'list 3 popular cookie recipe'}]}],
  };

  const resp = await generativeModel.generateContent(req);

  console.log('aggregated response: ', JSON.stringify(resp.response));
};

generateContentControlledOutput();

Go

import (
  "context"
  "cloud.google.com/go/vertexai/genai"
)

model.GenerationConfig.ResponseMIMEType = "application/json"

// Build an OpenAPI schema, in memory
model.GenerationConfig.ResponseSchema = &genai.Schema{
  Type: genai.TypeArray,
  Items: &genai.Schema{
    Type: genai.TypeArray,
    Items: &genai.Schema{
      Type: genai.TypeObject,
      Properties: map[string]*genai.Schema{
        "object": {
          Type: genai.TypeString,
        },
      },
    },
  },
}

img1 := genai.FileData{
  MIMEType: "image/jpeg",
  FileURI:  "gs://cloud-samples-data/generative-ai/image/office-desk.jpeg",
}

img2 := genai.FileData{
  MIMEType: "image/jpeg",
  FileURI:  "gs://cloud-samples-data/generative-ai/image/gardening-tools.jpeg",
}

prompt := "Generate a list of objects in the images."

res, err := model.GenerateContent(ctx, img1, img2, genai.Text(prompt))

After

Python

response_schema = {
  "type": "ARRAY",
  "items": {
    "type": "OBJECT",
    "properties": {
      "recipe_name": {"type": "STRING"},
      "ingredients": {"type": "ARRAY", "items": {"type": "STRING"}},
    },
    "required": ["recipe_name", "ingredients"],
  },
}

prompt = """
  List a few popular cookie recipes.
"""

response = client.models.generate_content(
  model="gemini-2.5-flash-preview-05-20",
  contents=prompt,
  config={
    "response_mime_type": "application/json",
    "response_schema": response_schema,
  },
)

Java

Import the Schema and Type modules:

import com.google.genai.types.Schema;
import com.google.genai.types.Type;

Create the response schema:

Schema schema =
  Schema.builder()
    .type(Type.Known.ARRAY)
    .items(
      Schema.builder()
        .type(Type.Known.OBJECT)
        .properties(
          ImmutableMap.of(
            "recipe_name",
            Schema.builder().type(Type.Known.STRING).build(),
            "ingredients",
            Schema.builder()
              .type(Type.Known.ARRAY)
              .items(Schema.builder().type(Type.Known.STRING))
              .build()))
        .required("recipe_name", "ingredients"))
    .build();

Add the schema to the content config:

GenerateContentConfig config =
  GenerateContentConfig.builder()
    .responseMimeType("application/json")
    .candidateCount(1)
    .responseSchema(schema)
    .build();

Generate responses with the config:

GenerateContentResponse response =
client.models.generateContent(
    "gemini-2.0-flash-001", "List a few popular cookie recipes.", config);

For the full implementation, see GenerateContentWithResponseSchema.java.

JavaScript

const ai = new GoogleGenAI({
  vertexai: true,
  project: GOOGLE_CLOUD_PROJECT,
  location: GOOGLE_CLOUD_LOCATION,
});

const response = await ai.models.generateContent({
  model: 'gemini-2.0-flash',
  contents: 'List 3 popular cookie recipes.',
  config: {
    responseMimeType: 'application/json',
    responseSchema: {
      type: Type.ARRAY,
      items: {
        type: Type.OBJECT,
        properties: {
          'recipeName': {
            type: Type.STRING,
            description: 'Name of the recipe',
            nullable: false,
          },
        },
        required: ['recipeName'],
      },
    },
  },
});

console.debug(response.text);

Go

import (
  "context"
  "encoding/json"

  genai "google.golang.org/genai"
)

cacheContents := []*genai.Content{
  {
    Parts: []*genai.Part{
      {FileData: &genai.FileData{
        FileURI:  "gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf",
        MIMEType: "application/pdf",
      }},
      {FileData: &genai.FileData{
        FileURI:  "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
        MIMEType: "application/pdf",
      }},
    },
    Role: "user",
  },
}

config := &genai.CreateCachedContentConfig{
  Contents: cacheContents,
  SystemInstruction: &genai.Content{
    Parts: []*genai.Part{
      {Text: systemInstruction},
    },
  },
  DisplayName: "example-cache",
  TTL:         "86400s",
}

res, err := client.Caches.Create(ctx, modelName, config)
if err != nil {
  return "", fmt.Errorf("failed to create content cache: %w", err)
}

cachedContent, err := json.MarshalIndent(res, "", "  ")
if err != nil {
  return "", fmt.Errorf("failed to marshal cache info: %w", err)
}

Count tokens

Tokens are the fundamental units of text (letters, words, phrases) that models process, analyze, and generate. To count or compute tokens in a response, replace the implementation with the Vertex AI SDK with the following code that uses the Google Gen AI SDK.

Before

Python

content = ["Why is sky blue?", "Explain it like I'm 5."]

response = model.count_tokens(content)

Java

import com.google.cloud.vertexai.api.CountTokensResponse;

CountTokensResponse response = model.countTokens(textPrompt);

int promptTokenCount = response.getTotalTokens();
int promptCharCount = response.getTotalBillableCharacters();

GenerateContentResponse contentResponse = model.generateContent(textPrompt);

int tokenCount = contentResponse.getUsageMetadata().getPromptTokenCount();
int candidateTokenCount = contentResponse.getUsageMetadata().getCandidatesTokenCount();
int totalTokenCount = contentResponse.getUsageMetadata().getTotalTokenCount();

JavaScript

const request = {
    contents: [{role: 'user', parts: [{text: 'How are you doing today?'}]}],
  };
const response = await generativeModel.countTokens(request);
console.log('count tokens response: ', JSON.stringify(response));

Go

package tokencount

import (
  "context"
  "fmt"
  "cloud.google.com/go/vertexai/genai"
)

resp, err := model.CountTokens(ctx, prompt)

fmt.Fprintf(w, "Number of tokens for the prompt: %d\n", resp.TotalTokens)

resp2, err := model.GenerateContent(ctx, prompt)

fmt.Fprintf(w, "Number of tokens for the prompt: %d\n", resp2.UsageMetadata.PromptTokenCount)
fmt.Fprintf(w, "Number of tokens for the candidates: %d\n", resp2.UsageMetadata.CandidatesTokenCount)
fmt.Fprintf(w, "Total number of tokens: %d\n", resp2.UsageMetadata.TotalTokenCount)

After

Python

Count Tokens

response = client.models.count_tokens(
    model='gemini-2.0-flash-001',
    contents='why is the sky blue?',
)
print(response)

Compute tokens

response = client.models.compute_tokens(
    model='gemini-2.0-flash-001',
    contents='why is the sky blue?',
)
print(response)

Java

Import the CountTokensResponse and ComputeTokensResponse modules:

import com.google.genai.types.CountTokensResponse;
import com.google.genai.types.ComputeTokensResponse;

Use countTokens to count the number of tokens used for a prompt:

CountTokensResponse response =
  client.models.countTokens("gemini-2.0-flash-001", "What is your name?", null);

Use computeTokens for a more granular analysis of how the prompt is tokenized:

ComputeTokensResponse response =
  client.models.computeTokens("gemini-2.0-flash-001", "What is your name?", null);

For the full implementation, see CountTokens.java.

JavaScript

const response = await ai.models.countTokens({
  model: 'gemini-2.0-flash',
  contents: 'The quick brown fox jumps over the lazy dog.',
});

Go

import (
  "context"
  "flag"
  "fmt"
  "log"

  "google.golang.org/genai"
)

client, err := genai.NewClient(ctx, &genai.ClientConfig{Backend: genai.BackendVertexAI})

fmt.Println("Count tokens example.")
countTokensResult, err := client.Models.CountTokens(ctx, *model, genai.Text("What is your name?"), nil)

fmt.Println(countTokensResult.TotalTokens)