OpenAI 兼容性

您可以使用 OpenAI 库(Python 和 TypeScript/JavaScript)以及 REST API 访问 Gemini 模型。在 Vertex AI 中,仅支持使用 OpenAI 库进行 Google Cloud 身份验证。如果您尚未使用 OpenAI 库,我们建议您直接调用 Gemini API

Python

import openai
from google.auth import default
import google.auth.transport.requests

# TODO(developer): Update and un-comment below lines
#project_id = "PROJECT_ID"
location = "us-central1"

# # Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

# OpenAI Client
client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token
)

response = client.chat.completions.create(
  model="google/gemini-2.0-flash-001",
  messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": "Explain to me how AI works"}
  ]
)

print(response.choices[0].message)

具体变化

  • api_key=credentials.token:如需使用 Google Cloud 身份验证,请使用示例代码获取Google Cloud auth 令牌。

  • base_url:这会指示 OpenAI 库向 Google Cloud发送请求,而不是向默认网址发送请求。

  • model="google/gemini-2.0-flash-001":从 Vertex 托管的模型中选择兼容的 Gemini 模型。

思考型

Gemini 2.5 模型经过训练,能够深入思考复杂问题,从而显著提升推理能力。Gemini API 附带一个“思考预算”参数,可让您精细控制模型的思考量。

与 Gemini API 不同,OpenAI API 提供三种思考控制级别:“低”“中”和“高”,这些级别会在后台映射到 1,000、8,000 和 24,000 个思考令牌预算。

如需停用思考功能,请将推理努力程度设为“无”。

Python

import openai
from google.auth import default
import google.auth.transport.requests

# TODO(developer): Update and un-comment below lines
#project_id = PROJECT_ID
location = "us-central1"

# # Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

# OpenAI Client
client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token
)

response = client.chat.completions.create(
  model="google/gemini-2.5-flash-preview-04-17",
  reasoning_effort="low",
  messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {
          "role": "user",
          "content": "Explain to me how AI works"
      }
  ]
)
print(response.choices[0].message)

流式

Gemini API 支持流式回答

Python

import openai
from google.auth import default
import google.auth.transport.requests

# TODO(developer): Update and un-comment below lines
#project_id = PROJECT_ID
location = "us-central1"

credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token
)
response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=[
  {"role": "system", "content": "You are a helpful assistant."},
  {"role": "user", "content": "Hello!"}
],
stream=True
)

for chunk in response:
  print(chunk.choices[0].delta)

函数调用

函数调用可让您更轻松地从生成式模型获取结构化数据输出,并且Gemini API 中支持此功能

Python

import openai
from google.auth import default
import google.auth.transport.requests

# TODO(developer): Update and un-comment below lines
#project_id = PROJECT_ID
location = "us-central1"

credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token
)

tools = [
{
  "type": "function",
  "function": {
    "name": "get_weather",
    "description": "Get the weather in a given location",
    "parameters": {
      "type": "object",
      "properties": {
        "location": {
          "type": "string",
          "description": "The city and state, e.g. Chicago, IL",
        },
        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
      },
      "required": ["location"],
    },
  }
}
]

messages = [{"role": "user", "content": "What's the weather like in Chicago today?"}]
response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=messages,
tools=tools,
tool_choice="auto"
)

print(response)

图片理解

Gemini 模型是原生多模态模型,在许多常见的视觉任务中都能提供出色的性能。

Python

from google.auth import default
import google.auth.transport.requests

import base64
from openai import OpenAI

# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"

# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

# OpenAI Client
client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token,
)

# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
  return base64.b64encode(image_file.read()).decode('utf-8')

# Getting the base64 string
#base64_image = encode_image("Path/to/image.jpeg")

response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=[
  {
    "role": "user",
    "content": [
      {
        "type": "text",
        "text": "What is in this image?",
      },
      {
        "type": "image_url",
        "image_url": {
          "url":  f"data:image/jpeg;base64,{base64_image}"
        },
      },
    ],
  }
],
)

print(response.choices[0])

生成图片

Python

from google.auth import default
import google.auth.transport.requests

import base64
from openai import OpenAI

# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"

# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

# OpenAI Client
client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token,
)

# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
  return base64.b64encode(image_file.read()).decode('utf-8')

# Getting the base64 string
#base64_image = encode_image("Path/to/image.jpeg")
base64_image = encode_image("/content/wayfairsofa.jpg")

response = client.chat.completions.create(
model="google/gemini-2.0-flash",
messages=[
  {
    "role": "user",
    "content": [
      {
        "type": "text",
        "text": "What is in this image?",
      },
      {
        "type": "image_url",
        "image_url": {
          "url":  f"data:image/jpeg;base64,{base64_image}"
        },
      },
    ],
  }
],
)

print(response.choices[0])

音频理解

分析音频输入:

Python

from google.auth import default
import google.auth.transport.requests

import base64
from openai import OpenAI

# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"

# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

# OpenAI Client
client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token,
)

with open("/path/to/your/audio/file.wav", "rb") as audio_file:
base64_audio = base64.b64encode(audio_file.read()).decode('utf-8')

response = client.chat.completions.create(
  model="gemini-2.0-flash",
  messages=[
  {
    "role": "user",
    "content": [
      {
        "type": "text",
        "text": "Transcribe this audio",
      },
      {
            "type": "input_audio",
            "input_audio": {
              "data": base64_audio,
              "format": "wav"
        }
      }
    ],
  }
],
)

print(response.choices[0].message.content)

结构化输出

Gemini 模型可以以您定义的任何结构输出 JSON 对象。

Python

from google.auth import default
import google.auth.transport.requests

from pydantic import BaseModel
from openai import OpenAI

# TODO(developer): Update and un-comment below lines
# project_id = "PROJECT_ID"
location = "us-central1"

# Programmatically get an access token
credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
credentials.refresh(google.auth.transport.requests.Request())

# OpenAI Client
client = openai.OpenAI(
  base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
  api_key=credentials.token,
)

class CalendarEvent(BaseModel):
  name: str
  date: str
  participants: list[str]

completion = client.beta.chat.completions.parse(
  model="google/gemini-2.0-flash",
  messages=[
      {"role": "system", "content": "Extract the event information."},
      {"role": "user", "content": "John and Susan are going to an AI conference on Friday."},
  ],
  response_format=CalendarEvent,
)

print(completion.choices[0].message.parsed)

当前限制

后续步骤