# Replace the `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` values# with appropriate values for your project.exportGOOGLE_CLOUD_PROJECT=GOOGLE_CLOUD_PROJECTexportGOOGLE_CLOUD_LOCATION=globalexportGOOGLE_GENAI_USE_VERTEXAI=True
importrequestsfromgoogleimportgenaifromgoogle.genai.typesimport(GenerateContentConfig,HarmBlockThreshold,HarmCategory,HttpOptions,Part,SafetySetting,)fromPILimportImage,ImageColor,ImageDrawfrompydanticimportBaseModel# Helper class to represent a bounding boxclassBoundingBox(BaseModel):""" Represents a bounding box with its 2D coordinates and associated label. Attributes: box_2d (list[int]): A list of integers representing the 2D coordinates of the bounding box, typically in the format [y_min, x_min, y_max, x_max]. label (str): A string representing the label or class associated with the object within the bounding box. """box_2d:list[int]label:str# Helper function to plot bounding boxes on an imagedefplot_bounding_boxes(image_uri:str,bounding_boxes:list[BoundingBox])-> None:""" Plots bounding boxes on an image with labels, using PIL and normalized coordinates. Args: image_uri: The URI of the image file. bounding_boxes: A list of BoundingBox objects. Each box's coordinates are in normalized [y_min, x_min, y_max, x_max] format. """withImage.open(requests.get(image_uri,stream=True,timeout=10).raw)asim:width,height=im.sizedraw=ImageDraw.Draw(im)colors=list(ImageColor.colormap.keys())fori,bboxinenumerate(bounding_boxes):# Scale normalized coordinates to image dimensionsabs_y_min=int(bbox.box_2d[0]/1000*height)abs_x_min=int(bbox.box_2d[1]/1000*width)abs_y_max=int(bbox.box_2d[2]/1000*height)abs_x_max=int(bbox.box_2d[3]/1000*width)color=colors[i%len(colors)]# Draw the rectangle using the correct (x, y) pairsdraw.rectangle(((abs_x_min,abs_y_min),(abs_x_max,abs_y_max)),outline=color,width=4,)ifbbox.label:# Position the text at the top-left corner of the boxdraw.text((abs_x_min+8,abs_y_min+6),bbox.label,fill=color)im.show()client=genai.Client(http_options=HttpOptions(api_version="v1"))config=GenerateContentConfig(system_instruction=""" Return bounding boxes as an array with labels. Never return masks. Limit to 25 objects. If an object is present multiple times, give each object a unique label according to its distinct characteristics (colors, size, position, etc..). """,temperature=0.5,safety_settings=[SafetySetting(category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,threshold=HarmBlockThreshold.BLOCK_ONLY_HIGH,),],response_mime_type="application/json",response_schema=list[BoundingBox],)image_uri="https://storage.googleapis.com/generativeai-downloads/images/socks.jpg"response=client.models.generate_content(model="gemini-2.5-flash",contents=[Part.from_uri(file_uri=image_uri,mime_type="image/jpeg",),"Output the positions of the socks with a face. Label according to position in the image.",],config=config,)print(response.text)plot_bounding_boxes(image_uri,response.parsed)# Example response:# [# {"box_2d": [6, 246, 386, 526], "label": "top-left light blue sock with cat face"},# {"box_2d": [234, 649, 650, 863], "label": "top-right light blue sock with cat face"},# ]
[[["易于理解","easyToUnderstand","thumb-up"],["解决了我的问题","solvedMyProblem","thumb-up"],["其他","otherUp","thumb-up"]],[["很难理解","hardToUnderstand","thumb-down"],["信息或示例代码不正确","incorrectInformationOrSampleCode","thumb-down"],["没有我需要的信息/示例","missingTheInformationSamplesINeed","thumb-down"],["翻译问题","translationIssue","thumb-down"],["其他","otherDown","thumb-down"]],["最后更新时间 (UTC):2025-08-25。"],[],[],null,["# Bounding box detection\n\n| **Experimental**\n|\n|\n| This feature is subject to the \"Pre-GA Offerings Terms\" in the General Service Terms section\n| of the [Service Specific Terms](/terms/service-terms#1).\n|\n| Pre-GA features are available \"as is\" and might have limited support.\n|\n| For more information, see the\n[launch stage descriptions](/products#product-launch-stages). \n| To see an example of bounding box detection,\n| run the \"Spatial understanding with Gemini 2.0\" notebook in one of the following\n| environments:\n|\n| [Open in Colab](https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/spatial-understanding/spatial_understanding.ipynb)\n|\n|\n| \\|\n|\n| [Open in Colab Enterprise](https://console.cloud.google.com/vertex-ai/colab/import/https%3A%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fuse-cases%2Fspatial-understanding%2Fspatial_understanding.ipynb)\n|\n|\n| \\|\n|\n| [Open\n| in Vertex AI Workbench](https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https%3A%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fuse-cases%2Fspatial-understanding%2Fspatial_understanding.ipynb)\n|\n|\n| \\|\n|\n| [View on GitHub](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/spatial-understanding/spatial_understanding.ipynb)\n\nIn this experimental launch, we are providing developers with a powerful tool\nfor object detection and localization within images and video. By accurately\nidentifying and delineating objects with bounding boxes, developers can unlock a\nwide range of applications and enhance the intelligence of their projects.\n\n**Key Benefits:**\n\n- **Simple:** Integrate object detection capabilities into your applications with ease, regardless of your computer vision expertise.\n- **Customizable:** Produce bounding boxes based on custom instructions (e.g. \"I want to see bounding boxes of all the green objects in this image\"), without having to train a custom model.\n\n**Technical Details:**\n\n- **Input:** Your prompt and associated images or video frames.\n- **Output:** Bounding boxes in the `[y_min, x_min, y_max, x_max]` format. The top left corner is the origin. The `x` and `y` axis go horizontally and vertically, respectively. Coordinate values are normalized to 0-1000 for every image.\n- **Visualization:** AI Studio users will see bounding boxes plotted within the UI. Vertex AI users should visualize their bounding boxes through custom visualization code.\n\n### Python\n\n#### Install\n\n```\npip install --upgrade google-genai\n```\n\n\nTo learn more, see the\n[SDK reference documentation](https://googleapis.github.io/python-genai/).\n\n\nSet environment variables to use the Gen AI SDK with Vertex AI:\n\n```bash\n# Replace the `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` values\n# with appropriate values for your project.\nexport GOOGLE_CLOUD_PROJECT=GOOGLE_CLOUD_PROJECT\nexport GOOGLE_CLOUD_LOCATION=global\nexport GOOGLE_GENAI_USE_VERTEXAI=True\n```\n\n\u003cbr /\u003e\n\n import requests\n from google import genai\n from google.genai.types import (\n GenerateContentConfig,\n HarmBlockThreshold,\n HarmCategory,\n HttpOptions,\n Part,\n SafetySetting,\n )\n from PIL import Image, ImageColor, ImageDraw\n from pydantic import BaseModel\n\n # Helper class to represent a bounding box\n class BoundingBox(BaseModel):\n \"\"\"\n Represents a bounding box with its 2D coordinates and associated label.\n\n Attributes:\n box_2d (list[int]): A list of integers representing the 2D coordinates of the bounding box,\n typically in the format [y_min, x_min, y_max, x_max].\n label (str): A string representing the label or class associated with the object within the bounding box.\n \"\"\"\n\n box_2d: list[int]\n label: str\n\n # Helper function to plot bounding boxes on an image\n def plot_bounding_boxes(image_uri: str, bounding_boxes: list[BoundingBox]) -\u003e None:\n \"\"\"\n Plots bounding boxes on an image with labels, using PIL and normalized coordinates.\n\n Args:\n image_uri: The URI of the image file.\n bounding_boxes: A list of BoundingBox objects. Each box's coordinates are in\n normalized [y_min, x_min, y_max, x_max] format.\n \"\"\"\n with Image.open(requests.get(image_uri, stream=True, timeout=10).raw) as im:\n width, height = im.size\n draw = ImageDraw.Draw(im)\n\n colors = list(ImageColor.colormap.keys())\n\n for i, bbox in enumerate(bounding_boxes):\n # Scale normalized coordinates to image dimensions\n abs_y_min = int(bbox.box_2d[0] / 1000 * height)\n abs_x_min = int(bbox.box_2d[1] / 1000 * width)\n abs_y_max = int(bbox.box_2d[2] / 1000 * height)\n abs_x_max = int(bbox.box_2d[3] / 1000 * width)\n\n color = colors[i % len(colors)]\n\n # Draw the rectangle using the correct (x, y) pairs\n draw.rectangle(\n ((abs_x_min, abs_y_min), (abs_x_max, abs_y_max)),\n outline=color,\n width=4,\n )\n if bbox.label:\n # Position the text at the top-left corner of the box\n draw.text((abs_x_min + 8, abs_y_min + 6), bbox.label, fill=color)\n\n im.show()\n\n client = genai.Client(http_options=HttpOptions(api_version=\"v1\"))\n\n config = GenerateContentConfig(\n system_instruction=\"\"\"\n Return bounding boxes as an array with labels.\n Never return masks. Limit to 25 objects.\n If an object is present multiple times, give each object a unique label\n according to its distinct characteristics (colors, size, position, etc..).\n \"\"\",\n temperature=0.5,\n safety_settings=[\n SafetySetting(\n category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,\n threshold=HarmBlockThreshold.BLOCK_ONLY_HIGH,\n ),\n ],\n response_mime_type=\"application/json\",\n response_schema=list[BoundingBox],\n )\n\n image_uri = \"https://storage.googleapis.com/generativeai-downloads/images/socks.jpg\"\n\n response = client.models.generate_content(\n model=\"gemini-2.5-flash\",\n contents=[\n Part.from_uri(\n file_uri=image_uri,\n mime_type=\"image/jpeg\",\n ),\n \"Output the positions of the socks with a face. Label according to position in the image.\",\n ],\n config=config,\n )\n print(response.text)\n plot_bounding_boxes(image_uri, response.parsed)\n\n # Example response:\n # [\n # {\"box_2d\": [6, 246, 386, 526], \"label\": \"top-left light blue sock with cat face\"},\n # {\"box_2d\": [234, 649, 650, 863], \"label\": \"top-right light blue sock with cat face\"},\n # ]\n\n\u003cbr /\u003e"]]