使用 OpenTelemetry 擷取自訂用戶端指標

本文將說明如何使用 OpenTelemetry 擷取自訂用戶端指標。您可以使用 JavaGo 用戶端程式庫,取得自訂用戶端指標。

自訂用戶端指標可協助您找出系統中延遲時間的來源。詳情請參閱「Spanner 要求中的延遲點」。

Spanner 用戶端程式庫也會使用 OpenTelemetry 可觀察性架構提供統計資料和追蹤記錄。詳情請參閱「使用 OpenTelemetry 設定追蹤記錄收集」。

OpenTelemetry 是開放原始碼觀測能力架構和工具包,可讓您建立及管理追蹤記錄、指標和記錄等遙測資料。

事前準備

您必須設定 OpenTelemetry SDK,並使用適當的選項匯出遙測資料。建議您使用 OpenTelemetry Protocol (OTLP) 匯出工具。

如要使用 OpenTelemetry 設定自訂用戶端指標,您需要設定 OpenTelemetry SDK 和 OTLP 匯出工具:

  1. 使用下列程式碼,將必要依附元件新增至應用程式:

    Java

    <dependencyManagement>
      <dependencies>
        <dependency>
          <groupId>com.google.cloud</groupId>
          <artifactId>libraries-bom</artifactId>
          <version>26.32.0</version>
          <type>pom</type>
          <scope>import</scope>
        </dependency>
        <dependency>
          <groupId>io.opentelemetry</groupId>
          <artifactId>opentelemetry-bom</artifactId>
          <version>1.35.0</version>
          <type>pom</type>
          <scope>import</scope>
        </dependency>
      </dependencies>
    </dependencyManagement>
    
    <dependencies>
      <dependency>
        <groupId>com.google.cloud</groupId>
        <artifactId>google-cloud-spanner</artifactId>
      </dependency>
      <dependency>
        <groupId>io.opentelemetry</groupId>
        <artifactId>opentelemetry-sdk</artifactId>
      </dependency>
      <dependency>
        <groupId>io.opentelemetry</groupId>
        <artifactId>opentelemetry-sdk-metrics</artifactId>
      </dependency>
      <dependency>
        <groupId>io.opentelemetry</groupId>
        <artifactId>opentelemetry-sdk-trace</artifactId>
      </dependency>
      <dependency>
        <groupId>io.opentelemetry</groupId>
        <artifactId>opentelemetry-exporter-otlp</artifactId>
      </dependency>
    </dependencies>

    Go

    go.opentelemetry.io/otel v1.34.0
    go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0
    go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0
    go.opentelemetry.io/otel/metric v1.34.0
    go.opentelemetry.io/otel/sdk v1.34.0
    go.opentelemetry.io/otel/sdk/metric v1.34.0
  2. 使用 OTLP 匯出工具建立 OpenTelemetry 物件,然後使用 SpannerOptions 將其插入 Spanner:

    Java

    // Enable OpenTelemetry metrics and traces before Injecting OpenTelemetry
    SpannerOptions.enableOpenTelemetryMetrics();
    SpannerOptions.enableOpenTelemetryTraces();
    
    // Create a new meter provider
    SdkMeterProvider sdkMeterProvider = SdkMeterProvider.builder()
        // Use Otlp exporter or any other exporter of your choice.
        .registerMetricReader(
            PeriodicMetricReader.builder(OtlpGrpcMetricExporter.builder().build()).build())
        .build();
    
    // Create a new tracer provider
    SdkTracerProvider sdkTracerProvider = SdkTracerProvider.builder()
        // Use Otlp exporter or any other exporter of your choice.
        .addSpanProcessor(SimpleSpanProcessor.builder(OtlpGrpcSpanExporter
            .builder().build()).build())
            .build();
    
    // Configure OpenTelemetry object using Meter Provider and Tracer Provider
    OpenTelemetry openTelemetry = OpenTelemetrySdk.builder()
        .setMeterProvider(sdkMeterProvider)
        .setTracerProvider(sdkTracerProvider)
        .build();
    
    // Inject OpenTelemetry object via Spanner options or register as GlobalOpenTelemetry.
    SpannerOptions options = SpannerOptions.newBuilder()
        .setOpenTelemetry(openTelemetry)
        .build();
    Spanner spanner = options.getService();
    
    DatabaseClient dbClient = spanner
        .getDatabaseClient(DatabaseId.of(projectId, instanceId, databaseId));
    
    captureGfeMetric(dbClient);
    captureQueryStatsMetric(openTelemetry, dbClient);
    
    // Close the providers to free up the resources and export the data. */
    sdkMeterProvider.close();
    sdkTracerProvider.close();

    Go

    // Ensure that your Go runtime version is supported by the OpenTelemetry-Go compatibility policy before enabling OpenTelemetry instrumentation.
    // Refer to compatibility here https://github.com/googleapis/google-cloud-go/blob/main/debug.md#opentelemetry
    
    import (
    	"context"
    	"fmt"
    	"io"
    	"log"
    	"strconv"
    	"strings"
    
    	"cloud.google.com/go/spanner"
    	"go.opentelemetry.io/otel"
    	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
    	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
    	"go.opentelemetry.io/otel/metric"
    	sdkmetric "go.opentelemetry.io/otel/sdk/metric"
    	"go.opentelemetry.io/otel/sdk/resource"
    	sdktrace "go.opentelemetry.io/otel/sdk/trace"
    	semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
    	"google.golang.org/api/iterator"
    )
    
    func enableOpenTelemetryMetricsAndTraces(w io.Writer, db string) error {
    	// db = `projects/<project>/instances/<instance-id>/database/<database-id>`
    	ctx := context.Background()
    
    	// Create a new resource to uniquely identify the application
    	res, err := newResource()
    	if err != nil {
    		log.Fatal(err)
    	}
    
    	// Enable OpenTelemetry traces by setting environment variable GOOGLE_API_GO_EXPERIMENTAL_TELEMETRY_PLATFORM_TRACING to the case-insensitive value "opentelemetry" before loading the client library.
    	// Enable OpenTelemetry metrics before injecting meter provider.
    	spanner.EnableOpenTelemetryMetrics()
    
    	// Create a new tracer provider
    	tracerProvider, err := getOtlpTracerProvider(ctx, res)
    	defer tracerProvider.ForceFlush(ctx)
    	if err != nil {
    		log.Fatal(err)
    	}
    	// Register tracer provider as global
    	otel.SetTracerProvider(tracerProvider)
    
    	// Create a new meter provider
    	meterProvider := getOtlpMeterProvider(ctx, res)
    	defer meterProvider.ForceFlush(ctx)
    
    	// Inject meter provider locally via ClientConfig when creating a spanner client or set globally via setMeterProvider.
    	client, err := spanner.NewClientWithConfig(ctx, db, spanner.ClientConfig{OpenTelemetryMeterProvider: meterProvider})
    	if err != nil {
    		return err
    	}
    	defer client.Close()
    	return nil
    }
    
    func getOtlpMeterProvider(ctx context.Context, res *resource.Resource) *sdkmetric.MeterProvider {
    	otlpExporter, err := otlpmetricgrpc.New(ctx)
    	if err != nil {
    		log.Fatal(err)
    	}
    	meterProvider := sdkmetric.NewMeterProvider(
    		sdkmetric.WithResource(res),
    		sdkmetric.WithReader(sdkmetric.NewPeriodicReader(otlpExporter)),
    	)
    	return meterProvider
    }
    
    func getOtlpTracerProvider(ctx context.Context, res *resource.Resource) (*sdktrace.TracerProvider, error) {
    	traceExporter, err := otlptracegrpc.New(ctx)
    	if err != nil {
    		return nil, err
    	}
    
    	tracerProvider := sdktrace.NewTracerProvider(
    		sdktrace.WithResource(res),
    		sdktrace.WithBatcher(traceExporter),
    		sdktrace.WithSampler(sdktrace.AlwaysSample()),
    	)
    
    	return tracerProvider, nil
    }
    
    func newResource() (*resource.Resource, error) {
    	return resource.Merge(resource.Default(),
    		resource.NewWithAttributes(semconv.SchemaURL,
    			semconv.ServiceName("otlp-service"),
    			semconv.ServiceVersion("0.1.0"),
    		))
    }
    

擷取 GFE 延遲時間

Google Front End (GFE) 延遲時間是指 Google 網路從用戶端接收遠端程序呼叫,到 GFE 收到回應的第一個位元組之間的時間長度 (以毫秒為單位)。

您可以使用下列程式碼擷取 GFE 延遲時間:

Java

static void captureGfeMetric(DatabaseClient dbClient) {
  // GFE_latency and other Spanner metrics are automatically collected
  // when OpenTelemetry metrics are enabled.

  try (ResultSet resultSet =
      dbClient
          .singleUse() // Execute a single read or query against Cloud Spanner.
          .executeQuery(Statement.of("SELECT SingerId, AlbumId, AlbumTitle FROM Albums"))) {
    while (resultSet.next()) {
      System.out.printf(
          "%d %d %s", resultSet.getLong(0), resultSet.getLong(1), resultSet.getString(2));
    }
  }
}

Go

// GFE_Latency and other Spanner metrics are automatically collected
// when OpenTelemetry metrics are enabled.
func captureGFELatencyMetric(ctx context.Context, client spanner.Client) error {
	stmt := spanner.Statement{SQL: `SELECT SingerId, AlbumId, AlbumTitle FROM Albums`}
	iter := client.Single().Query(ctx, stmt)
	defer iter.Stop()
	for {
		row, err := iter.Next()
		if err == iterator.Done {
			return nil
		}
		if err != nil {
			return err
		}
		var singerID, albumID int64
		var albumTitle string
		if err := row.Columns(&singerID, &albumID, &albumTitle); err != nil {
			return err
		}
	}
}

程式碼範例會在將指標名稱匯出至 Cloud Monitoring 時,在名稱後方加上字串 spanner/gfe_latency。您可以使用附加的字串,在 Cloud Monitoring 中搜尋這個指標。

擷取 Cloud Spanner API 要求延遲時間

Cloud Spanner API 要求延遲時間是指 Cloud Spanner API 前端收到用戶端要求的第一個位元組,以及 Cloud Spanner API 前端傳送回應的最後一個位元組之間的時間 (以秒為單位)。

這個延遲指標可做為 Cloud Monitoring 指標的一部分。

擷取用戶端往返延遲時間

用戶端來回延遲時間是指,從用戶端傳送至資料庫的 Cloud Spanner API 要求 (透過 GFE 和 Cloud Spanner API 前端) 第一個位元組,到用戶端從資料庫收到的回應最後一個位元組之間的時間長度 (以毫秒為單位)。

使用 OpenTelemetry 時,系統不支援 Spanner 用戶端的來回延遲指標。您可以改為查看用戶端作業延遲指標。詳情請參閱「用戶端指標說明」。

您也可以使用 OpenCensus 和橋接器設定指標,並將資料遷移至 OpenTelemetry。

擷取查詢延遲時間

查詢延遲時間是指在 Spanner 資料庫中執行 SQL 查詢所需的時間長度 (以毫秒為單位)。

您可以使用下列程式碼擷取查詢延遲時間:

Java

static void captureQueryStatsMetric(OpenTelemetry openTelemetry, DatabaseClient dbClient) {
  // Register query stats metric.
  // This should be done once before start recording the data.
  Meter meter = openTelemetry.getMeter("cloud.google.com/java");
  DoubleHistogram queryStatsMetricLatencies =
      meter
          .histogramBuilder("spanner/query_stats_elapsed")
          .setDescription("The execution of the query")
          .setUnit("ms")
          .build();

  // Capture query stats metric data.
  try (ResultSet resultSet = dbClient.singleUse()
      .analyzeQuery(Statement.of("SELECT SingerId, AlbumId, AlbumTitle FROM Albums"),
          QueryAnalyzeMode.PROFILE)) {

    while (resultSet.next()) {
      System.out.printf(
          "%d %d %s", resultSet.getLong(0), resultSet.getLong(1), resultSet.getString(2));
    }

    String value = resultSet.getStats().getQueryStats()
        .getFieldsOrDefault("elapsed_time", Value.newBuilder().setStringValue("0 msecs").build())
        .getStringValue();
    double elapsedTime = value.contains("msecs")
        ? Double.parseDouble(value.replaceAll(" msecs", ""))
        : Double.parseDouble(value.replaceAll(" secs", "")) * 1000;
    queryStatsMetricLatencies.record(elapsedTime);
  }
}

Go

func captureQueryStatsMetric(ctx context.Context, mp metric.MeterProvider, client spanner.Client) error {
	meter := mp.Meter(spanner.OtInstrumentationScope)
	// Register query stats metric with OpenTelemetry to record the data.
	// This should be done once before start recording the data.
	queryStats, err := meter.Float64Histogram(
		"spanner/query_stats_elapsed",
		metric.WithDescription("The execution of the query"),
		metric.WithUnit("ms"),
		metric.WithExplicitBucketBoundaries(0.0, 0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 13.0,
			16.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0, 200.0, 250.0,
			300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0,
			100000.0),
	)
	if err != nil {
		fmt.Print(err)
	}

	stmt := spanner.Statement{SQL: `SELECT SingerId, AlbumId, AlbumTitle FROM Albums`}
	iter := client.Single().QueryWithStats(ctx, stmt)
	defer iter.Stop()
	for {
		row, err := iter.Next()
		if err == iterator.Done {
			// Record query execution time with OpenTelemetry.
			elapasedTime := iter.QueryStats["elapsed_time"].(string)
			elapasedTimeMs, err := strconv.ParseFloat(strings.TrimSuffix(elapasedTime, " msecs"), 64)
			if err != nil {
				return err
			}
			queryStats.Record(ctx, elapasedTimeMs)
			return nil
		}
		if err != nil {
			return err
		}
		var singerID, albumID int64
		var albumTitle string
		if err := row.Columns(&singerID, &albumID, &albumTitle); err != nil {
			return err
		}
	}
}

程式碼範例會在將指標名稱匯出至 Cloud Monitoring 時,在名稱後方加上字串 spanner/query_stats_elapsed。您可以使用附加的字串,在 Cloud Monitoring 中搜尋這個指標。

在 Metrics Explorer 中查看指標

  1. 前往 Google Cloud 控制台的「Metrics Explorer」頁面。

    前往 Metrics Explorer

  2. 選取專案。

  3. 按一下「Select a metric」(選取指標)

  4. 使用下列字串搜尋延遲指標:

    • roundtrip_latency:用於用戶端來回延遲時間指標。
    • spanner/gfe_latency:適用於 GFE 延遲時間指標。
    • spanner/query_stats_elapsed:查詢延遲時間指標。
  5. 選取指標,然後按一下「套用」

如要進一步瞭解如何分組或匯總指標,請參閱「使用選單建立查詢」。

後續步驟