Files
antigravity-skills-reference/skills/grpc-golang/resources/implementation-playbook.md
2026-02-25 15:57:09 +01:00

15 KiB

gRPC Golang Implementation Playbook

This file contains detailed patterns, checklists, and code samples referenced by the skill.

Schema Design Standards

Protobuf Definition

  • Syntax: Use proto3 only.
  • Versioning: Use package versioning (e.g., api.v1).
  • Pagination: Use page_token and page_size for list operations.
  • Timezone: Always use google.protobuf.Timestamp with UTC values at the server level.
  • Idempotency: Use idempotency keys or design side-effect-free methods to allow safe retries.
  • Validation: Adopt a schema-level validation approach (e.g., Buf validation rules or protoc-gen-validate) and ensure generated code is enforced server-side.
syntax = "proto3";
package api.v1;
option go_package = "github.com/org/repo/gen/api/v1;apiv1";

import "google/protobuf/timestamp.proto";

service UserService {
  rpc GetUser(GetUserRequest) returns (GetUserResponse);
  rpc ListUsers(ListUsersRequest) returns (ListUsersResponse);
  rpc WatchUsers(WatchUsersRequest) returns (stream UserEvent);
}

message User {
  string id = 1;
  string name = 2;
  string email = 3;
  google.protobuf.Timestamp created_at = 4;
}

message GetUserRequest {
  string id = 1;
}

message GetUserResponse {
  User user = 1;
}

message ListUsersRequest {
  int32 page_size = 1;
  string page_token = 2;
}

message ListUsersResponse {
  repeated User users = 1;
  string next_page_token = 2;
}

message WatchUsersRequest {
  // Empty; streams all user events from the current point.
}

message UserEvent {
  enum EventType {
    EVENT_TYPE_UNSPECIFIED = 0;
    EVENT_TYPE_CREATED = 1;
    EVENT_TYPE_UPDATED = 2;
    EVENT_TYPE_DELETED = 3;
  }
  EventType type = 1;
  User user = 2;
  google.protobuf.Timestamp occurred_at = 3;
}

Code Generation

  • Toolchain: Use google.golang.org/protobuf/cmd/protoc-gen-go and protoc-gen-go-grpc.
  • Management: Use buf.gen.yaml to manage plugin versions and generation parameters.
  • Compatibility: Ensure plugins use Protobuf Go v2 API (google.golang.org/protobuf). Do not mix with the deprecated v1 API (github.com/golang/protobuf).

buf.gen.yaml Example

version: v2
plugins:
  - remote: buf.build/protocolbuffers/go
    out: gen
    opt: paths=source_relative
  - remote: buf.build/grpc/go
    out: gen
    opt: paths=source_relative

Server Implementation

Full Server Setup with Graceful Shutdown

package main

import (
	"context"
	"log"
	"net"
	"os"
	"os/signal"
	"syscall"
	"time"

	"google.golang.org/grpc"
	"google.golang.org/grpc/health"
	healthpb "google.golang.org/grpc/health/grpc_health_v1"
	"google.golang.org/grpc/keepalive"

	apiv1 "github.com/org/repo/gen/api/v1"
)

func main() {
	srv := grpc.NewServer(
		grpc.ChainUnaryInterceptor(
			recoveryInterceptor,
			loggingInterceptor,
			otelUnaryInterceptor,
		),
		grpc.KeepaliveParams(keepalive.ServerParameters{
			MaxConnectionIdle: 5 * time.Minute,
			Time:              1 * time.Minute,
			Timeout:           20 * time.Second,
		}),
		grpc.MaxRecvMsgSize(4<<20), // 4 MB
		grpc.MaxSendMsgSize(4<<20), // 4 MB
	)

	// Register application services.
	apiv1.RegisterUserServiceServer(srv, newUserService())

	// Register health check with fully-qualified service name.
	healthSrv := health.NewServer()
	healthpb.RegisterHealthServer(srv, healthSrv)
	healthSrv.SetServingStatus(
		"api.v1.UserService",
		healthpb.HealthCheckResponse_SERVING,
	)

	lis, err := net.Listen("tcp", ":50051")
	if err != nil {
		log.Fatalf("listen: %v", err)
	}

	// Graceful shutdown: GracefulStop with a fallback timeout to Stop.
	go func() {
		sigCh := make(chan os.Signal, 1)
		signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
		<-sigCh

		log.Println("shutting down gRPC server...")
		healthSrv.SetServingStatus(
			"api.v1.UserService",
			healthpb.HealthCheckResponse_NOT_SERVING,
		)

		ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
		defer cancel()

		stopped := make(chan struct{})
		go func() {
			srv.GracefulStop()
			close(stopped)
		}()

		select {
		case <-stopped:
			log.Println("server stopped gracefully")
		case <-ctx.Done():
			log.Println("graceful stop timed out, forcing stop")
			srv.Stop()
		}
	}()

	log.Printf("gRPC server listening on %s", lis.Addr())
	if err := srv.Serve(lis); err != nil {
		log.Fatalf("serve: %v", err)
	}
}

mTLS Setup

package main

import (
	"crypto/tls"
	"crypto/x509"
	"fmt"
	"log"
	"os"

	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials"
)

// loadServerTLS configures mTLS for the server side.
func loadServerTLS() grpc.ServerOption {
	tlsCert, err := tls.LoadX509KeyPair("server.crt", "server.key")
	if err != nil {
		log.Fatalf("load server cert: %v", err)
	}

	caCert, err := os.ReadFile("ca.crt")
	if err != nil {
		log.Fatalf("read CA cert: %v", err)
	}

	caPool := x509.NewCertPool()
	if !caPool.AppendCertsFromPEM(caCert) {
		log.Fatal("failed to append CA cert")
	}

	tlsCfg := &tls.Config{
		Certificates: []tls.Certificate{tlsCert},
		ClientCAs:    caPool,
		ClientAuth:   tls.RequireAndVerifyClientCert,
		MinVersion:   tls.VersionTLS13,
	}
	return grpc.Creds(credentials.NewTLS(tlsCfg))
}

// dialWithMTLS creates a client connection using mTLS.
func dialWithMTLS(target string) (*grpc.ClientConn, error) {
	clientCert, err := tls.LoadX509KeyPair("client.crt", "client.key")
	if err != nil {
		return nil, fmt.Errorf("load client cert: %w", err)
	}

	caCert, err := os.ReadFile("ca.crt")
	if err != nil {
		return nil, fmt.Errorf("read CA cert: %w", err)
	}

	caPool := x509.NewCertPool()
	if !caPool.AppendCertsFromPEM(caCert) {
		return nil, fmt.Errorf("failed to append CA cert")
	}

	creds := credentials.NewTLS(&tls.Config{
		Certificates: []tls.Certificate{clientCert},
		RootCAs:      caPool,
		MinVersion:   tls.VersionTLS13,
	})

	// Note: for gRPC-Go v1.63+, grpc.NewClient is the recommended replacement.
	conn, err := grpc.Dial(target, grpc.WithTransportCredentials(creds))
	if err != nil {
		return nil, fmt.Errorf("dial %s: %w", target, err)
	}
	return conn, nil
}

Client Best Practices

Connection Reuse

package main

import (
	"context"
	"fmt"
	"log"
	"os"
	"time"

	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials"

	apiv1 "github.com/org/repo/gen/api/v1"
)

// Initialize once at startup; reuse across the application lifetime.
var userConn *grpc.ClientConn

func initClients(creds credentials.TransportCredentials) {
	var err error
	// Note: for gRPC-Go v1.63+, use grpc.NewClient instead.
	userConn, err = grpc.Dial(
		os.Getenv("USER_SVC_ADDR"),
		grpc.WithTransportCredentials(creds),
	)
	if err != nil {
		log.Fatalf("dial user-svc: %v", err)
	}
}

func callListUsers(ctx context.Context) (*apiv1.ListUsersResponse, error) {
	// Always set a deadline per call, not per connection.
	ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
	defer cancel()

	client := apiv1.NewUserServiceClient(userConn)
	resp, err := client.ListUsers(ctx, &apiv1.ListUsersRequest{PageSize: 20})
	if err != nil {
		return nil, fmt.Errorf("list users: %w", err)
	}
	return resp, nil
}

Retry Policy

Only enable retries for idempotent calls. Use exponential backoff.

import "google.golang.org/grpc"

// Service config with retry policy for idempotent methods.
const retryPolicy = `{
  "methodConfig": [{
    "name": [{"service": "api.v1.UserService", "method": "GetUser"}],
    "retryPolicy": {
      "maxAttempts": 3,
      "initialBackoff": "0.1s",
      "maxBackoff": "1s",
      "backoffMultiplier": 2,
      "retryableStatusCodes": ["UNAVAILABLE", "DEADLINE_EXCEEDED"]
    }
  }]
}`

// Note: for gRPC-Go v1.63+, use grpc.NewClient instead of grpc.Dial.
conn, err := grpc.Dial(
	target,
	grpc.WithTransportCredentials(creds),
	grpc.WithDefaultServiceConfig(retryPolicy),
)

Observability

Interceptor Labels

  • Logging: Include grpc.method, grpc.service, grpc.code, latency_ms, and trace_id.
  • Metrics: Export request count, latency histogram, and in-flight stream count.

OpenTelemetry Integration

import (
	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
	"google.golang.org/grpc"
)

srv := grpc.NewServer(
	grpc.StatsHandler(otelgrpc.NewServerHandler()),
)

// Note: for gRPC-Go v1.63+, use grpc.NewClient instead of grpc.Dial.
conn, err := grpc.Dial(
	target,
	grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
)

Testing

bufconn In-Process Test

package service_test

import (
	"context"
	"net"
	"testing"
	"time"

	"google.golang.org/grpc"
	"google.golang.org/grpc/credentials/insecure"
	"google.golang.org/grpc/status"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/test/bufconn"

	apiv1 "github.com/org/repo/gen/api/v1"
)

func TestListUsers(t *testing.T) {
	lis := bufconn.Listen(1 << 20)
	srv := grpc.NewServer()
	apiv1.RegisterUserServiceServer(srv, &fakeUserSvc{})
	go func() {
		if err := srv.Serve(lis); err != nil {
			t.Logf("server exited: %v", err)
		}
	}()
	t.Cleanup(srv.GracefulStop)

	// Note: for gRPC-Go v1.63+, use grpc.NewClient instead of grpc.DialContext.
	conn, err := grpc.DialContext(context.Background(),
		"bufnet",
		grpc.WithContextDialer(func(ctx context.Context, _ string) (net.Conn, error) {
			return lis.DialContext(ctx)
		}),
		grpc.WithTransportCredentials(insecure.NewCredentials()),
	)
	if err != nil {
		t.Fatalf("dial bufnet: %v", err)
	}
	t.Cleanup(func() { conn.Close() })

	client := apiv1.NewUserServiceClient(conn)
	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
	defer cancel()

	resp, err := client.ListUsers(ctx, &apiv1.ListUsersRequest{PageSize: 10})
	if code := status.Code(err); code != codes.OK {
		t.Fatalf("expected OK, got %v: %v", code, err)
	}
	if resp == nil {
		t.Fatal("expected non-nil response")
	}
}

Streaming Handler Pattern

Always check ctx.Done() in streaming loops. Never expose raw internal errors to clients.

func (s *userService) WatchUsers(
	req *apiv1.WatchUsersRequest,
	stream apiv1.UserService_WatchUsersServer,
) error {
	ctx := stream.Context()

	events := s.subscribeUserEvents()
	defer s.unsubscribe(events)

	for {
		select {
		case <-ctx.Done():
			// Client disconnected or deadline exceeded; exit cleanly.
			return status.Error(codes.Canceled, "client disconnected")

		case event, ok := <-events:
			if !ok {
				// Channel closed; server is shutting down.
				return status.Error(codes.Unavailable, "service shutting down")
			}

			if err := stream.Send(event); err != nil {
				// Log the raw error server-side for diagnostics.
				log.Printf("stream send failed: %v", err)
				// Return a generic message to the client; never leak raw err.
				return status.Error(codes.Internal, "failed to send event")
			}
		}
	}
}

Error Mapping

Map domain errors to gRPC status codes consistently:

Only return err.Error() to clients when it is a safe, user-facing domain message (not an internal error string).

package service

import (
	"errors"

	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/status"
)

var (
	ErrNotFound       = errors.New("resource not found")
	ErrAlreadyExists  = errors.New("resource already exists")
	ErrInvalidInput   = errors.New("invalid input")
	ErrPermission     = errors.New("permission denied")
)

// toGRPCError maps a domain error to a gRPC status error.
func toGRPCError(err error) error {
	if err == nil {
		return nil
	}
	switch {
	case errors.Is(err, ErrNotFound):
		return status.Error(codes.NotFound, err.Error())
	case errors.Is(err, ErrAlreadyExists):
		return status.Error(codes.AlreadyExists, err.Error())
	case errors.Is(err, ErrInvalidInput):
		return status.Error(codes.InvalidArgument, err.Error())
	case errors.Is(err, ErrPermission):
		return status.Error(codes.PermissionDenied, err.Error())
	default:
		return status.Error(codes.Internal, "internal error")
	}
}

Project Layout

project/
  buf.gen.yaml
  buf.yaml
  proto/
    api/
      v1/
        user_service.proto
  gen/                          # Generated code (committed or gitignored)
    api/
      v1/
        user_service.pb.go
        user_service_grpc.pb.go
  internal/
    service/
      user.go                  # Service implementation
      user_test.go             # bufconn tests
    domain/
      errors.go                # Domain error definitions
  cmd/
    server/
      main.go                  # Server entrypoint with graceful shutdown
  config/
    config.go                  # Env-based config (timeouts, TLS paths, limits)

Safety Checklist

  • Default to TLS/mTLS for all production traffic.
  • Enforce limits (MaxRecvMsgSize, MaxSendMsgSize, metadata size) to reduce resource exhaustion.
  • Treat client-sent metadata as untrusted; validate and allowlist keys used for auth or tenant routing.
  • Disable gRPC reflection in production to avoid exposing internal service schemas.
  • Check context.Done() in every iteration of a streaming handler to prevent goroutine leaks.

Anti-Patterns

Anti-Pattern Why It Hurts Fix
Create new grpc.ClientConn per request Exhausts OS sockets and disables HTTP/2 multiplexing, causing high latency and resource leaks Initialize once, reuse globally
Mix Protobuf v1 and v2 libraries Causes silent marshaling bugs; proto.Marshal from v1 and v2 are NOT interchangeable Pin to google.golang.org/protobuf (v2) throughout
Expose raw internal error strings to clients Leaks stack traces and internal service names; a security and UX risk Map errors with status.Errorf using appropriate gRPC codes
Ignore context.Done() in streaming handlers Goroutine and connection leak when client disconnects Check ctx.Err() in every iteration of a streaming loop
Skip error handling with _ = Hides failures silently; production outages become undiagnosable Always check and handle errors explicitly
Use grpc.Dial without health checks Connection failures are deferred and may surface as runtime errors Use health checks and monitor connection state

Migration note: For gRPC-Go v1.63+ (Jan 2024), grpc.NewClient is the newer API recommended by the gRPC-Go project for new code. For older versions (or when following existing codebases and official grpc.io examples), using grpc.Dial / grpc.DialContext is still common.