Commit d0d80a8

mo khan <mo@mokhan.ca>
2026-01-07 04:34:10
feat: add vertex AI client
1 parent 55e2913
Changed files (8)
lib
spec
fixtures
Net_Llm_VertexAI
net
support
lib/net/llm/vertex_ai.rb
@@ -0,0 +1,106 @@
+# frozen_string_literal: true
+
+module Net
+  module Llm
+    class VertexAI
+      attr_reader :project_id, :region, :model, :http
+
+      def initialize(project_id:, region:, model: "claude-opus-4-5@20251101", http: Net::Llm.http)
+        @project_id = project_id
+        @region = region
+        @model = model
+        @http = http
+      end
+
+      def messages(messages, system: nil, max_tokens: 1024, tools: nil, &block)
+        payload = build_payload(messages, system, max_tokens, tools, block_given?)
+
+        if block_given?
+          stream_request(payload, &block)
+        else
+          post_request(payload)
+        end
+      end
+
+      private
+
+      def build_payload(messages, system, max_tokens, tools, stream)
+        payload = {
+          anthropic_version: "vertex-2023-10-16",
+          max_tokens: max_tokens,
+          messages: messages,
+          stream: stream
+        }
+        payload[:system] = system if system
+        payload[:tools] = tools if tools
+        payload
+      end
+
+      def endpoint
+        "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project_id}/locations/#{region}/publishers/anthropic/models/#{model}:rawPredict"
+      end
+
+      def headers
+        { "Authorization" => "Bearer #{access_token}" }
+      end
+
+      def access_token
+        @access_token ||= `gcloud auth application-default print-access-token`.strip
+      end
+
+      def post_request(payload)
+        handle_response(http.post(endpoint, headers: headers, body: payload))
+      end
+
+      def handle_response(response)
+        if response.is_a?(Net::HTTPSuccess)
+          JSON.parse(response.body)
+        else
+          { "code" => response.code, "body" => response.body }
+        end
+      end
+
+      def stream_request(payload, &block)
+        http.post(endpoint, headers: headers, body: payload) do |response|
+          raise "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
+
+          buffer = ""
+          response.read_body do |chunk|
+            buffer += chunk
+
+            while (event = extract_sse_event(buffer))
+              next if event[:data].nil? || event[:data].empty?
+              next if event[:data] == "[DONE]"
+
+              json = JSON.parse(event[:data])
+              block.call(json)
+
+              break if json["type"] == "message_stop"
+            end
+          end
+        end
+      end
+
+      def extract_sse_event(buffer)
+        event_end = buffer.index("\n\n")
+        return nil unless event_end
+
+        event_data = buffer[0...event_end]
+        buffer.replace(buffer[(event_end + 2)..] || "")
+
+        event = {}
+        event_data.split("\n").each do |line|
+          if line.start_with?("event: ")
+            event[:event] = line[7..]
+          elsif line.start_with?("data: ")
+            event[:data] = line[6..]
+          elsif line == "data:"
+            event[:data] = ""
+          end
+        end
+
+        event
+      end
+    end
+  end
+end
lib/net/llm.rb
@@ -4,6 +4,7 @@ require_relative "llm/version"
 require_relative "llm/openai"
 require_relative "llm/ollama"
 require_relative "llm/anthropic"
+require_relative "llm/vertex_ai"
 require "net/hippie"
 require "json"
 
spec/fixtures/Net_Llm_VertexAI/_messages/with_streaming/yields_SSE_events_to_the_block.yml
@@ -0,0 +1,70 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+    body:
+      encoding: UTF-8
+      string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}],"stream":true}'
+    headers:
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      Authorization:
+      - "<BEARER_TOKEN>"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Cache-Control:
+      - no-cache
+      Transfer-Encoding:
+      - chunked
+    body:
+      encoding: ASCII-8BIT
+      string: |+
+        event: vertex_event
+        data: {"type":"vertex_event","usage":{"input_tokens":8,"output_tokens":5,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"tiers":{"0-to-200000":{"input_tokens":8,"output_tokens":5,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0}}}}       }
+
+        event: message_start
+        data: {"type":"message_start","message":{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_019UxNsxrHHYtDf3UqfU5Mum","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":8,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":5}}              }
+
+        event: ping
+        data: {"type": "ping"}
+
+        event: content_block_start
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}        }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello! How are you"}    }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" doing today? Is there"}      }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" something"}     }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" I can help you with"}       }
+
+        event: content_block_delta
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"?"}      }
+
+        event: content_block_stop
+        data: {"type":"content_block_stop","index":0           }
+
+        event: vertex_event
+        data: {"type":"vertex_event","usage":{"input_tokens":0,"output_tokens":15,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":null,"tiers":{"0-to-200000":{"input_tokens":0,"output_tokens":15,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":null}}}    }
+
+        event: message_delta
+        data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20}    }
+
+        event: message_stop
+        data: {"type":"message_stop" }
+
+  recorded_at: Wed, 07 Jan 2026 00:22:06 GMT
+recorded_with: VCR 6.3.1
spec/fixtures/Net_Llm_VertexAI/_messages/with_system_prompt/includes_system_in_request.yml
@@ -0,0 +1,31 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+    body:
+      encoding: UTF-8
+      string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}],"stream":false,"system":"You are a helpful assistant."}'
+    headers:
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      Authorization:
+      - "<BEARER_TOKEN>"
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_01MPXti9xKtoAy1rwCBsviG5","type":"message","role":"assistant","content":[{"type":"text","text":"Hello! How are you doing today? Is there something I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":20}}'
+  recorded_at: Wed, 07 Jan 2026 00:22:12 GMT
+recorded_with: VCR 6.3.1
spec/fixtures/Net_Llm_VertexAI/_messages/with_tools/POST_rawPredict_with_tools.yml
@@ -0,0 +1,31 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+    body:
+      encoding: UTF-8
+      string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"What is the weather in Tokyo?"}],"stream":false,"tools":[{"name":"get_weather","description":"Get weather for a city","input_schema":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}}]}'
+    headers:
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      Authorization:
+      - "<BEARER_TOKEN>"
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_01VcfLZpYpcbAEMtciQnyuJ2","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_vrtx_01WXhEuzc8gXtZpkhh9XKNvU","name":"get_weather","input":{"city":"Tokyo"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":563,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":53}}'
+  recorded_at: Wed, 07 Jan 2026 00:22:10 GMT
+recorded_with: VCR 6.3.1
spec/fixtures/Net_Llm_VertexAI/_messages/without_streaming/POST_rawPredict.yml
@@ -0,0 +1,56 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+    body:
+      encoding: UTF-8
+      string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}],"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Content-Type:
+      - application/json
+      User-Agent:
+      - net/hippie 1.4.0
+      Authorization:
+      - "<BEARER_TOKEN>"
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Host:
+      - us-east5-aiplatform.googleapis.com
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      X-Vertex-Ai-Internal-Prediction-Backend:
+      - harpoon
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 07 Jan 2026 00:22:03 GMT
+      Request-Id:
+      - req_vrtx_011CWs4bARQpr2gfBaUuLy2V
+      Vary:
+      - Origin
+      - Referer
+      - X-Origin
+      Server:
+      - scaffolding on HTTPServer2
+      X-Xss-Protection:
+      - '0'
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-Content-Type-Options:
+      - nosniff
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Transfer-Encoding:
+      - chunked
+    body:
+      encoding: ASCII-8BIT
+      string: '{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_01E76LkjuR5s5LW7iMNvsfuQ","type":"message","role":"assistant","content":[{"type":"text","text":"Hello!
+        How are you doing today? Is there something I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":8,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":20}}'
+  recorded_at: Wed, 07 Jan 2026 00:22:04 GMT
+recorded_with: VCR 6.3.1
spec/net/llm/vertex_ai_spec.rb
@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+
+RSpec.describe Net::Llm::VertexAI do
+  subject(:client) do
+    described_class.new(
+      project_id: ENV.fetch("GOOGLE_CLOUD_PROJECT", "test-project"),
+      region: ENV.fetch("GOOGLE_CLOUD_REGION", "us-east5")
+    )
+  end
+
+  describe "#initialize" do
+    it "sets default model" do
+      expect(client.model).to eq("claude-opus-4-5@20251101")
+    end
+
+    it "allows custom model" do
+      custom = described_class.new(
+        project_id: "test-project",
+        region: "us-east5",
+        model: "claude-haiku-4-5@20251022"
+      )
+      expect(custom.model).to eq("claude-haiku-4-5@20251022")
+    end
+  end
+
+  describe "#messages" do
+    let(:messages) { [{ role: "user", content: "Hello" }] }
+
+    context "without streaming", :vcr do
+      it "POST rawPredict" do
+        result = client.messages(messages)
+
+        expect(result["content"]).not_to be_empty
+        expect(result["role"]).to eq("assistant")
+      end
+    end
+
+    context "with streaming", :vcr do
+      it "yields SSE events to the block" do
+        results = []
+        client.messages(messages) { |event| results << event }
+
+        expect(results).not_to be_empty
+        expect(results.last["type"]).to eq("message_stop")
+      end
+    end
+
+    context "with tools", :vcr do
+      let(:tools) do
+        [{
+          name: "get_weather",
+          description: "Get weather for a city",
+          input_schema: {
+            type: "object",
+            properties: { city: { type: "string" } },
+            required: ["city"]
+          }
+        }]
+      end
+
+      it "POST rawPredict with tools" do
+        messages = [{ role: "user", content: "What is the weather in Tokyo?" }]
+        result = client.messages(messages, tools: tools)
+
+        expect(result["content"]).not_to be_empty
+        expect(result["stop_reason"]).to eq("tool_use")
+      end
+    end
+
+    context "with system prompt", :vcr do
+      it "includes system in request" do
+        result = client.messages(messages, system: "You are a helpful assistant.")
+
+        expect(result["content"]).not_to be_empty
+      end
+    end
+
+    context "error handling" do
+      it "returns error hash on HTTP failure" do
+        stub_request(:post, %r{aiplatform\.googleapis\.com})
+          .to_return(status: 401, body: "Unauthorized")
+
+        result = client.messages(messages)
+        expect(result["code"]).to eq("401")
+        expect(result["body"]).to eq("Unauthorized")
+      end
+    end
+  end
+end
spec/support/vcr.rb
@@ -6,4 +6,7 @@ VCR.configure do |config|
   config.cassette_library_dir = "spec/fixtures"
   config.hook_into :webmock
   config.configure_rspec_metadata!
+  config.filter_sensitive_data("<ANTHROPIC_API_KEY>") { |i| i.request.headers["X-Api-Key"]&.first }
+  config.filter_sensitive_data("<BEARER_TOKEN>") { |i| i.request.headers["Authorization"]&.first }
+  config.filter_sensitive_data("<GCP_PROJECT>") { ENV.fetch("GOOGLE_CLOUD_PROJECT", "test-project") }
 end