Commit d0d80a8
Changed files (8)
lib
net
llm
spec
fixtures
Net_Llm_VertexAI
_messages
with_streaming
with_system_prompt
with_tools
without_streaming
net
support
lib/net/llm/vertex_ai.rb
@@ -0,0 +1,106 @@
+# frozen_string_literal: true
+
+module Net
+ module Llm
+ class VertexAI
+ attr_reader :project_id, :region, :model, :http
+
+ def initialize(project_id:, region:, model: "claude-opus-4-5@20251101", http: Net::Llm.http)
+ @project_id = project_id
+ @region = region
+ @model = model
+ @http = http
+ end
+
+ def messages(messages, system: nil, max_tokens: 1024, tools: nil, &block)
+ payload = build_payload(messages, system, max_tokens, tools, block_given?)
+
+ if block_given?
+ stream_request(payload, &block)
+ else
+ post_request(payload)
+ end
+ end
+
+ private
+
+ def build_payload(messages, system, max_tokens, tools, stream)
+ payload = {
+ anthropic_version: "vertex-2023-10-16",
+ max_tokens: max_tokens,
+ messages: messages,
+ stream: stream
+ }
+ payload[:system] = system if system
+ payload[:tools] = tools if tools
+ payload
+ end
+
+ def endpoint
+ "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project_id}/locations/#{region}/publishers/anthropic/models/#{model}:rawPredict"
+ end
+
+ def headers
+ { "Authorization" => "Bearer #{access_token}" }
+ end
+
+ def access_token
+ @access_token ||= `gcloud auth application-default print-access-token`.strip
+ end
+
+ def post_request(payload)
+ handle_response(http.post(endpoint, headers: headers, body: payload))
+ end
+
+ def handle_response(response)
+ if response.is_a?(Net::HTTPSuccess)
+ JSON.parse(response.body)
+ else
+ { "code" => response.code, "body" => response.body }
+ end
+ end
+
+ def stream_request(payload, &block)
+ http.post(endpoint, headers: headers, body: payload) do |response|
+ raise "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
+
+ buffer = ""
+ response.read_body do |chunk|
+ buffer += chunk
+
+ while (event = extract_sse_event(buffer))
+ next if event[:data].nil? || event[:data].empty?
+ next if event[:data] == "[DONE]"
+
+ json = JSON.parse(event[:data])
+ block.call(json)
+
+ break if json["type"] == "message_stop"
+ end
+ end
+ end
+ end
+
+ def extract_sse_event(buffer)
+ event_end = buffer.index("\n\n")
+ return nil unless event_end
+
+ event_data = buffer[0...event_end]
+ buffer.replace(buffer[(event_end + 2)..] || "")
+
+ event = {}
+ event_data.split("\n").each do |line|
+ if line.start_with?("event: ")
+ event[:event] = line[7..]
+ elsif line.start_with?("data: ")
+ event[:data] = line[6..]
+ elsif line == "data:"
+ event[:data] = ""
+ end
+ end
+
+ event
+ end
+ end
+ end
+end
lib/net/llm.rb
@@ -4,6 +4,7 @@ require_relative "llm/version"
require_relative "llm/openai"
require_relative "llm/ollama"
require_relative "llm/anthropic"
+require_relative "llm/vertex_ai"
require "net/hippie"
require "json"
spec/fixtures/Net_Llm_VertexAI/_messages/with_streaming/yields_SSE_events_to_the_block.yml
@@ -0,0 +1,70 @@
+---
+http_interactions:
+- request:
+ method: post
+ uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+ body:
+ encoding: UTF-8
+ string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}],"stream":true}'
+ headers:
+ Accept:
+ - application/json
+ Content-Type:
+ - application/json
+ Authorization:
+ - "<BEARER_TOKEN>"
+ response:
+ status:
+ code: 200
+ message: OK
+ headers:
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Cache-Control:
+ - no-cache
+ Transfer-Encoding:
+ - chunked
+ body:
+ encoding: ASCII-8BIT
+ string: |+
+ event: vertex_event
+ data: {"type":"vertex_event","usage":{"input_tokens":8,"output_tokens":5,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"tiers":{"0-to-200000":{"input_tokens":8,"output_tokens":5,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0}}}} }
+
+ event: message_start
+ data: {"type":"message_start","message":{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_019UxNsxrHHYtDf3UqfU5Mum","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":8,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":5}} }
+
+ event: ping
+ data: {"type": "ping"}
+
+ event: content_block_start
+ data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello! How are you"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" doing today? Is there"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" something"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" I can help you with"} }
+
+ event: content_block_delta
+ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"?"} }
+
+ event: content_block_stop
+ data: {"type":"content_block_stop","index":0 }
+
+ event: vertex_event
+ data: {"type":"vertex_event","usage":{"input_tokens":0,"output_tokens":15,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":null,"tiers":{"0-to-200000":{"input_tokens":0,"output_tokens":15,"cache_creation_input_tokens":null,"cache_read_input_tokens":null,"web_search_requests":null,"cache_creation":null}}} }
+
+ event: message_delta
+ data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":20} }
+
+ event: message_stop
+ data: {"type":"message_stop" }
+
+ recorded_at: Wed, 07 Jan 2026 00:22:06 GMT
+recorded_with: VCR 6.3.1
spec/fixtures/Net_Llm_VertexAI/_messages/with_system_prompt/includes_system_in_request.yml
@@ -0,0 +1,31 @@
+---
+http_interactions:
+- request:
+ method: post
+ uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+ body:
+ encoding: UTF-8
+ string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}],"stream":false,"system":"You are a helpful assistant."}'
+ headers:
+ Accept:
+ - application/json
+ Content-Type:
+ - application/json
+ Authorization:
+ - "<BEARER_TOKEN>"
+ Accept-Encoding:
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+ response:
+ status:
+ code: 200
+ message: OK
+ headers:
+ Content-Type:
+ - application/json
+ Transfer-Encoding:
+ - chunked
+ body:
+ encoding: ASCII-8BIT
+ string: '{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_01MPXti9xKtoAy1rwCBsviG5","type":"message","role":"assistant","content":[{"type":"text","text":"Hello! How are you doing today? Is there something I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":14,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":20}}'
+ recorded_at: Wed, 07 Jan 2026 00:22:12 GMT
+recorded_with: VCR 6.3.1
spec/fixtures/Net_Llm_VertexAI/_messages/with_tools/POST_rawPredict_with_tools.yml
@@ -0,0 +1,31 @@
+---
+http_interactions:
+- request:
+ method: post
+ uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+ body:
+ encoding: UTF-8
+ string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"What is the weather in Tokyo?"}],"stream":false,"tools":[{"name":"get_weather","description":"Get weather for a city","input_schema":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}}]}'
+ headers:
+ Accept:
+ - application/json
+ Content-Type:
+ - application/json
+ Authorization:
+ - "<BEARER_TOKEN>"
+ Accept-Encoding:
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+ response:
+ status:
+ code: 200
+ message: OK
+ headers:
+ Content-Type:
+ - application/json
+ Transfer-Encoding:
+ - chunked
+ body:
+ encoding: ASCII-8BIT
+ string: '{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_01VcfLZpYpcbAEMtciQnyuJ2","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_vrtx_01WXhEuzc8gXtZpkhh9XKNvU","name":"get_weather","input":{"city":"Tokyo"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":563,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":53}}'
+ recorded_at: Wed, 07 Jan 2026 00:22:10 GMT
+recorded_with: VCR 6.3.1
spec/fixtures/Net_Llm_VertexAI/_messages/without_streaming/POST_rawPredict.yml
@@ -0,0 +1,56 @@
+---
+http_interactions:
+- request:
+ method: post
+ uri: https://us-east5-aiplatform.googleapis.com/v1/projects/<GCP_PROJECT>/locations/us-east5/publishers/anthropic/models/claude-opus-4-5@20251101:rawPredict
+ body:
+ encoding: UTF-8
+ string: '{"anthropic_version":"vertex-2023-10-16","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}],"stream":false}'
+ headers:
+ Accept:
+ - application/json
+ Content-Type:
+ - application/json
+ User-Agent:
+ - net/hippie 1.4.0
+ Authorization:
+ - "<BEARER_TOKEN>"
+ Accept-Encoding:
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+ Host:
+ - us-east5-aiplatform.googleapis.com
+ response:
+ status:
+ code: 200
+ message: OK
+ headers:
+ X-Vertex-Ai-Internal-Prediction-Backend:
+ - harpoon
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 07 Jan 2026 00:22:03 GMT
+ Request-Id:
+ - req_vrtx_011CWs4bARQpr2gfBaUuLy2V
+ Vary:
+ - Origin
+ - Referer
+ - X-Origin
+ Server:
+ - scaffolding on HTTPServer2
+ X-Xss-Protection:
+ - '0'
+ X-Frame-Options:
+ - SAMEORIGIN
+ X-Content-Type-Options:
+ - nosniff
+ Alt-Svc:
+ - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+ Transfer-Encoding:
+ - chunked
+ body:
+ encoding: ASCII-8BIT
+ string: '{"model":"claude-opus-4-5-20251101","id":"msg_vrtx_01E76LkjuR5s5LW7iMNvsfuQ","type":"message","role":"assistant","content":[{"type":"text","text":"Hello!
+ How are you doing today? Is there something I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":8,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":20}}'
+ recorded_at: Wed, 07 Jan 2026 00:22:04 GMT
+recorded_with: VCR 6.3.1
spec/net/llm/vertex_ai_spec.rb
@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+
+RSpec.describe Net::Llm::VertexAI do
+ subject(:client) do
+ described_class.new(
+ project_id: ENV.fetch("GOOGLE_CLOUD_PROJECT", "test-project"),
+ region: ENV.fetch("GOOGLE_CLOUD_REGION", "us-east5")
+ )
+ end
+
+ describe "#initialize" do
+ it "sets default model" do
+ expect(client.model).to eq("claude-opus-4-5@20251101")
+ end
+
+ it "allows custom model" do
+ custom = described_class.new(
+ project_id: "test-project",
+ region: "us-east5",
+ model: "claude-haiku-4-5@20251022"
+ )
+ expect(custom.model).to eq("claude-haiku-4-5@20251022")
+ end
+ end
+
+ describe "#messages" do
+ let(:messages) { [{ role: "user", content: "Hello" }] }
+
+ context "without streaming", :vcr do
+ it "POST rawPredict" do
+ result = client.messages(messages)
+
+ expect(result["content"]).not_to be_empty
+ expect(result["role"]).to eq("assistant")
+ end
+ end
+
+ context "with streaming", :vcr do
+ it "yields SSE events to the block" do
+ results = []
+ client.messages(messages) { |event| results << event }
+
+ expect(results).not_to be_empty
+ expect(results.last["type"]).to eq("message_stop")
+ end
+ end
+
+ context "with tools", :vcr do
+ let(:tools) do
+ [{
+ name: "get_weather",
+ description: "Get weather for a city",
+ input_schema: {
+ type: "object",
+ properties: { city: { type: "string" } },
+ required: ["city"]
+ }
+ }]
+ end
+
+ it "POST rawPredict with tools" do
+ messages = [{ role: "user", content: "What is the weather in Tokyo?" }]
+ result = client.messages(messages, tools: tools)
+
+ expect(result["content"]).not_to be_empty
+ expect(result["stop_reason"]).to eq("tool_use")
+ end
+ end
+
+ context "with system prompt", :vcr do
+ it "includes system in request" do
+ result = client.messages(messages, system: "You are a helpful assistant.")
+
+ expect(result["content"]).not_to be_empty
+ end
+ end
+
+ context "error handling" do
+ it "returns error hash on HTTP failure" do
+ stub_request(:post, %r{aiplatform\.googleapis\.com})
+ .to_return(status: 401, body: "Unauthorized")
+
+ result = client.messages(messages)
+ expect(result["code"]).to eq("401")
+ expect(result["body"]).to eq("Unauthorized")
+ end
+ end
+ end
+end
spec/support/vcr.rb
@@ -6,4 +6,7 @@ VCR.configure do |config|
config.cassette_library_dir = "spec/fixtures"
config.hook_into :webmock
config.configure_rspec_metadata!
+ config.filter_sensitive_data("<ANTHROPIC_API_KEY>") { |i| i.request.headers["X-Api-Key"]&.first }
+ config.filter_sensitive_data("<BEARER_TOKEN>") { |i| i.request.headers["Authorization"]&.first }
+ config.filter_sensitive_data("<GCP_PROJECT>") { ENV.fetch("GOOGLE_CLOUD_PROJECT", "test-project") }
end