main
1# frozen_string_literal: true
2
3module Net
4 module Llm
5 class Ollama
6 attr_reader :host, :model, :http
7
8 def initialize(host: ENV.fetch("OLLAMA_HOST", "localhost:11434"), model: "gpt-oss", http: Net::Llm.http)
9 @host = host
10 @model = model
11 @http = http
12 end
13
14 def chat(messages, tools = [], &block)
15 payload = { model: model, messages: messages, stream: block_given? }
16 payload[:tools] = tools unless tools.empty?
17
18 execute(build_url("/api/chat"), payload, &block)
19 end
20
21 def fetch(messages, tools = [], &block)
22 content = ""
23 thinking = ""
24 tool_calls = []
25
26 if block_given?
27 chat(messages, tools) do |chunk|
28 msg = chunk["message"] || {}
29 delta_content = msg["content"]
30 delta_thinking = msg["thinking"]
31
32 content += delta_content if delta_content
33 thinking += delta_thinking if delta_thinking
34 tool_calls += normalize_tool_calls(msg["tool_calls"]) if msg["tool_calls"]
35
36 if chunk["done"]
37 block.call({
38 type: :complete,
39 content: content,
40 thinking: thinking.empty? ? nil : thinking,
41 tool_calls: tool_calls,
42 stop_reason: map_stop_reason(chunk["done_reason"])
43 })
44 else
45 block.call({
46 type: :delta,
47 content: delta_content,
48 thinking: delta_thinking,
49 tool_calls: nil
50 })
51 end
52 end
53 else
54 result = chat(messages, tools)
55 msg = result["message"] || {}
56 {
57 type: :complete,
58 content: msg["content"],
59 thinking: msg["thinking"],
60 tool_calls: normalize_tool_calls(msg["tool_calls"]),
61 stop_reason: map_stop_reason(result["done_reason"])
62 }
63 end
64 end
65
66 def generate(prompt, &block)
67 execute(build_url("/api/generate"), {
68 model: model,
69 prompt: prompt,
70 stream: block_given?
71 }, &block)
72 end
73
74 def embeddings(input)
75 post_request(build_url("/api/embed"), { model: model, input: input })
76 end
77
78 def tags
79 get_request(build_url("/api/tags"))
80 end
81
82 def show(name)
83 post_request(build_url("/api/show"), { name: name })
84 end
85
86 private
87
88 def execute(url, payload, &block)
89 if block_given?
90 stream_request(url, payload, &block)
91 else
92 post_request(url, payload)
93 end
94 end
95
96 def build_url(path)
97 base = host.start_with?("http://", "https://") ? host : "http://#{host}"
98 "#{base}#{path}"
99 end
100
101 def get_request(url)
102 handle_response(http.get(url))
103 end
104
105 def post_request(url, payload)
106 handle_response(http.post(url, body: payload))
107 end
108
109 def handle_response(response)
110 if response.is_a?(Net::HTTPSuccess)
111 JSON.parse(response.body)
112 else
113 {
114 "code" => response.code,
115 "body" => response.body
116 }
117 end
118 end
119
120 def stream_request(url, payload, &block)
121 http.post(url, body: payload) do |response|
122 raise "HTTP #{response.code}: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
123
124 buffer = ""
125 response.read_body do |chunk|
126 buffer += chunk
127
128 while (message = extract_message(buffer))
129 next if message.empty?
130
131 json = JSON.parse(message)
132 block.call(json)
133
134 break if json["done"]
135 end
136 end
137 end
138 end
139
140 def extract_message(buffer)
141 message_end = buffer.index("\n")
142 return nil unless message_end
143
144 message = buffer[0...message_end]
145 buffer.replace(buffer[(message_end + 1)..-1] || "")
146 message
147 end
148
149 def normalize_tool_calls(tool_calls)
150 return [] if tool_calls.nil? || tool_calls.empty?
151
152 tool_calls.map do |tc|
153 {
154 id: tc["id"] || tc.dig("function", "id"),
155 name: tc.dig("function", "name"),
156 arguments: tc.dig("function", "arguments") || {}
157 }
158 end
159 end
160
161 def map_stop_reason(reason)
162 case reason
163 when "stop" then :end_turn
164 when "tool_calls", "tool_use" then :tool_use
165 when "length" then :max_tokens
166 else :end_turn
167 end
168 end
169 end
170 end
171end