main
1package speech
2
3import (
4 "fmt"
5 "os"
6 "os/exec"
7 "path/filepath"
8 "strconv"
9 "strings"
10)
11
12// MacOSBackend implements TTS using macOS 'say' command
13type MacOSBackend struct{}
14
15func (m *MacOSBackend) Speak(text string, voice string, rate *int, volume *float64, output string) (string, error) {
16 cmdArgs := []string{}
17
18 if voice != "" {
19 cmdArgs = append(cmdArgs, "-v", voice)
20 }
21
22 if rate != nil {
23 if *rate < 80 || *rate > 500 {
24 return "", fmt.Errorf("rate must be between 80-500 words per minute")
25 }
26 cmdArgs = append(cmdArgs, "-r", strconv.Itoa(*rate))
27 }
28
29 if volume != nil {
30 if *volume < 0.0 || *volume > 1.0 {
31 return "", fmt.Errorf("volume must be between 0.0 and 1.0")
32 }
33 // Convert to 0-100 scale for say command
34 volumeInt := int(*volume * 100)
35 cmdArgs = append(cmdArgs, "--volume", strconv.Itoa(volumeInt))
36 }
37
38 if output != "" {
39 // Validate output file extension
40 ext := strings.ToLower(filepath.Ext(output))
41 if ext != ".aiff" && ext != ".wav" && ext != ".m4a" {
42 return "", fmt.Errorf("output format must be .aiff, .wav, or .m4a")
43 }
44 cmdArgs = append(cmdArgs, "-o", output)
45 }
46
47 // Add the text to speak
48 cmdArgs = append(cmdArgs, text)
49
50 cmd := exec.Command("say", cmdArgs...)
51 output_bytes, err := cmd.CombinedOutput()
52
53 var result string
54 if output != "" {
55 result = fmt.Sprintf("Audio saved to: %s", output)
56 } else {
57 result = fmt.Sprintf("Spoke: \"%s\"", text)
58 }
59
60 if len(output_bytes) > 0 {
61 result += fmt.Sprintf("\nOutput: %s", string(output_bytes))
62 }
63
64 if err != nil {
65 return result, err
66 }
67
68 return result, nil
69}
70
71func (m *MacOSBackend) ListVoices(language string) ([]Voice, error) {
72 cmd := exec.Command("say", "-v", "?")
73 output, err := cmd.Output()
74
75 if err != nil {
76 return nil, fmt.Errorf("failed to list voices: %v", err)
77 }
78
79 voices := []Voice{}
80 lines := strings.Split(string(output), "\n")
81
82 for _, line := range lines {
83 line = strings.TrimSpace(line)
84 if line == "" {
85 continue
86 }
87
88 // Filter by language if specified
89 if language != "" && !strings.Contains(strings.ToLower(line), language) {
90 continue
91 }
92
93 // Parse voice line (format: "Name Language # Details")
94 parts := strings.Fields(line)
95 if len(parts) >= 2 {
96 voice := Voice{
97 Name: parts[0],
98 Language: parts[1],
99 Details: line,
100 }
101 voices = append(voices, voice)
102 }
103 }
104
105 return voices, nil
106}
107
108func (m *MacOSBackend) SpeakFile(filepath string, voice string, rate *int, volume *float64, maxLines *int) (string, error) {
109 // Read the file to get stats
110 content, err := os.ReadFile(filepath)
111 if err != nil {
112 return "", fmt.Errorf("failed to read file: %v", err)
113 }
114
115 text := string(content)
116 linesCount := len(strings.Split(text, "\n"))
117 wordsCount := len(strings.Fields(text))
118
119 // Build say command
120 cmdArgs := []string{}
121
122 if voice != "" {
123 cmdArgs = append(cmdArgs, "-v", voice)
124 }
125
126 if rate != nil {
127 if *rate < 80 || *rate > 500 {
128 return "", fmt.Errorf("rate must be between 80-500 words per minute")
129 }
130 cmdArgs = append(cmdArgs, "-r", strconv.Itoa(*rate))
131 }
132
133 if volume != nil {
134 if *volume < 0.0 || *volume > 1.0 {
135 return "", fmt.Errorf("volume must be between 0.0 and 1.0")
136 }
137 volumeInt := int(*volume * 100)
138 cmdArgs = append(cmdArgs, "--volume", strconv.Itoa(volumeInt))
139 }
140
141 // If maxLines specified, speak text directly with limit
142 if maxLines != nil && *maxLines > 0 && *maxLines < linesCount {
143 lines := strings.Split(text, "\n")
144 lines = lines[:*maxLines]
145 limitedText := strings.Join(lines, "\n")
146 cmdArgs = append(cmdArgs, limitedText)
147
148 cmd := exec.Command("say", cmdArgs...)
149 _, err := cmd.CombinedOutput()
150
151 result := fmt.Sprintf("Speaking file: %s\nLines: %d (limited to %d), Words: ~%d",
152 filepath, linesCount, *maxLines, len(strings.Fields(limitedText)))
153
154 if err != nil {
155 return result, err
156 }
157 return result, nil
158 }
159
160 // Otherwise use -f flag to speak entire file
161 cmdArgs = append(cmdArgs, "-f", filepath)
162
163 cmd := exec.Command("say", cmdArgs...)
164 _, err = cmd.CombinedOutput()
165
166 result := fmt.Sprintf("Speaking file: %s\nLines: %d, Words: %d",
167 filepath, linesCount, wordsCount)
168
169 if err != nil {
170 return result, err
171 }
172
173 return result, nil
174}
175
176func (m *MacOSBackend) StopSpeech() (string, error) {
177 cmd := exec.Command("pkill", "say")
178 err := cmd.Run()
179
180 if err != nil {
181 // pkill returns error if no processes found, which is fine
182 return "Stopped all speech synthesis (no speech processes were running)", nil
183 }
184
185 return "Stopped all speech synthesis", nil
186}
187
188func (m *MacOSBackend) IsAvailable() bool {
189 _, err := exec.LookPath("say")
190 return err == nil
191}
192
193func (m *MacOSBackend) GetName() string {
194 return "macOS say"
195}
196
197// LinuxBackend implements TTS using espeak-ng or espeak
198type LinuxBackend struct {
199 command string
200}
201
202func (l *LinuxBackend) getCommand() string {
203 if l.command != "" {
204 return l.command
205 }
206
207 // Try espeak-ng first (newer, better quality)
208 if _, err := exec.LookPath("espeak-ng"); err == nil {
209 l.command = "espeak-ng"
210 return l.command
211 }
212
213 // Fall back to espeak
214 if _, err := exec.LookPath("espeak"); err == nil {
215 l.command = "espeak"
216 return l.command
217 }
218
219 return ""
220}
221
222func (l *LinuxBackend) Speak(text string, voice string, rate *int, volume *float64, output string) (string, error) {
223 cmd := l.getCommand()
224 if cmd == "" {
225 return "", fmt.Errorf("no TTS command available (install espeak-ng or espeak)")
226 }
227
228 cmdArgs := []string{}
229
230 // Add voice selection
231 if voice != "" {
232 cmdArgs = append(cmdArgs, "-v", voice)
233 }
234
235 // Add speech rate (words per minute)
236 if rate != nil {
237 // espeak uses words per minute directly
238 cmdArgs = append(cmdArgs, "-s", strconv.Itoa(*rate))
239 }
240
241 // Add volume (amplitude)
242 if volume != nil {
243 // espeak uses amplitude 0-200, with 100 as default
244 amplitude := int(*volume * 200)
245 cmdArgs = append(cmdArgs, "-a", strconv.Itoa(amplitude))
246 }
247
248 // Add output file if specified
249 if output != "" {
250 // espeak supports wav output
251 ext := strings.ToLower(filepath.Ext(output))
252 if ext != ".wav" {
253 return "", fmt.Errorf("output format must be .wav for Linux TTS")
254 }
255 cmdArgs = append(cmdArgs, "-w", output)
256 }
257
258 // Add the text
259 cmdArgs = append(cmdArgs, text)
260
261 command := exec.Command(cmd, cmdArgs...)
262 output_bytes, err := command.CombinedOutput()
263
264 var result string
265 if output != "" {
266 result = fmt.Sprintf("Audio saved to: %s", output)
267 } else {
268 result = fmt.Sprintf("Spoke: \"%s\"", text)
269 }
270
271 if len(output_bytes) > 0 && !strings.Contains(string(output_bytes), "ALSA lib") {
272 // Filter out common ALSA warnings
273 result += fmt.Sprintf("\nOutput: %s", string(output_bytes))
274 }
275
276 if err != nil {
277 return result, err
278 }
279
280 return result, nil
281}
282
283func (l *LinuxBackend) ListVoices(language string) ([]Voice, error) {
284 cmd := l.getCommand()
285 if cmd == "" {
286 return nil, fmt.Errorf("no TTS command available (install espeak-ng or espeak)")
287 }
288
289 command := exec.Command(cmd, "--voices")
290 output, err := command.Output()
291
292 if err != nil {
293 return nil, fmt.Errorf("failed to list voices: %v", err)
294 }
295
296 voices := []Voice{}
297 lines := strings.Split(string(output), "\n")
298
299 // Skip header line
300 if len(lines) > 0 {
301 lines = lines[1:]
302 }
303
304 for _, line := range lines {
305 line = strings.TrimSpace(line)
306 if line == "" {
307 continue
308 }
309
310 // Parse espeak voice format
311 // Format: "Pty Language Age/Gender VoiceName File Other Languages"
312 fields := strings.Fields(line)
313 if len(fields) >= 4 {
314 lang := fields[1]
315 name := fields[3]
316
317 // Filter by language if specified
318 if language != "" && !strings.Contains(strings.ToLower(lang), strings.ToLower(language)) {
319 continue
320 }
321
322 voice := Voice{
323 Name: name,
324 Language: lang,
325 Details: line,
326 }
327 voices = append(voices, voice)
328 }
329 }
330
331 return voices, nil
332}
333
334func (l *LinuxBackend) SpeakFile(filepath string, voice string, rate *int, volume *float64, maxLines *int) (string, error) {
335 // Read the file to get stats and handle maxLines
336 content, err := os.ReadFile(filepath)
337 if err != nil {
338 return "", fmt.Errorf("failed to read file: %v", err)
339 }
340
341 text := string(content)
342 linesCount := len(strings.Split(text, "\n"))
343 wordsCount := len(strings.Fields(text))
344
345 // Limit lines if specified
346 actualText := text
347 if maxLines != nil && *maxLines > 0 && *maxLines < linesCount {
348 lines := strings.Split(text, "\n")
349 lines = lines[:*maxLines]
350 actualText = strings.Join(lines, "\n")
351 }
352
353 // Use Speak method with the text
354 result, err := l.Speak(actualText, voice, rate, volume, "")
355
356 fileInfo := fmt.Sprintf("Speaking file: %s\nLines: %d", filepath, linesCount)
357 if maxLines != nil && *maxLines < linesCount {
358 fileInfo += fmt.Sprintf(" (limited to %d)", *maxLines)
359 }
360 fileInfo += fmt.Sprintf(", Words: %d", wordsCount)
361
362 if err != nil {
363 return fileInfo + "\n" + result, err
364 }
365
366 return fileInfo + "\n" + result, nil
367}
368
369func (l *LinuxBackend) StopSpeech() (string, error) {
370 cmd := l.getCommand()
371 if cmd == "" {
372 return "No TTS command available", nil
373 }
374
375 // Kill espeak/espeak-ng processes
376 exec.Command("pkill", cmd).Run()
377
378 // Also try to kill common audio players that might be used
379 exec.Command("pkill", "aplay").Run()
380 exec.Command("pkill", "paplay").Run()
381
382 return fmt.Sprintf("Stopped all %s processes", cmd), nil
383}
384
385func (l *LinuxBackend) IsAvailable() bool {
386 return l.getCommand() != ""
387}
388
389func (l *LinuxBackend) GetName() string {
390 cmd := l.getCommand()
391 if cmd != "" {
392 return cmd
393 }
394 return "Linux TTS (not available)"
395}
396
397// UnsupportedBackend for unsupported operating systems
398type UnsupportedBackend struct {
399 os string
400}
401
402func (u *UnsupportedBackend) Speak(text string, voice string, rate *int, volume *float64, output string) (string, error) {
403 return "", fmt.Errorf("speech synthesis is not supported on %s", u.os)
404}
405
406func (u *UnsupportedBackend) ListVoices(language string) ([]Voice, error) {
407 return nil, fmt.Errorf("voice listing is not supported on %s", u.os)
408}
409
410func (u *UnsupportedBackend) SpeakFile(filepath string, voice string, rate *int, volume *float64, maxLines *int) (string, error) {
411 return "", fmt.Errorf("file speaking is not supported on %s", u.os)
412}
413
414func (u *UnsupportedBackend) StopSpeech() (string, error) {
415 return "", fmt.Errorf("speech control is not supported on %s", u.os)
416}
417
418func (u *UnsupportedBackend) IsAvailable() bool {
419 return false
420}
421
422func (u *UnsupportedBackend) GetName() string {
423 return fmt.Sprintf("Unsupported (%s)", u.os)
424}