Add sound generation api (#9)

* Add missing attributes for VoiceResponseModel * Updating module to point to forked repo * Tidying up go.mod * Adding missing voice settings * Adding support for request stitching * Adding support for request stitching * Fix dup SharingOptions struct from merge * Add Sound Generation API * Fix: revert user-agent/package url to original
2026-04-02 03:08:57 -07:00 · 2024-11-26 06:39:34 +01:00
parent c585531fae
commit db0a2e1760
8 changed files with 243 additions and 163 deletions
--- a/client/types/types.go
+++ b/client/types/types.go
@@ -19,11 +19,17 @@ type Voice struct {
 	Labels      string     `json:"labels,omitempty"`      // Serialized labels dictionary for the voice.
 }
 type TTS struct {
+	VoiceID       string           `json:"voice_id"` // The ID of the voice that will be used to generate the speech.
 	ModelID       string           `json:"model_id,omitempty"`
-	Text          string           `json:"text"`                     // The text that will get converted into speech. Currently only English text is supported.
+	Text          string           `json:"text"`                     // The text that will get converted into speech.
+	PreviousText  string           `json:"previous_text,omitempty"`  // The text that was used to generate the previous audio file.
+	NextText      string           `json:"next_text,omitempty"`      // The text that will be used to generate the next audio file.
 	VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request.
+	Stream        bool             `json:"stream,omitempty"`         // If true, the response will be a stream of audio data.
 }

+type TTSParam func(*TTS)
+
 func (so *SynthesisOptions) Clamp() {
 	if so.Stability > 1 || so.Stability < 0 {
 		so.Stability = 0.75
@@ -218,3 +224,9 @@ type VoiceResponseModel struct {
 	Sharing                 SharingOptions          `json:"sharing"`
 	HighQualityBaseModelIds []string                `json:"high_quality_base_model_ids"`
 }
+
+type SoundGeneration struct {
+	Text            string  `json:"text"`             // The text that will get converted into a sound effect.
+	DurationSeconds float64 `json:"duration_seconds"` // The duration of the sound which will be generated in seconds.
+	PromptInfluence float64 `json:"prompt_influence"` // A higher prompt influence makes your generation follow the prompt more closely.
+}