Add sound generation api (#9)

* Add missing attributes for VoiceResponseModel

* Updating module to point to forked repo

* Tidying up go.mod

* Adding missing voice settings

* Adding support for request stitching

* Adding support for request stitching

* Fix dup SharingOptions struct from merge

* Add Sound Generation API

* Fix: revert user-agent/package url to original
This commit is contained in:
Lachlan Laycock
2024-11-26 06:39:34 +01:00
committed by GitHub
parent c585531fae
commit db0a2e1760
8 changed files with 243 additions and 163 deletions

View File

@@ -19,11 +19,17 @@ type Voice struct {
Labels string `json:"labels,omitempty"` // Serialized labels dictionary for the voice.
}
type TTS struct {
VoiceID string `json:"voice_id"` // The ID of the voice that will be used to generate the speech.
ModelID string `json:"model_id,omitempty"`
Text string `json:"text"` // The text that will get converted into speech. Currently only English text is supported.
Text string `json:"text"` // The text that will get converted into speech.
PreviousText string `json:"previous_text,omitempty"` // The text that was used to generate the previous audio file.
NextText string `json:"next_text,omitempty"` // The text that will be used to generate the next audio file.
VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request.
Stream bool `json:"stream,omitempty"` // If true, the response will be a stream of audio data.
}
type TTSParam func(*TTS)
func (so *SynthesisOptions) Clamp() {
if so.Stability > 1 || so.Stability < 0 {
so.Stability = 0.75
@@ -218,3 +224,9 @@ type VoiceResponseModel struct {
Sharing SharingOptions `json:"sharing"`
HighQualityBaseModelIds []string `json:"high_quality_base_model_ids"`
}
type SoundGeneration struct {
Text string `json:"text"` // The text that will get converted into a sound effect.
DurationSeconds float64 `json:"duration_seconds"` // The duration of the sound which will be generated in seconds.
PromptInfluence float64 `json:"prompt_influence"` // A higher prompt influence makes your generation follow the prompt more closely.
}