From 5451bcd0b1773d08d133f85b57c18cce4606aa83 Mon Sep 17 00:00:00 2001 From: Tai Groot Date: Wed, 19 Apr 2023 14:00:03 -0700 Subject: [PATCH 1/3] fix readme and say --- README.md | 2 +- cmd/say/main.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ca696a0..39192c9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ make TTS (text-to-speech) requests to elevenlabs.io As a prerequisite, you must already have an account with elevenlabs.io. -After creating your account, you can get you API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-). +After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-). ## Test Program diff --git a/cmd/say/main.go b/cmd/say/main.go index 7f005de..7e3016e 100644 --- a/cmd/say/main.go +++ b/cmd/say/main.go @@ -26,7 +26,8 @@ func main() { pipeReader, pipeWriter := io.Pipe() reader := bufio.NewReader(os.Stdin) - text, _ := reader.ReadString('\n') + b, _ := io.ReadAll(reader) + text := string(b) go func() { err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75}) From 6fcc65115db6787bc4bfa4e0bbf916e2cf5d0465 Mon Sep 17 00:00:00 2001 From: Tai Groot Date: Wed, 19 Apr 2023 14:02:09 -0700 Subject: [PATCH 2/3] add comments to the readme code --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 39192c9..d5470d6 100644 --- a/README.md +++ b/README.md @@ -56,21 +56,26 @@ import ( func main() { ctx := context.Background() + // load in an API key to create a client client := client.New(os.Getenv("XI_API_KEY")) + // fetch a list of voice IDs from elevenlabs ids, err := client.GetVoiceIDs(ctx) if err != nil { panic(err) } + // prepare a pipe for streaming audio directly to beep pipeReader, pipeWriter := io.Pipe() reader := bufio.NewReader(os.Stdin) text, _ := reader.ReadString('\n') go func() { + // stream audio from elevenlabs using the first voice we found err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75}) if err != nil { panic(err) } pipeWriter.Close() }() + // decode and prepare the streaming mp3 as it comes through streamer, format, err := mp3.Decode(pipeReader) if err != nil { log.Fatal(err) @@ -78,6 +83,7 @@ func main() { defer streamer.Close() speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)) done := make(chan bool) + // play the audio speaker.Play(beep.Seq(streamer, beep.Callback(func() { done <- true }))) From 3e3b7004b807a52642b3589b66e766e45d36cd57 Mon Sep 17 00:00:00 2001 From: Tai Groot Date: Fri, 12 May 2023 23:42:59 -0700 Subject: [PATCH 3/3] update to newer release of api --- client/tts.go | 6 ++++-- client/types/types.go | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/client/tts.go b/client/tts.go index 28c33ec..aa44e4b 100644 --- a/client/tts.go +++ b/client/tts.go @@ -13,11 +13,12 @@ import ( "github.com/taigrr/elevenlabs/client/types" ) -func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string, options types.SynthesisOptions) error { +func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voiceID string, options types.SynthesisOptions) error { options.Clamp() url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID) opts := types.TTS{ Text: text, + ModelID: modelID, VoiceSettings: options, } b, _ := json.Marshal(opts) @@ -56,12 +57,13 @@ func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string } } -func (c Client) TTS(ctx context.Context, text, voiceID string, options types.SynthesisOptions) ([]byte, error) { +func (c Client) TTS(ctx context.Context, text, voiceID, modelID string, options types.SynthesisOptions) ([]byte, error) { options.Clamp() url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID) client := &http.Client{} opts := types.TTS{ Text: text, + ModelID: modelID, VoiceSettings: options, } b, _ := json.Marshal(opts) diff --git a/client/types/types.go b/client/types/types.go index 802477e..85e90ac 100644 --- a/client/types/types.go +++ b/client/types/types.go @@ -19,6 +19,7 @@ type Voice struct { Labels string `json:"labels,omitempty"` // Serialized labels dictionary for the voice. } type TTS struct { + ModelID string `json:"model_id"` Text string `json:"text"` // The text that will get converted into speech. Currently only English text is supported. VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request. } @@ -103,6 +104,22 @@ type LanguageResponseModel struct { IsoCode string `json:"iso_code"` DisplayName string `json:"display_name"` } + +type Language struct { + LanguageID string `json:"language_id"` + Name string `json:"name"` +} + +type ModelResponseModel struct { + ModelID string `json:"model_id"` + Name string `json:"name"` + Description string `json:"description"` + CanBeFinetuned bool `json:"can_be_finetuned"` + CanDoTextToSpeech bool `json:"can_do_text_to_speech"` + CanDoVoiceConversion bool `json:"can_do_voice_conversion"` + TokenCostFactor float64 `json:"token_cost_factor"` + Languages []Language `json:"languages"` +} type RecordingResponseModel struct { RecordingID string `json:"recording_id"` MimeType string `json:"mime_type"`