Merge branch 'master' of github.com:taigrr/elevenlabs

2026-04-01 18:58:52 -07:00 · 2023-06-26 20:09:02 -07:00
parent 84e59417d6 3e3b7004b8
commit 32972d4ff2
4 changed files with 30 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ make TTS (text-to-speech) requests to elevenlabs.io


 As a prerequisite, you must already have an account with elevenlabs.io.
-After creating your account, you can get you API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).
+After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).

 ## Test Program

@@ -56,21 +56,26 @@ import (

 func main() {
        ctx := context.Background()
+        // load in an API key to create a client
        client := client.New(os.Getenv("XI_API_KEY"))
+        // fetch a list of voice IDs from elevenlabs
        ids, err := client.GetVoiceIDs(ctx)
        if err != nil {
                panic(err)
        }
+        // prepare a pipe for streaming audio directly to beep
        pipeReader, pipeWriter := io.Pipe()
        reader := bufio.NewReader(os.Stdin)
        text, _ := reader.ReadString('\n')
        go func() {
+                // stream audio from elevenlabs using the first voice we found
                err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})
                if err != nil {
                        panic(err)
                }
                pipeWriter.Close()
        }()
+        // decode and prepare the streaming mp3 as it comes through
        streamer, format, err := mp3.Decode(pipeReader)
        if err != nil {
                log.Fatal(err)
@@ -78,6 +83,7 @@ func main() {
        defer streamer.Close()
        speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10))
        done := make(chan bool)
+        // play the audio
        speaker.Play(beep.Seq(streamer, beep.Callback(func() {
                done <- true
        })))
--- a/client/tts.go
+++ b/client/tts.go
@@ -13,11 +13,12 @@ import (
 	"github.com/taigrr/elevenlabs/client/types"
 )

-func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string, options types.SynthesisOptions) error {
+func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voiceID string, options types.SynthesisOptions) error {
 	options.Clamp()
 	url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
 	opts := types.TTS{
 		Text:          text,
+		ModelID:       modelID,
 		VoiceSettings: options,
 	}
 	b, _ := json.Marshal(opts)
@@ -55,12 +56,13 @@ func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string
 	}
 }

-func (c Client) TTS(ctx context.Context, text, voiceID string, options types.SynthesisOptions) ([]byte, error) {
+func (c Client) TTS(ctx context.Context, text, voiceID, modelID string, options types.SynthesisOptions) ([]byte, error) {
 	options.Clamp()
 	url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
 	client := &http.Client{}
 	opts := types.TTS{
 		Text:          text,
+		ModelID:       modelID,
 		VoiceSettings: options,
 	}
 	b, _ := json.Marshal(opts)
--- a/client/types/types.go
+++ b/client/types/types.go
@@ -19,6 +19,7 @@ type Voice struct {
 	Labels      string     `json:"labels,omitempty"`      // Serialized labels dictionary for the voice.
 }
 type TTS struct {
+	ModelID       string           `json:"model_id"`
 	Text          string           `json:"text"`                     // The text that will get converted into speech. Currently only English text is supported.
 	VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request.
 }
@@ -103,6 +104,22 @@ type LanguageResponseModel struct {
 	IsoCode     string `json:"iso_code"`
 	DisplayName string `json:"display_name"`
 }
+
+type Language struct {
+	LanguageID string `json:"language_id"`
+	Name       string `json:"name"`
+}
+
+type ModelResponseModel struct {
+	ModelID              string     `json:"model_id"`
+	Name                 string     `json:"name"`
+	Description          string     `json:"description"`
+	CanBeFinetuned       bool       `json:"can_be_finetuned"`
+	CanDoTextToSpeech    bool       `json:"can_do_text_to_speech"`
+	CanDoVoiceConversion bool       `json:"can_do_voice_conversion"`
+	TokenCostFactor      float64    `json:"token_cost_factor"`
+	Languages            []Language `json:"languages"`
+}
 type RecordingResponseModel struct {
 	RecordingID    string `json:"recording_id"`
 	MimeType       string `json:"mime_type"`
--- a/cmd/say/main.go
+++ b/cmd/say/main.go
@@ -26,7 +26,8 @@ func main() {
 	pipeReader, pipeWriter := io.Pipe()

 	reader := bufio.NewReader(os.Stdin)
-	text, _ := reader.ReadString('\n')
+	b, _ := io.ReadAll(reader)
+	text := string(b)

 	go func() {
 		err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})