9 Commits

Author SHA1 Message Date
fzqxzhang
261398509a feat: model_id tag add omitempty (#3) 2023-07-25 15:43:48 +00:00
Marcel Molina
b925ef1471 Fix compile error from variable typo (#2)
* Fix compile error from variable typo

* bytes.Buffer pointer
2023-07-09 16:00:45 +00:00
e095a7ec13 don't check resposne code before checcking err !=nil 2023-06-27 12:31:56 -07:00
32972d4ff2 Merge branch 'master' of github.com:taigrr/elevenlabs 2023-06-26 20:09:02 -07:00
84e59417d6 fix nil error pointed out by @Davincible , closes #1 2023-06-26 20:08:51 -07:00
3e3b7004b8 update to newer release of api 2023-05-12 23:42:59 -07:00
6fcc65115d add comments to the readme code 2023-04-19 14:02:09 -07:00
5451bcd0b1 fix readme and say 2023-04-19 14:00:03 -07:00
58581c3c46 add example binary to readme 2023-04-18 22:35:17 -07:00
4 changed files with 48 additions and 19 deletions

View File

@@ -18,7 +18,15 @@ make TTS (text-to-speech) requests to elevenlabs.io
As a prerequisite, you must already have an account with elevenlabs.io.
After creating your account, you can get you API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).
After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).
## Test Program
To test out an example `say` program, run:
`go install github.com/taigrr/elevenlabs/cmd/say@latest`
Set the `XI_API_KEY` environment variable, and pipe it some text to give it a whirl!
## Example Code
@@ -48,21 +56,26 @@ import (
func main() {
ctx := context.Background()
// load in an API key to create a client
client := client.New(os.Getenv("XI_API_KEY"))
// fetch a list of voice IDs from elevenlabs
ids, err := client.GetVoiceIDs(ctx)
if err != nil {
panic(err)
}
// prepare a pipe for streaming audio directly to beep
pipeReader, pipeWriter := io.Pipe()
reader := bufio.NewReader(os.Stdin)
text, _ := reader.ReadString('\n')
go func() {
// stream audio from elevenlabs using the first voice we found
err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})
if err != nil {
panic(err)
}
pipeWriter.Close()
}()
// decode and prepare the streaming mp3 as it comes through
streamer, format, err := mp3.Decode(pipeReader)
if err != nil {
log.Fatal(err)
@@ -70,6 +83,7 @@ func main() {
defer streamer.Close()
speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10))
done := make(chan bool)
// play the audio
speaker.Play(beep.Seq(streamer, beep.Callback(func() {
done <- true
})))

View File

@@ -1,7 +1,6 @@
package client
import (
"bufio"
"bytes"
"context"
"encoding/json"
@@ -13,11 +12,12 @@ import (
"github.com/taigrr/elevenlabs/client/types"
)
func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string, options types.SynthesisOptions) error {
func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voiceID string, options types.SynthesisOptions) error {
options.Clamp()
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
opts := types.TTS{
Text: text,
ModelID: modelID,
VoiceSettings: options,
}
b, _ := json.Marshal(opts)
@@ -30,14 +30,13 @@ func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string
req.Header.Set("User-Agent", "github.com/taigrr/elevenlabs")
req.Header.Set("accept", "audio/mpeg")
res, err := client.Do(req)
if err != nil {
return err
}
switch res.StatusCode {
case 401:
return ErrUnauthorized
case 200:
if err != nil {
return err
}
defer res.Body.Close()
io.Copy(w, res.Body)
return nil
@@ -56,12 +55,13 @@ func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string
}
}
func (c Client) TTS(ctx context.Context, text, voiceID string, options types.SynthesisOptions) ([]byte, error) {
func (c Client) TTS(ctx context.Context, text, voiceID, modelID string, options types.SynthesisOptions) ([]byte, error) {
options.Clamp()
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
client := &http.Client{}
opts := types.TTS{
Text: text,
ModelID: modelID,
VoiceSettings: options,
}
b, _ := json.Marshal(opts)
@@ -73,19 +73,17 @@ func (c Client) TTS(ctx context.Context, text, voiceID string, options types.Syn
req.Header.Set("User-Agent", "github.com/taigrr/elevenlabs")
req.Header.Set("accept", "audio/mpeg")
res, err := client.Do(req)
if err != nil {
return []byte{}, err
}
switch res.StatusCode {
case 401:
return []byte{}, ErrUnauthorized
case 200:
if err != nil {
return []byte{}, err
}
b := bytes.Buffer{}
w := bufio.NewWriter(&b)
defer res.Body.Close()
io.Copy(w, res.Body)
io.Copy(&b, res.Body)
return b.Bytes(), nil
case 422:
fallthrough
@@ -119,14 +117,13 @@ func (c Client) TTSStream(ctx context.Context, w io.Writer, text, voiceID string
req.Header.Set("User-Agent", "github.com/taigrr/elevenlabs")
req.Header.Set("accept", "audio/mpeg")
res, err := client.Do(req)
if err != nil {
return err
}
switch res.StatusCode {
case 401:
return ErrUnauthorized
case 200:
if err != nil {
return err
}
defer res.Body.Close()
io.Copy(w, res.Body)
return nil

View File

@@ -19,6 +19,7 @@ type Voice struct {
Labels string `json:"labels,omitempty"` // Serialized labels dictionary for the voice.
}
type TTS struct {
ModelID string `json:"model_id,omitempty"`
Text string `json:"text"` // The text that will get converted into speech. Currently only English text is supported.
VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request.
}
@@ -103,6 +104,22 @@ type LanguageResponseModel struct {
IsoCode string `json:"iso_code"`
DisplayName string `json:"display_name"`
}
type Language struct {
LanguageID string `json:"language_id"`
Name string `json:"name"`
}
type ModelResponseModel struct {
ModelID string `json:"model_id"`
Name string `json:"name"`
Description string `json:"description"`
CanBeFinetuned bool `json:"can_be_finetuned"`
CanDoTextToSpeech bool `json:"can_do_text_to_speech"`
CanDoVoiceConversion bool `json:"can_do_voice_conversion"`
TokenCostFactor float64 `json:"token_cost_factor"`
Languages []Language `json:"languages"`
}
type RecordingResponseModel struct {
RecordingID string `json:"recording_id"`
MimeType string `json:"mime_type"`

View File

@@ -26,7 +26,8 @@ func main() {
pipeReader, pipeWriter := io.Pipe()
reader := bufio.NewReader(os.Stdin)
text, _ := reader.ReadString('\n')
b, _ := io.ReadAll(reader)
text := string(b)
go func() {
err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})