mirror of
https://github.com/taigrr/elevenlabs.git
synced 2026-04-02 03:08:57 -07:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
3e3b7004b8
|
|||
|
6fcc65115d
|
|||
|
5451bcd0b1
|
|||
|
58581c3c46
|
16
README.md
16
README.md
@@ -18,7 +18,15 @@ make TTS (text-to-speech) requests to elevenlabs.io
|
|||||||
|
|
||||||
|
|
||||||
As a prerequisite, you must already have an account with elevenlabs.io.
|
As a prerequisite, you must already have an account with elevenlabs.io.
|
||||||
After creating your account, you can get you API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).
|
After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).
|
||||||
|
|
||||||
|
## Test Program
|
||||||
|
|
||||||
|
To test out an example `say` program, run:
|
||||||
|
|
||||||
|
`go install github.com/taigrr/elevenlabs/cmd/say@latest`
|
||||||
|
|
||||||
|
Set the `XI_API_KEY` environment variable, and pipe it some text to give it a whirl!
|
||||||
|
|
||||||
## Example Code
|
## Example Code
|
||||||
|
|
||||||
@@ -48,21 +56,26 @@ import (
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
// load in an API key to create a client
|
||||||
client := client.New(os.Getenv("XI_API_KEY"))
|
client := client.New(os.Getenv("XI_API_KEY"))
|
||||||
|
// fetch a list of voice IDs from elevenlabs
|
||||||
ids, err := client.GetVoiceIDs(ctx)
|
ids, err := client.GetVoiceIDs(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
// prepare a pipe for streaming audio directly to beep
|
||||||
pipeReader, pipeWriter := io.Pipe()
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
reader := bufio.NewReader(os.Stdin)
|
reader := bufio.NewReader(os.Stdin)
|
||||||
text, _ := reader.ReadString('\n')
|
text, _ := reader.ReadString('\n')
|
||||||
go func() {
|
go func() {
|
||||||
|
// stream audio from elevenlabs using the first voice we found
|
||||||
err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})
|
err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
pipeWriter.Close()
|
pipeWriter.Close()
|
||||||
}()
|
}()
|
||||||
|
// decode and prepare the streaming mp3 as it comes through
|
||||||
streamer, format, err := mp3.Decode(pipeReader)
|
streamer, format, err := mp3.Decode(pipeReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
@@ -70,6 +83,7 @@ func main() {
|
|||||||
defer streamer.Close()
|
defer streamer.Close()
|
||||||
speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10))
|
speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10))
|
||||||
done := make(chan bool)
|
done := make(chan bool)
|
||||||
|
// play the audio
|
||||||
speaker.Play(beep.Seq(streamer, beep.Callback(func() {
|
speaker.Play(beep.Seq(streamer, beep.Callback(func() {
|
||||||
done <- true
|
done <- true
|
||||||
})))
|
})))
|
||||||
|
|||||||
@@ -13,11 +13,12 @@ import (
|
|||||||
"github.com/taigrr/elevenlabs/client/types"
|
"github.com/taigrr/elevenlabs/client/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string, options types.SynthesisOptions) error {
|
func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voiceID string, options types.SynthesisOptions) error {
|
||||||
options.Clamp()
|
options.Clamp()
|
||||||
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
|
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
|
||||||
opts := types.TTS{
|
opts := types.TTS{
|
||||||
Text: text,
|
Text: text,
|
||||||
|
ModelID: modelID,
|
||||||
VoiceSettings: options,
|
VoiceSettings: options,
|
||||||
}
|
}
|
||||||
b, _ := json.Marshal(opts)
|
b, _ := json.Marshal(opts)
|
||||||
@@ -56,12 +57,13 @@ func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, voiceID string
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Client) TTS(ctx context.Context, text, voiceID string, options types.SynthesisOptions) ([]byte, error) {
|
func (c Client) TTS(ctx context.Context, text, voiceID, modelID string, options types.SynthesisOptions) ([]byte, error) {
|
||||||
options.Clamp()
|
options.Clamp()
|
||||||
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
|
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
opts := types.TTS{
|
opts := types.TTS{
|
||||||
Text: text,
|
Text: text,
|
||||||
|
ModelID: modelID,
|
||||||
VoiceSettings: options,
|
VoiceSettings: options,
|
||||||
}
|
}
|
||||||
b, _ := json.Marshal(opts)
|
b, _ := json.Marshal(opts)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ type Voice struct {
|
|||||||
Labels string `json:"labels,omitempty"` // Serialized labels dictionary for the voice.
|
Labels string `json:"labels,omitempty"` // Serialized labels dictionary for the voice.
|
||||||
}
|
}
|
||||||
type TTS struct {
|
type TTS struct {
|
||||||
|
ModelID string `json:"model_id"`
|
||||||
Text string `json:"text"` // The text that will get converted into speech. Currently only English text is supported.
|
Text string `json:"text"` // The text that will get converted into speech. Currently only English text is supported.
|
||||||
VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request.
|
VoiceSettings SynthesisOptions `json:"voice_settings,omitempty"` // Voice settings are applied only on the given TTS request.
|
||||||
}
|
}
|
||||||
@@ -103,6 +104,22 @@ type LanguageResponseModel struct {
|
|||||||
IsoCode string `json:"iso_code"`
|
IsoCode string `json:"iso_code"`
|
||||||
DisplayName string `json:"display_name"`
|
DisplayName string `json:"display_name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Language struct {
|
||||||
|
LanguageID string `json:"language_id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ModelResponseModel struct {
|
||||||
|
ModelID string `json:"model_id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
CanBeFinetuned bool `json:"can_be_finetuned"`
|
||||||
|
CanDoTextToSpeech bool `json:"can_do_text_to_speech"`
|
||||||
|
CanDoVoiceConversion bool `json:"can_do_voice_conversion"`
|
||||||
|
TokenCostFactor float64 `json:"token_cost_factor"`
|
||||||
|
Languages []Language `json:"languages"`
|
||||||
|
}
|
||||||
type RecordingResponseModel struct {
|
type RecordingResponseModel struct {
|
||||||
RecordingID string `json:"recording_id"`
|
RecordingID string `json:"recording_id"`
|
||||||
MimeType string `json:"mime_type"`
|
MimeType string `json:"mime_type"`
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ func main() {
|
|||||||
pipeReader, pipeWriter := io.Pipe()
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
|
|
||||||
reader := bufio.NewReader(os.Stdin)
|
reader := bufio.NewReader(os.Stdin)
|
||||||
text, _ := reader.ReadString('\n')
|
b, _ := io.ReadAll(reader)
|
||||||
|
text := string(b)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})
|
err = client.TTSStream(ctx, pipeWriter, text, ids[0], types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75})
|
||||||
|
|||||||
Reference in New Issue
Block a user