Add sound generation api (#9)

* Add missing attributes for VoiceResponseModel

* Updating module to point to forked repo

* Tidying up go.mod

* Adding missing voice settings

* Adding support for request stitching

* Adding support for request stitching

* Fix dup SharingOptions struct from merge

* Add Sound Generation API

* Fix: revert user-agent/package url to original
This commit is contained in:
Lachlan Laycock
2024-11-26 06:39:34 +01:00
committed by GitHub
parent c585531fae
commit db0a2e1760
8 changed files with 243 additions and 163 deletions

View File

@@ -12,34 +12,101 @@ import (
"github.com/taigrr/elevenlabs/client/types"
)
func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voiceID string, options types.SynthesisOptions) error {
options.Clamp()
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
opts := types.TTS{
Text: text,
ModelID: modelID,
VoiceSettings: options,
func WithPreviousText(previousText string) types.TTSParam {
return func(tts *types.TTS) {
tts.PreviousText = previousText
}
b, _ := json.Marshal(opts)
client := &http.Client{}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(b))
}
func WithNextText(nextText string) types.TTSParam {
return func(tts *types.TTS) {
tts.NextText = nextText
}
}
func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voiceID string, options types.SynthesisOptions, optionalParams ...types.TTSParam) error {
params := types.TTS{
Text: text,
VoiceID: voiceID,
ModelID: modelID,
}
for _, p := range optionalParams {
p(&params)
}
body, err := c.requestTTS(ctx, params, options)
if err != nil {
return err
}
defer body.Close()
io.Copy(w, body)
return nil
}
func (c Client) TTS(ctx context.Context, text, voiceID, modelID string, options types.SynthesisOptions, optionalParams ...types.TTSParam) ([]byte, error) {
params := types.TTS{
Text: text,
VoiceID: voiceID,
ModelID: modelID,
}
for _, p := range optionalParams {
p(&params)
}
body, err := c.requestTTS(ctx, params, options)
if err != nil {
return []byte{}, err
}
defer body.Close()
b := bytes.Buffer{}
io.Copy(&b, body)
return b.Bytes(), nil
}
func (c Client) TTSStream(ctx context.Context, w io.Writer, text, voiceID string, options types.SynthesisOptions, optionalParams ...types.TTSParam) error {
params := types.TTS{
Text: text,
VoiceID: voiceID,
Stream: true,
}
for _, p := range optionalParams {
p(&params)
}
body, err := c.requestTTS(ctx, params, options)
if err != nil {
return err
}
defer body.Close()
io.Copy(w, body)
return nil
}
func (c Client) requestTTS(ctx context.Context, params types.TTS, options types.SynthesisOptions) (io.ReadCloser, error) {
options.Clamp()
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", params.VoiceID)
if params.Stream {
url += "/stream"
}
client := &http.Client{}
b, _ := json.Marshal(params)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(b))
if err != nil {
return nil, err
}
req.Header.Set("xi-api-key", c.apiKey)
req.Header.Set("User-Agent", "github.com/taigrr/elevenlabs")
req.Header.Set("accept", "audio/mpeg")
res, err := client.Do(req)
if err != nil {
return err
return nil, err
}
switch res.StatusCode {
case 401:
return ErrUnauthorized
return nil, ErrUnauthorized
case 200:
defer res.Body.Close()
io.Copy(w, res.Body)
return nil
return res.Body, nil
case 422:
fallthrough
default:
@@ -51,93 +118,6 @@ func (c Client) TTSWriter(ctx context.Context, w io.Writer, text, modelID, voice
} else {
err = errors.Join(err, ve)
}
return err
}
}
func (c Client) TTS(ctx context.Context, text, voiceID, modelID string, options types.SynthesisOptions) ([]byte, error) {
options.Clamp()
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s", voiceID)
client := &http.Client{}
opts := types.TTS{
Text: text,
ModelID: modelID,
VoiceSettings: options,
}
b, _ := json.Marshal(opts)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(b))
if err != nil {
return []byte{}, err
}
req.Header.Set("xi-api-key", c.apiKey)
req.Header.Set("User-Agent", "github.com/taigrr/elevenlabs")
req.Header.Set("accept", "audio/mpeg")
res, err := client.Do(req)
if err != nil {
return []byte{}, err
}
switch res.StatusCode {
case 401:
return []byte{}, ErrUnauthorized
case 200:
b := bytes.Buffer{}
defer res.Body.Close()
io.Copy(&b, res.Body)
return b.Bytes(), nil
case 422:
fallthrough
default:
ve := types.ValidationError{}
defer res.Body.Close()
jerr := json.NewDecoder(res.Body).Decode(&ve)
if jerr != nil {
err = errors.Join(err, jerr)
} else {
err = errors.Join(err, ve)
}
return []byte{}, err
}
}
func (c Client) TTSStream(ctx context.Context, w io.Writer, text, voiceID string, options types.SynthesisOptions) error {
options.Clamp()
url := fmt.Sprintf(c.endpoint+"/v1/text-to-speech/%s/stream", voiceID)
opts := types.TTS{
Text: text,
VoiceSettings: options,
}
b, _ := json.Marshal(opts)
client := &http.Client{}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(b))
if err != nil {
return err
}
req.Header.Set("xi-api-key", c.apiKey)
req.Header.Set("User-Agent", "github.com/taigrr/elevenlabs")
req.Header.Set("accept", "audio/mpeg")
res, err := client.Do(req)
if err != nil {
return err
}
switch res.StatusCode {
case 401:
return ErrUnauthorized
case 200:
defer res.Body.Close()
io.Copy(w, res.Body)
return nil
case 422:
fallthrough
default:
ve := types.ValidationError{}
defer res.Body.Close()
jerr := json.NewDecoder(res.Body).Decode(&ve)
if jerr != nil {
err = errors.Join(err, jerr)
} else {
err = errors.Join(err, ve)
}
return err
return nil, err
}
}