reduce complexity

This commit is contained in:
2025-08-08 00:47:12 -07:00
parent a3301f15a6
commit 9579320040
7 changed files with 628 additions and 461 deletions

1
.gitignore vendored
View File

@@ -38,3 +38,4 @@ Thumbs.db
!examples/**/*.wav !examples/**/*.wav
!examples/**/*.m4a.xi/ !examples/**/*.m4a.xi/
.xi/ .xi/
elevenlabs-mcp

View File

@@ -1,4 +1,5 @@
# elevenlabs-mcp # elevenlabs-mcp
[![License 0BSD](https://img.shields.io/badge/License-0BSD-pink.svg)](https://opensource.org/licenses/0BSD) [![License 0BSD](https://img.shields.io/badge/License-0BSD-pink.svg)](https://opensource.org/licenses/0BSD)
[![GoDoc](https://godoc.org/github.com/taigrr/elevenlabs-mcp?status.svg)](https://godoc.org/github.com/taigrr/elevenlabs-mcp) [![GoDoc](https://godoc.org/github.com/taigrr/elevenlabs-mcp?status.svg)](https://godoc.org/github.com/taigrr/elevenlabs-mcp)
[![Go Report Card](https://goreportcard.com/badge/github.com/taigrr/elevenlabs-mcp)](https://goreportcard.com/report/github.com/taigrr/elevenlabs-mcp) [![Go Report Card](https://goreportcard.com/badge/github.com/taigrr/elevenlabs-mcp)](https://goreportcard.com/report/github.com/taigrr/elevenlabs-mcp)
@@ -14,27 +15,27 @@ It provides a seamless interface for converting text to high-quality speech, man
As a prerequisite, you must already have an account with [ElevenLabs](https://elevenlabs.io). As a prerequisite, you must already have an account with [ElevenLabs](https://elevenlabs.io).
After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-). After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-).
Note, your API key will read access to your voices and to Text-to-Speech generation as a minimum to function properly.
## Installation ## Installation
```bash ```bash
go build -o elevenlabs-mcp go install .
``` ```
## Configuration ## Configuration
Set your ElevenLabs API key: Set your ElevenLabs API key:
```bash ```bash
export XI_API_KEY=your_api_key_here export XI_API_KEY=your_api_key_here
``` ```
## Usage ## Usage
The server communicates via stdio using the MCP protocol: The server communicates via stdio using the MCP protocol.
```bash You'll need a compatible MCP client to interact with this server.
./elevenlabs-mcp
```
Generated audio files are automatically saved to `.xi/<timestamp>-<hex5>.mp3` with corresponding `.txt` files containing the original text for reference. Generated audio files are automatically saved to `.xi/<timestamp>-<hex5>.mp3` with corresponding `.txt` files containing the original text for reference.
@@ -43,11 +44,11 @@ Generated audio files are automatically saved to `.xi/<timestamp>-<hex5>.mp3` wi
The server provides the following tools to MCP clients: The server provides the following tools to MCP clients:
- **say** - Convert text to speech and save as MP3 - **say** - Convert text to speech and save as MP3
- **read** - Read a text file and convert it to speech - **read** - Read a text file and convert it to speech
- **play** - Play audio files using system audio - **play** - Play audio files using system audio
- **set_voice** - Change the voice used for generation - **set_voice** - Change the voice used for generation
- **get_voices** - List available voices and show current selection - **get_voices** - List available voices and show current selection
- **history** - List previously generated audio files with text summaries - **history** - List previously generated audio files with (truncated) text summaries
## Dependencies ## Dependencies
@@ -61,4 +62,5 @@ This project is licensed under the 0BSD License, written by [Rob Landley](https:
As such, you may use this library without restriction or attribution, but please don't pass it off as your own. As such, you may use this library without restriction or attribution, but please don't pass it off as your own.
Attribution, though not required, is appreciated. Attribution, though not required, is appreciated.
By contributing, you agree all code submitted also falls under the License. By contributing, you agree all code submitted also falls under the License.

144
internal/ximcp/audio.go Normal file
View File

@@ -0,0 +1,144 @@
package ximcp
import (
"context"
"crypto/rand"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
"github.com/gopxl/beep/v2"
"github.com/gopxl/beep/v2/mp3"
"github.com/gopxl/beep/v2/speaker"
"github.com/taigrr/elevenlabs/client/types"
)
func generateRandomHex(length int) string {
bytes := make([]byte, length)
rand.Read(bytes)
return fmt.Sprintf("%x", bytes)[:length]
}
func (s *Server) GenerateAudio(text string) (string, error) {
if s.currentVoice == nil {
return "", fmt.Errorf("no voice selected")
}
audioData, err := s.generateTTSAudio(text)
if err != nil {
return "", err
}
filePath, err := s.saveAudioFiles(text, audioData)
if err != nil {
return "", err
}
return filePath, nil
}
func (s *Server) generateTTSAudio(text string) ([]byte, error) {
return s.client.TTS(context.Background(), text, s.currentVoice.VoiceID, "", types.SynthesisOptions{
Stability: DefaultStability,
SimilarityBoost: DefaultSimilarityBoost,
})
}
func (s *Server) saveAudioFiles(text string, audioData []byte) (string, error) {
filePath := s.generateFilePath()
if err := s.ensureDirectoryExists(filePath); err != nil {
return "", err
}
if err := s.writeAudioFile(filePath, audioData); err != nil {
return "", err
}
if err := s.writeTextFile(filePath, text); err != nil {
return "", err
}
return filePath, nil
}
func (s *Server) generateFilePath() string {
timestamp := time.Now().UnixMilli()
randomHex := generateRandomHex(RandomHexLength)
filename := fmt.Sprintf("%d-%s.mp3", timestamp, randomHex)
return filepath.Join(AudioDirectory, filename)
}
func (s *Server) ensureDirectoryExists(filePath string) error {
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
return nil
}
func (s *Server) writeAudioFile(filePath string, audioData []byte) error {
if err := os.WriteFile(filePath, audioData, 0644); err != nil {
return fmt.Errorf("failed to write audio file: %w", err)
}
return nil
}
func (s *Server) writeTextFile(filePath, text string) error {
textFilePath := strings.TrimSuffix(filePath, ".mp3") + ".txt"
if err := os.WriteFile(textFilePath, []byte(text), 0644); err != nil {
return fmt.Errorf("failed to write text file: %w", err)
}
return nil
}
func (s *Server) PlayAudio(filepath string) error {
s.playMutex.Lock()
defer s.playMutex.Unlock()
file, err := os.Open(filepath)
if err != nil {
return fmt.Errorf("failed to open audio file: %w", err)
}
defer file.Close()
streamer, format, err := mp3.Decode(file)
if err != nil {
return fmt.Errorf("failed to decode mp3: %w", err)
}
defer streamer.Close()
return s.playStreamer(streamer, format)
}
func (s *Server) playStreamer(streamer beep.StreamSeekCloser, format beep.Format) error {
resampled := beep.Resample(4, format.SampleRate, AudioSampleRate, streamer)
done := make(chan bool)
speaker.Play(beep.Seq(resampled, beep.Callback(func() {
done <- true
})))
<-done
return nil
}
func (s *Server) PlayAudioAsync(filepath string) {
go func() {
if err := s.PlayAudio(filepath); err != nil {
log.Printf("Error playing audio: %v", err)
}
}()
}
func (s *Server) ReadFileToAudio(filePath string) (string, error) {
content, err := os.ReadFile(filePath)
if err != nil {
return "", fmt.Errorf("failed to read file: %w", err)
}
text := string(content)
return s.GenerateAudio(text)
}

64
internal/ximcp/history.go Normal file
View File

@@ -0,0 +1,64 @@
package ximcp
import (
"fmt"
"os"
"path/filepath"
"strings"
)
type AudioFile struct {
Name string
Summary string
}
func (s *Server) GetAudioHistory() ([]AudioFile, error) {
files, err := os.ReadDir(AudioDirectory)
if err != nil {
if os.IsNotExist(err) {
return []AudioFile{}, nil
}
return nil, fmt.Errorf("failed to read %s directory: %w", AudioDirectory, err)
}
return s.processAudioFiles(files), nil
}
func (s *Server) processAudioFiles(files []os.DirEntry) []AudioFile {
var audioFiles []AudioFile
for _, file := range files {
if strings.HasSuffix(file.Name(), ".mp3") {
summary := s.getAudioSummary(file.Name())
audioFiles = append(audioFiles, AudioFile{
Name: file.Name(),
Summary: summary,
})
}
}
return audioFiles
}
func (s *Server) getAudioSummary(audioFileName string) string {
textFile := strings.TrimSuffix(audioFileName, ".mp3") + ".txt"
textPath := filepath.Join(AudioDirectory, textFile)
content, err := os.ReadFile(textPath)
if err != nil {
return "(no text summary available)"
}
return s.createSummary(string(content))
}
func (s *Server) createSummary(text string) string {
text = strings.TrimSpace(text)
words := strings.Fields(text)
if len(words) > MaxSummaryWords {
return strings.Join(words[:MaxSummaryWords], " ") + "..."
}
return text
}

123
internal/ximcp/server.go Normal file
View File

@@ -0,0 +1,123 @@
package ximcp
import (
"context"
"fmt"
"os"
"sync"
"time"
"github.com/gopxl/beep/v2"
"github.com/gopxl/beep/v2/speaker"
"github.com/mark3labs/mcp-go/server"
"github.com/taigrr/elevenlabs/client"
"github.com/taigrr/elevenlabs/client/types"
)
const (
DefaultStability = 0.5
DefaultSimilarityBoost = 0.5
AudioDirectory = ".xi"
AudioSampleRate = 44100
RandomHexLength = 5
MaxSummaryWords = 10
)
type Server struct {
mcpServer *server.MCPServer
client client.Client
voices []types.VoiceResponseModel
currentVoice *types.VoiceResponseModel
voicesMutex sync.RWMutex
playMutex sync.Mutex
}
func NewServer(mcpServer *server.MCPServer) (*Server, error) {
apiKey := os.Getenv("XI_API_KEY")
if apiKey == "" {
return nil, fmt.Errorf("XI_API_KEY environment variable is required")
}
elevenClient := client.New(apiKey)
s := &Server{
client: elevenClient,
mcpServer: mcpServer,
}
if err := s.initializeVoices(); err != nil {
return nil, fmt.Errorf("failed to initialize voices: %w", err)
}
if err := s.initializeSpeaker(); err != nil {
return nil, fmt.Errorf("failed to initialize speaker: %w", err)
}
return s, nil
}
func (s *Server) initializeVoices() error {
if err := s.refreshVoices(); err != nil {
return err
}
return nil
}
func (s *Server) initializeSpeaker() error {
sr := beep.SampleRate(AudioSampleRate)
speaker.Init(sr, sr.N(time.Second/10))
return nil
}
func (s *Server) refreshVoices() error {
s.voicesMutex.Lock()
defer s.voicesMutex.Unlock()
voices, err := s.client.GetVoices(context.Background())
if err != nil {
return fmt.Errorf("failed to get voices: %w", err)
}
s.voices = voices
s.setDefaultVoiceIfNeeded()
return nil
}
func (s *Server) setDefaultVoiceIfNeeded() {
if s.currentVoice == nil && len(s.voices) > 0 {
s.currentVoice = &s.voices[0]
}
}
func (s *Server) GetVoices() ([]types.VoiceResponseModel, *types.VoiceResponseModel, error) {
if err := s.refreshVoices(); err != nil {
return nil, nil, err
}
s.voicesMutex.RLock()
defer s.voicesMutex.RUnlock()
return s.voices, s.currentVoice, nil
}
func (s *Server) SetVoice(voiceID string) (*types.VoiceResponseModel, error) {
s.voicesMutex.Lock()
defer s.voicesMutex.Unlock()
selectedVoice := s.findVoiceByID(voiceID)
if selectedVoice == nil {
return nil, fmt.Errorf("voice with ID '%s' not found", voiceID)
}
s.currentVoice = selectedVoice
return selectedVoice, nil
}
func (s *Server) findVoiceByID(voiceID string) *types.VoiceResponseModel {
for i, voice := range s.voices {
if voice.VoiceID == voiceID {
return &s.voices[i]
}
}
return nil
}

280
internal/ximcp/tools.go Normal file
View File

@@ -0,0 +1,280 @@
package ximcp
import (
"context"
"fmt"
"strings"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/taigrr/elevenlabs/client/types"
)
func (s *Server) SetupTools() {
s.mcpServer.AddTool(s.say())
s.mcpServer.AddTool(s.read())
s.mcpServer.AddTool(s.play())
s.mcpServer.AddTool(s.setVoice())
s.mcpServer.AddTool(s.getVoices())
s.mcpServer.AddTool(s.history())
}
func (s *Server) say() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "say",
Description: "Convert text to speech, save as MP3 file, and play the audio",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"text": map[string]any{
"type": "string",
"description": "Text to convert to speech",
},
},
Required: []string{"text"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
text, err := request.RequireString("text")
if err != nil {
return nil, err
}
filepath, err := s.GenerateAudio(text)
if err != nil {
return nil, err
}
s.PlayAudioAsync(filepath)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Audio generated, saved to %s, and playing", filepath),
},
},
}, nil
}
return tool, handler
}
func (s *Server) read() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "read",
Description: "Read a text file and convert it to speech, saving as MP3",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"file_path": map[string]any{
"type": "string",
"description": "Path to the text file to read and convert to speech",
},
},
Required: []string{"file_path"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
filePath, err := request.RequireString("file_path")
if err != nil {
return nil, err
}
audioPath, err := s.ReadFileToAudio(filePath)
if err != nil {
return nil, err
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("File '%s' converted to speech and saved to: %s", filePath, audioPath),
},
},
}, nil
}
return tool, handler
}
func (s *Server) play() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "play",
Description: "Play an audio file",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"file_path": map[string]any{
"type": "string",
"description": "Path to the audio file to play",
},
},
Required: []string{"file_path"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
filePath, err := request.RequireString("file_path")
if err != nil {
return nil, err
}
s.PlayAudioAsync(filePath)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Playing audio file: %s", filePath),
},
},
}, nil
}
return tool, handler
}
func (s *Server) setVoice() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "set_voice",
Description: "Set the voice to use for text-to-speech generation",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"voice_id": map[string]any{
"type": "string",
"description": "ID of the voice to use",
},
},
Required: []string{"voice_id"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
voiceID, err := request.RequireString("voice_id")
if err != nil {
return nil, err
}
selectedVoice, err := s.SetVoice(voiceID)
if err != nil {
return nil, err
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Voice set to: %s (%s)", selectedVoice.Name, selectedVoice.VoiceID),
},
},
}, nil
}
return tool, handler
}
func (s *Server) getVoices() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "get_voices",
Description: "Get list of available voices and show the currently selected one",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
voices, currentVoice, err := s.GetVoices()
if err != nil {
return nil, err
}
voiceList := s.formatVoiceList(voices, currentVoice)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: voiceList,
},
},
}, nil
}
return tool, handler
}
func (s *Server) formatVoiceList(voices []types.VoiceResponseModel, currentVoice *types.VoiceResponseModel) string {
var voiceList strings.Builder
voiceList.WriteString("Available voices:\n")
for _, voice := range voices {
marker := " "
if currentVoice != nil && voice.VoiceID == currentVoice.VoiceID {
marker = "* "
}
voiceList.WriteString(fmt.Sprintf("%s%s (%s) - %s\n",
marker, voice.Name, voice.VoiceID, voice.Category))
}
if currentVoice != nil {
voiceList.WriteString(fmt.Sprintf("\nCurrently selected: %s (%s)",
currentVoice.Name, currentVoice.VoiceID))
} else {
voiceList.WriteString("\nNo voice currently selected")
}
return voiceList.String()
}
func (s *Server) history() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "history",
Description: "List available audio files with text summaries",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
audioFiles, err := s.GetAudioHistory()
if err != nil {
return nil, err
}
if len(audioFiles) == 0 {
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: "No audio files found",
},
},
}, nil
}
historyList := s.formatHistoryList(audioFiles)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: historyList,
},
},
}, nil
}
return tool, handler
}
func (s *Server) formatHistoryList(audioFiles []AudioFile) string {
var historyList strings.Builder
historyList.WriteString("Available audio files:\n\n")
for _, audioFile := range audioFiles {
historyList.WriteString(fmt.Sprintf("• %s\n %s\n\n", audioFile.Name, audioFile.Summary))
}
return historyList.String()
}

459
main.go
View File

@@ -1,472 +1,25 @@
package main package main
import ( import (
"context"
"crypto/rand"
"fmt"
"log" "log"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/gopxl/beep/v2"
"github.com/gopxl/beep/v2/mp3"
"github.com/gopxl/beep/v2/speaker"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server" "github.com/mark3labs/mcp-go/server"
"github.com/taigrr/elevenlabs/client" "github.com/taigrr/elevenlabs-mcp/internal/ximcp"
"github.com/taigrr/elevenlabs/client/types"
) )
type ElevenLabsServer struct {
client client.Client
voices []types.VoiceResponseModel
currentVoice *types.VoiceResponseModel
voicesMutex sync.RWMutex
playMutex sync.Mutex
}
func NewElevenLabsServer() (*ElevenLabsServer, error) {
apiKey := os.Getenv("XI_API_KEY")
if apiKey == "" {
return nil, fmt.Errorf("XI_API_KEY environment variable is required")
}
elevenClient := client.New(apiKey)
s := &ElevenLabsServer{
client: elevenClient,
}
// Initialize voices and set default
if err := s.refreshVoices(); err != nil {
return nil, fmt.Errorf("failed to initialize voices: %w", err)
}
// Initialize speaker for audio playback
sr := beep.SampleRate(44100)
speaker.Init(sr, sr.N(time.Second/10))
return s, nil
}
func (s *ElevenLabsServer) refreshVoices() error {
s.voicesMutex.Lock()
defer s.voicesMutex.Unlock()
voices, err := s.client.GetVoices(context.Background())
if err != nil {
return fmt.Errorf("failed to get voices: %w", err)
}
s.voices = voices
// Set default voice if none selected
if s.currentVoice == nil && len(voices) > 0 {
s.currentVoice = &voices[0]
}
return nil
}
func generateRandomHex(length int) string {
bytes := make([]byte, length)
rand.Read(bytes)
return fmt.Sprintf("%x", bytes)[:length]
}
func (s *ElevenLabsServer) generateAudio(text string) (string, error) {
if s.currentVoice == nil {
return "", fmt.Errorf("no voice selected")
}
// Generate audio using TTS
audioData, err := s.client.TTS(context.Background(), text, s.currentVoice.VoiceID, "", types.SynthesisOptions{
Stability: 0.5,
SimilarityBoost: 0.5,
})
if err != nil {
return "", fmt.Errorf("failed to generate speech: %w", err)
}
// Create filename with timestamp and random hex
timestamp := time.Now().UnixMilli()
randomHex := generateRandomHex(5)
filename := fmt.Sprintf("%d-%s.mp3", timestamp, randomHex)
filePath := filepath.Join(".xi", filename)
// Ensure directory exists
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return "", fmt.Errorf("failed to create directory: %w", err)
}
// Write audio file
if err := os.WriteFile(filePath, audioData, 0644); err != nil {
return "", fmt.Errorf("failed to write audio file: %w", err)
}
// Write text file alongside audio
textFilePath := strings.TrimSuffix(filePath, ".mp3") + ".txt"
if err := os.WriteFile(textFilePath, []byte(text), 0644); err != nil {
return "", fmt.Errorf("failed to write text file: %w", err)
}
return filePath, nil
}
func (s *ElevenLabsServer) playAudio(filepath string) error {
s.playMutex.Lock()
defer s.playMutex.Unlock()
file, err := os.Open(filepath)
if err != nil {
return fmt.Errorf("failed to open audio file: %w", err)
}
defer file.Close()
streamer, format, err := mp3.Decode(file)
if err != nil {
return fmt.Errorf("failed to decode mp3: %w", err)
}
defer streamer.Close()
resampled := beep.Resample(4, format.SampleRate, 44100, streamer)
done := make(chan bool)
speaker.Play(beep.Seq(resampled, beep.Callback(func() {
done <- true
})))
<-done
return nil
}
func (s *ElevenLabsServer) playAudioAsync(filepath string) {
go func() {
if err := s.playAudio(filepath); err != nil {
log.Printf("Error playing audio: %v", err)
}
}()
}
func (s *ElevenLabsServer) setupTools(mcpServer *server.MCPServer) {
// Say tool
sayTool := mcp.Tool{
Name: "say",
Description: "Convert text to speech, save as MP3 file, and play the audio",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"text": map[string]any{
"type": "string",
"description": "Text to convert to speech",
},
},
Required: []string{"text"},
},
}
mcpServer.AddTool(sayTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
text, err := request.RequireString("text")
if err != nil {
return nil, err
}
filepath, err := s.generateAudio(text)
if err != nil {
return nil, err
}
// Play audio asynchronously
s.playAudioAsync(filepath)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Audio generated, saved to %s, and playing", filepath),
},
},
}, nil
})
// Read tool
readTool := mcp.Tool{
Name: "read",
Description: "Read a text file and convert it to speech, saving as MP3",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"file_path": map[string]any{
"type": "string",
"description": "Path to the text file to read and convert to speech",
},
},
Required: []string{"file_path"},
},
}
mcpServer.AddTool(readTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
filePath, err := request.RequireString("file_path")
if err != nil {
return nil, err
}
// Read file content
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read file: %w", err)
}
text := string(content)
audioPath, err := s.generateAudio(text)
if err != nil {
return nil, err
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("File '%s' converted to speech and saved to: %s", filePath, audioPath),
},
},
}, nil
})
// Play tool
playTool := mcp.Tool{
Name: "play",
Description: "Play an audio file",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"file_path": map[string]any{
"type": "string",
"description": "Path to the audio file to play",
},
},
Required: []string{"file_path"},
},
}
mcpServer.AddTool(playTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
filePath, err := request.RequireString("file_path")
if err != nil {
return nil, err
}
// Play audio asynchronously
s.playAudioAsync(filePath)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Playing audio file: %s", filePath),
},
},
}, nil
})
// Set voice tool
setVoiceTool := mcp.Tool{
Name: "set_voice",
Description: "Set the voice to use for text-to-speech generation",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"voice_id": map[string]any{
"type": "string",
"description": "ID of the voice to use",
},
},
Required: []string{"voice_id"},
},
}
mcpServer.AddTool(setVoiceTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
voiceID, err := request.RequireString("voice_id")
if err != nil {
return nil, err
}
s.voicesMutex.Lock()
defer s.voicesMutex.Unlock()
// Find the voice
var selectedVoice *types.VoiceResponseModel
for i, voice := range s.voices {
if voice.VoiceID == voiceID {
selectedVoice = &s.voices[i]
break
}
}
if selectedVoice == nil {
return nil, fmt.Errorf("voice with ID '%s' not found", voiceID)
}
s.currentVoice = selectedVoice
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Voice set to: %s (%s)", selectedVoice.Name, selectedVoice.VoiceID),
},
},
}, nil
})
// Get voices tool
getVoicesTool := mcp.Tool{
Name: "get_voices",
Description: "Get list of available voices and show the currently selected one",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
},
}
mcpServer.AddTool(getVoicesTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Refresh voices from API
if err := s.refreshVoices(); err != nil {
return nil, err
}
s.voicesMutex.RLock()
defer s.voicesMutex.RUnlock()
var voiceList strings.Builder
voiceList.WriteString("Available voices:\n")
for _, voice := range s.voices {
marker := " "
if s.currentVoice != nil && voice.VoiceID == s.currentVoice.VoiceID {
marker = "* "
}
voiceList.WriteString(fmt.Sprintf("%s%s (%s) - %s\n",
marker, voice.Name, voice.VoiceID, voice.Category))
}
if s.currentVoice != nil {
voiceList.WriteString(fmt.Sprintf("\nCurrently selected: %s (%s)",
s.currentVoice.Name, s.currentVoice.VoiceID))
} else {
voiceList.WriteString("\nNo voice currently selected")
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: voiceList.String(),
},
},
}, nil
})
// History tool
historyTool := mcp.Tool{
Name: "history",
Description: "List available audio files with text summaries",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
},
}
mcpServer.AddTool(historyTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
// Read .xi directory
files, err := os.ReadDir(".xi")
if err != nil {
if os.IsNotExist(err) {
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: "No audio files found (directory doesn't exist yet)",
},
},
}, nil
}
return nil, fmt.Errorf("failed to read .xi directory: %w", err)
}
var audioFiles []string
for _, file := range files {
if strings.HasSuffix(file.Name(), ".mp3") {
audioFiles = append(audioFiles, file.Name())
}
}
if len(audioFiles) == 0 {
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: "No audio files found",
},
},
}, nil
}
var historyList strings.Builder
historyList.WriteString("Available audio files:\n\n")
for _, audioFile := range audioFiles {
// Try to read corresponding text file
textFile := strings.TrimSuffix(audioFile, ".mp3") + ".txt"
textPath := filepath.Join(".xi", textFile)
summary := ""
if content, err := os.ReadFile(textPath); err == nil {
text := strings.TrimSpace(string(content))
words := strings.Fields(text)
if len(words) > 10 {
summary = strings.Join(words[:10], " ") + "..."
} else {
summary = text
}
} else {
summary = "(no text summary available)"
}
historyList.WriteString(fmt.Sprintf("• %s\n %s\n\n", audioFile, summary))
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: historyList.String(),
},
},
}, nil
})
}
func main() { func main() {
// Create ElevenLabs server
elevenServer, err := NewElevenLabsServer()
if err != nil {
log.Fatalf("Failed to create ElevenLabs server: %v", err)
}
// Create MCP server
mcpServer := server.NewMCPServer( mcpServer := server.NewMCPServer(
"ElevenLabs MCP Server", "ElevenLabs MCP Server",
"1.0.0", "1.0.0",
server.WithToolCapabilities(true), server.WithToolCapabilities(true),
) )
elevenServer, err := ximcp.NewServer(mcpServer)
if err != nil {
log.Fatalf("Failed to create ElevenLabs server: %v", err)
}
// Setup tools elevenServer.SetupTools()
elevenServer.setupTools(mcpServer)
// Serve via stdio
if err := server.ServeStdio(mcpServer); err != nil { if err := server.ServeStdio(mcpServer); err != nil {
log.Fatalf("Failed to serve MCP server: %v", err) log.Fatalf("Failed to serve MCP server: %v", err)
} }