diff --git a/.gitignore b/.gitignore index bfc64b2..06c2fb2 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ Thumbs.db !examples/**/*.wav !examples/**/*.m4a.xi/ .xi/ +elevenlabs-mcp diff --git a/README.md b/README.md index 2ec224a..cf49e19 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # elevenlabs-mcp + [![License 0BSD](https://img.shields.io/badge/License-0BSD-pink.svg)](https://opensource.org/licenses/0BSD) [![GoDoc](https://godoc.org/github.com/taigrr/elevenlabs-mcp?status.svg)](https://godoc.org/github.com/taigrr/elevenlabs-mcp) [![Go Report Card](https://goreportcard.com/badge/github.com/taigrr/elevenlabs-mcp)](https://goreportcard.com/report/github.com/taigrr/elevenlabs-mcp) @@ -14,27 +15,27 @@ It provides a seamless interface for converting text to high-quality speech, man As a prerequisite, you must already have an account with [ElevenLabs](https://elevenlabs.io). After creating your account, you can get your API key [from here](https://help.elevenlabs.io/hc/en-us/articles/14599447207697-How-to-authorize-yourself-using-your-xi-api-key-). +Note, your API key will read access to your voices and to Text-to-Speech generation as a minimum to function properly. ## Installation ```bash -go build -o elevenlabs-mcp +go install . ``` ## Configuration Set your ElevenLabs API key: + ```bash export XI_API_KEY=your_api_key_here ``` ## Usage -The server communicates via stdio using the MCP protocol: +The server communicates via stdio using the MCP protocol. -```bash -./elevenlabs-mcp -``` +You'll need a compatible MCP client to interact with this server. Generated audio files are automatically saved to `.xi/-.mp3` with corresponding `.txt` files containing the original text for reference. @@ -43,11 +44,11 @@ Generated audio files are automatically saved to `.xi/-.mp3` wi The server provides the following tools to MCP clients: - **say** - Convert text to speech and save as MP3 -- **read** - Read a text file and convert it to speech +- **read** - Read a text file and convert it to speech - **play** - Play audio files using system audio - **set_voice** - Change the voice used for generation - **get_voices** - List available voices and show current selection -- **history** - List previously generated audio files with text summaries +- **history** - List previously generated audio files with (truncated) text summaries ## Dependencies @@ -61,4 +62,5 @@ This project is licensed under the 0BSD License, written by [Rob Landley](https: As such, you may use this library without restriction or attribution, but please don't pass it off as your own. Attribution, though not required, is appreciated. -By contributing, you agree all code submitted also falls under the License. \ No newline at end of file +By contributing, you agree all code submitted also falls under the License. + diff --git a/internal/ximcp/audio.go b/internal/ximcp/audio.go new file mode 100644 index 0000000..90ee1b1 --- /dev/null +++ b/internal/ximcp/audio.go @@ -0,0 +1,144 @@ +package ximcp + +import ( + "context" + "crypto/rand" + "fmt" + "log" + "os" + "path/filepath" + "strings" + "time" + + "github.com/gopxl/beep/v2" + "github.com/gopxl/beep/v2/mp3" + "github.com/gopxl/beep/v2/speaker" + "github.com/taigrr/elevenlabs/client/types" +) + +func generateRandomHex(length int) string { + bytes := make([]byte, length) + rand.Read(bytes) + return fmt.Sprintf("%x", bytes)[:length] +} + +func (s *Server) GenerateAudio(text string) (string, error) { + if s.currentVoice == nil { + return "", fmt.Errorf("no voice selected") + } + + audioData, err := s.generateTTSAudio(text) + if err != nil { + return "", err + } + + filePath, err := s.saveAudioFiles(text, audioData) + if err != nil { + return "", err + } + + return filePath, nil +} + +func (s *Server) generateTTSAudio(text string) ([]byte, error) { + return s.client.TTS(context.Background(), text, s.currentVoice.VoiceID, "", types.SynthesisOptions{ + Stability: DefaultStability, + SimilarityBoost: DefaultSimilarityBoost, + }) +} + +func (s *Server) saveAudioFiles(text string, audioData []byte) (string, error) { + filePath := s.generateFilePath() + + if err := s.ensureDirectoryExists(filePath); err != nil { + return "", err + } + + if err := s.writeAudioFile(filePath, audioData); err != nil { + return "", err + } + + if err := s.writeTextFile(filePath, text); err != nil { + return "", err + } + + return filePath, nil +} + +func (s *Server) generateFilePath() string { + timestamp := time.Now().UnixMilli() + randomHex := generateRandomHex(RandomHexLength) + filename := fmt.Sprintf("%d-%s.mp3", timestamp, randomHex) + return filepath.Join(AudioDirectory, filename) +} + +func (s *Server) ensureDirectoryExists(filePath string) error { + if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil { + return fmt.Errorf("failed to create directory: %w", err) + } + return nil +} + +func (s *Server) writeAudioFile(filePath string, audioData []byte) error { + if err := os.WriteFile(filePath, audioData, 0644); err != nil { + return fmt.Errorf("failed to write audio file: %w", err) + } + return nil +} + +func (s *Server) writeTextFile(filePath, text string) error { + textFilePath := strings.TrimSuffix(filePath, ".mp3") + ".txt" + if err := os.WriteFile(textFilePath, []byte(text), 0644); err != nil { + return fmt.Errorf("failed to write text file: %w", err) + } + return nil +} + +func (s *Server) PlayAudio(filepath string) error { + s.playMutex.Lock() + defer s.playMutex.Unlock() + + file, err := os.Open(filepath) + if err != nil { + return fmt.Errorf("failed to open audio file: %w", err) + } + defer file.Close() + + streamer, format, err := mp3.Decode(file) + if err != nil { + return fmt.Errorf("failed to decode mp3: %w", err) + } + defer streamer.Close() + + return s.playStreamer(streamer, format) +} + +func (s *Server) playStreamer(streamer beep.StreamSeekCloser, format beep.Format) error { + resampled := beep.Resample(4, format.SampleRate, AudioSampleRate, streamer) + + done := make(chan bool) + speaker.Play(beep.Seq(resampled, beep.Callback(func() { + done <- true + }))) + + <-done + return nil +} + +func (s *Server) PlayAudioAsync(filepath string) { + go func() { + if err := s.PlayAudio(filepath); err != nil { + log.Printf("Error playing audio: %v", err) + } + }() +} + +func (s *Server) ReadFileToAudio(filePath string) (string, error) { + content, err := os.ReadFile(filePath) + if err != nil { + return "", fmt.Errorf("failed to read file: %w", err) + } + + text := string(content) + return s.GenerateAudio(text) +} diff --git a/internal/ximcp/history.go b/internal/ximcp/history.go new file mode 100644 index 0000000..5c06e1b --- /dev/null +++ b/internal/ximcp/history.go @@ -0,0 +1,64 @@ +package ximcp + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +type AudioFile struct { + Name string + Summary string +} + +func (s *Server) GetAudioHistory() ([]AudioFile, error) { + files, err := os.ReadDir(AudioDirectory) + if err != nil { + if os.IsNotExist(err) { + return []AudioFile{}, nil + } + return nil, fmt.Errorf("failed to read %s directory: %w", AudioDirectory, err) + } + + return s.processAudioFiles(files), nil +} + +func (s *Server) processAudioFiles(files []os.DirEntry) []AudioFile { + var audioFiles []AudioFile + + for _, file := range files { + if strings.HasSuffix(file.Name(), ".mp3") { + summary := s.getAudioSummary(file.Name()) + audioFiles = append(audioFiles, AudioFile{ + Name: file.Name(), + Summary: summary, + }) + } + } + + return audioFiles +} + +func (s *Server) getAudioSummary(audioFileName string) string { + textFile := strings.TrimSuffix(audioFileName, ".mp3") + ".txt" + textPath := filepath.Join(AudioDirectory, textFile) + + content, err := os.ReadFile(textPath) + if err != nil { + return "(no text summary available)" + } + + return s.createSummary(string(content)) +} + +func (s *Server) createSummary(text string) string { + text = strings.TrimSpace(text) + words := strings.Fields(text) + + if len(words) > MaxSummaryWords { + return strings.Join(words[:MaxSummaryWords], " ") + "..." + } + + return text +} diff --git a/internal/ximcp/server.go b/internal/ximcp/server.go new file mode 100644 index 0000000..6353835 --- /dev/null +++ b/internal/ximcp/server.go @@ -0,0 +1,123 @@ +package ximcp + +import ( + "context" + "fmt" + "os" + "sync" + "time" + + "github.com/gopxl/beep/v2" + "github.com/gopxl/beep/v2/speaker" + "github.com/mark3labs/mcp-go/server" + "github.com/taigrr/elevenlabs/client" + "github.com/taigrr/elevenlabs/client/types" +) + +const ( + DefaultStability = 0.5 + DefaultSimilarityBoost = 0.5 + AudioDirectory = ".xi" + AudioSampleRate = 44100 + RandomHexLength = 5 + MaxSummaryWords = 10 +) + +type Server struct { + mcpServer *server.MCPServer + client client.Client + voices []types.VoiceResponseModel + currentVoice *types.VoiceResponseModel + voicesMutex sync.RWMutex + playMutex sync.Mutex +} + +func NewServer(mcpServer *server.MCPServer) (*Server, error) { + apiKey := os.Getenv("XI_API_KEY") + if apiKey == "" { + return nil, fmt.Errorf("XI_API_KEY environment variable is required") + } + + elevenClient := client.New(apiKey) + + s := &Server{ + client: elevenClient, + mcpServer: mcpServer, + } + + if err := s.initializeVoices(); err != nil { + return nil, fmt.Errorf("failed to initialize voices: %w", err) + } + + if err := s.initializeSpeaker(); err != nil { + return nil, fmt.Errorf("failed to initialize speaker: %w", err) + } + + return s, nil +} + +func (s *Server) initializeVoices() error { + if err := s.refreshVoices(); err != nil { + return err + } + return nil +} + +func (s *Server) initializeSpeaker() error { + sr := beep.SampleRate(AudioSampleRate) + speaker.Init(sr, sr.N(time.Second/10)) + return nil +} + +func (s *Server) refreshVoices() error { + s.voicesMutex.Lock() + defer s.voicesMutex.Unlock() + + voices, err := s.client.GetVoices(context.Background()) + if err != nil { + return fmt.Errorf("failed to get voices: %w", err) + } + + s.voices = voices + s.setDefaultVoiceIfNeeded() + return nil +} + +func (s *Server) setDefaultVoiceIfNeeded() { + if s.currentVoice == nil && len(s.voices) > 0 { + s.currentVoice = &s.voices[0] + } +} + +func (s *Server) GetVoices() ([]types.VoiceResponseModel, *types.VoiceResponseModel, error) { + if err := s.refreshVoices(); err != nil { + return nil, nil, err + } + + s.voicesMutex.RLock() + defer s.voicesMutex.RUnlock() + + return s.voices, s.currentVoice, nil +} + +func (s *Server) SetVoice(voiceID string) (*types.VoiceResponseModel, error) { + s.voicesMutex.Lock() + defer s.voicesMutex.Unlock() + + selectedVoice := s.findVoiceByID(voiceID) + if selectedVoice == nil { + return nil, fmt.Errorf("voice with ID '%s' not found", voiceID) + } + + s.currentVoice = selectedVoice + return selectedVoice, nil +} + +func (s *Server) findVoiceByID(voiceID string) *types.VoiceResponseModel { + for i, voice := range s.voices { + if voice.VoiceID == voiceID { + return &s.voices[i] + } + } + return nil +} diff --git a/internal/ximcp/tools.go b/internal/ximcp/tools.go new file mode 100644 index 0000000..0c04279 --- /dev/null +++ b/internal/ximcp/tools.go @@ -0,0 +1,280 @@ +package ximcp + +import ( + "context" + "fmt" + "strings" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + "github.com/taigrr/elevenlabs/client/types" +) + +func (s *Server) SetupTools() { + s.mcpServer.AddTool(s.say()) + s.mcpServer.AddTool(s.read()) + s.mcpServer.AddTool(s.play()) + s.mcpServer.AddTool(s.setVoice()) + s.mcpServer.AddTool(s.getVoices()) + s.mcpServer.AddTool(s.history()) +} + +func (s *Server) say() (mcp.Tool, server.ToolHandlerFunc) { + tool := mcp.Tool{ + Name: "say", + Description: "Convert text to speech, save as MP3 file, and play the audio", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "text": map[string]any{ + "type": "string", + "description": "Text to convert to speech", + }, + }, + Required: []string{"text"}, + }, + } + + handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + text, err := request.RequireString("text") + if err != nil { + return nil, err + } + + filepath, err := s.GenerateAudio(text) + if err != nil { + return nil, err + } + + s.PlayAudioAsync(filepath) + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: fmt.Sprintf("Audio generated, saved to %s, and playing", filepath), + }, + }, + }, nil + } + return tool, handler +} + +func (s *Server) read() (mcp.Tool, server.ToolHandlerFunc) { + tool := mcp.Tool{ + Name: "read", + Description: "Read a text file and convert it to speech, saving as MP3", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "file_path": map[string]any{ + "type": "string", + "description": "Path to the text file to read and convert to speech", + }, + }, + Required: []string{"file_path"}, + }, + } + + handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + filePath, err := request.RequireString("file_path") + if err != nil { + return nil, err + } + + audioPath, err := s.ReadFileToAudio(filePath) + if err != nil { + return nil, err + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: fmt.Sprintf("File '%s' converted to speech and saved to: %s", filePath, audioPath), + }, + }, + }, nil + } + return tool, handler +} + +func (s *Server) play() (mcp.Tool, server.ToolHandlerFunc) { + tool := mcp.Tool{ + Name: "play", + Description: "Play an audio file", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "file_path": map[string]any{ + "type": "string", + "description": "Path to the audio file to play", + }, + }, + Required: []string{"file_path"}, + }, + } + + handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + filePath, err := request.RequireString("file_path") + if err != nil { + return nil, err + } + + s.PlayAudioAsync(filePath) + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: fmt.Sprintf("Playing audio file: %s", filePath), + }, + }, + }, nil + } + return tool, handler +} + +func (s *Server) setVoice() (mcp.Tool, server.ToolHandlerFunc) { + tool := mcp.Tool{ + Name: "set_voice", + Description: "Set the voice to use for text-to-speech generation", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{ + "voice_id": map[string]any{ + "type": "string", + "description": "ID of the voice to use", + }, + }, + Required: []string{"voice_id"}, + }, + } + + handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + voiceID, err := request.RequireString("voice_id") + if err != nil { + return nil, err + } + + selectedVoice, err := s.SetVoice(voiceID) + if err != nil { + return nil, err + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: fmt.Sprintf("Voice set to: %s (%s)", selectedVoice.Name, selectedVoice.VoiceID), + }, + }, + }, nil + } + return tool, handler +} + +func (s *Server) getVoices() (mcp.Tool, server.ToolHandlerFunc) { + tool := mcp.Tool{ + Name: "get_voices", + Description: "Get list of available voices and show the currently selected one", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{}, + }, + } + + handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + voices, currentVoice, err := s.GetVoices() + if err != nil { + return nil, err + } + + voiceList := s.formatVoiceList(voices, currentVoice) + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: voiceList, + }, + }, + }, nil + } + return tool, handler +} + +func (s *Server) formatVoiceList(voices []types.VoiceResponseModel, currentVoice *types.VoiceResponseModel) string { + var voiceList strings.Builder + voiceList.WriteString("Available voices:\n") + + for _, voice := range voices { + marker := " " + if currentVoice != nil && voice.VoiceID == currentVoice.VoiceID { + marker = "* " + } + voiceList.WriteString(fmt.Sprintf("%s%s (%s) - %s\n", + marker, voice.Name, voice.VoiceID, voice.Category)) + } + + if currentVoice != nil { + voiceList.WriteString(fmt.Sprintf("\nCurrently selected: %s (%s)", + currentVoice.Name, currentVoice.VoiceID)) + } else { + voiceList.WriteString("\nNo voice currently selected") + } + + return voiceList.String() +} + +func (s *Server) history() (mcp.Tool, server.ToolHandlerFunc) { + tool := mcp.Tool{ + Name: "history", + Description: "List available audio files with text summaries", + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{}, + }, + } + + handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { + audioFiles, err := s.GetAudioHistory() + if err != nil { + return nil, err + } + + if len(audioFiles) == 0 { + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: "No audio files found", + }, + }, + }, nil + } + + historyList := s.formatHistoryList(audioFiles) + + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.TextContent{ + Type: "text", + Text: historyList, + }, + }, + }, nil + } + return tool, handler +} + +func (s *Server) formatHistoryList(audioFiles []AudioFile) string { + var historyList strings.Builder + historyList.WriteString("Available audio files:\n\n") + + for _, audioFile := range audioFiles { + historyList.WriteString(fmt.Sprintf("• %s\n %s\n\n", audioFile.Name, audioFile.Summary)) + } + + return historyList.String() +} diff --git a/main.go b/main.go index c91b4fe..7ddb8c8 100644 --- a/main.go +++ b/main.go @@ -1,472 +1,25 @@ package main import ( - "context" - "crypto/rand" - "fmt" "log" - "os" - "path/filepath" - "strings" - "sync" - "time" - "github.com/gopxl/beep/v2" - "github.com/gopxl/beep/v2/mp3" - "github.com/gopxl/beep/v2/speaker" - "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" - "github.com/taigrr/elevenlabs/client" - "github.com/taigrr/elevenlabs/client/types" + "github.com/taigrr/elevenlabs-mcp/internal/ximcp" ) -type ElevenLabsServer struct { - client client.Client - voices []types.VoiceResponseModel - currentVoice *types.VoiceResponseModel - voicesMutex sync.RWMutex - playMutex sync.Mutex -} - -func NewElevenLabsServer() (*ElevenLabsServer, error) { - apiKey := os.Getenv("XI_API_KEY") - if apiKey == "" { - return nil, fmt.Errorf("XI_API_KEY environment variable is required") - } - - elevenClient := client.New(apiKey) - - s := &ElevenLabsServer{ - client: elevenClient, - } - - // Initialize voices and set default - if err := s.refreshVoices(); err != nil { - return nil, fmt.Errorf("failed to initialize voices: %w", err) - } - - // Initialize speaker for audio playback - sr := beep.SampleRate(44100) - speaker.Init(sr, sr.N(time.Second/10)) - - return s, nil -} - -func (s *ElevenLabsServer) refreshVoices() error { - s.voicesMutex.Lock() - defer s.voicesMutex.Unlock() - - voices, err := s.client.GetVoices(context.Background()) - if err != nil { - return fmt.Errorf("failed to get voices: %w", err) - } - - s.voices = voices - - // Set default voice if none selected - if s.currentVoice == nil && len(voices) > 0 { - s.currentVoice = &voices[0] - } - - return nil -} - -func generateRandomHex(length int) string { - bytes := make([]byte, length) - rand.Read(bytes) - return fmt.Sprintf("%x", bytes)[:length] -} - -func (s *ElevenLabsServer) generateAudio(text string) (string, error) { - if s.currentVoice == nil { - return "", fmt.Errorf("no voice selected") - } - - // Generate audio using TTS - audioData, err := s.client.TTS(context.Background(), text, s.currentVoice.VoiceID, "", types.SynthesisOptions{ - Stability: 0.5, - SimilarityBoost: 0.5, - }) - if err != nil { - return "", fmt.Errorf("failed to generate speech: %w", err) - } - - // Create filename with timestamp and random hex - timestamp := time.Now().UnixMilli() - randomHex := generateRandomHex(5) - filename := fmt.Sprintf("%d-%s.mp3", timestamp, randomHex) - filePath := filepath.Join(".xi", filename) - - // Ensure directory exists - if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil { - return "", fmt.Errorf("failed to create directory: %w", err) - } - - // Write audio file - if err := os.WriteFile(filePath, audioData, 0644); err != nil { - return "", fmt.Errorf("failed to write audio file: %w", err) - } - - // Write text file alongside audio - textFilePath := strings.TrimSuffix(filePath, ".mp3") + ".txt" - if err := os.WriteFile(textFilePath, []byte(text), 0644); err != nil { - return "", fmt.Errorf("failed to write text file: %w", err) - } - - return filePath, nil -} - -func (s *ElevenLabsServer) playAudio(filepath string) error { - s.playMutex.Lock() - defer s.playMutex.Unlock() - - file, err := os.Open(filepath) - if err != nil { - return fmt.Errorf("failed to open audio file: %w", err) - } - defer file.Close() - - streamer, format, err := mp3.Decode(file) - if err != nil { - return fmt.Errorf("failed to decode mp3: %w", err) - } - defer streamer.Close() - - resampled := beep.Resample(4, format.SampleRate, 44100, streamer) - - done := make(chan bool) - speaker.Play(beep.Seq(resampled, beep.Callback(func() { - done <- true - }))) - - <-done - return nil -} - -func (s *ElevenLabsServer) playAudioAsync(filepath string) { - go func() { - if err := s.playAudio(filepath); err != nil { - log.Printf("Error playing audio: %v", err) - } - }() -} - -func (s *ElevenLabsServer) setupTools(mcpServer *server.MCPServer) { - // Say tool - sayTool := mcp.Tool{ - Name: "say", - Description: "Convert text to speech, save as MP3 file, and play the audio", - InputSchema: mcp.ToolInputSchema{ - Type: "object", - Properties: map[string]any{ - "text": map[string]any{ - "type": "string", - "description": "Text to convert to speech", - }, - }, - Required: []string{"text"}, - }, - } - - mcpServer.AddTool(sayTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - text, err := request.RequireString("text") - if err != nil { - return nil, err - } - - filepath, err := s.generateAudio(text) - if err != nil { - return nil, err - } - - // Play audio asynchronously - s.playAudioAsync(filepath) - - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: fmt.Sprintf("Audio generated, saved to %s, and playing", filepath), - }, - }, - }, nil - }) - - // Read tool - readTool := mcp.Tool{ - Name: "read", - Description: "Read a text file and convert it to speech, saving as MP3", - InputSchema: mcp.ToolInputSchema{ - Type: "object", - Properties: map[string]any{ - "file_path": map[string]any{ - "type": "string", - "description": "Path to the text file to read and convert to speech", - }, - }, - Required: []string{"file_path"}, - }, - } - - mcpServer.AddTool(readTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - filePath, err := request.RequireString("file_path") - if err != nil { - return nil, err - } - - // Read file content - content, err := os.ReadFile(filePath) - if err != nil { - return nil, fmt.Errorf("failed to read file: %w", err) - } - - text := string(content) - audioPath, err := s.generateAudio(text) - if err != nil { - return nil, err - } - - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: fmt.Sprintf("File '%s' converted to speech and saved to: %s", filePath, audioPath), - }, - }, - }, nil - }) - - // Play tool - playTool := mcp.Tool{ - Name: "play", - Description: "Play an audio file", - InputSchema: mcp.ToolInputSchema{ - Type: "object", - Properties: map[string]any{ - "file_path": map[string]any{ - "type": "string", - "description": "Path to the audio file to play", - }, - }, - Required: []string{"file_path"}, - }, - } - - mcpServer.AddTool(playTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - filePath, err := request.RequireString("file_path") - if err != nil { - return nil, err - } - - // Play audio asynchronously - s.playAudioAsync(filePath) - - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: fmt.Sprintf("Playing audio file: %s", filePath), - }, - }, - }, nil - }) - - // Set voice tool - setVoiceTool := mcp.Tool{ - Name: "set_voice", - Description: "Set the voice to use for text-to-speech generation", - InputSchema: mcp.ToolInputSchema{ - Type: "object", - Properties: map[string]any{ - "voice_id": map[string]any{ - "type": "string", - "description": "ID of the voice to use", - }, - }, - Required: []string{"voice_id"}, - }, - } - - mcpServer.AddTool(setVoiceTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - voiceID, err := request.RequireString("voice_id") - if err != nil { - return nil, err - } - - s.voicesMutex.Lock() - defer s.voicesMutex.Unlock() - - // Find the voice - var selectedVoice *types.VoiceResponseModel - for i, voice := range s.voices { - if voice.VoiceID == voiceID { - selectedVoice = &s.voices[i] - break - } - } - - if selectedVoice == nil { - return nil, fmt.Errorf("voice with ID '%s' not found", voiceID) - } - - s.currentVoice = selectedVoice - - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: fmt.Sprintf("Voice set to: %s (%s)", selectedVoice.Name, selectedVoice.VoiceID), - }, - }, - }, nil - }) - - // Get voices tool - getVoicesTool := mcp.Tool{ - Name: "get_voices", - Description: "Get list of available voices and show the currently selected one", - InputSchema: mcp.ToolInputSchema{ - Type: "object", - Properties: map[string]any{}, - }, - } - - mcpServer.AddTool(getVoicesTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - // Refresh voices from API - if err := s.refreshVoices(); err != nil { - return nil, err - } - - s.voicesMutex.RLock() - defer s.voicesMutex.RUnlock() - - var voiceList strings.Builder - voiceList.WriteString("Available voices:\n") - - for _, voice := range s.voices { - marker := " " - if s.currentVoice != nil && voice.VoiceID == s.currentVoice.VoiceID { - marker = "* " - } - voiceList.WriteString(fmt.Sprintf("%s%s (%s) - %s\n", - marker, voice.Name, voice.VoiceID, voice.Category)) - } - - if s.currentVoice != nil { - voiceList.WriteString(fmt.Sprintf("\nCurrently selected: %s (%s)", - s.currentVoice.Name, s.currentVoice.VoiceID)) - } else { - voiceList.WriteString("\nNo voice currently selected") - } - - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: voiceList.String(), - }, - }, - }, nil - }) - - // History tool - historyTool := mcp.Tool{ - Name: "history", - Description: "List available audio files with text summaries", - InputSchema: mcp.ToolInputSchema{ - Type: "object", - Properties: map[string]any{}, - }, - } - - mcpServer.AddTool(historyTool, func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - // Read .xi directory - files, err := os.ReadDir(".xi") - if err != nil { - if os.IsNotExist(err) { - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: "No audio files found (directory doesn't exist yet)", - }, - }, - }, nil - } - return nil, fmt.Errorf("failed to read .xi directory: %w", err) - } - - var audioFiles []string - for _, file := range files { - if strings.HasSuffix(file.Name(), ".mp3") { - audioFiles = append(audioFiles, file.Name()) - } - } - - if len(audioFiles) == 0 { - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: "No audio files found", - }, - }, - }, nil - } - - var historyList strings.Builder - historyList.WriteString("Available audio files:\n\n") - - for _, audioFile := range audioFiles { - // Try to read corresponding text file - textFile := strings.TrimSuffix(audioFile, ".mp3") + ".txt" - textPath := filepath.Join(".xi", textFile) - - summary := "" - if content, err := os.ReadFile(textPath); err == nil { - text := strings.TrimSpace(string(content)) - words := strings.Fields(text) - if len(words) > 10 { - summary = strings.Join(words[:10], " ") + "..." - } else { - summary = text - } - } else { - summary = "(no text summary available)" - } - - historyList.WriteString(fmt.Sprintf("• %s\n %s\n\n", audioFile, summary)) - } - - return &mcp.CallToolResult{ - Content: []mcp.Content{ - mcp.TextContent{ - Type: "text", - Text: historyList.String(), - }, - }, - }, nil - }) -} - func main() { - // Create ElevenLabs server - elevenServer, err := NewElevenLabsServer() - if err != nil { - log.Fatalf("Failed to create ElevenLabs server: %v", err) - } - - // Create MCP server mcpServer := server.NewMCPServer( "ElevenLabs MCP Server", "1.0.0", server.WithToolCapabilities(true), ) + elevenServer, err := ximcp.NewServer(mcpServer) + if err != nil { + log.Fatalf("Failed to create ElevenLabs server: %v", err) + } - // Setup tools - elevenServer.setupTools(mcpServer) + elevenServer.SetupTools() - // Serve via stdio if err := server.ServeStdio(mcpServer); err != nil { log.Fatalf("Failed to serve MCP server: %v", err) }