reduce complexity

This commit is contained in:
2025-08-08 00:47:12 -07:00
parent a3301f15a6
commit 9579320040
7 changed files with 628 additions and 461 deletions

144
internal/ximcp/audio.go Normal file
View File

@@ -0,0 +1,144 @@
package ximcp
import (
"context"
"crypto/rand"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"time"
"github.com/gopxl/beep/v2"
"github.com/gopxl/beep/v2/mp3"
"github.com/gopxl/beep/v2/speaker"
"github.com/taigrr/elevenlabs/client/types"
)
func generateRandomHex(length int) string {
bytes := make([]byte, length)
rand.Read(bytes)
return fmt.Sprintf("%x", bytes)[:length]
}
func (s *Server) GenerateAudio(text string) (string, error) {
if s.currentVoice == nil {
return "", fmt.Errorf("no voice selected")
}
audioData, err := s.generateTTSAudio(text)
if err != nil {
return "", err
}
filePath, err := s.saveAudioFiles(text, audioData)
if err != nil {
return "", err
}
return filePath, nil
}
func (s *Server) generateTTSAudio(text string) ([]byte, error) {
return s.client.TTS(context.Background(), text, s.currentVoice.VoiceID, "", types.SynthesisOptions{
Stability: DefaultStability,
SimilarityBoost: DefaultSimilarityBoost,
})
}
func (s *Server) saveAudioFiles(text string, audioData []byte) (string, error) {
filePath := s.generateFilePath()
if err := s.ensureDirectoryExists(filePath); err != nil {
return "", err
}
if err := s.writeAudioFile(filePath, audioData); err != nil {
return "", err
}
if err := s.writeTextFile(filePath, text); err != nil {
return "", err
}
return filePath, nil
}
func (s *Server) generateFilePath() string {
timestamp := time.Now().UnixMilli()
randomHex := generateRandomHex(RandomHexLength)
filename := fmt.Sprintf("%d-%s.mp3", timestamp, randomHex)
return filepath.Join(AudioDirectory, filename)
}
func (s *Server) ensureDirectoryExists(filePath string) error {
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
return nil
}
func (s *Server) writeAudioFile(filePath string, audioData []byte) error {
if err := os.WriteFile(filePath, audioData, 0644); err != nil {
return fmt.Errorf("failed to write audio file: %w", err)
}
return nil
}
func (s *Server) writeTextFile(filePath, text string) error {
textFilePath := strings.TrimSuffix(filePath, ".mp3") + ".txt"
if err := os.WriteFile(textFilePath, []byte(text), 0644); err != nil {
return fmt.Errorf("failed to write text file: %w", err)
}
return nil
}
func (s *Server) PlayAudio(filepath string) error {
s.playMutex.Lock()
defer s.playMutex.Unlock()
file, err := os.Open(filepath)
if err != nil {
return fmt.Errorf("failed to open audio file: %w", err)
}
defer file.Close()
streamer, format, err := mp3.Decode(file)
if err != nil {
return fmt.Errorf("failed to decode mp3: %w", err)
}
defer streamer.Close()
return s.playStreamer(streamer, format)
}
func (s *Server) playStreamer(streamer beep.StreamSeekCloser, format beep.Format) error {
resampled := beep.Resample(4, format.SampleRate, AudioSampleRate, streamer)
done := make(chan bool)
speaker.Play(beep.Seq(resampled, beep.Callback(func() {
done <- true
})))
<-done
return nil
}
func (s *Server) PlayAudioAsync(filepath string) {
go func() {
if err := s.PlayAudio(filepath); err != nil {
log.Printf("Error playing audio: %v", err)
}
}()
}
func (s *Server) ReadFileToAudio(filePath string) (string, error) {
content, err := os.ReadFile(filePath)
if err != nil {
return "", fmt.Errorf("failed to read file: %w", err)
}
text := string(content)
return s.GenerateAudio(text)
}

64
internal/ximcp/history.go Normal file
View File

@@ -0,0 +1,64 @@
package ximcp
import (
"fmt"
"os"
"path/filepath"
"strings"
)
type AudioFile struct {
Name string
Summary string
}
func (s *Server) GetAudioHistory() ([]AudioFile, error) {
files, err := os.ReadDir(AudioDirectory)
if err != nil {
if os.IsNotExist(err) {
return []AudioFile{}, nil
}
return nil, fmt.Errorf("failed to read %s directory: %w", AudioDirectory, err)
}
return s.processAudioFiles(files), nil
}
func (s *Server) processAudioFiles(files []os.DirEntry) []AudioFile {
var audioFiles []AudioFile
for _, file := range files {
if strings.HasSuffix(file.Name(), ".mp3") {
summary := s.getAudioSummary(file.Name())
audioFiles = append(audioFiles, AudioFile{
Name: file.Name(),
Summary: summary,
})
}
}
return audioFiles
}
func (s *Server) getAudioSummary(audioFileName string) string {
textFile := strings.TrimSuffix(audioFileName, ".mp3") + ".txt"
textPath := filepath.Join(AudioDirectory, textFile)
content, err := os.ReadFile(textPath)
if err != nil {
return "(no text summary available)"
}
return s.createSummary(string(content))
}
func (s *Server) createSummary(text string) string {
text = strings.TrimSpace(text)
words := strings.Fields(text)
if len(words) > MaxSummaryWords {
return strings.Join(words[:MaxSummaryWords], " ") + "..."
}
return text
}

123
internal/ximcp/server.go Normal file
View File

@@ -0,0 +1,123 @@
package ximcp
import (
"context"
"fmt"
"os"
"sync"
"time"
"github.com/gopxl/beep/v2"
"github.com/gopxl/beep/v2/speaker"
"github.com/mark3labs/mcp-go/server"
"github.com/taigrr/elevenlabs/client"
"github.com/taigrr/elevenlabs/client/types"
)
const (
DefaultStability = 0.5
DefaultSimilarityBoost = 0.5
AudioDirectory = ".xi"
AudioSampleRate = 44100
RandomHexLength = 5
MaxSummaryWords = 10
)
type Server struct {
mcpServer *server.MCPServer
client client.Client
voices []types.VoiceResponseModel
currentVoice *types.VoiceResponseModel
voicesMutex sync.RWMutex
playMutex sync.Mutex
}
func NewServer(mcpServer *server.MCPServer) (*Server, error) {
apiKey := os.Getenv("XI_API_KEY")
if apiKey == "" {
return nil, fmt.Errorf("XI_API_KEY environment variable is required")
}
elevenClient := client.New(apiKey)
s := &Server{
client: elevenClient,
mcpServer: mcpServer,
}
if err := s.initializeVoices(); err != nil {
return nil, fmt.Errorf("failed to initialize voices: %w", err)
}
if err := s.initializeSpeaker(); err != nil {
return nil, fmt.Errorf("failed to initialize speaker: %w", err)
}
return s, nil
}
func (s *Server) initializeVoices() error {
if err := s.refreshVoices(); err != nil {
return err
}
return nil
}
func (s *Server) initializeSpeaker() error {
sr := beep.SampleRate(AudioSampleRate)
speaker.Init(sr, sr.N(time.Second/10))
return nil
}
func (s *Server) refreshVoices() error {
s.voicesMutex.Lock()
defer s.voicesMutex.Unlock()
voices, err := s.client.GetVoices(context.Background())
if err != nil {
return fmt.Errorf("failed to get voices: %w", err)
}
s.voices = voices
s.setDefaultVoiceIfNeeded()
return nil
}
func (s *Server) setDefaultVoiceIfNeeded() {
if s.currentVoice == nil && len(s.voices) > 0 {
s.currentVoice = &s.voices[0]
}
}
func (s *Server) GetVoices() ([]types.VoiceResponseModel, *types.VoiceResponseModel, error) {
if err := s.refreshVoices(); err != nil {
return nil, nil, err
}
s.voicesMutex.RLock()
defer s.voicesMutex.RUnlock()
return s.voices, s.currentVoice, nil
}
func (s *Server) SetVoice(voiceID string) (*types.VoiceResponseModel, error) {
s.voicesMutex.Lock()
defer s.voicesMutex.Unlock()
selectedVoice := s.findVoiceByID(voiceID)
if selectedVoice == nil {
return nil, fmt.Errorf("voice with ID '%s' not found", voiceID)
}
s.currentVoice = selectedVoice
return selectedVoice, nil
}
func (s *Server) findVoiceByID(voiceID string) *types.VoiceResponseModel {
for i, voice := range s.voices {
if voice.VoiceID == voiceID {
return &s.voices[i]
}
}
return nil
}

280
internal/ximcp/tools.go Normal file
View File

@@ -0,0 +1,280 @@
package ximcp
import (
"context"
"fmt"
"strings"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/taigrr/elevenlabs/client/types"
)
func (s *Server) SetupTools() {
s.mcpServer.AddTool(s.say())
s.mcpServer.AddTool(s.read())
s.mcpServer.AddTool(s.play())
s.mcpServer.AddTool(s.setVoice())
s.mcpServer.AddTool(s.getVoices())
s.mcpServer.AddTool(s.history())
}
func (s *Server) say() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "say",
Description: "Convert text to speech, save as MP3 file, and play the audio",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"text": map[string]any{
"type": "string",
"description": "Text to convert to speech",
},
},
Required: []string{"text"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
text, err := request.RequireString("text")
if err != nil {
return nil, err
}
filepath, err := s.GenerateAudio(text)
if err != nil {
return nil, err
}
s.PlayAudioAsync(filepath)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Audio generated, saved to %s, and playing", filepath),
},
},
}, nil
}
return tool, handler
}
func (s *Server) read() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "read",
Description: "Read a text file and convert it to speech, saving as MP3",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"file_path": map[string]any{
"type": "string",
"description": "Path to the text file to read and convert to speech",
},
},
Required: []string{"file_path"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
filePath, err := request.RequireString("file_path")
if err != nil {
return nil, err
}
audioPath, err := s.ReadFileToAudio(filePath)
if err != nil {
return nil, err
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("File '%s' converted to speech and saved to: %s", filePath, audioPath),
},
},
}, nil
}
return tool, handler
}
func (s *Server) play() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "play",
Description: "Play an audio file",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"file_path": map[string]any{
"type": "string",
"description": "Path to the audio file to play",
},
},
Required: []string{"file_path"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
filePath, err := request.RequireString("file_path")
if err != nil {
return nil, err
}
s.PlayAudioAsync(filePath)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Playing audio file: %s", filePath),
},
},
}, nil
}
return tool, handler
}
func (s *Server) setVoice() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "set_voice",
Description: "Set the voice to use for text-to-speech generation",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{
"voice_id": map[string]any{
"type": "string",
"description": "ID of the voice to use",
},
},
Required: []string{"voice_id"},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
voiceID, err := request.RequireString("voice_id")
if err != nil {
return nil, err
}
selectedVoice, err := s.SetVoice(voiceID)
if err != nil {
return nil, err
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: fmt.Sprintf("Voice set to: %s (%s)", selectedVoice.Name, selectedVoice.VoiceID),
},
},
}, nil
}
return tool, handler
}
func (s *Server) getVoices() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "get_voices",
Description: "Get list of available voices and show the currently selected one",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
voices, currentVoice, err := s.GetVoices()
if err != nil {
return nil, err
}
voiceList := s.formatVoiceList(voices, currentVoice)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: voiceList,
},
},
}, nil
}
return tool, handler
}
func (s *Server) formatVoiceList(voices []types.VoiceResponseModel, currentVoice *types.VoiceResponseModel) string {
var voiceList strings.Builder
voiceList.WriteString("Available voices:\n")
for _, voice := range voices {
marker := " "
if currentVoice != nil && voice.VoiceID == currentVoice.VoiceID {
marker = "* "
}
voiceList.WriteString(fmt.Sprintf("%s%s (%s) - %s\n",
marker, voice.Name, voice.VoiceID, voice.Category))
}
if currentVoice != nil {
voiceList.WriteString(fmt.Sprintf("\nCurrently selected: %s (%s)",
currentVoice.Name, currentVoice.VoiceID))
} else {
voiceList.WriteString("\nNo voice currently selected")
}
return voiceList.String()
}
func (s *Server) history() (mcp.Tool, server.ToolHandlerFunc) {
tool := mcp.Tool{
Name: "history",
Description: "List available audio files with text summaries",
InputSchema: mcp.ToolInputSchema{
Type: "object",
Properties: map[string]any{},
},
}
handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
audioFiles, err := s.GetAudioHistory()
if err != nil {
return nil, err
}
if len(audioFiles) == 0 {
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: "No audio files found",
},
},
}, nil
}
historyList := s.formatHistoryList(audioFiles)
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: historyList,
},
},
}, nil
}
return tool, handler
}
func (s *Server) formatHistoryList(audioFiles []AudioFile) string {
var historyList strings.Builder
historyList.WriteString("Available audio files:\n\n")
for _, audioFile := range audioFiles {
historyList.WriteString(fmt.Sprintf("• %s\n %s\n\n", audioFile.Name, audioFile.Summary))
}
return historyList.String()
}