diff --git a/server/ai/anthropic/anthropic.go b/server/ai/anthropic/anthropic.go index 484cdcb3..b76e2efb 100644 --- a/server/ai/anthropic/anthropic.go +++ b/server/ai/anthropic/anthropic.go @@ -4,23 +4,28 @@ import ( "fmt" "github.com/mattermost/mattermost-plugin-ai/server/ai" + "github.com/mattermost/mattermost-plugin-ai/server/metrics" ) const DefaultMaxTokens = 4096 type Anthropic struct { - client *Client - defaultModel string - tokenLimit int + client *Client + defaultModel string + tokenLimit int + metricsService metrics.Metrics + name string } -func New(llmService ai.ServiceConfig) *Anthropic { - client := NewClient(llmService.APIKey) +func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *Anthropic { + client := NewClient(botConfig.Service.APIKey) return &Anthropic{ - client: client, - defaultModel: llmService.DefaultModel, - tokenLimit: llmService.TokenLimit, + client: client, + defaultModel: botConfig.Service.DefaultModel, + tokenLimit: botConfig.Service.TokenLimit, + metricsService: metricsService, + name: botConfig.Name, } } @@ -79,6 +84,8 @@ func (a *Anthropic) createCompletionRequest(conversation ai.BotConversation, opt } func (a *Anthropic) ChatCompletion(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (*ai.TextStreamResult, error) { + a.metricsService.IncrementLLMRequests(a.name) + request := a.createCompletionRequest(conversation, opts) request.Stream = true result, err := a.client.MessageCompletion(request) @@ -90,6 +97,8 @@ func (a *Anthropic) ChatCompletion(conversation ai.BotConversation, opts ...ai.L } func (a *Anthropic) ChatCompletionNoStream(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (string, error) { + a.metricsService.IncrementLLMRequests(a.name) + request := a.createCompletionRequest(conversation, opts) request.Stream = false result, err := a.client.MessageCompletionNoStream(request) diff --git a/server/ai/asksage/asksage.go b/server/ai/asksage/asksage.go index 1467e5e3..f3da9fc5 100644 --- a/server/ai/asksage/asksage.go +++ b/server/ai/asksage/asksage.go @@ -4,24 +4,29 @@ import ( "strings" "github.com/mattermost/mattermost-plugin-ai/server/ai" + "github.com/mattermost/mattermost-plugin-ai/server/metrics" ) type AskSage struct { - client *Client - defaultModel string - maxTokens int + client *Client + defaultModel string + maxTokens int + metricsService metrics.Metrics + name string } -func New(llmService ai.ServiceConfig) *AskSage { +func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *AskSage { client := NewClient("") client.Login(GetTokenParams{ - Email: llmService.Username, - Password: llmService.Password, + Email: botConfig.Service.Username, + Password: botConfig.Service.Password, }) return &AskSage{ - client: client, - defaultModel: llmService.DefaultModel, - maxTokens: llmService.TokenLimit, + client: client, + defaultModel: botConfig.Service.DefaultModel, + maxTokens: botConfig.Service.TokenLimit, + metricsService: metricsService, + name: botConfig.Name, } } @@ -75,6 +80,8 @@ func (s *AskSage) ChatCompletion(conversation ai.BotConversation, opts ...ai.Lan } func (s *AskSage) ChatCompletionNoStream(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (string, error) { + s.metricsService.IncrementLLMRequests(s.name) + params := s.queryParamsFromConfig(s.createConfig(opts)) params.Message = conversationToMessagesList(conversation) params.SystemPrompt = conversation.ExtractSystemMessage() diff --git a/server/ai/openai/openai.go b/server/ai/openai/openai.go index 21f41325..de4f6b7d 100644 --- a/server/ai/openai/openai.go +++ b/server/ai/openai/openai.go @@ -18,6 +18,7 @@ import ( "github.com/invopop/jsonschema" "github.com/mattermost/mattermost-plugin-ai/server/ai" "github.com/mattermost/mattermost-plugin-ai/server/ai/subtitles" + "github.com/mattermost/mattermost-plugin-ai/server/metrics" openaiClient "github.com/sashabaranov/go-openai" ) @@ -26,6 +27,8 @@ type OpenAI struct { defaultModel string tokenLimit int streamingTimeout time.Duration + metricsService metrics.Metrics + name string } const StreamingTimeoutDefault = 10 * time.Second @@ -36,10 +39,10 @@ const OpenAIMaxImageSize = 20 * 1024 * 1024 // 20 MB var ErrStreamingTimeout = errors.New("timeout streaming") -func NewCompatible(llmService ai.ServiceConfig) *OpenAI { - apiKey := llmService.APIKey - endpointURL := strings.TrimSuffix(llmService.APIURL, "/") - defaultModel := llmService.DefaultModel +func NewCompatible(botConfig ai.BotConfig, metricsService metrics.Metrics) *OpenAI { + apiKey := botConfig.Service.APIKey + endpointURL := strings.TrimSuffix(botConfig.Service.APIURL, "/") + defaultModel := botConfig.Service.DefaultModel config := openaiClient.DefaultConfig(apiKey) config.BaseURL = endpointURL @@ -50,35 +53,39 @@ func NewCompatible(llmService ai.ServiceConfig) *OpenAI { } streamingTimeout := StreamingTimeoutDefault - if llmService.StreamingTimeoutSeconds > 0 { - streamingTimeout = time.Duration(llmService.StreamingTimeoutSeconds) * time.Second + if botConfig.Service.StreamingTimeoutSeconds > 0 { + streamingTimeout = time.Duration(botConfig.Service.StreamingTimeoutSeconds) * time.Second } return &OpenAI{ client: openaiClient.NewClientWithConfig(config), defaultModel: defaultModel, - tokenLimit: llmService.TokenLimit, + tokenLimit: botConfig.Service.TokenLimit, streamingTimeout: streamingTimeout, + metricsService: metricsService, + name: botConfig.Name, } } -func New(llmService ai.ServiceConfig) *OpenAI { - defaultModel := llmService.DefaultModel +func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *OpenAI { + defaultModel := botConfig.Service.DefaultModel if defaultModel == "" { defaultModel = openaiClient.GPT3Dot5Turbo } - config := openaiClient.DefaultConfig(llmService.APIKey) - config.OrgID = llmService.OrgID + config := openaiClient.DefaultConfig(botConfig.Service.APIKey) + config.OrgID = botConfig.Service.OrgID streamingTimeout := StreamingTimeoutDefault - if llmService.StreamingTimeoutSeconds > 0 { - streamingTimeout = time.Duration(llmService.StreamingTimeoutSeconds) * time.Second + if botConfig.Service.StreamingTimeoutSeconds > 0 { + streamingTimeout = time.Duration(botConfig.Service.StreamingTimeoutSeconds) * time.Second } return &OpenAI{ client: openaiClient.NewClientWithConfig(config), defaultModel: defaultModel, - tokenLimit: llmService.TokenLimit, + tokenLimit: botConfig.Service.TokenLimit, streamingTimeout: streamingTimeout, + metricsService: metricsService, + name: botConfig.Name, } } @@ -344,6 +351,8 @@ func (s *OpenAI) completionRequestFromConfig(cfg ai.LLMConfig) openaiClient.Chat } func (s *OpenAI) ChatCompletion(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (*ai.TextStreamResult, error) { + s.metricsService.IncrementLLMRequests(s.name) + request := s.completionRequestFromConfig(s.createConfig(opts)) request = modifyCompletionRequestWithConversation(request, conversation) request.Stream = true diff --git a/server/api_channel.go b/server/api_channel.go index 46e84a52..f4abe5a9 100644 --- a/server/api_channel.go +++ b/server/api_channel.go @@ -109,7 +109,7 @@ func (p *Plugin) handleSince(c *gin.Context) { return } - resultStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt) + resultStream, err := p.getLLM(bot.cfg).ChatCompletion(prompt) if err != nil { c.AbortWithError(http.StatusInternalServerError, err) return diff --git a/server/api_post.go b/server/api_post.go index a63e078a..eb52a620 100644 --- a/server/api_post.go +++ b/server/api_post.go @@ -66,7 +66,7 @@ func (p *Plugin) handleReact(c *gin.Context) { return } - emojiName, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(prompt, ai.WithMaxGeneratedTokens(25)) + emojiName, err := p.getLLM(bot.cfg).ChatCompletionNoStream(prompt, ai.WithMaxGeneratedTokens(25)) if err != nil { c.AbortWithError(http.StatusInternalServerError, err) return diff --git a/server/meeting_summarization.go b/server/meeting_summarization.go index 39310d91..3ca7ce0d 100644 --- a/server/meeting_summarization.go +++ b/server/meeting_summarization.go @@ -259,8 +259,8 @@ func (p *Plugin) summarizeCallRecording(bot *Bot, rootID string, requestingUser func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subtitles, context ai.ConversationContext) (*ai.TextStreamResult, error) { llmFormattedTranscription := transcription.FormatForLLM() - tokens := p.getLLM(bot.cfg.Service).CountTokens(llmFormattedTranscription) - tokenLimitWithMargin := int(float64(p.getLLM(bot.cfg.Service).TokenLimit())*0.75) - ContextTokenMargin + tokens := p.getLLM(bot.cfg).CountTokens(llmFormattedTranscription) + tokenLimitWithMargin := int(float64(p.getLLM(bot.cfg).TokenLimit())*0.75) - ContextTokenMargin if tokenLimitWithMargin < 0 { tokenLimitWithMargin = ContextTokenMargin / 2 } @@ -277,7 +277,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti return nil, fmt.Errorf("unable to get summarize chunk prompt: %w", err) } - summarizedChunk, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(summarizeChunkPrompt) + summarizedChunk, err := p.getLLM(bot.cfg).ChatCompletionNoStream(summarizeChunkPrompt) if err != nil { return nil, fmt.Errorf("unable to get summarized chunk: %w", err) } @@ -287,7 +287,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti llmFormattedTranscription = strings.Join(summarizedChunks, "\n\n") isChunked = true - p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM(bot.cfg.Service).CountTokens(llmFormattedTranscription)) + p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM(bot.cfg).CountTokens(llmFormattedTranscription)) } context.PromptParameters = map[string]string{"Transcription": llmFormattedTranscription, "IsChunked": fmt.Sprintf("%t", isChunked)} @@ -296,7 +296,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti return nil, fmt.Errorf("unable to get meeting summary prompt: %w", err) } - summaryStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(summaryPrompt) + summaryStream, err := p.getLLM(bot.cfg).ChatCompletion(summaryPrompt) if err != nil { return nil, fmt.Errorf("unable to get meeting summary: %w", err) } diff --git a/server/metrics/metrics.go b/server/metrics/metrics.go index 7791d184..132f117d 100644 --- a/server/metrics/metrics.go +++ b/server/metrics/metrics.go @@ -11,6 +11,7 @@ const ( MetricsSubsystemSystem = "system" MetricsSubsystemHTTP = "http" MetricsSubsystemAPI = "api" + MetricsSubsystemLLM = "llm" MetricsCloudInstallationLabel = "installationId" MetricsVersionLabel = "version" @@ -23,6 +24,8 @@ type Metrics interface { IncrementHTTPRequests() IncrementHTTPErrors() + + IncrementLLMRequests(llmName string) } type InstanceInfo struct { @@ -42,6 +45,8 @@ type metrics struct { httpRequestsTotal prometheus.Counter httpErrorsTotal prometheus.Counter + + llmRequestsTotal *prometheus.CounterVec } // NewMetrics Factory method to create a new metrics collector. @@ -113,6 +118,15 @@ func NewMetrics(info InstanceInfo) Metrics { }) m.registry.MustRegister(m.httpErrorsTotal) + m.llmRequestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: MetricsNamespace, + Subsystem: MetricsSubsystemLLM, + Name: "requests_total", + Help: "The total number of LLM requests.", + ConstLabels: additionalLabels, + }, []string{"llm_name"}) + m.registry.MustRegister(m.llmRequestsTotal) + return m } @@ -137,3 +151,9 @@ func (m *metrics) IncrementHTTPErrors() { m.httpErrorsTotal.Inc() } } + +func (m *metrics) IncrementLLMRequests(llmName string) { + if m != nil { + m.llmRequestsTotal.With(prometheus.Labels{"llm_name": llmName}).Inc() + } +} diff --git a/server/plugin.go b/server/plugin.go index 5aac42dd..2ef38f1e 100644 --- a/server/plugin.go +++ b/server/plugin.go @@ -127,17 +127,17 @@ func (p *Plugin) OnActivate() error { return nil } -func (p *Plugin) getLLM(llmServiceConfig ai.ServiceConfig) ai.LanguageModel { +func (p *Plugin) getLLM(llmBotConfig ai.BotConfig) ai.LanguageModel { var llm ai.LanguageModel - switch llmServiceConfig.Type { + switch llmBotConfig.Service.Type { case "openai": - llm = openai.New(llmServiceConfig) + llm = openai.New(llmBotConfig, p.metricsService) case "openaicompatible": - llm = openai.NewCompatible(llmServiceConfig) + llm = openai.NewCompatible(llmBotConfig, p.metricsService) case "anthropic": - llm = anthropic.New(llmServiceConfig) + llm = anthropic.New(llmBotConfig, p.metricsService) case "asksage": - llm = asksage.New(llmServiceConfig) + llm = asksage.New(llmBotConfig, p.metricsService) } cfg := p.getConfiguration() @@ -152,18 +152,18 @@ func (p *Plugin) getLLM(llmServiceConfig ai.ServiceConfig) ai.LanguageModel { func (p *Plugin) getTranscribe() ai.Transcriber { cfg := p.getConfiguration() - var transcriptionService ai.ServiceConfig + var botConfig ai.BotConfig for _, bot := range cfg.Bots { if bot.Name == cfg.TranscriptGenerator { - transcriptionService = bot.Service + botConfig = bot break } } - switch transcriptionService.Type { + switch botConfig.Service.Type { case "openai": - return openai.New(transcriptionService) + return openai.New(botConfig, p.metricsService) case "openaicompatible": - return openai.NewCompatible(transcriptionService) + return openai.NewCompatible(botConfig, p.metricsService) } return nil } diff --git a/server/service.go b/server/service.go index cc8243bb..a7f97e8b 100644 --- a/server/service.go +++ b/server/service.go @@ -53,7 +53,7 @@ func (p *Plugin) newConversation(bot *Bot, context ai.ConversationContext) error } conversation.AddPost(p.PostToAIPost(bot, context.Post)) - result, err := p.getLLM(bot.cfg.Service).ChatCompletion(conversation) + result, err := p.getLLM(bot.cfg).ChatCompletion(conversation) if err != nil { return err } @@ -81,7 +81,7 @@ func (p *Plugin) generateTitle(bot *Bot, request string, threadRootID string) er titleRequest := ai.BotConversation{ Posts: []ai.Post{{Role: ai.PostRoleUser, Message: request}}, } - conversationTitle, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(titleRequest, ai.WithMaxGeneratedTokens(25)) + conversationTitle, err := p.getLLM(bot.cfg).ChatCompletionNoStream(titleRequest, ai.WithMaxGeneratedTokens(25)) if err != nil { return fmt.Errorf("failed to get title: %w", err) } @@ -134,7 +134,7 @@ func (p *Plugin) continueConversation(bot *Bot, threadData *ThreadData, context } prompt.AppendConversation(p.ThreadToBotConversation(bot, threadData.Posts)) - result, err = p.getLLM(bot.cfg.Service).ChatCompletion(prompt) + result, err = p.getLLM(bot.cfg).ChatCompletion(prompt) if err != nil { return nil, err } @@ -157,7 +157,7 @@ func (p *Plugin) continueThreadConversation(bot *Bot, questionThreadData *Thread } prompt.AppendConversation(p.ThreadToBotConversation(bot, questionThreadData.Posts)) - result, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt) + result, err := p.getLLM(bot.cfg).ChatCompletion(prompt) if err != nil { return nil, err } @@ -181,7 +181,7 @@ func (p *Plugin) summarizePost(bot *Bot, postIDToSummarize string, context ai.Co if err != nil { return nil, err } - summaryStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt) + summaryStream, err := p.getLLM(bot.cfg).ChatCompletion(prompt) if err != nil { return nil, err }