Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding LLM Requests Total metric #202

Merged
merged 1 commit into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions server/ai/anthropic/anthropic.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,28 @@ import (
"fmt"

"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/metrics"
)

const DefaultMaxTokens = 4096

type Anthropic struct {
client *Client
defaultModel string
tokenLimit int
client *Client
defaultModel string
tokenLimit int
metricsService metrics.Metrics
name string
}

func New(llmService ai.ServiceConfig) *Anthropic {
client := NewClient(llmService.APIKey)
func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *Anthropic {
client := NewClient(botConfig.Service.APIKey)

return &Anthropic{
client: client,
defaultModel: llmService.DefaultModel,
tokenLimit: llmService.TokenLimit,
client: client,
defaultModel: botConfig.Service.DefaultModel,
tokenLimit: botConfig.Service.TokenLimit,
metricsService: metricsService,
name: botConfig.Name,
}
}

Expand Down Expand Up @@ -79,6 +84,8 @@ func (a *Anthropic) createCompletionRequest(conversation ai.BotConversation, opt
}

func (a *Anthropic) ChatCompletion(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (*ai.TextStreamResult, error) {
a.metricsService.IncrementLLMRequests(a.name)

request := a.createCompletionRequest(conversation, opts)
request.Stream = true
result, err := a.client.MessageCompletion(request)
Expand All @@ -90,6 +97,8 @@ func (a *Anthropic) ChatCompletion(conversation ai.BotConversation, opts ...ai.L
}

func (a *Anthropic) ChatCompletionNoStream(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (string, error) {
a.metricsService.IncrementLLMRequests(a.name)

request := a.createCompletionRequest(conversation, opts)
request.Stream = false
result, err := a.client.MessageCompletionNoStream(request)
Expand Down
25 changes: 16 additions & 9 deletions server/ai/asksage/asksage.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@ import (
"strings"

"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/metrics"
)

type AskSage struct {
client *Client
defaultModel string
maxTokens int
client *Client
defaultModel string
maxTokens int
metricsService metrics.Metrics
name string
}

func New(llmService ai.ServiceConfig) *AskSage {
func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *AskSage {
client := NewClient("")
client.Login(GetTokenParams{
Email: llmService.Username,
Password: llmService.Password,
Email: botConfig.Service.Username,
Password: botConfig.Service.Password,
})
return &AskSage{
client: client,
defaultModel: llmService.DefaultModel,
maxTokens: llmService.TokenLimit,
client: client,
defaultModel: botConfig.Service.DefaultModel,
maxTokens: botConfig.Service.TokenLimit,
metricsService: metricsService,
name: botConfig.Name,
}
}

Expand Down Expand Up @@ -75,6 +80,8 @@ func (s *AskSage) ChatCompletion(conversation ai.BotConversation, opts ...ai.Lan
}

func (s *AskSage) ChatCompletionNoStream(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (string, error) {
s.metricsService.IncrementLLMRequests(s.name)

params := s.queryParamsFromConfig(s.createConfig(opts))
params.Message = conversationToMessagesList(conversation)
params.SystemPrompt = conversation.ExtractSystemMessage()
Expand Down
37 changes: 23 additions & 14 deletions server/ai/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/invopop/jsonschema"
"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/ai/subtitles"
"github.com/mattermost/mattermost-plugin-ai/server/metrics"
openaiClient "github.com/sashabaranov/go-openai"
)

Expand All @@ -26,6 +27,8 @@ type OpenAI struct {
defaultModel string
tokenLimit int
streamingTimeout time.Duration
metricsService metrics.Metrics
name string
}

const StreamingTimeoutDefault = 10 * time.Second
Expand All @@ -36,10 +39,10 @@ const OpenAIMaxImageSize = 20 * 1024 * 1024 // 20 MB

var ErrStreamingTimeout = errors.New("timeout streaming")

func NewCompatible(llmService ai.ServiceConfig) *OpenAI {
apiKey := llmService.APIKey
endpointURL := strings.TrimSuffix(llmService.APIURL, "/")
defaultModel := llmService.DefaultModel
func NewCompatible(botConfig ai.BotConfig, metricsService metrics.Metrics) *OpenAI {
apiKey := botConfig.Service.APIKey
endpointURL := strings.TrimSuffix(botConfig.Service.APIURL, "/")
defaultModel := botConfig.Service.DefaultModel
config := openaiClient.DefaultConfig(apiKey)
config.BaseURL = endpointURL

Expand All @@ -50,35 +53,39 @@ func NewCompatible(llmService ai.ServiceConfig) *OpenAI {
}

streamingTimeout := StreamingTimeoutDefault
if llmService.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(llmService.StreamingTimeoutSeconds) * time.Second
if botConfig.Service.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(botConfig.Service.StreamingTimeoutSeconds) * time.Second
}
return &OpenAI{
client: openaiClient.NewClientWithConfig(config),
defaultModel: defaultModel,
tokenLimit: llmService.TokenLimit,
tokenLimit: botConfig.Service.TokenLimit,
streamingTimeout: streamingTimeout,
metricsService: metricsService,
name: botConfig.Name,
}
}

func New(llmService ai.ServiceConfig) *OpenAI {
defaultModel := llmService.DefaultModel
func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *OpenAI {
defaultModel := botConfig.Service.DefaultModel
if defaultModel == "" {
defaultModel = openaiClient.GPT3Dot5Turbo
}
config := openaiClient.DefaultConfig(llmService.APIKey)
config.OrgID = llmService.OrgID
config := openaiClient.DefaultConfig(botConfig.Service.APIKey)
config.OrgID = botConfig.Service.OrgID

streamingTimeout := StreamingTimeoutDefault
if llmService.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(llmService.StreamingTimeoutSeconds) * time.Second
if botConfig.Service.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(botConfig.Service.StreamingTimeoutSeconds) * time.Second
}

return &OpenAI{
client: openaiClient.NewClientWithConfig(config),
defaultModel: defaultModel,
tokenLimit: llmService.TokenLimit,
tokenLimit: botConfig.Service.TokenLimit,
streamingTimeout: streamingTimeout,
metricsService: metricsService,
name: botConfig.Name,
}
}

Expand Down Expand Up @@ -344,6 +351,8 @@ func (s *OpenAI) completionRequestFromConfig(cfg ai.LLMConfig) openaiClient.Chat
}

func (s *OpenAI) ChatCompletion(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (*ai.TextStreamResult, error) {
s.metricsService.IncrementLLMRequests(s.name)

request := s.completionRequestFromConfig(s.createConfig(opts))
request = modifyCompletionRequestWithConversation(request, conversation)
request.Stream = true
Expand Down
2 changes: 1 addition & 1 deletion server/api_channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func (p *Plugin) handleSince(c *gin.Context) {
return
}

resultStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
resultStream, err := p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
c.AbortWithError(http.StatusInternalServerError, err)
return
Expand Down
2 changes: 1 addition & 1 deletion server/api_post.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (p *Plugin) handleReact(c *gin.Context) {
return
}

emojiName, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(prompt, ai.WithMaxGeneratedTokens(25))
emojiName, err := p.getLLM(bot.cfg).ChatCompletionNoStream(prompt, ai.WithMaxGeneratedTokens(25))
if err != nil {
c.AbortWithError(http.StatusInternalServerError, err)
return
Expand Down
10 changes: 5 additions & 5 deletions server/meeting_summarization.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ func (p *Plugin) summarizeCallRecording(bot *Bot, rootID string, requestingUser

func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subtitles, context ai.ConversationContext) (*ai.TextStreamResult, error) {
llmFormattedTranscription := transcription.FormatForLLM()
tokens := p.getLLM(bot.cfg.Service).CountTokens(llmFormattedTranscription)
tokenLimitWithMargin := int(float64(p.getLLM(bot.cfg.Service).TokenLimit())*0.75) - ContextTokenMargin
tokens := p.getLLM(bot.cfg).CountTokens(llmFormattedTranscription)
tokenLimitWithMargin := int(float64(p.getLLM(bot.cfg).TokenLimit())*0.75) - ContextTokenMargin
if tokenLimitWithMargin < 0 {
tokenLimitWithMargin = ContextTokenMargin / 2
}
Expand All @@ -269,7 +269,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti
return nil, fmt.Errorf("unable to get summarize chunk prompt: %w", err)
}

summarizedChunk, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(summarizeChunkPrompt)
summarizedChunk, err := p.getLLM(bot.cfg).ChatCompletionNoStream(summarizeChunkPrompt)
if err != nil {
return nil, fmt.Errorf("unable to get summarized chunk: %w", err)
}
Expand All @@ -279,7 +279,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti

llmFormattedTranscription = strings.Join(summarizedChunks, "\n\n")
isChunked = true
p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM(bot.cfg.Service).CountTokens(llmFormattedTranscription))
p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM(bot.cfg).CountTokens(llmFormattedTranscription))
}

context.PromptParameters = map[string]string{"Transcription": llmFormattedTranscription, "IsChunked": fmt.Sprintf("%t", isChunked)}
Expand All @@ -288,7 +288,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti
return nil, fmt.Errorf("unable to get meeting summary prompt: %w", err)
}

summaryStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(summaryPrompt)
summaryStream, err := p.getLLM(bot.cfg).ChatCompletion(summaryPrompt)
if err != nil {
return nil, fmt.Errorf("unable to get meeting summary: %w", err)
}
Expand Down
20 changes: 20 additions & 0 deletions server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const (
MetricsSubsystemSystem = "system"
MetricsSubsystemHTTP = "http"
MetricsSubsystemAPI = "api"
MetricsSubsystemLLM = "llm"

MetricsCloudInstallationLabel = "installationId"
MetricsVersionLabel = "version"
Expand All @@ -23,6 +24,8 @@ type Metrics interface {

IncrementHTTPRequests()
IncrementHTTPErrors()

IncrementLLMRequests(llmName string)
}

type InstanceInfo struct {
Expand All @@ -42,6 +45,8 @@ type metrics struct {

httpRequestsTotal prometheus.Counter
httpErrorsTotal prometheus.Counter

llmRequestsTotal *prometheus.CounterVec
}

// NewMetrics Factory method to create a new metrics collector.
Expand Down Expand Up @@ -113,6 +118,15 @@ func NewMetrics(info InstanceInfo) Metrics {
})
m.registry.MustRegister(m.httpErrorsTotal)

m.llmRequestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystemLLM,
Name: "requests_total",
Help: "The total number of LLM requests.",
ConstLabels: additionalLabels,
}, []string{"llm_name"})
m.registry.MustRegister(m.llmRequestsTotal)

return m
}

Expand All @@ -137,3 +151,9 @@ func (m *metrics) IncrementHTTPErrors() {
m.httpErrorsTotal.Inc()
}
}

func (m *metrics) IncrementLLMRequests(llmName string) {
if m != nil {
m.llmRequestsTotal.With(prometheus.Labels{"llm_name": llmName}).Inc()
}
}
22 changes: 11 additions & 11 deletions server/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,17 @@ func (p *Plugin) OnActivate() error {
return nil
}

func (p *Plugin) getLLM(llmServiceConfig ai.ServiceConfig) ai.LanguageModel {
func (p *Plugin) getLLM(llmBotConfig ai.BotConfig) ai.LanguageModel {
var llm ai.LanguageModel
switch llmServiceConfig.Type {
switch llmBotConfig.Service.Type {
case "openai":
llm = openai.New(llmServiceConfig)
llm = openai.New(llmBotConfig, p.metricsService)
case "openaicompatible":
llm = openai.NewCompatible(llmServiceConfig)
llm = openai.NewCompatible(llmBotConfig, p.metricsService)
case "anthropic":
llm = anthropic.New(llmServiceConfig)
llm = anthropic.New(llmBotConfig, p.metricsService)
case "asksage":
llm = asksage.New(llmServiceConfig)
llm = asksage.New(llmBotConfig, p.metricsService)
}

cfg := p.getConfiguration()
Expand All @@ -151,18 +151,18 @@ func (p *Plugin) getLLM(llmServiceConfig ai.ServiceConfig) ai.LanguageModel {

func (p *Plugin) getTranscribe() ai.Transcriber {
cfg := p.getConfiguration()
var transcriptionService ai.ServiceConfig
var botConfig ai.BotConfig
for _, bot := range cfg.Bots {
if bot.Name == cfg.TranscriptGenerator {
transcriptionService = bot.Service
botConfig = bot
break
}
}
switch transcriptionService.Type {
switch botConfig.Service.Type {
case "openai":
return openai.New(transcriptionService)
return openai.New(botConfig, p.metricsService)
case "openaicompatible":
return openai.NewCompatible(transcriptionService)
return openai.NewCompatible(botConfig, p.metricsService)
}
return nil
}
Expand Down
10 changes: 5 additions & 5 deletions server/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (p *Plugin) newConversation(bot *Bot, context ai.ConversationContext) error
}
conversation.AddPost(p.PostToAIPost(bot, context.Post))

result, err := p.getLLM(bot.cfg.Service).ChatCompletion(conversation)
result, err := p.getLLM(bot.cfg).ChatCompletion(conversation)
if err != nil {
return err
}
Expand Down Expand Up @@ -81,7 +81,7 @@ func (p *Plugin) generateTitle(bot *Bot, request string, threadRootID string) er
titleRequest := ai.BotConversation{
Posts: []ai.Post{{Role: ai.PostRoleUser, Message: request}},
}
conversationTitle, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(titleRequest, ai.WithMaxGeneratedTokens(25))
conversationTitle, err := p.getLLM(bot.cfg).ChatCompletionNoStream(titleRequest, ai.WithMaxGeneratedTokens(25))
if err != nil {
return fmt.Errorf("failed to get title: %w", err)
}
Expand Down Expand Up @@ -134,7 +134,7 @@ func (p *Plugin) continueConversation(bot *Bot, threadData *ThreadData, context
}
prompt.AppendConversation(p.ThreadToBotConversation(bot, threadData.Posts))

result, err = p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
result, err = p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
return nil, err
}
Expand All @@ -157,7 +157,7 @@ func (p *Plugin) continueThreadConversation(bot *Bot, questionThreadData *Thread
}
prompt.AppendConversation(p.ThreadToBotConversation(bot, questionThreadData.Posts))

result, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
result, err := p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
return nil, err
}
Expand All @@ -181,7 +181,7 @@ func (p *Plugin) summarizePost(bot *Bot, postIDToSummarize string, context ai.Co
if err != nil {
return nil, err
}
summaryStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
summaryStream, err := p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
return nil, err
}
Expand Down