livepeer · j0sh · Oct 29, 2024 · Oct 17, 2024 · Oct 25, 2024 · leszko
diff --git a/core/capabilities.go b/core/capabilities.go
@@ -81,6 +81,7 @@ const (
 	Capability_SegmentAnything2           Capability = 32
 	Capability_LLM                        Capability = 33
 	Capability_ImageToText                Capability = 34
+	Capability_LiveVideoToVideo           Capability = 35
 )
 
 var CapabilityNameLookup = map[Capability]string{
@@ -120,6 +121,7 @@ var CapabilityNameLookup = map[Capability]string{
 	Capability_SegmentAnything2:           "Segment anything 2",
 	Capability_LLM:                        "Llm",
 	Capability_ImageToText:                "Image to text",
+	Capability_LiveVideoToVideo:           "Live video to video",
 }
 
 var CapabilityTestLookup = map[Capability]CapabilityTest{

diff --git a/server/ai_http.go b/server/ai_http.go
@@ -56,6 +56,7 @@
 	lp.transRPC.Handle("/llm", oapiReqValidator(lp.LLM()))
 	lp.transRPC.Handle("/segment-anything-2", oapiReqValidator(lp.SegmentAnything2()))
 	lp.transRPC.Handle("/image-to-text", oapiReqValidator(lp.ImageToText()))
+	lp.transRPC.Handle("/live-video-to-video", oapiReqValidator(lp.StartLiveVideoToVideo()))
 	// Additionally, there is the '/aiResults' endpoint registered in server/rpc.go
 
 	return nil
@@ -236,6 +237,35 @@
 		}
 
 		handleAIRequest(ctx, w, r, orch, req)
+
+	})
+}
+
+func (h *lphttp) StartLiveVideoToVideo() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+
+		// skipping handleAIRequest for now until we have payments
+
+		var (
+			mid    = string(core.RandomManifestID())
+			pubUrl = "/ai/live-video/" + mid
+			subUrl = pubUrl + "/out"
+		)
+		jsonData, err := json.Marshal(struct {
+			PublishUrl   string
+			SubscribeUrl string
+		}{
+			PublishUrl:   pubUrl,
+			SubscribeUrl: subUrl,
+		})
+		if err != nil {
+			respondWithError(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		w.Write(jsonData)
 	})
 }
 

diff --git a/server/ai_mediaserver.go b/server/ai_mediaserver.go
@@ -74,6 +74,9 @@
 	ls.HTTPMux.Handle("/segment-anything-2", oapiReqValidator(handle(ls, multipartDecoder[worker.GenSegmentAnything2MultipartRequestBody], processSegmentAnything2)))
 	ls.HTTPMux.Handle("/image-to-text", oapiReqValidator(handle(ls, multipartDecoder[worker.GenImageToTextMultipartRequestBody], processImageToText)))
 
+	// This is called by the media server when the stream is ready
+	ls.HTTPMux.Handle("/live/video-to-video/start", ls.StartLiveVideo())
+
 	return nil
 }
 
@@ -361,3 +364,24 @@
 		_ = json.NewEncoder(w).Encode(resp)
 	})
 }
+
+func (ls *LivepeerServer) StartLiveVideo() http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		streamName := r.FormValue("stream")
+		if streamName == "" {
+			http.Error(w, "Missing stream name", http.StatusBadRequest)
+			return
+		}
+		requestID := string(core.RandomManifestID())
+		params := aiRequestParams{
+			node:        ls.LivepeerNode,
+			os:          drivers.NodeStorage.NewSession(requestID),
+			sessManager: ls.AISessionManager,
+		}
+		ctx := clog.AddVal(r.Context(), "request_id", requestID)
+		// TODO set model and initial parameters here if necessary (eg, prompt)
+		req := struct{}{}
+		resp, err := processAIRequest(ctx, params, req)
+		clog.Infof(ctx, "Received live video AI request stream=%s resp=%v err=%v", streamName, resp, err)
+	})
+}
diff --git a/server/ai_process.go b/server/ai_process.go
@@ -34,6 +34,7 @@
 const defaultLLMModelID = "meta-llama/llama-3.1-8B-Instruct"
 const defaultSegmentAnything2ModelID = "facebook/sam2-hiera-large"
 const defaultImageToTextModelID = "Salesforce/blip-image-captioning-large"
+const defaultLiveVideoToVideoModelID = "cumulo-autumn/stream-diffusion"
 
 var errWrongFormat = fmt.Errorf("result not in correct format")
 
@@ -865,6 +866,19 @@
 	return &res, nil
 }
 
+func submitLiveVideoToVideo(ctx context.Context, params aiRequestParams, sess *AISession, req struct{ ModelId *string }) (any, error) {
+	//client, err := worker.NewClientWithResponses(sess.Transcoder(), worker.WithHTTPClient(httpClient))
+	var err error
+	if err != nil {
+		if monitor.Enabled {
+			monitor.AIRequestError(err.Error(), "LiveVideoToVideo", *req.ModelId, sess.OrchestratorInfo)
+		}
+		return nil, err
+	}
+	// TODO check urls and add sess.Transcoder to the host if necessary
+	return nil, nil
+}
+
 func CalculateLLMLatencyScore(took time.Duration, tokensUsed int) float64 {
 	if tokensUsed <= 0 {
 		return 0
@@ -1204,6 +1218,17 @@
 		submitFn = func(ctx context.Context, params aiRequestParams, sess *AISession) (interface{}, error) {
 			return submitImageToText(ctx, params, sess, v)
 		}
+		/*
+			case worker.StartLiveVideoToVideoFormdataRequestBody:
+				cap = core.Capability_LiveVideoToVideo
+				modelID = defaultLiveVideoToVideoModelID
+				if v.ModelId != nil {
+					modelID = *v.ModelId
+				}
+				submitFn = func(ctx context.Context, params aiRequestParams, sess *AISession) (interface{}, error) {
+					return submitLiveVideoToVideo(ctx, params, sess, v)
+				}
+		*/
 	default:
 		return nil, fmt.Errorf("unsupported request type %T", req)
 	}