@@ -40,9 +40,7 @@ async def warm_video(self):
40
40
41
41
async def warm_audio (self ):
42
42
dummy_frame = av .AudioFrame ()
43
- dummy_frame .side_data .input = np .random .randint (
44
- - 32768 , 32767 , int (48000 * 0.5 ), dtype = np .int16
45
- ) # TODO: adds a lot of delay if it doesn't match the buffer size, is warmup needed?
43
+ dummy_frame .side_data .input = np .random .randint (- 32768 , 32767 , int (48000 * 0.5 ), dtype = np .int16 ) # TODO: adds a lot of delay if it doesn't match the buffer size, is warmup needed?
46
44
dummy_frame .sample_rate = 48000
47
45
48
46
for _ in range (WARMUP_RUNS ):
@@ -55,9 +53,7 @@ async def set_prompts(self, prompts: Union[Dict[Any, Any], List[Dict[Any, Any]]]
55
53
else :
56
54
await self .client .set_prompts ([prompts ])
57
55
58
- async def update_prompts (
59
- self , prompts : Union [Dict [Any , Any ], List [Dict [Any , Any ]]]
60
- ):
56
+ async def update_prompts (self , prompts : Union [Dict [Any , Any ], List [Dict [Any , Any ]]]):
61
57
if isinstance (prompts , list ):
62
58
await self .client .update_prompts (prompts )
63
59
else :
@@ -82,21 +78,12 @@ def video_preprocess(self, frame: av.VideoFrame) -> Union[torch.Tensor, np.ndarr
82
78
def audio_preprocess (self , frame : av .AudioFrame ) -> Union [torch .Tensor , np .ndarray ]:
83
79
return frame .to_ndarray ().ravel ().reshape (- 1 , 2 ).mean (axis = 1 ).astype (np .int16 )
84
80
85
- def video_postprocess (
86
- self , output : Union [torch .Tensor , np .ndarray ]
87
- ) -> av .VideoFrame :
81
+ def video_postprocess (self , output : Union [torch .Tensor , np .ndarray ]) -> av .VideoFrame :
88
82
return av .VideoFrame .from_ndarray (
89
- (output * 255.0 )
90
- .clamp (0 , 255 )
91
- .to (dtype = torch .uint8 )
92
- .squeeze (0 )
93
- .cpu ()
94
- .numpy ()
83
+ (output * 255.0 ).clamp (0 , 255 ).to (dtype = torch .uint8 ).squeeze (0 ).cpu ().numpy ()
95
84
)
96
85
97
- def audio_postprocess (
98
- self , output : Union [torch .Tensor , np .ndarray ]
99
- ) -> av .AudioFrame :
86
+ def audio_postprocess (self , output : Union [torch .Tensor , np .ndarray ]) -> av .AudioFrame :
100
87
return av .AudioFrame .from_ndarray (np .repeat (output , 2 ).reshape (1 , - 1 ))
101
88
102
89
async def get_processed_video_frame (self ):
@@ -107,7 +94,7 @@ async def get_processed_video_frame(self):
107
94
while frame .side_data .skipped :
108
95
frame = await self .video_incoming_frames .get ()
109
96
110
- processed_frame = self .video_postprocess (out_tensor )
97
+ processed_frame = self .video_postprocess (out_tensor )
111
98
processed_frame .pts = frame .pts
112
99
processed_frame .time_base = frame .time_base
113
100
@@ -119,17 +106,15 @@ async def get_processed_audio_frame(self):
119
106
if frame .samples > len (self .processed_audio_buffer ):
120
107
async with temporary_log_level ("comfy" , self ._comfyui_inference_log_level ):
121
108
out_tensor = await self .client .get_audio_output ()
122
- self .processed_audio_buffer = np .concatenate (
123
- [self .processed_audio_buffer , out_tensor ]
124
- )
125
- out_data = self .processed_audio_buffer [: frame .samples ]
126
- self .processed_audio_buffer = self .processed_audio_buffer [frame .samples :]
109
+ self .processed_audio_buffer = np .concatenate ([self .processed_audio_buffer , out_tensor ])
110
+ out_data = self .processed_audio_buffer [:frame .samples ]
111
+ self .processed_audio_buffer = self .processed_audio_buffer [frame .samples :]
127
112
128
113
processed_frame = self .audio_postprocess (out_data )
129
114
processed_frame .pts = frame .pts
130
115
processed_frame .time_base = frame .time_base
131
116
processed_frame .sample_rate = frame .sample_rate
132
-
117
+
133
118
return processed_frame
134
119
135
120
async def get_nodes_info (self ) -> Dict [str , Any ]:
0 commit comments