Skip to content

Commit a484226

Browse files
committed
feat: ui changes to get audio
1 parent f182502 commit a484226

File tree

5 files changed

+68
-7
lines changed

5 files changed

+68
-7
lines changed

nodes/audio_utils/save_asr_response.py

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ class SaveASRResponse:
44
CATEGORY = "audio_utils"
55
RETURN_TYPES = ()
66
FUNCTION = "execute"
7+
OUTPUT_NODE = True
78

89
@classmethod
910
def INPUT_TYPES(s):

server/app.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ async def offer(request):
190190

191191
pcs.add(pc)
192192

193-
tracks = {"video": None}
193+
tracks = {"video": None, "audio": None}
194194

195195
# Prefer h264
196196
transceiver = pc.addTransceiver("video")

ui/src/components/room.tsx

+2
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ export function Room() {
7878
streamUrl: "",
7979
frameRate: 0,
8080
selectedDeviceId: "",
81+
selectedAudioDeviceId: "", // New property for audio device
8182
prompt: null,
8283
});
8384

@@ -153,6 +154,7 @@ export function Room() {
153154
onStreamReady={onStreamReady}
154155
deviceId={config.selectedDeviceId}
155156
frameRate={config.frameRate}
157+
selectedAudioDeviceId={config.selectedAudioDeviceId} // Pass audio device ID
156158
/>
157159
</div>
158160
</div>

ui/src/components/settings.tsx

+48-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ export interface StreamConfig {
3333
frameRate: number;
3434
prompt?: any;
3535
selectedDeviceId: string;
36+
selectedAudioDeviceId: string; // New property for audio device
3637
}
3738

3839
interface VideoDevice {
@@ -45,6 +46,7 @@ export const DEFAULT_CONFIG: StreamConfig = {
4546
process.env.NEXT_PUBLIC_DEFAULT_STREAM_URL || "http://127.0.0.1:3000",
4647
frameRate: 30,
4748
selectedDeviceId: "",
49+
selectedAudioDeviceId: "", // Default value for audio device
4850
};
4951

5052
interface StreamSettingsProps {
@@ -110,7 +112,9 @@ interface ConfigFormProps {
110112
function ConfigForm({ config, onSubmit }: ConfigFormProps) {
111113
const [prompt, setPrompt] = useState<any>(null);
112114
const [videoDevices, setVideoDevices] = useState<VideoDevice[]>([]);
115+
const [audioDevices, setAudioDevices] = useState<VideoDevice[]>([]);
113116
const [selectedDevice, setSelectedDevice] = useState<string>("");
117+
const [selectedAudioDevice, setSelectedAudioDevice] = useState<string>("");
114118

115119
const form = useForm<z.infer<typeof formSchema>>({
116120
resolver: zodResolver(formSchema),
@@ -138,17 +142,42 @@ function ConfigForm({ config, onSubmit }: ConfigFormProps) {
138142
}
139143
}, []);
140144

145+
const getAudioDevices = useCallback(async () => {
146+
try {
147+
const devices = await navigator.mediaDevices.enumerateDevices();
148+
const audioDevices = devices
149+
.filter((device) => device.kind === "audioinput")
150+
.map((device) => ({
151+
deviceId: device.deviceId,
152+
label: device.label || `Microphone ${device.deviceId.slice(0, 5)}...`,
153+
}));
154+
155+
setAudioDevices(audioDevices);
156+
if (audioDevices.length > 0) {
157+
setSelectedAudioDevice((curr) => curr || audioDevices[0].deviceId);
158+
}
159+
} catch (err) {
160+
console.error("Failed to get audio devices");
161+
}
162+
}, []);
163+
141164
useEffect(() => {
142165
getVideoDevices();
166+
getAudioDevices();
143167
navigator.mediaDevices.addEventListener("devicechange", getVideoDevices);
168+
navigator.mediaDevices.addEventListener("devicechange", getAudioDevices);
144169

145170
return () => {
146171
navigator.mediaDevices.removeEventListener(
147172
"devicechange",
148173
getVideoDevices
149174
);
175+
navigator.mediaDevices.removeEventListener(
176+
"devicechange",
177+
getAudioDevices
178+
);
150179
};
151-
}, [getVideoDevices]);
180+
}, [getVideoDevices, getAudioDevices]);
152181

153182
const handleSubmit = (values: z.infer<typeof formSchema>) => {
154183
onSubmit({
@@ -158,6 +187,7 @@ function ConfigForm({ config, onSubmit }: ConfigFormProps) {
158187
: values.streamUrl,
159188
prompt,
160189
selectedDeviceId: selectedDevice,
190+
selectedAudioDeviceId: selectedAudioDevice,
161191
});
162192
};
163193

@@ -221,6 +251,23 @@ function ConfigForm({ config, onSubmit }: ConfigFormProps) {
221251
</Select>
222252
</div>
223253

254+
<div className="mt-4 mb-4">
255+
<Label>Microphone</Label>
256+
<Select value={selectedAudioDevice} onValueChange={setSelectedAudioDevice}>
257+
<Select.Trigger className="w-full mt-2">
258+
{audioDevices.find((d) => d.deviceId === selectedAudioDevice)?.label ||
259+
"Select microphone"}
260+
</Select.Trigger>
261+
<Select.Content>
262+
{audioDevices.map((device) => (
263+
<Select.Option key={device.deviceId} value={device.deviceId}>
264+
{device.label}
265+
</Select.Option>
266+
))}
267+
</Select.Content>
268+
</Select>
269+
</div>
270+
224271
<div className="mt-4 mb-4 grid max-w-sm items-center gap-3">
225272
<Label>Comfy Workflow</Label>
226273
<Input

ui/src/components/webcam.tsx

+16-5
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,10 @@ interface WebcamProps {
103103
onStreamReady: (stream: MediaStream) => void;
104104
deviceId: string;
105105
frameRate: number;
106+
selectedAudioDeviceId: string;
106107
}
107108

108-
export function Webcam({ onStreamReady, deviceId, frameRate }: WebcamProps) {
109+
export function Webcam({ onStreamReady, deviceId, frameRate, selectedAudioDeviceId }: WebcamProps) {
109110
const [stream, setStream] = useState<MediaStream | null>(null);
110111

111112
const replaceStream = useCallback((newStream: MediaStream | null) => {
@@ -123,7 +124,7 @@ export function Webcam({ onStreamReady, deviceId, frameRate }: WebcamProps) {
123124
}, []);
124125

125126
const startWebcam = useCallback(async () => {
126-
if (!deviceId) {
127+
if (!deviceId || !selectedAudioDeviceId) {
127128
return null;
128129
}
129130
if (frameRate == 0) {
@@ -139,25 +140,35 @@ export function Webcam({ onStreamReady, deviceId, frameRate }: WebcamProps) {
139140
aspectRatio: { ideal: 1 },
140141
frameRate: { ideal: frameRate, max: frameRate },
141142
},
143+
audio: {
144+
...(selectedAudioDeviceId ? { deviceId: { exact: selectedAudioDeviceId } } : {}),
145+
sampleRate: 16000,
146+
sampleSize: 16,
147+
channelCount: 1,
148+
},
142149
});
143150
return newStream;
144151
} catch (error) {
152+
console.error("Error accessing media devices.", error);
145153
return null;
146154
}
147-
}, [deviceId, frameRate]);
155+
}, [deviceId, frameRate, selectedAudioDeviceId]);
148156

149157
useEffect(() => {
150-
if (!deviceId) return;
158+
if (!deviceId || !selectedAudioDeviceId) return;
151159
if (frameRate == 0) return;
152160

153161
startWebcam().then((newStream) => {
154162
replaceStream(newStream);
163+
if (newStream) {
164+
onStreamReady(newStream);
165+
}
155166
});
156167

157168
return () => {
158169
replaceStream(null);
159170
};
160-
}, [deviceId, frameRate, startWebcam, replaceStream]);
171+
}, [deviceId, frameRate, selectedAudioDeviceId, startWebcam, replaceStream, onStreamReady]);
161172

162173
return (
163174
<div>

0 commit comments

Comments
 (0)