Skip to content

Commit fe8b261

Browse files
committed
feat: init
1 parent 53fa913 commit fe8b261

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed

server/app.py

+91
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import os
44
import json
55
import logging
6+
import wave
7+
import numpy as np
68

79
from twilio.rest import Client
810
from aiohttp import web
@@ -31,6 +33,91 @@ def __init__(self, track: MediaStreamTrack, pipeline):
3133
async def recv(self):
3234
frame = await self.track.recv()
3335
return await self.pipeline(frame)
36+
37+
class AudioStreamTrack(MediaStreamTrack):
38+
"""
39+
This custom audio track wraps an incoming audio MediaStreamTrack.
40+
It continuously records frames in 10-second chunks and saves each chunk
41+
as a separate WAV file with an incrementing index.
42+
"""
43+
44+
kind = "audio"
45+
46+
def __init__(self, source: MediaStreamTrack):
47+
super().__init__()
48+
self.source = source
49+
self.start_time = None
50+
self.frames = []
51+
self._recording_duration = 10.0 # in seconds
52+
self._chunk_index = 0
53+
self._saving = False
54+
self._lock = asyncio.Lock()
55+
56+
async def recv(self):
57+
frame = await self.source.recv()
58+
59+
# On the first frame, record the start time.
60+
if self.start_time is None:
61+
self.start_time = frame.time
62+
logger.info(f"Audio recording started at time: {self.start_time:.3f}")
63+
64+
elapsed = frame.time - self.start_time
65+
self.frames.append(frame)
66+
67+
logger.info(f"Received audio frame at time: {frame.time:.3f}, total frames: {len(self.frames)}")
68+
69+
# Check if we've hit 10 seconds and we're not currently saving.
70+
if elapsed >= self._recording_duration and not self._saving:
71+
logger.info(f"10 second chunk reached (elapsed: {elapsed:.3f}s). Preparing to save chunk {self._chunk_index}.")
72+
self._saving = True
73+
# Handle saving in a background task so we don't block the recv loop.
74+
asyncio.create_task(self.save_audio())
75+
76+
return frame
77+
78+
async def save_audio(self):
79+
logger.info(f"Starting to save audio chunk {self._chunk_index}...")
80+
async with self._lock:
81+
# Extract properties from the first frame
82+
if not self.frames:
83+
logger.warning("No frames to save, skipping.")
84+
self._saving = False
85+
return
86+
87+
sample_rate = self.frames[0].sample_rate
88+
layout = self.frames[0].layout
89+
channels = len(layout.channels)
90+
91+
logger.info(f"Audio chunk {self._chunk_index}: sample_rate={sample_rate}, channels={channels}, frames_count={len(self.frames)}")
92+
93+
# Convert all frames to ndarray and concatenate
94+
data_arrays = [f.to_ndarray() for f in self.frames]
95+
data = np.concatenate(data_arrays, axis=1) # shape: (channels, total_samples)
96+
97+
# Interleave channels (if multiple) since WAV expects interleaved samples.
98+
interleaved = data.T.flatten()
99+
100+
# If needed, convert float frames to int16
101+
# interleaved = (interleaved * 32767).astype(np.int16)
102+
103+
filename = f"output_{self._chunk_index}.wav"
104+
logger.info(f"Writing audio chunk {self._chunk_index} to file: {filename}")
105+
with wave.open(filename, 'wb') as wf:
106+
wf.setnchannels(channels)
107+
wf.setsampwidth(2) # 16-bit PCM
108+
wf.setframerate(sample_rate)
109+
wf.writeframes(interleaved.tobytes())
110+
111+
logger.info(f"Audio chunk {self._chunk_index} saved successfully as {filename}")
112+
113+
# Increment the chunk index for the next segment
114+
self._chunk_index += 1
115+
116+
# Reset for next recording chunk
117+
self.frames.clear()
118+
self.start_time = None
119+
self._saving = False
120+
logger.info(f"Ready to record next 10-second chunk. Current chunk index: {self._chunk_index}")
34121

35122

36123
def force_codec(pc, sender, forced_codec):
@@ -113,6 +200,10 @@ def on_track(track):
113200

114201
codec = "video/H264"
115202
force_codec(pc, sender, codec)
203+
elif track.kind == "audio":
204+
audioTrack = AudioStreamTrack(track)
205+
tracks["audio"] = audioTrack
206+
pc.addTrack(audioTrack)
116207

117208
@track.on("ended")
118209
async def on_ended():

ui/src/components/webcam.tsx

+6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ export function Webcam({ onStreamReady }: WebcamProps) {
2828
width: { exact: 512 },
2929
height: { exact: 512 },
3030
},
31+
audio: {
32+
noiseSuppression: true,
33+
echoCancellation: true,
34+
sampleRate: 16000,
35+
sampleSize: 16,
36+
},
3137
});
3238

3339
if (videoRef.current) videoRef.current.srcObject = stream;

0 commit comments

Comments
 (0)