-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvad.py
30 lines (25 loc) · 989 Bytes
/
vad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np
import os
import onnxruntime
current_dir = os.path.dirname(os.path.abspath(__file__))
class Vad:
def __init__(self, threshold: float = 0.1):
model_path = os.path.join(current_dir, "assets", "silero_vad.onnx")
options = onnxruntime.SessionOptions()
options.log_severity_level = 4
self.inference_session = onnxruntime.InferenceSession(
model_path, sess_options=options
)
self.threshold = threshold
self.h = np.zeros((2, 1, 64), dtype=np.float32)
self.c = np.zeros((2, 1, 64), dtype=np.float32)
def is_speech(self, audio_data: np.ndarray, samplerate: int) -> bool:
input_data = {
"input": audio_data.reshape(1, -1),
"sr": np.array([samplerate], dtype=np.int64),
"h": self.h,
"c": self.c,
}
out, h, c = self.inference_session.run(None, input_data)
self.h, self.c = h, c
return out > self.threshold