Skip to content

Commit 29d3c6e

Browse files
committed
recordyoubutton
1 parent dfa2c86 commit 29d3c6e

7 files changed

+183
-40
lines changed

next/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
## now
44

5-
- assembly streaming only supports english, deepgram could be used for other languages
6-
- maybe should make an app?
5+
- add a button to record the "You" language and send to non-streaming assembly api
76

87
## medium
98

9+
- maybe start the react native app?
1010
- real cloning?
1111
- handle different romanization prompts for languages not just chinese?
1212
- summarize kaikki definitions w llm?

next/components/assembly-view.tsx

+15-7
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ import { useEffect, useRef, useState, type JSX } from "react";
44
import { SpeechStateManager } from "@/lib/speech-state";
55
import { MicrophoneState, useMicrophone } from "./microphone-context";
66
import { SpeechInput } from "./speech-input";
7-
import { Language } from "@/lib/types";
7+
import { describeLanguage, Language } from "@/lib/types";
88
import { CloningView } from "./cloning-view";
99
import { LanguageSelect } from "./language-select";
1010
import { FragmentComponent, type Fragment } from "./fragment";
1111
import { useAssembly } from "./assembly-context";
1212
import { AudioProcessor } from "@/lib/audio-processor";
13+
import { RecordYouButton } from "./record-you-button";
1314

1415
const AssemblyView: () => JSX.Element = () => {
1516
const [inputText, setInputText] = useState("");
@@ -66,7 +67,11 @@ const AssemblyView: () => JSX.Element = () => {
6667
useEffect(() => {
6768
if (microphoneState === MicrophoneState.Ready && microphone?.stream) {
6869
console.log("[AssemblyAI] Microphone ready, connecting to AssemblyAI");
69-
connectToAssembly();
70+
// Debounce connection attempts
71+
const timer = setTimeout(() => {
72+
connectToAssembly();
73+
}, 1000);
74+
return () => clearTimeout(timer);
7075
}
7176
}, [microphoneState, microphone?.stream, connectToAssembly]);
7277

@@ -81,20 +86,22 @@ const AssemblyView: () => JSX.Element = () => {
8186
const processor = await AudioProcessor.getInstance(microphone.stream);
8287
await processor.setupProcessor((blob: Blob) => {
8388
if (isConnected) {
84-
sendAudio(blob);
89+
// Debounce audio sending while connection is establishing
90+
setTimeout(() => sendAudio(blob), 0);
8591
}
8692
}, isPaused);
8793

88-
// Store for cleanup
8994
mainRecorder.current = {
9095
stop: () => {
9196
processor.stop();
9297
},
93-
} as MediaRecorder; // Type assertion since we only use stop()
98+
} as MediaRecorder;
9499

95100
manager.isInitialized = true;
96101
} catch (error) {
97-
console.error("[AssemblyView] Failed to setup audio processor:", error);
102+
console.debug("[AssemblyView] Audio processor setup retry");
103+
// Retry setup after delay
104+
setTimeout(setupAudioProcessor, 1000);
98105
}
99106
};
100107

@@ -318,7 +325,8 @@ const AssemblyView: () => JSX.Element = () => {
318325
</div>
319326
<div className="flex-none px-4 pt-2 pb-4 border-t">
320327
<div className="max-w-2xl mx-auto">
321-
<div className="pb-2 flex gap-4 justify-end">
328+
<div className="pb-2 flex gap-4 justify-between items-center">
329+
<RecordYouButton language={describeLanguage(outputLanguage)} />
322330
<CloningView
323331
speechState={managerRef.current}
324332
audioBlob={audioBlob}

next/components/cloning-view.tsx

+8-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import { SpeechStateManager } from "@/lib/speech-state";
44
import { Button } from "@/components/ui/button";
55
import { Progress } from "@/components/ui/progress";
6-
import { MicVocal } from "lucide-react";
76
import {
87
Popover,
98
PopoverContent,
@@ -33,12 +32,14 @@ export const CloningView = ({ speechState, audioBlob }: CloningViewProps) => {
3332
<Popover>
3433
<PopoverTrigger asChild>
3534
<div className="relative w-24">
36-
<Button
37-
variant="outline"
35+
<Button
36+
variant="outline"
3837
className="w-full h-12 relative flex items-center justify-center pb-2"
3938
disabled={!isComplete}
4039
>
41-
<span className="text-center -mt-1 text-muted-foreground">{status}</span>
40+
<span className="text-center -mt-1 text-muted-foreground">
41+
{status}
42+
</span>
4243
</Button>
4344
<div className="absolute inset-x-0 bottom-0 px-1 pb-1">
4445
<Progress value={progress} className="h-1.5" />
@@ -48,9 +49,9 @@ export const CloningView = ({ speechState, audioBlob }: CloningViewProps) => {
4849
{isComplete && audioBlob && (
4950
<PopoverContent className="w-80">
5051
<div className="space-y-2">
51-
<audio
52-
controls
53-
src={URL.createObjectURL(audioBlob)}
52+
<audio
53+
controls
54+
src={URL.createObjectURL(audioBlob)}
5455
className="w-full"
5556
/>
5657
<div className="text-xs text-muted-foreground">

next/components/record-you-button.tsx

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"use client";
2+
3+
import { Button } from "@/components/ui/button";
4+
import { Circle, Square } from "lucide-react";
5+
import { useState } from "react";
6+
7+
interface RecordYouButtonProps {
8+
onToggle?: (isRecording: boolean) => void;
9+
language?: string;
10+
}
11+
12+
export function RecordYouButton({ language }: RecordYouButtonProps) {
13+
const [isRecording, setIsRecording] = useState(false);
14+
15+
const handleClick = () => {
16+
const newState = !isRecording;
17+
setIsRecording(newState);
18+
};
19+
20+
return (
21+
<Button
22+
size="lg"
23+
variant="outline"
24+
className="px-8 py-6 text-lg font-semibold w-full flex items-center justify-center gap-2"
25+
onClick={handleClick}
26+
>
27+
{isRecording ? (
28+
<Square className="h-5 w-5 text-red-500" />
29+
) : (
30+
<Circle className="h-5 w-5 text-red-500" />
31+
)}
32+
{isRecording ? "Recording" : "Record"} {language}
33+
</Button>
34+
);
35+
}

next/lib/audio-processor.ts

+9-3
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,13 @@ export class AudioProcessor {
214214
const workletNode = new AudioWorkletNode(context, "pcm-processor");
215215
workletNode.port.onmessage = (e: MessageEvent<ArrayBuffer>) => {
216216
if (!isPaused) {
217-
const blob = new Blob([e.data], { type: "audio/wav" });
218-
onAudioData(blob);
217+
try {
218+
const blob = new Blob([e.data], { type: "audio/wav" });
219+
onAudioData(blob);
220+
} catch (error) {
221+
// Silently handle audio processing errors - they're usually transient
222+
console.debug("[AudioProcessor] Skipping audio chunk due to error");
223+
}
219224
}
220225
};
221226

@@ -266,7 +271,8 @@ export class AudioProcessor {
266271
const blob = new Blob([pcmData], { type: "audio/wav" });
267272
onAudioData(blob);
268273
} catch (err) {
269-
console.error("[AudioProcessor] Error processing audio data:", err);
274+
// Silently handle audio processing errors - they're usually transient
275+
console.debug("[AudioProcessor] Skipping audio chunk due to error");
270276
}
271277
};
272278

next/lib/types.test.ts

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
import { describe, it, expect } from "bun:test";
2-
import { Language, normalizeLanguage, normalizeKaikkiLanguage, KAIKKI_LANGUAGES } from "./types";
2+
import {
3+
Language,
4+
normalizeLanguage,
5+
normalizeKaikkiLanguage,
6+
KAIKKI_LANGUAGES,
7+
describeLanguage,
8+
} from "./types";
39
import { CartesiaLanguages } from "./cartesia";
410

511
describe("language compatibility", () => {
@@ -21,3 +27,12 @@ describe("language compatibility", () => {
2127
}
2228
});
2329
});
30+
31+
describe("describeLanguage", () => {
32+
it("should trim parenthetical content from language names", () => {
33+
expect(describeLanguage(Language.ENGLISH_US)).toBe("English");
34+
expect(describeLanguage(Language.SPANISH_MX)).toBe("Spanish");
35+
expect(describeLanguage(Language.CHINESE_CN)).toBe("Chinese");
36+
expect(describeLanguage(Language.ITALIAN)).toBe("Italian"); // no parens, should be unchanged
37+
});
38+
});

next/lib/types.ts

+98-20
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ export enum Language {
1313
KOREAN = "ko-KR",
1414
SWEDISH = "sv-SE",
1515
TURKISH = "tr-TR",
16-
POLISH = "pl-PL"
16+
POLISH = "pl-PL",
1717
}
1818

1919
interface LanguageInfo {
@@ -25,33 +25,109 @@ interface LanguageInfo {
2525

2626
// ordered by preference/importance
2727
export const LANGUAGE_INFO: LanguageInfo[] = [
28-
{ code: Language.ENGLISH_US, name: "English (US)", countryCode: "US", kaikkiName: "English" },
29-
{ code: Language.SPANISH_MX, name: "Spanish (Mexico)", countryCode: "MX", kaikkiName: "Spanish" },
30-
{ code: Language.ITALIAN, name: "Italian", countryCode: "IT", kaikkiName: "Italian" },
31-
{ code: Language.JAPANESE, name: "Japanese", countryCode: "JP", kaikkiName: "Japanese" },
32-
{ code: Language.FRENCH, name: "French", countryCode: "FR", kaikkiName: "French" },
33-
{ code: Language.CHINESE_CN, name: "Chinese (Mandarin)", countryCode: "CN", kaikkiName: "Chinese" },
34-
{ code: Language.GERMAN, name: "German", countryCode: "DE", kaikkiName: "German" },
35-
{ code: Language.DUTCH, name: "Dutch", countryCode: "NL", kaikkiName: "Dutch" },
36-
{ code: Language.PORTUGUESE, name: "Portuguese", countryCode: "PT", kaikkiName: "Portuguese" },
37-
{ code: Language.RUSSIAN, name: "Russian", countryCode: "RU", kaikkiName: "Russian" },
38-
{ code: Language.HINDI, name: "Hindi", countryCode: "IN", kaikkiName: "Hindi" },
39-
{ code: Language.KOREAN, name: "Korean", countryCode: "KR", kaikkiName: "Korean" },
40-
{ code: Language.SWEDISH, name: "Swedish", countryCode: "SE", kaikkiName: "Swedish" },
41-
{ code: Language.TURKISH, name: "Turkish", countryCode: "TR", kaikkiName: "Turkish" },
42-
{ code: Language.POLISH, name: "Polish", countryCode: "PL", kaikkiName: "Polish" }
28+
{
29+
code: Language.ENGLISH_US,
30+
name: "English (US)",
31+
countryCode: "US",
32+
kaikkiName: "English",
33+
},
34+
{
35+
code: Language.SPANISH_MX,
36+
name: "Spanish (Mexico)",
37+
countryCode: "MX",
38+
kaikkiName: "Spanish",
39+
},
40+
{
41+
code: Language.ITALIAN,
42+
name: "Italian",
43+
countryCode: "IT",
44+
kaikkiName: "Italian",
45+
},
46+
{
47+
code: Language.JAPANESE,
48+
name: "Japanese",
49+
countryCode: "JP",
50+
kaikkiName: "Japanese",
51+
},
52+
{
53+
code: Language.FRENCH,
54+
name: "French",
55+
countryCode: "FR",
56+
kaikkiName: "French",
57+
},
58+
{
59+
code: Language.CHINESE_CN,
60+
name: "Chinese (Mandarin)",
61+
countryCode: "CN",
62+
kaikkiName: "Chinese",
63+
},
64+
{
65+
code: Language.GERMAN,
66+
name: "German",
67+
countryCode: "DE",
68+
kaikkiName: "German",
69+
},
70+
{
71+
code: Language.DUTCH,
72+
name: "Dutch",
73+
countryCode: "NL",
74+
kaikkiName: "Dutch",
75+
},
76+
{
77+
code: Language.PORTUGUESE,
78+
name: "Portuguese",
79+
countryCode: "PT",
80+
kaikkiName: "Portuguese",
81+
},
82+
{
83+
code: Language.RUSSIAN,
84+
name: "Russian",
85+
countryCode: "RU",
86+
kaikkiName: "Russian",
87+
},
88+
{
89+
code: Language.HINDI,
90+
name: "Hindi",
91+
countryCode: "IN",
92+
kaikkiName: "Hindi",
93+
},
94+
{
95+
code: Language.KOREAN,
96+
name: "Korean",
97+
countryCode: "KR",
98+
kaikkiName: "Korean",
99+
},
100+
{
101+
code: Language.SWEDISH,
102+
name: "Swedish",
103+
countryCode: "SE",
104+
kaikkiName: "Swedish",
105+
},
106+
{
107+
code: Language.TURKISH,
108+
name: "Turkish",
109+
countryCode: "TR",
110+
kaikkiName: "Turkish",
111+
},
112+
{
113+
code: Language.POLISH,
114+
name: "Polish",
115+
countryCode: "PL",
116+
kaikkiName: "Polish",
117+
},
43118
];
44119

45120
// Get language info by code
46121
export function getLanguageInfo(code: Language): LanguageInfo {
47-
const info = LANGUAGE_INFO.find(l => l.code === code);
122+
const info = LANGUAGE_INFO.find((l) => l.code === code);
48123
if (!info) throw new Error(`No language info for code: ${code}`);
49124
return info;
50125
}
51126

52127
// Get display name
53128
export function describeLanguage(lang: Language): string {
54-
return getLanguageInfo(lang).name;
129+
const name = getLanguageInfo(lang).name;
130+
return name.replace(/\s*\([^)]*\)\s*/g, "").trim();
55131
}
56132

57133
// normalize lang codes to basic ISO format (e.g. "en-US" -> "en")
@@ -107,7 +183,7 @@ export const KAIKKI_LANGUAGE_MAP = {
107183
Telugu: "te",
108184
Turkish: "tr",
109185
Urdu: "ur",
110-
Vietnamese: "vi"
186+
Vietnamese: "vi",
111187
} as const;
112188

113189
export type KaikkiLanguage = keyof typeof KAIKKI_LANGUAGE_MAP;
@@ -118,4 +194,6 @@ export function normalizeKaikkiLanguage(lang: KaikkiLanguage): string {
118194
}
119195

120196
// all supported kaikki languages
121-
export const KAIKKI_LANGUAGES = Object.keys(KAIKKI_LANGUAGE_MAP) as KaikkiLanguage[];
197+
export const KAIKKI_LANGUAGES = Object.keys(
198+
KAIKKI_LANGUAGE_MAP
199+
) as KaikkiLanguage[];

0 commit comments

Comments
 (0)