Skip to content

Commit 1135c1e

Browse files
committed
improve defs
1 parent 642214a commit 1135c1e

File tree

7 files changed

+233
-70
lines changed

7 files changed

+233
-70
lines changed

next/app/api/define/route.ts

+12-5
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,23 @@ import { fetchKaikkiDefinitions, type KaikkiLanguage } from "@/lib/kaikki";
44
export async function GET(request: Request) {
55
const { searchParams } = new URL(request.url);
66
const word = searchParams.get("word");
7-
const language = (searchParams.get("language") || "English") as KaikkiLanguage;
8-
7+
const language = (searchParams.get("language") ||
8+
"English") as KaikkiLanguage;
9+
910
if (!word) {
10-
return NextResponse.json({ error: "Missing word parameter" }, { status: 400 });
11+
return NextResponse.json(
12+
{ error: "Missing word parameter" },
13+
{ status: 400 }
14+
);
1115
}
1216

1317
try {
14-
const definitions = await fetchKaikkiDefinitions(word, language);
18+
const definitions = await fetchKaikkiDefinitions(word, language, true);
1519
return NextResponse.json(definitions);
1620
} catch (error) {
17-
return NextResponse.json({ error: "Failed to fetch definitions" }, { status: 500 });
21+
return NextResponse.json(
22+
{ error: "Failed to fetch definitions" },
23+
{ status: 500 }
24+
);
1825
}
1926
}

next/bun.lock

+3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"autoprefixer": "^10.0.1",
5151
"bun": "^1.2.2",
5252
"cheerio": "^1.0.0",
53+
"dotenv": "^16.4.7",
5354
"eslint": "^8",
5455
"eslint-config-next": "15.1.7",
5556
"husky": "^9.0.11",
@@ -673,6 +674,8 @@
673674

674675
"dot-prop": ["dot-prop@5.3.0", "", { "dependencies": { "is-obj": "^2.0.0" } }, "sha512-QM8q3zDe58hqUqjraQOmzZ1LIH9SWQJTlEKCH4kJ2oQvLZk7RbQXvtDM2XEq3fwkV9CCvvH4LA0AV+ogFsBM2Q=="],
675676

677+
"dotenv": ["dotenv@16.4.7", "", {}, "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ=="],
678+
676679
"dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="],
677680

678681
"duplexer2": ["duplexer2@0.1.4", "", { "dependencies": { "readable-stream": "^2.0.2" } }, "sha512-asLFVfWWtJ90ZyOUHMqk7/S2w2guQKxUI2itj3d92ADHhxUSbCMGi1f1cBcJ7xM1To+pE/Khbwo1yuNbMEPKeA=="],

next/components/assembly-view.tsx

+4-3
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,10 @@ const AssemblyView: () => JSX.Element = () => {
6262
useEffect(() => {
6363
if (!microphone?.stream || mainRecorder.current) return;
6464

65+
const manager = managerRef.current;
6566
const setupAudioProcessor = async () => {
6667
try {
67-
managerRef.current.isInitialized = false;
68+
manager.isInitialized = false;
6869
const processor = await AudioProcessor.getInstance(microphone.stream);
6970
await processor.setupProcessor((blob: Blob) => {
7071
if (isConnected) {
@@ -79,7 +80,7 @@ const AssemblyView: () => JSX.Element = () => {
7980
},
8081
} as MediaRecorder; // Type assertion since we only use stop()
8182

82-
managerRef.current.isInitialized = true;
83+
manager.isInitialized = true;
8384
} catch (error) {
8485
console.error("[AssemblyView] Failed to setup audio processor:", error);
8586
}
@@ -92,7 +93,7 @@ const AssemblyView: () => JSX.Element = () => {
9293
mainRecorder.current.stop();
9394
mainRecorder.current = null;
9495
}
95-
managerRef.current.isInitialized = false;
96+
manager.isInitialized = false;
9697
};
9798
}, [microphone?.stream, isPaused, isConnected, sendAudio]);
9899

next/components/fragment.tsx

+36-27
Original file line numberDiff line numberDiff line change
@@ -131,35 +131,44 @@ export const FragmentComponent = ({
131131
return <p className="text-muted-foreground">No definitions found.</p>;
132132
}
133133

134-
return definitions.map((entry, i) => (
135-
<div key={i} className="mb-6 last:mb-0">
136-
<div className="flex items-center gap-2 mb-2">
137-
<a
138-
href={buildKaikkiHtmlUrl(
139-
entry.word,
140-
toKaikkiLanguage(outputLanguage)
134+
return definitions.map((entry, i) => {
135+
// If base form exists, only show that
136+
const definitionToShow = entry.base_word_definition || entry;
137+
const isBaseForm = !!entry.base_word_definition;
138+
139+
return (
140+
<div key={i} className="mb-6 last:mb-0">
141+
<div className="flex items-center gap-2 mb-2">
142+
<a
143+
href={buildKaikkiHtmlUrl(
144+
definitionToShow.word,
145+
toKaikkiLanguage(outputLanguage)
146+
)}
147+
target="_blank"
148+
rel="noopener noreferrer"
149+
className="font-bold hover:underline"
150+
>
151+
{definitionToShow.word}
152+
</a>
153+
<span className="text-xs px-2 py-0.5 bg-muted rounded-full">
154+
{definitionToShow.pos}
155+
</span>
156+
{isBaseForm && (
157+
<span className="text-xs text-muted-foreground">(base form)</span>
158+
)}
159+
</div>
160+
<ul className="list-disc list-inside space-y-1">
161+
{definitionToShow.senses?.map((sense, j) =>
162+
sense.glosses?.map((gloss, k) => (
163+
<li key={`${j}-${k}`} className="text-sm">
164+
{gloss}
165+
</li>
166+
))
141167
)}
142-
target="_blank"
143-
rel="noopener noreferrer"
144-
className="font-bold hover:underline"
145-
>
146-
{entry.word}
147-
</a>
148-
<span className="text-xs px-2 py-0.5 bg-muted rounded-full">
149-
{entry.pos}
150-
</span>
168+
</ul>
151169
</div>
152-
<ul className="list-disc list-inside space-y-1">
153-
{entry.senses?.map((sense, j) =>
154-
sense.glosses?.map((gloss, k) => (
155-
<li key={`${j}-${k}`} className="text-sm">
156-
{gloss}
157-
</li>
158-
))
159-
)}
160-
</ul>
161-
</div>
162-
));
170+
);
171+
});
163172
};
164173

165174
const desktopContent = selectedWord && (

next/lib/kaikki.ts

+91-25
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@ export interface KaikkiEntry {
66
senses: Array<{
77
raw_glosses?: string[];
88
glosses?: string[];
9+
form_of?: Array<{
10+
word: string;
11+
}>;
912
}>;
13+
base_word_definition?: KaikkiEntry; // for form_of references
1014
}
1115

1216
interface KaikkiCache {
@@ -21,18 +25,38 @@ export function cleanWord(word: string): string {
2125
// Multiple cleaning passes to handle nested cases
2226
for (let i = 0; i < 3; i++) {
2327
result = result
24-
.replace(/[.,!?;:»)}\]]+$/g, '') // trailing
25-
.replace(/^[«({[\]]+/g, '') // leading
26-
.replace(/^["'`](.+)["'`]$/g, '$1') // quotes
28+
// Universal punctuation (including inverted marks)
29+
.replace(/[.,!?¿¡;:]+$/gu, "") // trailing periods, ellipsis (including CJK)
30+
.replace(/^[¡¿]+/gu, "") // leading spanish marks
31+
32+
// Brackets/parens/braces across scripts (escape special chars)
33+
.replace(/[»\)\]}>]+$/gu, "") // trailing (including CJK/JP)
34+
.replace(/^[«\(\[{<]+/gu, "") // leading (including CJK/JP)
35+
36+
// Quotes across scripts
37+
.replace(/^["'`""''«»]+(.+)["'`""''«»]+$/gu, "$1")
38+
39+
// Arabic/Persian/Hebrew punctuation
40+
.replace(/[؟،؛]+$/gu, "") // Arabic question mark, comma, semicolon
41+
.replace(/^[؟،؛]+/gu, "") // Leading Arabic punctuation
42+
43+
// CJK specific
44+
.replace(/[]+$/gu, "") // CJK exclamation, question, full stop
45+
.replace(/^[]+/gu, "") // Leading CJK marks (rare but possible)
46+
47+
// Devanagari/Indic
48+
.replace(/[]+$/gu, "") // Devanagari danda, double danda
49+
.replace(/^[]+/gu, "") // Leading dandas (rare but possible)
50+
2751
.trim();
2852
}
2953
return result.toLowerCase();
3054
}
3155

3256
// from https://kaikki.org
33-
export type KaikkiLanguage =
57+
export type KaikkiLanguage =
3458
// Primary dictionaries
35-
| "English"
59+
| "English"
3660
| "Spanish"
3761
| "Italian"
3862
| "German"
@@ -82,27 +106,36 @@ function toTitleCase(str: string): string {
82106

83107
function buildKaikkiPath(word: string, language: KaikkiLanguage): string {
84108
const firstLetter = word[0].toLowerCase();
85-
const firstTwo = word.slice(0,2).toLowerCase();
109+
const firstTwo = word.slice(0, 2).toLowerCase();
86110
const wordLower = word.toLowerCase();
87111
const languageFormatted = toTitleCase(language);
88112
return `${languageFormatted}/meaning/${firstLetter}/${firstTwo}/${wordLower}`;
89113
}
90114

91-
export function buildKaikkiHtmlUrl(word: string, language: KaikkiLanguage): string {
115+
export function buildKaikkiHtmlUrl(
116+
word: string,
117+
language: KaikkiLanguage
118+
): string {
92119
return `https://kaikki.org/dictionary/${buildKaikkiPath(word, language)}.html`;
93120
}
94121

95-
export function buildKaikkiJsonlUrl(word: string, language: KaikkiLanguage): string {
122+
export function buildKaikkiJsonlUrl(
123+
word: string,
124+
language: KaikkiLanguage
125+
): string {
96126
return `https://kaikki.org/dictionary/${buildKaikkiPath(word, language)}.jsonl`;
97127
}
98128

99-
async function fetchFromKaikki(word: string, language: KaikkiLanguage): Promise<KaikkiEntry[]> {
129+
async function fetchFromKaikki(
130+
word: string,
131+
language: KaikkiLanguage
132+
): Promise<KaikkiEntry[]> {
100133
const url = buildKaikkiJsonlUrl(word, language);
101134
console.log(`🔍 Fetching from URL: ${url}`);
102-
135+
103136
const r = await fetch(url);
104137
console.log(`📡 Response status: ${r.status} ${r.statusText}`);
105-
138+
106139
if (!r.ok) {
107140
const text = await r.text().catch(() => "No response body");
108141
console.error(`❌ Error details:
@@ -118,24 +151,57 @@ async function fetchFromKaikki(word: string, language: KaikkiLanguage): Promise<
118151
return lines.map((line) => JSON.parse(line));
119152
}
120153

121-
export async function fetchKaikkiDefinitions(word: string, language: KaikkiLanguage = "English"): Promise<KaikkiEntry[]> {
154+
export async function fetchKaikkiDefinitions(
155+
word: string,
156+
language: KaikkiLanguage = "English",
157+
noCache: boolean = false
158+
): Promise<KaikkiEntry[]> {
122159
const redis = await getRedis();
123160
const cacheKey = buildKaikkiKey(word, language);
124-
125-
// Try cache first
126-
const cached = await redis.get<KaikkiCache>(cacheKey);
127-
if (cached) {
128-
console.log(`🎯 Cache hit for ${cacheKey}`);
129-
return cached.definitions;
161+
162+
// Skip cache if noCache is true
163+
if (!noCache) {
164+
// Try cache first
165+
const cached = await redis.get<KaikkiCache>(cacheKey);
166+
if (cached) {
167+
console.log(`🎯 Cache hit for ${cacheKey}`);
168+
return cached.definitions;
169+
}
130170
}
131-
171+
132172
// Fetch from Kaikki
133-
console.log(`💫 Cache miss for ${cacheKey}`);
173+
console.log(
174+
`💫 ${noCache ? "Skipping cache" : "Cache miss"} for ${cacheKey}`
175+
);
134176
const definitions = await fetchFromKaikki(word, language);
135-
136-
// Cache the result (1 year TTL)
137-
await redis.set(cacheKey, { definitions }, { ex: CACHE_TTL });
138-
console.log(`💾 Cached definitions for ${cacheKey} (expires in 1 year)`);
139-
177+
178+
// Fetch base word definitions for form_of references
179+
for (const def of definitions) {
180+
for (const sense of def.senses) {
181+
if (sense.form_of?.[0]?.word) {
182+
const baseWord = sense.form_of[0].word;
183+
console.log(`🔄 Fetching base word definition for ${baseWord}`);
184+
try {
185+
const baseDefs = await fetchKaikkiDefinitions(
186+
baseWord,
187+
language,
188+
noCache
189+
);
190+
if (baseDefs.length > 0) {
191+
def.base_word_definition = baseDefs[0];
192+
}
193+
} catch (e) {
194+
console.warn(`⚠️ Failed to fetch base word ${baseWord}:`, e);
195+
}
196+
}
197+
}
198+
}
199+
200+
// Cache the result (1 year TTL) unless noCache is true
201+
if (!noCache) {
202+
await redis.set(cacheKey, { definitions }, { ex: CACHE_TTL });
203+
console.log(`💾 Cached definitions for ${cacheKey} (expires in 1 year)`);
204+
}
205+
140206
return definitions;
141207
}

0 commit comments

Comments
 (0)