Skip to content

Commit 1312009

Browse files
committed
fix(marshal)!: compare strings by codepoint
1 parent 193e403 commit 1312009

File tree

4 files changed

+116
-2
lines changed

4 files changed

+116
-2
lines changed

packages/marshal/index.js

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export {
1717

1818
export {
1919
trivialComparator,
20+
compareByCodePoints,
2021
assertRankSorted,
2122
compareRank,
2223
isRankSorted,

packages/marshal/src/rankOrder.js

+41-2
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,46 @@ const { entries, fromEntries, setPrototypeOf, is } = Object;
4646
*/
4747
const sameValueZero = (x, y) => x === y || is(x, y);
4848

49+
/**
50+
* @param {any} left
51+
* @param {any} right
52+
* @returns {RankComparison}
53+
*/
4954
export const trivialComparator = (left, right) =>
5055
// eslint-disable-next-line no-nested-ternary, @endo/restrict-comparison-operands
5156
left < right ? -1 : left === right ? 0 : 1;
57+
harden(trivialComparator);
58+
59+
// Apparently eslint confused about whether the function can ever exit
60+
// without an explicit return.
61+
// eslint-disable-next-line jsdoc/require-returns-check
62+
/**
63+
* @param {string} left
64+
* @param {string} right
65+
* @returns {RankComparison}
66+
*/
67+
export const compareByCodePoints = (left, right) => {
68+
const leftIter = left[Symbol.iterator]();
69+
const rightIter = right[Symbol.iterator]();
70+
for (;;) {
71+
const { value: leftChar } = leftIter.next();
72+
const { value: rightChar } = rightIter.next();
73+
if (leftChar === undefined && rightChar === undefined) {
74+
return 0;
75+
} else if (leftChar === undefined) {
76+
// left is a prefix of right.
77+
return -1;
78+
} else if (rightChar === undefined) {
79+
// right is a prefix of left.
80+
return 1;
81+
}
82+
const leftCodepoint = /** @type {number} */ (leftChar.codePointAt(0));
83+
const rightCodepoint = /** @type {number} */ (rightChar.codePointAt(0));
84+
if (leftCodepoint < rightCodepoint) return -1;
85+
if (leftCodepoint > rightCodepoint) return 1;
86+
}
87+
};
88+
harden(compareByCodePoints);
5289

5390
/**
5491
* @typedef {Record<PassStyle, { index: number, cover: RankCover }>} PassStyleRanksRecord
@@ -140,8 +177,7 @@ export const makeComparatorKit = (compareRemotables = (_x, _y) => 0) => {
140177
return 0;
141178
}
142179
case 'boolean':
143-
case 'bigint':
144-
case 'string': {
180+
case 'bigint': {
145181
// Within each of these passStyles, the rank ordering agrees with
146182
// JavaScript's relational operators `<` and `>`.
147183
if (left < right) {
@@ -151,6 +187,9 @@ export const makeComparatorKit = (compareRemotables = (_x, _y) => 0) => {
151187
return 1;
152188
}
153189
}
190+
case 'string': {
191+
return compareByCodePoints(left, right);
192+
}
154193
case 'symbol': {
155194
return comparator(
156195
nameForPassableSymbol(left),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import { test } from './prepare-test-env-ava.js';
2+
3+
import { compareRank } from '../src/rankOrder.js';
4+
5+
test('unicode code point order', t => {
6+
// Test case from
7+
// https://icu-project.org/docs/papers/utf16_code_point_order.html
8+
const str0 = '\u{ff61}';
9+
const str3 = '\u{d800}\u{dc02}';
10+
11+
// str1 and str2 become impossible examples once we prohibit
12+
// non - well - formed strings.
13+
// See https://github.com/endojs/endo/pull/2002
14+
const str1 = '\u{d800}X';
15+
const str2 = '\u{d800}\u{ff61}';
16+
17+
// harden to ensure it is not sorted in place, just for sanity
18+
const strs = harden([str0, str1, str2, str3]);
19+
20+
/**
21+
* @param {string} left
22+
* @param {string} right
23+
* @returns {import('../src/types.js').RankComparison}
24+
*/
25+
const nativeComp = (left, right) =>
26+
// eslint-disable-next-line no-nested-ternary
27+
left < right ? -1 : left > right ? 1 : 0;
28+
29+
const nativeSorted = strs.toSorted(nativeComp);
30+
31+
t.deepEqual(nativeSorted, [str1, str3, str2, str0]);
32+
33+
const rankSorted = strs.toSorted(compareRank);
34+
35+
t.deepEqual(rankSorted, [str1, str2, str0, str3]);
36+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// modeled on test-string-rank-order.js
2+
3+
import { test } from './prepare-test-env-ava.js';
4+
5+
import { compareKeys } from '../src/keys/compareKeys.js';
6+
7+
test('unicode code point order', t => {
8+
// Test case from
9+
// https://icu-project.org/docs/papers/utf16_code_point_order.html
10+
const str0 = '\u{ff61}';
11+
const str3 = '\u{d800}\u{dc02}';
12+
13+
// str1 and str2 become impossible examples once we prohibit
14+
// non - well - formed strings.
15+
// See https://github.com/endojs/endo/pull/2002
16+
const str1 = '\u{d800}X';
17+
const str2 = '\u{d800}\u{ff61}';
18+
19+
// harden to ensure it is not sorted in place, just for sanity
20+
const strs = harden([str0, str1, str2, str3]);
21+
22+
/**
23+
* @param {string} left
24+
* @param {string} right
25+
* @returns {import('../src/types.js').KeyComparison}
26+
*/
27+
const nativeComp = (left, right) =>
28+
// eslint-disable-next-line no-nested-ternary
29+
left < right ? -1 : left > right ? 1 : 0;
30+
31+
const nativeSorted = strs.toSorted(nativeComp);
32+
33+
t.deepEqual(nativeSorted, [str1, str3, str2, str0]);
34+
35+
const keySorted = strs.toSorted(compareKeys);
36+
37+
t.deepEqual(keySorted, [str1, str2, str0, str3]);
38+
});

0 commit comments

Comments
 (0)