-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathCFUniChar.h
279 lines (230 loc) · 10.3 KB
/
CFUniChar.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*
* Copyright (c) 2008-2012 Brent Fulgham <bfulgham@gmail.org>. All rights reserved.
*
* This source code is a modified version of the CoreFoundation sources released by Apple Inc. under
* the terms of the APSL version 2.0 (see below).
*
* For information about changes from the original Apple source release can be found by reviewing the
* source control system for the project at https://sourceforge.net/svn/?group_id=246198.
*
* The original license information is as follows:
*
* Copyright (c) 2011 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/* CFUniChar.h
Copyright (c) 1998-2011, Apple Inc. All rights reserved.
*/
#if !defined(__COREFOUNDATION_CFUNICHAR__)
#define __COREFOUNDATION_CFUNICHAR__ 1
#include <CoreFoundation/CFByteOrder.h>
#include <CoreFoundation/CFBase.h>
CF_EXTERN_C_BEGIN
#define kCFUniCharBitShiftForByte (3)
#define kCFUniCharBitShiftForMask (7)
CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) {
return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
}
CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) {
return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
}
CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
}
// The following values coinside TextEncodingFormat format defines in TextCommon.h
enum {
kCFUniCharUTF16Format = 0,
kCFUniCharUTF8Format = 2,
kCFUniCharUTF32Format = 3
};
CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
}
CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
}
CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
}
enum {
kCFUniCharControlCharacterSet = 1,
kCFUniCharWhitespaceCharacterSet,
kCFUniCharWhitespaceAndNewlineCharacterSet,
kCFUniCharDecimalDigitCharacterSet,
kCFUniCharLetterCharacterSet,
kCFUniCharLowercaseLetterCharacterSet,
kCFUniCharUppercaseLetterCharacterSet,
kCFUniCharNonBaseCharacterSet,
kCFUniCharCanonicalDecomposableCharacterSet,
kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
kCFUniCharAlphaNumericCharacterSet,
kCFUniCharPunctuationCharacterSet,
kCFUniCharIllegalCharacterSet,
kCFUniCharTitlecaseLetterCharacterSet,
kCFUniCharSymbolAndOperatorCharacterSet,
kCFUniCharNewlineCharacterSet,
kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here
kCFUniCharHFSPlusDecomposableCharacterSet,
kCFUniCharStrongRightToLeftCharacterSet,
kCFUniCharHasNonSelfLowercaseCharacterSet,
kCFUniCharHasNonSelfUppercaseCharacterSet,
kCFUniCharHasNonSelfTitlecaseCharacterSet,
kCFUniCharHasNonSelfCaseFoldingCharacterSet,
kCFUniCharHasNonSelfMirrorMappingCharacterSet,
kCFUniCharControlAndFormatterCharacterSet,
kCFUniCharCaseIgnorableCharacterSet,
kCFUniCharGraphemeExtendCharacterSet
};
CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
// This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
enum {
kCFUniCharBitmapFilled = (uint8_t)0,
kCFUniCharBitmapEmpty = (uint8_t)0xFF,
kCFUniCharBitmapAll = (uint8_t)1
};
CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
enum {
kCFUniCharToLowercase = 0,
kCFUniCharToUppercase,
kCFUniCharToTitlecase,
kCFUniCharCaseFold
};
enum {
kCFUniCharCaseMapFinalSigma = (1UL << 0),
kCFUniCharCaseMapAfter_i = (1UL << 1),
kCFUniCharCaseMapMoreAbove = (1UL << 2),
kCFUniCharCaseMapDutchDigraph = (1UL << 3)
};
CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
enum {
kCFUniCharBiDiPropertyON = 0,
kCFUniCharBiDiPropertyL,
kCFUniCharBiDiPropertyR,
kCFUniCharBiDiPropertyAN,
kCFUniCharBiDiPropertyEN,
kCFUniCharBiDiPropertyAL,
kCFUniCharBiDiPropertyNSM,
kCFUniCharBiDiPropertyCS,
kCFUniCharBiDiPropertyES,
kCFUniCharBiDiPropertyET,
kCFUniCharBiDiPropertyBN,
kCFUniCharBiDiPropertyS,
kCFUniCharBiDiPropertyWS,
kCFUniCharBiDiPropertyB,
kCFUniCharBiDiPropertyRLO,
kCFUniCharBiDiPropertyRLE,
kCFUniCharBiDiPropertyLRO,
kCFUniCharBiDiPropertyLRE,
kCFUniCharBiDiPropertyPDF
};
enum {
kCFUniCharCombiningProperty = 0,
kCFUniCharBidiProperty
};
// The second arg 'bitmap' has to be the pointer to a specific plane
CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
if (bitmap) {
uint8_t value = bitmap[(character >> 8)];
if (value > kCFUniCharBiDiPropertyPDF) {
bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
return bitmap[character % 256];
} else {
return value;
}
}
return kCFUniCharBiDiPropertyL;
}
CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
if (bitmap) {
uint8_t value = bitmap[(character >> 8)];
if (value) {
bitmap = bitmap + 256 + ((value - 1) * 256);
return bitmap[character % 256];
}
}
return 0;
}
CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat);
// UTF32 support
CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) {
const UTF16Char *limit = src + length;
UTF32Char character;
while (src < limit) {
character = *(src++);
if (CFUniCharIsSurrogateHighCharacter(character)) {
if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) {
character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++));
} else {
if (!allowLossy) return false;
character = 0xFFFD; // replacement character
}
} else if (CFUniCharIsSurrogateLowCharacter(character)) {
if (!allowLossy) return false;
character = 0xFFFD; // replacement character
}
*(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character));
}
return true;
}
CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) {
const UTF32Char *limit = src + length;
UTF32Char character;
while (src < limit) {
character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++)));
if (character < 0xFFFF) { // BMP
if (allowLossy) {
if (CFUniCharIsSurrogateHighCharacter(character)) {
UTF32Char otherCharacter = 0xFFFD; // replacement character
if (src < limit) {
otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src));
if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) {
*(dst++) = character; ++src;
} else {
otherCharacter = 0xFFFD; // replacement character
}
}
character = otherCharacter;
} else if (CFUniCharIsSurrogateLowCharacter(character)) {
character = 0xFFFD; // replacement character
}
} else {
if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false;
}
} else if (character < 0x110000) { // non-BMP
character -= 0x10000;
*(dst++) = (UTF16Char)((character >> 10) + 0xD800UL);
character = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
} else {
if (!allowLossy) return false;
character = 0xFFFD; // replacement character
}
*(dst++) = character;
}
return true;
}
CF_EXTERN_C_END
#endif /* ! __COREFOUNDATION_CFUNICHAR__ */