42 #define guchar unsigned char
45 #define guint unsigned int
46 #define gushort unsigned short
47 #define gint16 int16_t
48 #define guint16 uint16_t
49 #define gunichar uint32_t
51 #define gssize ssize_t
52 #define g_malloc malloc
54 #define g_return_val_if_fail(expr,val) { \
85 # define TRUE (!FALSE)
88 #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))
90 #define G_UNLIKELY(expr) (expr)
128 #define g_utf8_next_char(p) ((p) + g_utf8_skip[*(const guchar *)(p)])
153 #define UTF8_COMPUTE(Char, Mask, Len) \
159 else if ((Char & 0xe0) == 0xc0) \
164 else if ((Char & 0xf0) == 0xe0) \
169 else if ((Char & 0xf8) == 0xf0) \
174 else if ((Char & 0xfc) == 0xf8) \
179 else if ((Char & 0xfe) == 0xfc) \
187 #define UTF8_LENGTH(Char) \
188 ((Char) < 0x80 ? 1 : \
189 ((Char) < 0x800 ? 2 : \
190 ((Char) < 0x10000 ? 3 : \
191 ((Char) < 0x200000 ? 4 : \
192 ((Char) < 0x4000000 ? 5 : 6)))))
194 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
195 (Result) = (Chars)[0] & (Mask); \
196 for ((Count) = 1; (Count) < (Len); ++(Count)) \
198 if (((Chars)[(Count)] & 0xc0) != 0x80) \
204 (Result) |= ((Chars)[(Count)] & 0x3f); \
207 static const gchar utf8_skip_data[256] = {
208 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
210 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
218 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
222 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
226 static const gchar *
const g_utf8_skip = utf8_skip_data;
242 g_utf8_strlen (
const gchar * p)
270 g_utf8_get_char (
const gchar * p)
272 int i, mask = 0, len;
274 unsigned char c = (
unsigned char) *p;
313 else if (c < 0x10000)
318 else if (c < 0x200000)
323 else if (c < 0x4000000)
336 for (i = len - 1; i > 0; --i)
338 outbuf[i] = (c & 0x3f) | 0x80;
341 outbuf[0] = c | first;
365 g_utf8_to_ucs4_fast (
const gchar * str,
glong len,
glong * items_written)
385 while (p < str + len && *p)
397 for (i = 0; i < n_chars; i++)
422 wc |= (
guchar) (*p++) & 0x3f;
425 while ((wc & mask) != 0);
464 g_ucs4_to_utf8 (
const gunichar * str,
468 gchar *result = NULL;
473 for (i = 0; len < 0 || i < len; i++)
478 if (str[i] >= 0x80000000)
484 result =
g_malloc (result_length + 1);
490 while (p < result + result_length)
491 p += g_unichar_to_utf8 (str[i++], p);
496 *items_written = p - result;
531 #define CC_PART1(Page, Char) \
532 ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
533 ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
534 : (cclass_data[combining_class_table_part1[Page]][Char]))
536 #define CC_PART2(Page, Char) \
537 ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
538 ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
539 : (cclass_data[combining_class_table_part2[Page]][Char]))
541 #define COMBINING_CLASS(Char) \
542 (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
543 ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
544 : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
545 ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
556 #define NCount (VCount * TCount)
557 #define SCount (LCount * NCount)
580 for (i = 0; i < len - 1; ++i)
583 if (next != 0 && last > next)
587 for (j = i + 1; j > 0; --j)
593 string[j] =
string[j - 1];
625 r[2] =
TBase + TIndex;
639 if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
643 int half = (start + end) / 2;
644 if (ch == decomp_table[half].ch)
650 offset = decomp_table[half].compat_offset;
652 offset = decomp_table[half].canon_offset;
656 offset = decomp_table[half].canon_offset;
661 return &(decomp_expansion_string[offset]);
663 else if (half == start)
665 else if (ch > decomp_table[half].ch)
692 if ((SIndex %
TCount) == 0)
696 *result = a + TIndex;
704 #define CI(Page, Char) \
705 ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
706 ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
707 : (compose_data[compose_table[Page]][Char]))
709 #define COMPOSE_INDEX(Char) \
710 (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
717 if (combine_hangul (a, b, result))
779 while ((max_len < 0 || p < str + max_len) && *p)
787 decompose_hangul (wc, NULL, &result_len);
792 decomp = find_decomposition (wc, do_compat);
795 n_wc += g_utf8_strlen (decomp);
810 while ((max_len < 0 || p < str + max_len) && *p)
815 gsize old_n_wc = n_wc;
820 decompose_hangul (wc, wc_buffer + n_wc, &result_len);
825 decomp = find_decomposition (wc, do_compat);
831 wc_buffer[n_wc++] = g_utf8_get_char (pd);
834 wc_buffer[n_wc++] = wc;
843 g_unicode_canonical_ordering (wc_buffer + last_start,
845 last_start = old_n_wc;
854 g_unicode_canonical_ordering (wc_buffer + last_start,
863 if (do_compose && n_wc > 0)
869 for (i = 0; i < n_wc; i++)
874 (last_cc == 0 || last_cc != cc) &&
875 combine (wc_buffer[last_start], wc_buffer[i],
876 &wc_buffer[last_start]))
878 for (j = i + 1; j < n_wc; j++)
879 wc_buffer[j - 1] = wc_buffer[j];
942 gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
943 gchar *result = NULL;
946 result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL);
968 return g_utf8_get_char (p);
985 return g_unichar_to_utf8 (c, outbuf);
1016 if (u8_check ((
const uint8_t *) str, n))
1019 return g_utf8_to_ucs4_fast (str, (
glong) len, (
glong *) items_written);
1041 size_t *items_read,
size_t *items_written)
1043 return g_ucs4_to_utf8 (str, len, (
glong *) items_read,
1044 (
glong *) items_written);
1079 if (u8_check ((
const uint8_t *) str, n))
1101 uint32_t *result_wc;
#define COMPOSE_SECOND_SINGLE_START
#define COMPOSE_SECOND_START
#define COMPOSE_FIRST_START
#define COMPOSE_FIRST_SINGLE_START
#define G_UNICODE_NOT_PRESENT_OFFSET
#define g_return_val_if_fail(expr, val)
#define UTF8_COMPUTE(Char, Mask, Len)
int stringprep_unichar_to_utf8(uint32_t c, char *outbuf)
#define COMPOSE_INDEX(Char)
#define G_N_ELEMENTS(arr)
char * stringprep_utf8_nfkc_normalize(const char *str, ssize_t len)
#define UTF8_LENGTH(Char)
#define g_utf8_next_char(p)
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
uint32_t stringprep_utf8_to_unichar(const char *p)
#define COMBINING_CLASS(Char)
@ G_NORMALIZE_DEFAULT_COMPOSE
@ G_NORMALIZE_ALL_COMPOSE
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)