ICU 62.1
62.1
common
unicode
utf_old.h
Go to the documentation of this file.
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
* Copyright (C) 2002-2012, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
*******************************************************************************
10
* file name: utf_old.h
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 2002sep21
16
* created by: Markus W. Scherer
17
*/
18
145
#ifndef __UTF_OLD_H__
146
#define __UTF_OLD_H__
147
159
#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
160
# define U_HIDE_OBSOLETE_UTF_OLD_H 0
161
#endif
162
163
#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H
164
165
#include "
unicode/utf.h
"
166
#include "
unicode/utf8.h
"
167
#include "
unicode/utf16.h
"
168
169
/* Formerly utf.h, part 1 --------------------------------------------------- */
170
171
#ifdef U_USE_UTF_DEPRECATES
172
179
typedef
int32_t UTextOffset;
180
#endif
181
183
#define UTF_SIZE 16
184
191
#define UTF_SAFE
192
193
#undef UTF_UNSAFE
194
195
#undef UTF_STRICT
196
211
#define UTF8_ERROR_VALUE_1 0x15
212
218
#define UTF8_ERROR_VALUE_2 0x9f
219
226
#define UTF_ERROR_VALUE 0xffff
227
234
#define UTF_IS_ERROR(c) \
235
(((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
236
242
#define UTF_IS_VALID(c) \
243
(UTF_IS_UNICODE_CHAR(c) && \
244
(c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
245
250
#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
251
257
#define UTF_IS_UNICODE_NONCHAR(c) \
258
((c)>=0xfdd0 && \
259
((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
260
(uint32_t)(c)<=0x10ffff)
261
277
#define UTF_IS_UNICODE_CHAR(c) \
278
((uint32_t)(c)<0xd800 || \
279
((uint32_t)(c)>0xdfff && \
280
(uint32_t)(c)<=0x10ffff && \
281
!UTF_IS_UNICODE_NONCHAR(c)))
282
283
/* Formerly utf8.h ---------------------------------------------------------- */
284
296
#ifdef U_UTF8_IMPL
297
// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes.
298
#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
299
U_CFUNC
const
uint8_t
utf8_countTrailBytes
[];
300
#else
301
U_CFUNC
U_IMPORT
const
uint8_t
utf8_countTrailBytes
[];
/* U_IMPORT2? */
/*U_IMPORT*/
302
#endif
303
308
#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
309
314
#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
315
317
#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
318
319
#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
320
321
#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
322
324
#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
325
339
#if 1
340
# define UTF8_CHAR_LENGTH(c) \
341
((uint32_t)(c)<=0x7f ? 1 : \
342
((uint32_t)(c)<=0x7ff ? 2 : \
343
((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
344
) \
345
)
346
#else
347
# define UTF8_CHAR_LENGTH(c) \
348
((uint32_t)(c)<=0x7f ? 1 : \
349
((uint32_t)(c)<=0x7ff ? 2 : \
350
((uint32_t)(c)<=0xffff ? 3 : \
351
((uint32_t)(c)<=0x10ffff ? 4 : \
352
((uint32_t)(c)<=0x3ffffff ? 5 : \
353
((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
354
) \
355
) \
356
) \
357
) \
358
)
359
#endif
360
362
#define UTF8_MAX_CHAR_LENGTH 4
363
365
#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
366
368
#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
369
int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
370
UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
371
UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
372
}
373
375
#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
376
int32_t _utf8_get_char_safe_index=(int32_t)(i); \
377
UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
378
UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
379
}
380
382
#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
383
(c)=(s)[(i)++]; \
384
if((uint8_t)((c)-0xc0)<0x35) { \
385
uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
386
UTF8_MASK_LEAD_BYTE(c, __count); \
387
switch(__count) { \
388
/* each following branch falls through to the next one */
\
389
case 3: \
390
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
391
case 2: \
392
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
393
case 1: \
394
(c)=((c)<<6)|((s)[(i)++]&0x3f); \
395
/* no other branches to optimize switch() */
\
396
break; \
397
} \
398
} \
399
}
400
402
#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
403
if((uint32_t)(c)<=0x7f) { \
404
(s)[(i)++]=(uint8_t)(c); \
405
} else { \
406
if((uint32_t)(c)<=0x7ff) { \
407
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
408
} else { \
409
if((uint32_t)(c)<=0xffff) { \
410
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
411
} else { \
412
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
413
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
414
} \
415
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
416
} \
417
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
418
} \
419
}
420
422
#define UTF8_FWD_1_UNSAFE(s, i) { \
423
(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
424
}
425
427
#define UTF8_FWD_N_UNSAFE(s, i, n) { \
428
int32_t __N=(n); \
429
while(__N>0) { \
430
UTF8_FWD_1_UNSAFE(s, i); \
431
--__N; \
432
} \
433
}
434
436
#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
437
while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
438
}
439
441
#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
442
(c)=(s)[(i)++]; \
443
if((c)>=0x80) { \
444
if(UTF8_IS_LEAD(c)) { \
445
(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
446
} else { \
447
(c)=UTF8_ERROR_VALUE_1; \
448
} \
449
} \
450
}
451
453
#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
454
if((uint32_t)(c)<=0x7f) { \
455
(s)[(i)++]=(uint8_t)(c); \
456
} else { \
457
(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
458
} \
459
}
460
462
#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
463
465
#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
466
468
#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
469
471
#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
472
(c)=(s)[--(i)]; \
473
if(UTF8_IS_TRAIL(c)) { \
474
uint8_t __b, __count=1, __shift=6; \
475
\
476
/* c is a trail byte */
\
477
(c)&=0x3f; \
478
for(;;) { \
479
__b=(s)[--(i)]; \
480
if(__b>=0xc0) { \
481
UTF8_MASK_LEAD_BYTE(__b, __count); \
482
(c)|=(UChar32)__b<<__shift; \
483
break; \
484
} else { \
485
(c)|=(UChar32)(__b&0x3f)<<__shift; \
486
++__count; \
487
__shift+=6; \
488
} \
489
} \
490
} \
491
}
492
494
#define UTF8_BACK_1_UNSAFE(s, i) { \
495
while(UTF8_IS_TRAIL((s)[--(i)])) {} \
496
}
497
499
#define UTF8_BACK_N_UNSAFE(s, i, n) { \
500
int32_t __N=(n); \
501
while(__N>0) { \
502
UTF8_BACK_1_UNSAFE(s, i); \
503
--__N; \
504
} \
505
}
506
508
#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
509
UTF8_BACK_1_UNSAFE(s, i); \
510
UTF8_FWD_1_UNSAFE(s, i); \
511
}
512
514
#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
515
(c)=(s)[--(i)]; \
516
if((c)>=0x80) { \
517
if((c)<=0xbf) { \
518
(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
519
} else { \
520
(c)=UTF8_ERROR_VALUE_1; \
521
} \
522
} \
523
}
524
526
#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
527
529
#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
530
532
#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
533
534
/* Formerly utf16.h --------------------------------------------------------- */
535
537
#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
538
540
#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
541
543
#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
544
546
#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
547
549
#define UTF16_GET_PAIR_VALUE(first, second) \
550
(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
551
553
#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
554
556
#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
557
559
#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
560
562
#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
563
565
#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
566
568
#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
569
571
#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
572
574
#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
575
577
#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
578
580
#define UTF16_MAX_CHAR_LENGTH 2
581
583
#define UTF16_ARRAY_SIZE(size) (size)
584
596
#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
597
(c)=(s)[i]; \
598
if(UTF_IS_SURROGATE(c)) { \
599
if(UTF_IS_SURROGATE_FIRST(c)) { \
600
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
601
} else { \
602
(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
603
} \
604
} \
605
}
606
608
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
609
(c)=(s)[i]; \
610
if(UTF_IS_SURROGATE(c)) { \
611
uint16_t __c2; \
612
if(UTF_IS_SURROGATE_FIRST(c)) { \
613
if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
614
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
615
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
616
} else if(strict) {\
617
/* unmatched first surrogate */
\
618
(c)=UTF_ERROR_VALUE; \
619
} \
620
} else { \
621
if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
622
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
623
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
624
} else if(strict) {\
625
/* unmatched second surrogate */
\
626
(c)=UTF_ERROR_VALUE; \
627
} \
628
} \
629
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
630
(c)=UTF_ERROR_VALUE; \
631
} \
632
}
633
635
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
636
(c)=(s)[(i)++]; \
637
if(UTF_IS_FIRST_SURROGATE(c)) { \
638
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
639
} \
640
}
641
643
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
644
if((uint32_t)(c)<=0xffff) { \
645
(s)[(i)++]=(uint16_t)(c); \
646
} else { \
647
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
648
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
649
} \
650
}
651
653
#define UTF16_FWD_1_UNSAFE(s, i) { \
654
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
655
++(i); \
656
} \
657
}
658
660
#define UTF16_FWD_N_UNSAFE(s, i, n) { \
661
int32_t __N=(n); \
662
while(__N>0) { \
663
UTF16_FWD_1_UNSAFE(s, i); \
664
--__N; \
665
} \
666
}
667
669
#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
670
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
671
--(i); \
672
} \
673
}
674
676
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
677
(c)=(s)[(i)++]; \
678
if(UTF_IS_FIRST_SURROGATE(c)) { \
679
uint16_t __c2; \
680
if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
681
++(i); \
682
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
683
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
684
} else if(strict) {\
685
/* unmatched first surrogate */
\
686
(c)=UTF_ERROR_VALUE; \
687
} \
688
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
689
/* unmatched second surrogate or other non-character */
\
690
(c)=UTF_ERROR_VALUE; \
691
} \
692
}
693
695
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
696
if((uint32_t)(c)<=0xffff) { \
697
(s)[(i)++]=(uint16_t)(c); \
698
} else if((uint32_t)(c)<=0x10ffff) { \
699
if((i)+1<(length)) { \
700
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
701
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
702
} else
/* not enough space */
{ \
703
(s)[(i)++]=UTF_ERROR_VALUE; \
704
} \
705
} else
/* c>0x10ffff, write error value */
{ \
706
(s)[(i)++]=UTF_ERROR_VALUE; \
707
} \
708
}
709
711
#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
712
714
#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
715
717
#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
718
720
#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
721
(c)=(s)[--(i)]; \
722
if(UTF_IS_SECOND_SURROGATE(c)) { \
723
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
724
} \
725
}
726
728
#define UTF16_BACK_1_UNSAFE(s, i) { \
729
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
730
--(i); \
731
} \
732
}
733
735
#define UTF16_BACK_N_UNSAFE(s, i, n) { \
736
int32_t __N=(n); \
737
while(__N>0) { \
738
UTF16_BACK_1_UNSAFE(s, i); \
739
--__N; \
740
} \
741
}
742
744
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
745
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
746
++(i); \
747
} \
748
}
749
751
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
752
(c)=(s)[--(i)]; \
753
if(UTF_IS_SECOND_SURROGATE(c)) { \
754
uint16_t __c2; \
755
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
756
--(i); \
757
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
758
/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */
\
759
} else if(strict) {\
760
/* unmatched second surrogate */
\
761
(c)=UTF_ERROR_VALUE; \
762
} \
763
} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
764
/* unmatched first surrogate or other non-character */
\
765
(c)=UTF_ERROR_VALUE; \
766
} \
767
}
768
770
#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
771
773
#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
774
776
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
777
778
/* Formerly utf32.h --------------------------------------------------------- */
779
780
/*
781
* Old documentation:
782
*
783
* This file defines macros to deal with UTF-32 code units and code points.
784
* Signatures and semantics are the same as for the similarly named macros
785
* in utf16.h.
786
* utf32.h is included by utf.h after unicode/umachine.h</p>
787
* and some common definitions.
788
* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
789
* Compound statements (curly braces {}) must be used for if-else-while...
790
* bodies and all macro statements should be terminated with semicolon.</p>
791
*/
792
793
/* internal definitions ----------------------------------------------------- */
794
796
#define UTF32_IS_SAFE(c, strict) \
797
(!(strict) ? \
798
(uint32_t)(c)<=0x10ffff : \
799
UTF_IS_UNICODE_CHAR(c))
800
801
/*
802
* For the semantics of all of these macros, see utf16.h.
803
* The UTF-32 versions are trivial because any code point is
804
* encoded using exactly one code unit.
805
*/
806
807
/* single-code point definitions -------------------------------------------- */
808
809
/* classes of code unit values */
810
812
#define UTF32_IS_SINGLE(uchar) 1
813
814
#define UTF32_IS_LEAD(uchar) 0
815
816
#define UTF32_IS_TRAIL(uchar) 0
817
818
/* number of code units per code point */
819
821
#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
822
823
#define UTF32_CHAR_LENGTH(c) 1
824
825
#define UTF32_MAX_CHAR_LENGTH 1
826
827
/* average number of code units compared to UTF-16 */
828
830
#define UTF32_ARRAY_SIZE(size) (size)
831
833
#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
834
(c)=(s)[i]; \
835
}
836
838
#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
839
(c)=(s)[i]; \
840
if(!UTF32_IS_SAFE(c, strict)) { \
841
(c)=UTF_ERROR_VALUE; \
842
} \
843
}
844
845
/* definitions with forward iteration --------------------------------------- */
846
848
#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
849
(c)=(s)[(i)++]; \
850
}
851
853
#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
854
(s)[(i)++]=(c); \
855
}
856
858
#define UTF32_FWD_1_UNSAFE(s, i) { \
859
++(i); \
860
}
861
863
#define UTF32_FWD_N_UNSAFE(s, i, n) { \
864
(i)+=(n); \
865
}
866
868
#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
869
}
870
872
#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
873
(c)=(s)[(i)++]; \
874
if(!UTF32_IS_SAFE(c, strict)) { \
875
(c)=UTF_ERROR_VALUE; \
876
} \
877
}
878
880
#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
881
if((uint32_t)(c)<=0x10ffff) { \
882
(s)[(i)++]=(c); \
883
} else
/* c>0x10ffff, write 0xfffd */
{ \
884
(s)[(i)++]=0xfffd; \
885
} \
886
}
887
889
#define UTF32_FWD_1_SAFE(s, i, length) { \
890
++(i); \
891
}
892
894
#define UTF32_FWD_N_SAFE(s, i, length, n) { \
895
if(((i)+=(n))>(length)) { \
896
(i)=(length); \
897
} \
898
}
899
901
#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
902
}
903
904
/* definitions with backward iteration -------------------------------------- */
905
907
#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
908
(c)=(s)[--(i)]; \
909
}
910
912
#define UTF32_BACK_1_UNSAFE(s, i) { \
913
--(i); \
914
}
915
917
#define UTF32_BACK_N_UNSAFE(s, i, n) { \
918
(i)-=(n); \
919
}
920
922
#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
923
}
924
926
#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
927
(c)=(s)[--(i)]; \
928
if(!UTF32_IS_SAFE(c, strict)) { \
929
(c)=UTF_ERROR_VALUE; \
930
} \
931
}
932
934
#define UTF32_BACK_1_SAFE(s, start, i) { \
935
--(i); \
936
}
937
939
#define UTF32_BACK_N_SAFE(s, start, i, n) { \
940
(i)-=(n); \
941
if((i)<(start)) { \
942
(i)=(start); \
943
} \
944
}
945
947
#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
948
}
949
950
/* Formerly utf.h, part 2 --------------------------------------------------- */
951
957
#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
958
960
#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
961
963
#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
964
965
967
#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
968
970
#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
971
972
974
#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
975
977
#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
978
979
981
#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
982
984
#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
985
986
988
#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
989
991
#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
992
993
995
#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
996
998
#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
999
1000
1002
#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
1003
1005
#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
1006
1007
1009
#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
1010
1012
#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
1013
1014
1016
#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
1017
1019
#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
1020
1021
1023
#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
1024
1026
#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
1027
1028
/* Define default macros (UTF-16 "safe") ------------------------------------ */
1029
1035
#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
1036
1042
#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
1043
1049
#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
1050
1056
#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
1057
1063
#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
1064
1070
#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
1071
1081
#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
1082
1094
#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
1095
1107
#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
1108
1118
#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
1119
1129
#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
1130
1145
#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
1146
1158
#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
1159
1171
#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
1172
1184
#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
1185
1200
#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
1201
1202
#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
1203
1204
#endif
utf.h
C API: Code point macros.
U_IMPORT
#define U_IMPORT
Definition:
platform.h:813
utf8_countTrailBytes
U_CFUNC const U_IMPORT uint8_t utf8_countTrailBytes[]
Definition:
utf_old.h:301
utf16.h
C API: 16-bit Unicode handling macros.
U_CFUNC
#define U_CFUNC
Definition:
umachine.h:83
utf8.h
C API: 8-bit Unicode handling macros.
Generated by
1.8.17