ICU 62.1  62.1
uspoof.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 2008-2016, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 * file name: uspoof.h
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2008Feb13
14 * created by: Andy Heninger
15 *
16 * Unicode Spoof Detection
17 */
18 
19 #ifndef USPOOF_H
20 #define USPOOF_H
21 
22 #include "unicode/utypes.h"
23 #include "unicode/uset.h"
24 #include "unicode/parseerr.h"
25 #include "unicode/localpointer.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 
30 #if U_SHOW_CPLUSPLUS_API
31 #include "unicode/unistr.h"
32 #include "unicode/uniset.h"
33 #endif
34 
35 
370 struct USpoofChecker;
374 typedef struct USpoofChecker USpoofChecker;
376 struct USpoofCheckResult;
382 
390 typedef enum USpoofChecks {
400 
410 
420 
431 
432 #ifndef U_HIDE_DEPRECATED_API
433 
439 #endif /* U_HIDE_DEPRECATED_API */
440 
455 
456 #ifndef U_HIDE_DEPRECATED_API
457 
463 #endif /* U_HIDE_DEPRECATED_API */
464 
472 
479 
487 
488 #ifndef U_HIDE_DRAFT_API
489 
509 #endif /* U_HIDE_DRAFT_API */
510 
517 
530  USPOOF_AUX_INFO = 0x40000000
531 
532  } USpoofChecks;
533 
534 
544  typedef enum URestrictionLevel {
551  USPOOF_ASCII = 0x10000000,
592  USPOOF_UNRESTRICTIVE = 0x60000000,
599 #ifndef U_HIDE_INTERNAL_API
600 
605 #endif /* U_HIDE_INTERNAL_API */
607 
618 U_STABLE USpoofChecker * U_EXPORT2
619 uspoof_open(UErrorCode *status);
620 
621 
643 U_STABLE USpoofChecker * U_EXPORT2
644 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
645  UErrorCode *pErrorCode);
646 
677 U_STABLE USpoofChecker * U_EXPORT2
678 uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
679  const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
680  int32_t *errType, UParseError *pe, UErrorCode *status);
681 
682 
688 U_STABLE void U_EXPORT2
690 
691 #if U_SHOW_CPLUSPLUS_API
692 
694 
705 
707 
708 #endif
709 
719 U_STABLE USpoofChecker * U_EXPORT2
720 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
721 
722 
760 U_STABLE void U_EXPORT2
761 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
762 
774 U_STABLE int32_t U_EXPORT2
775 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
776 
788 U_STABLE void U_EXPORT2
790 
791 
799 U_STABLE URestrictionLevel U_EXPORT2
801 
844 U_STABLE void U_EXPORT2
845 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
846 
868 U_STABLE const char * U_EXPORT2
870 
871 
890 U_STABLE void U_EXPORT2
891 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
892 
893 
914 U_STABLE const USet * U_EXPORT2
916 
917 
918 #if U_SHOW_CPLUSPLUS_API
919 
937 U_STABLE void U_EXPORT2
939 
940 
961 U_STABLE const icu::UnicodeSet * U_EXPORT2
963 #endif
964 
965 
998 U_STABLE int32_t U_EXPORT2
999 uspoof_check(const USpoofChecker *sc,
1000  const UChar *id, int32_t length,
1001  int32_t *position,
1002  UErrorCode *status);
1003 
1004 
1037 U_STABLE int32_t U_EXPORT2
1039  const char *id, int32_t length,
1040  int32_t *position,
1041  UErrorCode *status);
1042 
1043 
1044 #if U_SHOW_CPLUSPLUS_API
1045 
1073 U_STABLE int32_t U_EXPORT2
1075  const icu::UnicodeString &id,
1076  int32_t *position,
1077  UErrorCode *status);
1078 #endif
1079 
1080 
1109 U_STABLE int32_t U_EXPORT2
1110 uspoof_check2(const USpoofChecker *sc,
1111  const UChar* id, int32_t length,
1112  USpoofCheckResult* checkResult,
1113  UErrorCode *status);
1114 
1146 U_STABLE int32_t U_EXPORT2
1148  const char *id, int32_t length,
1149  USpoofCheckResult* checkResult,
1150  UErrorCode *status);
1151 
1152 #if U_SHOW_CPLUSPLUS_API
1153 
1178 U_STABLE int32_t U_EXPORT2
1180  const icu::UnicodeString &id,
1181  USpoofCheckResult* checkResult,
1182  UErrorCode *status);
1183 #endif
1184 
1203 U_STABLE USpoofCheckResult* U_EXPORT2
1205 
1213 U_STABLE void U_EXPORT2
1215 
1216 #if U_SHOW_CPLUSPLUS_API
1217 
1219 
1230 
1232 
1233 #endif
1234 
1249 U_STABLE int32_t U_EXPORT2
1250 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
1251 
1262 U_STABLE URestrictionLevel U_EXPORT2
1264 
1276 U_STABLE const USet* U_EXPORT2
1277 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
1278 
1279 
1323 U_STABLE int32_t U_EXPORT2
1325  const UChar *id1, int32_t length1,
1326  const UChar *id2, int32_t length2,
1327  UErrorCode *status);
1328 
1329 
1330 
1356 U_STABLE int32_t U_EXPORT2
1358  const char *id1, int32_t length1,
1359  const char *id2, int32_t length2,
1360  UErrorCode *status);
1361 
1362 
1363 
1364 
1365 #if U_SHOW_CPLUSPLUS_API
1366 
1387 U_STABLE int32_t U_EXPORT2
1389  const icu::UnicodeString &s1,
1390  const icu::UnicodeString &s2,
1391  UErrorCode *status);
1392 #endif
1393 
1394 
1426 U_STABLE int32_t U_EXPORT2
1428  uint32_t type,
1429  const UChar *id, int32_t length,
1430  UChar *dest, int32_t destCapacity,
1431  UErrorCode *status);
1432 
1466 U_STABLE int32_t U_EXPORT2
1468  uint32_t type,
1469  const char *id, int32_t length,
1470  char *dest, int32_t destCapacity,
1471  UErrorCode *status);
1472 
1473 #if U_SHOW_CPLUSPLUS_API
1474 
1497 U_I18N_API icu::UnicodeString & U_EXPORT2
1499  uint32_t type,
1500  const icu::UnicodeString &id,
1501  icu::UnicodeString &dest,
1502  UErrorCode *status);
1503 #endif /* U_SHOW_CPLUSPLUS_API */
1504 
1517 U_STABLE const USet * U_EXPORT2
1519 
1532 U_STABLE const USet * U_EXPORT2
1534 
1535 #if U_SHOW_CPLUSPLUS_API
1536 
1549 U_STABLE const icu::UnicodeSet * U_EXPORT2
1551 
1564 U_STABLE const icu::UnicodeSet * U_EXPORT2
1566 
1567 #endif /* U_SHOW_CPLUSPLUS_API */
1568 
1591 U_STABLE int32_t U_EXPORT2
1593  void *data, int32_t capacity,
1594  UErrorCode *status);
1595 
1596 
1597 #endif
1598 
1599 #endif /* USPOOF_H */
USPOOF_UNDEFINED_RESTRICTIVE
@ USPOOF_UNDEFINED_RESTRICTIVE
An undefined restriction level.
Definition: uspoof.h:604
USPOOF_ASCII
@ USPOOF_ASCII
All characters in the string are in the identifier profile and all characters in the string are in th...
Definition: uspoof.h:551
uspoof_getAllowedUnicodeSet
const U_STABLE icu::UnicodeSet *U_EXPORT2 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status)
Get a UnicodeSet for the characters permitted in an identifier.
uspoof_setChecks
U_STABLE void U_EXPORT2 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status)
Specify the bitmask of checks that will be performed by uspoof_check.
uspoof_checkUTF8
U_STABLE int32_t U_EXPORT2 uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
USpoofChecks
USpoofChecks
Enum for the kinds of checks that USpoofChecker can perform.
Definition: uspoof.h:390
parseerr.h
C API: Parse Error Information.
uspoof_check2UnicodeString
U_STABLE int32_t U_EXPORT2 uspoof_check2UnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
USPOOF_RESTRICTION_LEVEL_MASK
@ USPOOF_RESTRICTION_LEVEL_MASK
Mask for selecting the Restriction Level bits from the return value of uspoof_check.
Definition: uspoof.h:598
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
icu::UnicodeSet
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:278
uspoof_getRestrictionLevel
U_STABLE URestrictionLevel U_EXPORT2 uspoof_getRestrictionLevel(const USpoofChecker *sc)
Get the Restriction Level that will be tested if the checks include USPOOF_RESTRICTION_LEVEL.
uspoof_areConfusable
U_STABLE int32_t U_EXPORT2 uspoof_areConfusable(const USpoofChecker *sc, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check the whether two specified strings are visually confusable.
uspoof_getSkeleton
U_STABLE int32_t U_EXPORT2 uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
USPOOF_ANY_CASE
@ USPOOF_ANY_CASE
This flag is deprecated and no longer affects the behavior of SpoofChecker.
Definition: uspoof.h:438
USPOOF_UNRESTRICTIVE
@ USPOOF_UNRESTRICTIVE
Any valid identifiers, including characters outside of the Identifier Profile.
Definition: uspoof.h:592
U_I18N_API
#define U_I18N_API
Definition: utypes.h:360
uspoof_setAllowedUnicodeSet
U_STABLE void U_EXPORT2 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
USPOOF_HIGHLY_RESTRICTIVE
@ USPOOF_HIGHLY_RESTRICTIVE
The string classifies as Single Script, or all characters in the string are in the identifier profile...
Definition: uspoof.h:572
uspoof_getInclusionSet
const U_STABLE USet *U_EXPORT2 uspoof_getInclusionSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode....
USpoofCheckResult
struct USpoofCheckResult USpoofCheckResult
Definition: uspoof.h:381
uspoof_getInclusionUnicodeSet
const U_STABLE icu::UnicodeSet *U_EXPORT2 uspoof_getInclusionUnicodeSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode....
UParseError
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
uspoof_areConfusableUnicodeString
U_STABLE int32_t U_EXPORT2 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
A version of uspoof_areConfusable accepting UnicodeStrings.
USPOOF_CHAR_LIMIT
@ USPOOF_CHAR_LIMIT
Check that an identifier contains only characters from a specified set of acceptable characters.
Definition: uspoof.h:478
USPOOF_ALL_CHECKS
@ USPOOF_ALL_CHECKS
Enable all spoof checks.
Definition: uspoof.h:516
uspoof_getRecommendedSet
const U_STABLE USet *U_EXPORT2 uspoof_getRecommendedSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
uspoof_setRestrictionLevel
U_STABLE void U_EXPORT2 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel)
Set the loosest restriction level allowed for strings.
uspoof_getCheckResultNumerics
const U_STABLE USet *U_EXPORT2 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled; otherwis...
icu::UnicodeString
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:286
uspoof_getChecks
U_STABLE int32_t U_EXPORT2 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status)
Get the set of checks that this Spoof Checker has been configured to perform.
uspoof_openCheckResult
U_STABLE USpoofCheckResult *U_EXPORT2 uspoof_openCheckResult(UErrorCode *status)
Create a USpoofCheckResult, used by the uspoof_check2 class of functions to return information about ...
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:396
USPOOF_SINGLE_SCRIPT_RESTRICTIVE
@ USPOOF_SINGLE_SCRIPT_RESTRICTIVE
The string classifies as ASCII-Only, or all characters in the string are in the identifier profile an...
Definition: uspoof.h:558
uspoof_openFromSource
USpoofChecker * uspoof_openFromSource(const char *confusables, int32_t confusablesLen, const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, int32_t *errType, UParseError *pe, UErrorCode *status)
Open a Spoof Checker from the source form of the spoof data.
uspoof_check
U_STABLE int32_t U_EXPORT2 uspoof_check(const USpoofChecker *sc, const UChar *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
USPOOF_SINGLE_SCRIPT
@ USPOOF_SINGLE_SCRIPT
Check that an identifier contains only characters from a single script (plus chars from the common an...
Definition: uspoof.h:462
uspoof_getCheckResultRestrictionLevel
U_STABLE URestrictionLevel U_EXPORT2 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check was enabled; ot...
uspoof_closeCheckResult
U_STABLE void U_EXPORT2 uspoof_closeCheckResult(USpoofCheckResult *checkResult)
Close a USpoofCheckResult, freeing any memory that was being held by its implementation.
uspoof_open
USpoofChecker * uspoof_open(UErrorCode *status)
Create a Unicode Spoof Checker, configured to perform all checks except for USPOOF_LOCALE_LIMIT and U...
uspoof_getAllowedLocales
const U_STABLE char *U_EXPORT2 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status)
Get a list of locales for the scripts that are acceptable in strings to be checked.
U_DEFINE_LOCAL_OPEN_POINTER
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
Definition: localpointer.h:487
USPOOF_SINGLE_SCRIPT_CONFUSABLE
@ USPOOF_SINGLE_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:399
USPOOF_MIXED_NUMBERS
@ USPOOF_MIXED_NUMBERS
Check that an identifier does not mix numbers from different numbering systems.
Definition: uspoof.h:486
USPOOF_AUX_INFO
@ USPOOF_AUX_INFO
Enable the return of auxillary (non-error) information in the upper bits of the check results value.
Definition: uspoof.h:530
uspoof_serialize
U_STABLE int32_t U_EXPORT2 uspoof_serialize(USpoofChecker *sc, void *data, int32_t capacity, UErrorCode *status)
Serialize the data for a spoof detector into a chunk of memory.
uspoof_clone
USpoofChecker * uspoof_clone(const USpoofChecker *sc, UErrorCode *status)
Clone a Spoof Checker.
uspoof_getSkeletonUTF8
U_STABLE int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
uset.h
C API: Unicode Set.
uspoof_getSkeletonUnicodeString
U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, uint32_t type, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "skeleton" for an identifier.
localpointer.h
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
uspoof_openFromSerialized
USpoofChecker * uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode)
Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
uspoof_setAllowedLocales
U_STABLE void U_EXPORT2 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status)
Limit characters that are acceptable in identifiers being checked to those normally used with the lan...
uspoof_check2UTF8
U_STABLE int32_t U_EXPORT2 uspoof_check2UTF8(const USpoofChecker *sc, const char *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
USPOOF_MINIMALLY_RESTRICTIVE
@ USPOOF_MINIMALLY_RESTRICTIVE
All characters in the string are in the identifier profile.
Definition: uspoof.h:586
U_STABLE
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
uspoof_getCheckResultChecks
U_NAMESPACE_END U_STABLE int32_t U_EXPORT2 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status)
Indicates which of the spoof check(s) have failed.
USpoofChecker
struct USpoofChecker USpoofChecker
typedef for C of USpoofChecker
Definition: uspoof.h:374
uspoof_getAllowedChars
const U_STABLE USet *U_EXPORT2 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status)
Get a USet for the characters permitted in an identifier.
UChar
uint16_t UChar
Definition: umachine.h:353
uspoof_getRecommendedUnicodeSet
const U_STABLE icu::UnicodeSet *U_EXPORT2 uspoof_getRecommendedUnicodeSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
uspoof_areConfusableUTF8
U_STABLE int32_t U_EXPORT2 uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
A version of uspoof_areConfusable accepting strings in UTF-8 format.
USPOOF_MODERATELY_RESTRICTIVE
@ USPOOF_MODERATELY_RESTRICTIVE
The string classifies as Highly Restrictive, or all characters in the string are in the identifier pr...
Definition: uspoof.h:580
USet
struct USet USet
Definition: ucnv.h:69
uspoof_setAllowedChars
U_STABLE void U_EXPORT2 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
uspoof_check2
U_STABLE int32_t U_EXPORT2 uspoof_check2(const USpoofChecker *sc, const UChar *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
uniset.h
C++ API: Unicode Set.
U_NAMESPACE_END
#define U_NAMESPACE_END
Definition: uversion.h:138
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
Definition: uversion.h:137
uspoof_close
void uspoof_close(USpoofChecker *sc)
Close a Spoof Checker, freeing any memory that was being held by its implementation.
LocalUSpoofCheckerPointer
USPOOF_HIDDEN_OVERLAY
@ USPOOF_HIDDEN_OVERLAY
Check that an identifier does not have a combining character following a character in which that comb...
Definition: uspoof.h:508
URestrictionLevel
URestrictionLevel
Constants from UAX #39 for use in uspoof_setRestrictionLevel, and for returned identifier restriction...
Definition: uspoof.h:544
unistr.h
C++ API: Unicode String.
USPOOF_RESTRICTION_LEVEL
@ USPOOF_RESTRICTION_LEVEL
Check that an identifier is no looser than the specified RestrictionLevel.
Definition: uspoof.h:454
USPOOF_WHOLE_SCRIPT_CONFUSABLE
@ USPOOF_WHOLE_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:419
USPOOF_INVISIBLE
@ USPOOF_INVISIBLE
Check an identifier for the presence of invisible characters, such as zero-width spaces,...
Definition: uspoof.h:471
uspoof_checkUnicodeString
U_STABLE int32_t U_EXPORT2 uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
LocalUSpoofCheckResultPointer
USPOOF_CONFUSABLE
@ USPOOF_CONFUSABLE
Enable this flag in uspoof_setChecks to turn on all types of confusables.
Definition: uspoof.h:430
USPOOF_MIXED_SCRIPT_CONFUSABLE
@ USPOOF_MIXED_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:409