ICU 62.1  62.1
ucsdet.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  * Copyright (C) 2005-2013, International Business Machines
6  * Corporation and others. All Rights Reserved.
7  **********************************************************************
8  * file name: ucsdet.h
9  * encoding: UTF-8
10  * indentation:4
11  *
12  * created on: 2005Aug04
13  * created by: Andy Heninger
14  *
15  * ICU Character Set Detection, API for C
16  *
17  * Draft version 18 Oct 2005
18  *
19  */
20 
21 #ifndef __UCSDET_H
22 #define __UCSDET_H
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_CONVERSION
27 
28 #include "unicode/localpointer.h"
29 #include "unicode/uenum.h"
30 
55 struct UCharsetDetector;
61 
62 struct UCharsetMatch;
69 
78 U_STABLE UCharsetDetector * U_EXPORT2
79 ucsdet_open(UErrorCode *status);
80 
90 U_STABLE void U_EXPORT2
92 
93 #if U_SHOW_CPLUSPLUS_API
94 
96 
107 
109 
110 #endif
111 
127 U_STABLE void U_EXPORT2
128 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
129 
130 
149 U_STABLE void U_EXPORT2
150 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
151 
152 
178 U_STABLE const UCharsetMatch * U_EXPORT2
180 
181 
212 U_STABLE const UCharsetMatch ** U_EXPORT2
213 ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
214 
215 
216 
232 U_STABLE const char * U_EXPORT2
233 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
234 
258 U_STABLE int32_t U_EXPORT2
259 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
260 
290 U_STABLE const char * U_EXPORT2
291 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
292 
293 
316 U_STABLE int32_t U_EXPORT2
317 ucsdet_getUChars(const UCharsetMatch *ucsm,
318  UChar *buf, int32_t cap, UErrorCode *status);
319 
320 
321 
350 U_STABLE UEnumeration * U_EXPORT2
352 
364 U_STABLE UBool U_EXPORT2
366 
367 
379 U_STABLE UBool U_EXPORT2
381 
382 #ifndef U_HIDE_INTERNAL_API
383 
396 U_INTERNAL UEnumeration * U_EXPORT2
398 
412 U_INTERNAL void U_EXPORT2
413 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
414 #endif /* U_HIDE_INTERNAL_API */
415 
416 #endif
417 #endif /* __UCSDET_H */
418 
419 
ucsdet_open
UCharsetDetector * ucsdet_open(UErrorCode *status)
Open a charset detector.
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
LocalUCharsetDetectorPointer
UBool
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
ucsdet_detectAll
const UCharsetMatch ** ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status)
Find all charset matches that appear to be consistent with the input, returning an array of results.
ucsdet_getDetectableCharsets
UEnumeration * ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of detectable charsets - over the charsets that are enabled by the speci...
ucsdet_enableInputFilter
UBool ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
Enable filtering of input text.
ucsdet_getConfidence
int32_t ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
Get a confidence number for the quality of the match of the byte data with the charset.
ucsdet_setDeclaredEncoding
void ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
Set the declared encoding for charset detection.
UCharsetDetector
struct UCharsetDetector UCharsetDetector
Structure representing a charset detector.
Definition: ucsdet.h:60
ucsdet_getUChars
int32_t ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status)
Get the entire input text as a UChar string, placing it into a caller-supplied buffer.
ucsdet_getName
const char * ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
Get the name of the charset represented by a UCharsetMatch.
ucsdet_getLanguage
const char * ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
Get the RFC 3066 code for the language of the input data.
UErrorCode
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers,...
Definition: utypes.h:396
ucsdet_close
void ucsdet_close(UCharsetDetector *ucsd)
Close a charset detector.
U_INTERNAL
#define U_INTERNAL
This is used to declare a function as an internal ICU C API
Definition: umachine.h:119
ucsdet_setText
void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
Set the input byte data whose charset is to detected.
ucsdet_getAllDetectableCharsets
UEnumeration * ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of all detectable charsets - over the charsets that are known to the cha...
ucsdet_detect
const UCharsetMatch * ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
Return the charset that best matches the supplied input data.
U_DEFINE_LOCAL_OPEN_POINTER
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
Definition: localpointer.h:487
localpointer.h
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_STABLE
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
ucsdet_setDetectableCharset
void ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
Enable or disable individual charset encoding.
UChar
uint16_t UChar
Definition: umachine.h:353
UCharsetMatch
struct UCharsetMatch UCharsetMatch
Opaque structure representing a match that was identified from a charset detection operation.
Definition: ucsdet.h:68
ucsdet_isInputFilterEnabled
UBool ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
Test whether input filtering is enabled for this charset detector.
UEnumeration
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition: uenum.h:43
U_NAMESPACE_END
#define U_NAMESPACE_END
Definition: uversion.h:138
U_NAMESPACE_BEGIN
#define U_NAMESPACE_BEGIN
Definition: uversion.h:137
uenum.h
C API: String Enumeration.