ICU 78.3  78.3
tblcoll.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 */
9 
62 #ifndef TBLCOLL_H
63 #define TBLCOLL_H
64 
65 #include "unicode/utypes.h"
66 
67 #if U_SHOW_CPLUSPLUS_API
68 
69 #if !UCONFIG_NO_COLLATION
70 
71 #include "unicode/coll.h"
72 #include "unicode/locid.h"
73 #include "unicode/uiter.h"
74 #include "unicode/ucol.h"
75 
76 U_NAMESPACE_BEGIN
77 
78 struct CollationCacheEntry;
79 struct CollationData;
80 struct CollationSettings;
81 struct CollationTailoring;
85 class StringSearch;
89 class CollationElementIterator;
90 class CollationKey;
91 class SortKeyByteSink;
92 class UnicodeSet;
93 class UnicodeString;
94 class UVector64;
95 
116 public:
125  U_I18N_API RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
126 
137  ECollationStrength collationStrength,
138  UErrorCode& status);
139 
150  UColAttributeValue decompositionMode,
151  UErrorCode& status);
152 
164  ECollationStrength collationStrength,
165  UColAttributeValue decompositionMode,
166  UErrorCode& status);
167 
168 #ifndef U_HIDE_INTERNAL_API
169 
174  UParseError& parseError,
175  UnicodeString& reason,
176  UErrorCode& errorCode);
177 #endif /* U_HIDE_INTERNAL_API */
178 
185 
203  U_I18N_API RuleBasedCollator(const uint8_t* bin,
204  int32_t length,
205  const RuleBasedCollator* base,
206  UErrorCode& status);
207 
212  U_I18N_API virtual ~RuleBasedCollator();
213 
219  U_I18N_API RuleBasedCollator& operator=(const RuleBasedCollator& other);
220 
227  U_I18N_API virtual bool operator==(const Collator& other) const override;
228 
234  U_I18N_API virtual RuleBasedCollator* clone() const override;
235 
247  createCollationElementIterator(const UnicodeString& source) const;
248 
259  createCollationElementIterator(const CharacterIterator& source) const;
260 
261  // Make deprecated versions of Collator::compare() visible.
262  using Collator::compare;
263 
276  U_I18N_API virtual UCollationResult compare(const UnicodeString& source,
277  const UnicodeString& target,
278  UErrorCode& status) const override;
279 
293  U_I18N_API virtual UCollationResult compare(const UnicodeString& source,
294  const UnicodeString& target,
295  int32_t length,
296  UErrorCode& status) const override;
297 
314  U_I18N_API virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
315  const char16_t* target, int32_t targetLength,
316  UErrorCode& status) const override;
317 
330  UCharIterator& tIter,
331  UErrorCode& status) const override;
332 
346  U_I18N_API virtual UCollationResult compareUTF8(const StringPiece& source,
347  const StringPiece& target,
348  UErrorCode& status) const override;
349 
364  U_I18N_API virtual CollationKey& getCollationKey(const UnicodeString& source,
365  CollationKey& key,
366  UErrorCode& status) const override;
367 
383  U_I18N_API virtual CollationKey& getCollationKey(const char16_t* source,
384  int32_t sourceLength,
385  CollationKey& key,
386  UErrorCode& status) const override;
387 
393  U_I18N_API virtual int32_t hashCode() const override;
394 
395 #ifndef U_FORCE_HIDE_DEPRECATED_API
396 
406  U_I18N_API virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const override;
407 #endif // U_FORCE_HIDE_DEPRECATED_API
408 
414  U_I18N_API const UnicodeString& getRules() const;
415 
421  U_I18N_API virtual void getVersion(UVersionInfo info) const override;
422 
423 #ifndef U_HIDE_DEPRECATED_API
424 
440  U_I18N_API int32_t getMaxExpansion(int32_t order) const;
441 #endif /* U_HIDE_DEPRECATED_API */
442 
453  U_I18N_API virtual UClassID getDynamicClassID() const override;
454 
466  U_I18N_API static UClassID getStaticClassID();
467 
468 #ifndef U_HIDE_DEPRECATED_API
469 
479  U_I18N_API uint8_t* cloneRuleData(int32_t& length, UErrorCode& status) const;
480 #endif /* U_HIDE_DEPRECATED_API */
481 
492  U_I18N_API int32_t cloneBinary(uint8_t* buffer, int32_t capacity, UErrorCode& status) const;
493 
505  U_I18N_API void getRules(UColRuleOption delta, UnicodeString& buffer) const;
506 
514  U_I18N_API virtual void setAttribute(UColAttribute attr,
515  UColAttributeValue value,
516  UErrorCode& status) override;
517 
526  UErrorCode& status) const override;
527 
544  U_I18N_API virtual Collator& setMaxVariable(UColReorderCode group, UErrorCode& errorCode) override;
545 
552  U_I18N_API virtual UColReorderCode getMaxVariable() const override;
553 
554 #ifndef U_FORCE_HIDE_DEPRECATED_API
555 
571  U_I18N_API virtual uint32_t setVariableTop(const char16_t* varTop,
572  int32_t len,
573  UErrorCode& status) override;
574 
590  U_I18N_API virtual uint32_t setVariableTop(const UnicodeString& varTop, UErrorCode& status) override;
591 
603  U_I18N_API virtual void setVariableTop(uint32_t varTop, UErrorCode& status) override;
604 #endif // U_FORCE_HIDE_DEPRECATED_API
605 
613  U_I18N_API virtual uint32_t getVariableTop(UErrorCode& status) const override;
614 
624  U_I18N_API virtual UnicodeSet* getTailoredSet(UErrorCode& status) const override;
625 
640  U_I18N_API virtual int32_t getSortKey(const UnicodeString& source,
641  uint8_t* result,
642  int32_t resultLength) const override;
643 
660  U_I18N_API virtual int32_t getSortKey(const char16_t* source,
661  int32_t sourceLength,
662  uint8_t* result,
663  int32_t resultLength) const override;
664 
678  U_I18N_API virtual int32_t getReorderCodes(int32_t* dest,
679  int32_t destCapacity,
680  UErrorCode& status) const override;
681 
693  U_I18N_API virtual void setReorderCodes(const int32_t* reorderCodes,
694  int32_t reorderCodesLength,
695  UErrorCode& status) override;
696 
701  U_I18N_API virtual UCollationResult internalCompareUTF8(const char* left, int32_t leftLength,
702  const char* right, int32_t rightLength,
703  UErrorCode& errorCode) const override;
704 
728  U_I18N_API virtual int32_t internalGetShortDefinitionString(const char* locale,
729  char* buffer,
730  int32_t capacity,
731  UErrorCode& status) const override;
732 
737  U_I18N_API virtual int32_t internalNextSortKeyPart(UCharIterator* iter,
738  uint32_t state[2],
739  uint8_t* dest,
740  int32_t count,
741  UErrorCode& errorCode) const override;
742 
743  // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
749 
750 #ifndef U_HIDE_INTERNAL_API
751 
757  U_I18N_API const char* internalGetLocaleID(ULocDataLocaleType type, UErrorCode& errorCode) const;
758 
771  U_I18N_API void internalGetContractionsAndExpansions(UnicodeSet* contractions,
772  UnicodeSet* expansions,
773  UBool addPrefixes,
774  UErrorCode& errorCode) const;
775 
781  U_I18N_API void internalAddContractions(UChar32 c, UnicodeSet& set, UErrorCode& errorCode) const;
782 
787  U_I18N_API void internalBuildTailoring(const UnicodeString& rules,
788  int32_t strength,
789  UColAttributeValue decompositionMode,
790  UParseError* outParseError,
791  UnicodeString* outReason,
792  UErrorCode& errorCode);
793 
796  return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
797  }
799  static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
800  return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
801  }
802 
807  U_I18N_API void internalGetCEs(const UnicodeString& str,
808  UVector64& ces,
809  UErrorCode& errorCode) const;
810 #endif // U_HIDE_INTERNAL_API
811 
812 protected:
820  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) override;
821 
822 private:
823  friend class CollationElementIterator;
824  friend class Collator;
825 
826  RuleBasedCollator(const CollationCacheEntry *entry);
827 
833  enum Attributes {
834  ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
835  ATTR_LIMIT
836  };
837 
838  void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
839 
840  // Both lengths must be <0 or else both must be >=0.
841  UCollationResult doCompare(const char16_t *left, int32_t leftLength,
842  const char16_t *right, int32_t rightLength,
843  UErrorCode &errorCode) const;
844  UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
845  const uint8_t *right, int32_t rightLength,
846  UErrorCode &errorCode) const;
847 
848  void writeSortKey(const char16_t *s, int32_t length,
849  SortKeyByteSink &sink, UErrorCode &errorCode) const;
850 
851  void writeIdenticalLevel(const char16_t *s, const char16_t *limit,
852  SortKeyByteSink &sink, UErrorCode &errorCode) const;
853 
854  const CollationSettings &getDefaultSettings() const;
855 
856  void setAttributeDefault(int32_t attribute) {
857  explicitlySetAttributes &= ~(static_cast<uint32_t>(1) << attribute);
858  }
859  void setAttributeExplicitly(int32_t attribute) {
860  explicitlySetAttributes |= static_cast<uint32_t>(1) << attribute;
861  }
862  UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
863  // assert(0 <= attribute < ATTR_LIMIT);
864  return (explicitlySetAttributes & (static_cast<uint32_t>(1) << attribute)) != 0;
865  }
866 
874  UBool isUnsafe(UChar32 c) const;
875 
876  static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
877  UBool initMaxExpansions(UErrorCode &errorCode) const;
878 
879  void setFastLatinOptions(CollationSettings &ownedSettings) const;
880 
881  const CollationData *data;
882  const CollationSettings *settings; // reference-counted
883  const CollationTailoring *tailoring; // alias of cacheEntry->tailoring
884  const CollationCacheEntry *cacheEntry; // reference-counted
885  Locale validLocale;
886  uint32_t explicitlySetAttributes;
887 
888  UBool actualLocaleIsSameAsValid;
889 };
890 
891 U_NAMESPACE_END
892 
893 #endif // !UCONFIG_NO_COLLATION
894 
895 #endif /* U_SHOW_CPLUSPLUS_API */
896 
897 #endif // TBLCOLL_H
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:173
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:76
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:837
void * UClassID
UClassID is used to identify classes without using the compiler&#39;s RTTI.
Definition: uobject.h:96
C API for code unit iteration.
Definition: uiter.h:341
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:203
static const RuleBasedCollator * rbcFromUCollator(const UCollator *uc)
Definition: tblcoll.h:799
virtual Collator * clone() const =0
Makes a copy of this object.
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes...
Definition: ucol.h:149
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:245
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:316
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const =0
Universal attribute getter.
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
#define U_I18N_API_CLASS
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:457
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables...
Definition: tblcoll.h:115
C++ API: Collation Service.
virtual int32_t hashCode() const =0
Generates the hash code for the collation object.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
The CollationElementIterator class is used as an iterator to walk through each character of an inter...
Definition: coleitr.h:121
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:449
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
C API: Collator.
virtual bool operator==(const Collator &other) const
Returns true if "other" is the same as "this".
Collation keys are generated by the Collator class.
Definition: sortkey.h:101
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a UnicodeString.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
C API: Unicode Character Iteration.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:509
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:338
static RuleBasedCollator * rbcFromUCollator(UCollator *uc)
Definition: tblcoll.h:795
C++ API: Locale ID object.
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:61
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1232
virtual UClassID getDynamicClassID() const override=0
Returns a unique class ID POLYMORPHICALLY.
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:302
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Definition: ucol.h:92
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:61
virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:372
One more than the highest normal UColAttribute value.
Definition: ucol.h:365
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:198