ICU 78.3  78.3
coll.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 */
9 
52 #ifndef COLL_H
53 #define COLL_H
54 
55 #include "unicode/utypes.h"
56 
57 #if U_SHOW_CPLUSPLUS_API
58 
59 #if !UCONFIG_NO_COLLATION
60 
61 #include <functional>
62 #include <string_view>
63 #include <type_traits>
64 
65 #include "unicode/char16ptr.h"
66 #include "unicode/uobject.h"
67 #include "unicode/ucol.h"
68 #include "unicode/unorm.h"
69 #include "unicode/locid.h"
70 #include "unicode/uniset.h"
71 #include "unicode/umisc.h"
72 #include "unicode/unistr.h"
73 #include "unicode/uiter.h"
74 #include "unicode/stringpiece.h"
75 
76 U_NAMESPACE_BEGIN
77 
78 class StringEnumeration;
79 
80 #if !UCONFIG_NO_SERVICE
81 
84 class CollatorFactory;
85 #endif
86 
90 class CollationKey;
91 
173 class U_I18N_API Collator : public UObject {
174 public:
175 
176  // Collator public enums -----------------------------------------------
177 
204  {
205  PRIMARY = UCOL_PRIMARY, // 0
206  SECONDARY = UCOL_SECONDARY, // 1
207  TERTIARY = UCOL_TERTIARY, // 2
208  QUATERNARY = UCOL_QUATERNARY, // 3
209  IDENTICAL = UCOL_IDENTICAL // 15
210  };
211 
212 
213  // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is
214  // used by virtual methods that cannot have that conditional.
215 #ifndef U_FORCE_HIDE_DEPRECATED_API
216 
227  {
228  LESS = UCOL_LESS, // -1
229  EQUAL = UCOL_EQUAL, // 0
230  GREATER = UCOL_GREATER // 1
231  };
232 #endif // U_FORCE_HIDE_DEPRECATED_API
233 
234  // Collator public destructor -----------------------------------------
235 
240  virtual ~Collator();
241 
242  // Collator public methods --------------------------------------------
243 
262  virtual bool operator==(const Collator& other) const;
263 
271  virtual bool operator!=(const Collator& other) const;
272 
278  virtual Collator* clone() const = 0;
279 
299  static Collator* U_EXPORT2 createInstance(UErrorCode& err);
300 
334  static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
335 
336 #ifndef U_FORCE_HIDE_DEPRECATED_API
337 
348  virtual EComparisonResult compare(const UnicodeString& source,
349  const UnicodeString& target) const;
350 #endif // U_FORCE_HIDE_DEPRECATED_API
351 
364  virtual UCollationResult compare(const UnicodeString& source,
365  const UnicodeString& target,
366  UErrorCode &status) const = 0;
367 
368 #ifndef U_FORCE_HIDE_DEPRECATED_API
369 
381  virtual EComparisonResult compare(const UnicodeString& source,
382  const UnicodeString& target,
383  int32_t length) const;
384 #endif // U_FORCE_HIDE_DEPRECATED_API
385 
399  virtual UCollationResult compare(const UnicodeString& source,
400  const UnicodeString& target,
401  int32_t length,
402  UErrorCode &status) const = 0;
403 
404 #ifndef U_FORCE_HIDE_DEPRECATED_API
405 
438  virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
439  const char16_t* target, int32_t targetLength)
440  const;
441 #endif // U_FORCE_HIDE_DEPRECATED_API
442 
459  virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
460  const char16_t* target, int32_t targetLength,
461  UErrorCode &status) const = 0;
462 
474  virtual UCollationResult compare(UCharIterator &sIter,
475  UCharIterator &tIter,
476  UErrorCode &status) const;
477 
491  virtual UCollationResult compareUTF8(const StringPiece &source,
492  const StringPiece &target,
493  UErrorCode &status) const;
494 
513  virtual CollationKey& getCollationKey(const UnicodeString& source,
514  CollationKey& key,
515  UErrorCode& status) const = 0;
516 
536  virtual CollationKey& getCollationKey(const char16_t*source,
537  int32_t sourceLength,
538  CollationKey& key,
539  UErrorCode& status) const = 0;
544  virtual int32_t hashCode() const = 0;
545 
546 #ifndef U_FORCE_HIDE_DEPRECATED_API
547 
559  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
560 #endif // U_FORCE_HIDE_DEPRECATED_API
561 
571  UBool greater(const UnicodeString& source, const UnicodeString& target)
572  const;
573 
583  UBool greaterOrEqual(const UnicodeString& source,
584  const UnicodeString& target) const;
585 
595  UBool equals(const UnicodeString& source, const UnicodeString& target) const;
596 
602  inline auto equal_to() const { return Predicate<std::equal_to, UCOL_EQUAL>(*this); }
603 
609  inline auto greater() const { return Predicate<std::equal_to, UCOL_GREATER>(*this); }
610 
616  inline auto less() const { return Predicate<std::equal_to, UCOL_LESS>(*this); }
617 
623  inline auto not_equal_to() const { return Predicate<std::not_equal_to, UCOL_EQUAL>(*this); }
624 
630  inline auto greater_equal() const { return Predicate<std::not_equal_to, UCOL_LESS>(*this); }
631 
637  inline auto less_equal() const { return Predicate<std::not_equal_to, UCOL_GREATER>(*this); }
638 
639 #ifndef U_FORCE_HIDE_DEPRECATED_API
640 
650  virtual ECollationStrength getStrength() const;
651 
670  virtual void setStrength(ECollationStrength newStrength);
671 #endif // U_FORCE_HIDE_DEPRECATED_API
672 
688  virtual int32_t getReorderCodes(int32_t *dest,
689  int32_t destCapacity,
690  UErrorCode& status) const;
691 
707  virtual void setReorderCodes(const int32_t* reorderCodes,
708  int32_t reorderCodesLength,
709  UErrorCode& status) ;
710 
731  static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
732  int32_t* dest,
733  int32_t destCapacity,
734  UErrorCode& status);
735 
745  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
746  const Locale& displayLocale,
747  UnicodeString& name);
748 
757  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
758  UnicodeString& name);
759 
771  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
772 
781  static StringEnumeration* U_EXPORT2 getAvailableLocales();
782 
792  static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
793 
805  static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
806 
823  static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
824  UBool commonlyUsed, UErrorCode& status);
825 
853  static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
854  UBool& isAvailable, UErrorCode& status);
855 
856 #if !UCONFIG_NO_SERVICE
857 
868  static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
869 
880  static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
881 
895  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
896 #endif /* UCONFIG_NO_SERVICE */
897 
903  virtual void getVersion(UVersionInfo info) const = 0;
904 
915  virtual UClassID getDynamicClassID() const override = 0;
916 
925  virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
926  UErrorCode &status) = 0;
927 
936  virtual UColAttributeValue getAttribute(UColAttribute attr,
937  UErrorCode &status) const = 0;
938 
957  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
958 
967  virtual UColReorderCode getMaxVariable() const;
968 
969 #ifndef U_FORCE_HIDE_DEPRECATED_API
970 
986  virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0;
987 
1003  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0;
1004 
1016  virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0;
1017 #endif // U_FORCE_HIDE_DEPRECATED_API
1018 
1026  virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
1027 
1037  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
1038 
1039 #ifndef U_FORCE_HIDE_DEPRECATED_API
1040 
1047  virtual Collator* safeClone() const;
1048 #endif // U_FORCE_HIDE_DEPRECATED_API
1049 
1066  virtual int32_t getSortKey(const UnicodeString& source,
1067  uint8_t* result,
1068  int32_t resultLength) const = 0;
1069 
1089  virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength,
1090  uint8_t*result, int32_t resultLength) const = 0;
1091 
1129  static int32_t U_EXPORT2 getBound(const uint8_t *source,
1130  int32_t sourceLength,
1131  UColBoundMode boundType,
1132  uint32_t noOfLevels,
1133  uint8_t *result,
1134  int32_t resultLength,
1135  UErrorCode &status);
1136 
1137 
1138 protected:
1139 
1140  // Collator protected constructors -------------------------------------
1141 
1149  Collator();
1150 
1151 #ifndef U_HIDE_DEPRECATED_API
1152 
1163  Collator(UCollationStrength collationStrength,
1164  UNormalizationMode decompositionMode);
1165 #endif /* U_HIDE_DEPRECATED_API */
1166 
1172  Collator(const Collator& other);
1173 
1174 public:
1182  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
1183 
1207  virtual int32_t internalGetShortDefinitionString(const char *locale,
1208  char *buffer,
1209  int32_t capacity,
1210  UErrorCode &status) const;
1211 
1216  virtual UCollationResult internalCompareUTF8(
1217  const char *left, int32_t leftLength,
1218  const char *right, int32_t rightLength,
1219  UErrorCode &errorCode) const;
1220 
1225  virtual int32_t
1226  internalNextSortKeyPart(
1227  UCharIterator *iter, uint32_t state[2],
1228  uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
1229 
1230 #ifndef U_HIDE_INTERNAL_API
1231 
1232  static inline Collator *fromUCollator(UCollator *uc) {
1233  return reinterpret_cast<Collator *>(uc);
1234  }
1236  static inline const Collator *fromUCollator(const UCollator *uc) {
1237  return reinterpret_cast<const Collator *>(uc);
1238  }
1241  return reinterpret_cast<UCollator *>(this);
1242  }
1244  inline const UCollator *toUCollator() const {
1245  return reinterpret_cast<const UCollator *>(this);
1246  }
1247 #endif // U_HIDE_INTERNAL_API
1248 
1249 private:
1253  Collator& operator=(const Collator& other) = delete;
1254 
1255  friend class CFactory;
1256  friend class SimpleCFactory;
1257  friend class ICUCollatorFactory;
1258  friend class ICUCollatorService;
1259  static Collator* makeInstance(const Locale& desiredLocale,
1260  UErrorCode& status);
1261 
1266  template <template <typename...> typename Compare, UCollationResult result>
1267  class Predicate {
1268  public:
1269  explicit Predicate(const Collator& parent) : collator(parent) {}
1270 
1271  template <
1272  typename T, typename U,
1273  typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
1274  bool operator()(const T& lhs, const U& rhs) const {
1275  UErrorCode status = U_ZERO_ERROR;
1276  return compare(
1277  collator.compare(
1278  UnicodeString::readOnlyAlias(lhs),
1279  UnicodeString::readOnlyAlias(rhs),
1280  status),
1281  result);
1282  }
1283 
1284  bool operator()(std::string_view lhs, std::string_view rhs) const {
1285  UErrorCode status = U_ZERO_ERROR;
1286  return compare(collator.compareUTF8(lhs, rhs, status), result);
1287  }
1288 
1289 #if defined(__cpp_char8_t)
1290  bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
1291  UErrorCode status = U_ZERO_ERROR;
1292  return compare(collator.compareUTF8(lhs, rhs, status), result);
1293  }
1294 #endif
1295 
1296  private:
1297  const Collator& collator;
1298  static constexpr Compare<UCollationResult> compare{};
1299  };
1300 };
1301 
1302 #if !UCONFIG_NO_SERVICE
1303 
1320 public:
1321 
1326  virtual ~CollatorFactory();
1327 
1335  virtual UBool visible() const;
1336 
1344  virtual Collator* createCollator(const Locale& loc) = 0;
1345 
1356  virtual UnicodeString& getDisplayName(const Locale& objectLocale,
1357  const Locale& displayLocale,
1358  UnicodeString& result);
1359 
1369  virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
1370 };
1371 #endif /* UCONFIG_NO_SERVICE */
1372 
1373 // Collator inline methods -----------------------------------------------
1374 
1375 U_NAMESPACE_END
1376 
1377 #endif /* #if !UCONFIG_NO_COLLATION */
1378 
1379 #endif /* U_SHOW_CPLUSPLUS_API */
1380 
1381 #endif
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59
string a < string b
Definition: ucol.h:82
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:173
const UCollator * toUCollator() const
Definition: coll.h:1244
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:76
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:140
C++ API: Unicode String.
void * UClassID
UClassID is used to identify classes without using the compiler&#39;s RTTI.
Definition: uobject.h:96
A factory, used with registerFactory, the creates multiple collators and provides display names for t...
Definition: coll.h:1319
C API: Miscellaneous definitions.
C API for code unit iteration.
Definition: uiter.h:341
Base class for &#39;pure&#39; C++ implementations of uenum api.
Definition: strenum.h:61
string a == string b
Definition: ucol.h:78
No error, no warning.
Definition: utypes.h:544
static const Collator * fromUCollator(const UCollator *uc)
Definition: coll.h:1236
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:203
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes...
Definition: ucol.h:149
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:245
auto less_equal() const
Creates a comparison function object that uses this collator.
Definition: coll.h:637
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:316
UCollator * toUCollator()
Definition: coll.h:1240
C++ API: StringPiece: Read-only byte string wrapper class.
Secondary collation strength.
Definition: ucol.h:99
auto not_equal_to() const
Creates a comparison function object that uses this collator.
Definition: coll.h:623
C API: Unicode Normalization.
auto less() const
Creates a comparison function object that uses this collator.
Definition: coll.h:616
virtual UClassID getDynamicClassID() const
ICU4C "poor man&#39;s RTTI", returns a UClassID for the actual ICU class.
EComparisonResult
LESS is returned if source string is compared to be less than target string in the compare() method...
Definition: coll.h:226
C API: Collator.
Collation keys are generated by the Collator class.
Definition: sortkey.h:101
Tertiary collation strength.
Definition: ucol.h:101
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
C API: Unicode Character Iteration.
C++ API: Common ICU base class UObject.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration...
Definition: umisc.h:57
string a > string b
Definition: ucol.h:80
auto equal_to() const
Creates a comparison function object that uses this collator.
Definition: coll.h:602
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:509
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:338
UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned ...
Definition: ucol.h:1073
C++ API: Locale ID object.
auto greater_equal() const
Creates a comparison function object that uses this collator.
Definition: coll.h:630
auto greater() const
Creates a comparison function object that uses this collator.
Definition: coll.h:609
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:61
Basic definitions for ICU, for both C and C++ APIs.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1232
Identical collation strength.
Definition: ucol.h:108
Quaternary collation strength.
Definition: ucol.h:106
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:302
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Definition: ucol.h:92
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:61
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:222
Primary collation strength.
Definition: ucol.h:97
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types...
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:198
C++ API: Unicode Set.