ICU 60.0.1  60.0.1
utext.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2004-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: utext.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004oct06
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UTEXT_H__
20 #define __UTEXT_H__
21 
140 #include "unicode/utypes.h"
141 #include "unicode/uchar.h"
142 #if U_SHOW_CPLUSPLUS_API
143 #include "unicode/localpointer.h"
144 #include "unicode/rep.h"
145 #include "unicode/unistr.h"
146 #include "unicode/chariter.h"
147 #endif
148 
149 
151 
152 struct UText;
153 typedef struct UText UText;
156 /***************************************************************************************
157  *
158  * C Functions for creating UText wrappers around various kinds of text strings.
159  *
160  ****************************************************************************************/
161 
162 
183 U_STABLE UText * U_EXPORT2
184 utext_close(UText *ut);
185 
186 #if U_SHOW_CPLUSPLUS_API
187 
189 
200 
202 
203 #endif
204 
226 U_STABLE UText * U_EXPORT2
227 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
228 
229 
244 U_STABLE UText * U_EXPORT2
245 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
246 
247 
248 #if U_SHOW_CPLUSPLUS_API
249 
261 U_STABLE UText * U_EXPORT2
263 
264 
277 U_STABLE UText * U_EXPORT2
279 
280 
293 U_STABLE UText * U_EXPORT2
295 
308 U_STABLE UText * U_EXPORT2
310 
311 #endif
312 
313 
371 U_STABLE UText * U_EXPORT2
372 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
373 
374 
386 U_STABLE UBool U_EXPORT2
387 utext_equals(const UText *a, const UText *b);
388 
389 
390 /*****************************************************************************
391  *
392  * Functions to work with the text represeted by a UText wrapper
393  *
394  *****************************************************************************/
395 
407 U_STABLE int64_t U_EXPORT2
409 
423 U_STABLE UBool U_EXPORT2
424 utext_isLengthExpensive(const UText *ut);
425 
451 U_STABLE UChar32 U_EXPORT2
452 utext_char32At(UText *ut, int64_t nativeIndex);
453 
454 
465 U_STABLE UChar32 U_EXPORT2
467 
468 
487 U_STABLE UChar32 U_EXPORT2
488 utext_next32(UText *ut);
489 
490 
508 U_STABLE UChar32 U_EXPORT2
510 
511 
530 U_STABLE UChar32 U_EXPORT2
531 utext_next32From(UText *ut, int64_t nativeIndex);
532 
533 
534 
550 U_STABLE UChar32 U_EXPORT2
551 utext_previous32From(UText *ut, int64_t nativeIndex);
552 
565 U_STABLE int64_t U_EXPORT2
566 utext_getNativeIndex(const UText *ut);
567 
591 U_STABLE void U_EXPORT2
592 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
593 
610 U_STABLE UBool U_EXPORT2
611 utext_moveIndex32(UText *ut, int32_t delta);
612 
635 U_STABLE int64_t U_EXPORT2
637 
638 
673 U_STABLE int32_t U_EXPORT2
674 utext_extract(UText *ut,
675  int64_t nativeStart, int64_t nativeLimit,
676  UChar *dest, int32_t destCapacity,
677  UErrorCode *status);
678 
679 
680 
681 /************************************************************************************
682  *
683  * #define inline versions of selected performance-critical text access functions
684  * Caution: do not use auto increment++ or decrement-- expressions
685  * as parameters to these macros.
686  *
687  * For most use, where there is no extreme performance constraint, the
688  * normal, non-inline functions are a better choice. The resulting code
689  * will be smaller, and, if the need ever arises, easier to debug.
690  *
691  * These are implemented as #defines rather than real functions
692  * because there is no fully portable way to do inline functions in plain C.
693  *
694  ************************************************************************************/
695 
696 #ifndef U_HIDE_INTERNAL_API
697 
706 #define UTEXT_CURRENT32(ut) \
707  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
708  ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
709 #endif /* U_HIDE_INTERNAL_API */
710 
722 #define UTEXT_NEXT32(ut) \
723  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
724  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
725 
736 #define UTEXT_PREVIOUS32(ut) \
737  ((ut)->chunkOffset > 0 && \
738  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
739  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
740 
753 #define UTEXT_GETNATIVEINDEX(ut) \
754  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
755  (ut)->chunkNativeStart+(ut)->chunkOffset : \
756  (ut)->pFuncs->mapOffsetToNative(ut))
757 
769 #define UTEXT_SETNATIVEINDEX(ut, ix) \
770  { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
771  if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
772  (ut)->chunkOffset=(int32_t)__offset; \
773  } else { \
774  utext_setNativeIndex((ut), (ix)); } }
775 
776 
777 
778 /************************************************************************************
779  *
780  * Functions related to writing or modifying the text.
781  * These will work only with modifiable UTexts. Attempting to
782  * modify a read-only UText will return an error status.
783  *
784  ************************************************************************************/
785 
786 
805 U_STABLE UBool U_EXPORT2
806 utext_isWritable(const UText *ut);
807 
808 
817 U_STABLE UBool U_EXPORT2
818 utext_hasMetaData(const UText *ut);
819 
820 
848 U_STABLE int32_t U_EXPORT2
849 utext_replace(UText *ut,
850  int64_t nativeStart, int64_t nativeLimit,
851  const UChar *replacementText, int32_t replacementLength,
852  UErrorCode *status);
853 
854 
855 
888 U_STABLE void U_EXPORT2
889 utext_copy(UText *ut,
890  int64_t nativeStart, int64_t nativeLimit,
891  int64_t destIndex,
892  UBool move,
893  UErrorCode *status);
894 
895 
917 U_STABLE void U_EXPORT2
918 utext_freeze(UText *ut);
919 
920 
927 enum {
961 };
962 
1000 typedef UText * U_CALLCONV
1001 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1002 
1003 
1012 typedef int64_t U_CALLCONV
1014 
1040 typedef UBool U_CALLCONV
1041 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1042 
1070 typedef int32_t U_CALLCONV
1072  int64_t nativeStart, int64_t nativeLimit,
1073  UChar *dest, int32_t destCapacity,
1074  UErrorCode *status);
1075 
1105 typedef int32_t U_CALLCONV
1107  int64_t nativeStart, int64_t nativeLimit,
1108  const UChar *replacementText, int32_t replacmentLength,
1109  UErrorCode *status);
1110 
1139 typedef void U_CALLCONV
1141  int64_t nativeStart, int64_t nativeLimit,
1142  int64_t nativeDest,
1143  UBool move,
1144  UErrorCode *status);
1145 
1159 typedef int64_t U_CALLCONV
1161 
1177 typedef int32_t U_CALLCONV
1178 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1179 
1180 
1198 typedef void U_CALLCONV
1200 
1201 
1211 struct UTextFuncs {
1226  int32_t tableSize;
1227 
1234 
1235 
1243 
1252 
1260 
1268 
1276 
1284 
1292 
1300 
1308 
1314 
1320 
1326 
1327 };
1332 typedef struct UTextFuncs UTextFuncs;
1333 
1345 struct UText {
1358  uint32_t magic;
1359 
1360 
1366  int32_t flags;
1367 
1368 
1375 
1382  int32_t sizeOfStruct;
1383 
1384  /* ------ 16 byte alignment boundary ----------- */
1385 
1386 
1393 
1398  int32_t extraSize;
1399 
1408 
1409  /* ---- 16 byte alignment boundary------ */
1410 
1416 
1422  int32_t chunkOffset;
1423 
1428  int32_t chunkLength;
1429 
1430  /* ---- 16 byte alignment boundary-- */
1431 
1432 
1440 
1446 
1452  void *pExtra;
1453 
1460  const void *context;
1461 
1462  /* --- 16 byte alignment boundary--- */
1463 
1469  const void *p;
1475  const void *q;
1481  const void *r;
1482 
1488  void *privP;
1489 
1490 
1491  /* --- 16 byte alignment boundary--- */
1492 
1493 
1499  int64_t a;
1500 
1506  int32_t b;
1507 
1513  int32_t c;
1514 
1515  /* ---- 16 byte alignment boundary---- */
1516 
1517 
1523  int64_t privA;
1529  int32_t privB;
1535  int32_t privC;
1536 };
1537 
1538 
1555 U_STABLE UText * U_EXPORT2
1556 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1557 
1558 #ifndef U_HIDE_INTERNAL_API
1559 
1564 enum {
1565  UTEXT_MAGIC = 0x345ad82c
1566 };
1567 #endif /* U_HIDE_INTERNAL_API */
1568 
1576 #define UTEXT_INITIALIZER { \
1577  UTEXT_MAGIC, /* magic */ \
1578  0, /* flags */ \
1579  0, /* providerProps */ \
1580  sizeof(UText), /* sizeOfStruct */ \
1581  0, /* chunkNativeLimit */ \
1582  0, /* extraSize */ \
1583  0, /* nativeIndexingLimit */ \
1584  0, /* chunkNativeStart */ \
1585  0, /* chunkOffset */ \
1586  0, /* chunkLength */ \
1587  NULL, /* chunkContents */ \
1588  NULL, /* pFuncs */ \
1589  NULL, /* pExtra */ \
1590  NULL, /* context */ \
1591  NULL, NULL, NULL, /* p, q, r */ \
1592  NULL, /* privP */ \
1593  0, 0, 0, /* a, b, c */ \
1594  0, 0, 0 /* privA,B,C, */ \
1595  }
1596 
1597 
1599 
1600 
1601 
1602 #endif
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1071
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1513
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position...
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1325
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond...
Definition: utext.h:1407
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1415
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1199
int32_t providerProperties
Text provider properties.
Definition: utext.h:1374
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1452
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1499
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index...
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1428
C++ API: Unicode String.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1140
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1299
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:1001
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:830
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1233
void utext_freeze(UText *ut)
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1267
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:1013
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1488
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1307
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText...
Definition: utext.h:1366
There is meta data associated with the text.
Definition: utext.h:952
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1535
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1242
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1251
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1475
(public) Function dispatch table for UText.
Definition: utext.h:1211
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1259
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:73
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1469
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1283
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:487
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:358
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1439
int32_t reserved3
Definition: utext.h:1233
UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accomodate...
Definition: utext.h:1226
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:396
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1529
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1041
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1422
Text provider owns the text storage.
Definition: utext.h:960
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1398
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1160
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceeding the current position.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
C API: Unicode Properties.
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1178
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:349
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1523
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1481
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator postion by delta code points.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1392
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1382
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1319
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1506
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
"Smart pointer" class, closes a UText via utext_close().
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1445
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:946
UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1313
uint32_t magic
(private) Magic.
Definition: utext.h:1358
UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1106
UText struct.
Definition: utext.h:1345
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1275
Basic definitions for ICU, for both C and C++ APIs.
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
const void * context
(protected) Pointer to string or text-containin object or similar.
Definition: utext.h:1460
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:286
UText * utext_close(UText *ut)
Close function for UText instances.
C++ API: Character Iterator.
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1291
int32_t reserved2
Definition: utext.h:1233
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:932
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:939
C++ API: Replaceable String.