mirror of
https://github.com/4ian/GDevelop.git
synced 2025-10-15 10:19:04 +00:00
896 lines
28 KiB
C++
896 lines
28 KiB
C++
/*
|
|
* GDevelop Core
|
|
* Copyright 2015 Victor Levasseur (victorlevasseur52@gmail.com).
|
|
* This project is released under the MIT License.
|
|
*/
|
|
|
|
#ifndef GDCORE_UTF8_STRING_H
|
|
#define GDCORE_UTF8_STRING_H
|
|
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "GDCore/Utf8/utf8.h"
|
|
|
|
namespace gd
|
|
{
|
|
|
|
class String;
|
|
|
|
/**
|
|
* \brief String represents an UTF8 encoded string.
|
|
*
|
|
* This class represents an UTF8 encoded string. It provides almost the same features as the STL std::string class
|
|
* but is UTF8 aware (size() returns the number of characters, not the number of bytes for example).
|
|
*/
|
|
class GD_CORE_API String
|
|
{
|
|
|
|
public:
|
|
|
|
using value_type = char32_t;
|
|
using reference = char32_t&;
|
|
using const_reference = const char32_t&;
|
|
using pointer = char32_t*;
|
|
using const_pointer = const char32_t*;
|
|
|
|
using size_type = std::string::size_type;
|
|
using difference_type = std::string::difference_type;
|
|
|
|
static constexpr size_type npos = -1;
|
|
|
|
template<class T>
|
|
class GD_CORE_API StringIterator : public std::iterator<std::bidirectional_iterator_tag, String::value_type, String::difference_type>
|
|
{
|
|
friend class String;
|
|
friend class StringIterator<const T>;
|
|
|
|
public:
|
|
StringIterator() : strIt() {};
|
|
|
|
StringIterator(const StringIterator<T> &other) : strIt(other.strIt) {}
|
|
template<class U> StringIterator(const StringIterator<U> &other) : strIt(other.strIt) {} //Convert from const_iterator to iterator
|
|
|
|
StringIterator<T>& operator=(const StringIterator<T> &other) { strIt = other.strIt; return *this; }
|
|
|
|
String::value_type operator*() {return ::utf8::unchecked::peek_next(strIt);}
|
|
|
|
StringIterator<T>& operator++() { ::utf8::unchecked::next(strIt); return *this; }
|
|
StringIterator<T> operator++(int) { StringIterator<T> tmp(*this); operator++(); return tmp; }
|
|
StringIterator<T>& operator--() { ::utf8::unchecked::prior(strIt); return *this; }
|
|
StringIterator<T> operator--(int) { StringIterator<T> tmp(*this); operator--(); return tmp; }
|
|
|
|
bool operator==(const StringIterator<T> &other) { return (strIt == other.strIt); }
|
|
bool operator!=(const StringIterator<T> &other) { return !operator==(other); }
|
|
|
|
bool operator<(const StringIterator<T> &other) { return (strIt < other.strIt); }
|
|
bool operator<=(const StringIterator<T> &other) { return (strIt <= other.strIt); }
|
|
bool operator>(const StringIterator<T> &other) { return (strIt > other.strIt); }
|
|
bool operator>=(const StringIterator<T> &other) { return (strIt >= other.strIt); }
|
|
|
|
T base() const {return strIt;}
|
|
|
|
private:
|
|
StringIterator(T strIt) : strIt(strIt) {};
|
|
T strIt;
|
|
};
|
|
|
|
using iterator = StringIterator<std::string::iterator>;
|
|
using const_iterator = StringIterator<std::string::const_iterator>;
|
|
using reverse_iterator = std::reverse_iterator<iterator>;
|
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
|
|
|
/**
|
|
* \name Constructors
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* Constructs an empty string.
|
|
*/
|
|
String();
|
|
|
|
/**
|
|
* Constructs a string from an array of char **representing a string encoded
|
|
* in UTF8**.
|
|
*
|
|
* Useful to implicitly create a String object from a string literal.
|
|
*
|
|
* **Usage:**
|
|
* \code
|
|
* gd::String str(u8"A little sentence.");
|
|
* \endcode
|
|
*/
|
|
String(const char *characters);
|
|
|
|
/**
|
|
* Constructs a String from a std::u32string.
|
|
*
|
|
* **Usage:**
|
|
*
|
|
* \code
|
|
* gd::String str(U"A UTF32 encoded string.");
|
|
* \endcode
|
|
*/
|
|
String(const std::u32string &string);
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Assignment (implicit conversions)
|
|
* \{
|
|
*/
|
|
/**
|
|
* Assign the String using a string literal (it assumes that the **string
|
|
* literal is encoded in UTF8**).
|
|
*
|
|
* Usage:
|
|
* \code
|
|
* gd::String str;
|
|
* str = u8"This is a test string.";
|
|
* \endcode
|
|
*/
|
|
String& operator=(const char *characters);
|
|
|
|
String& operator=(const std::u32string &string);
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Size
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \brief Returns true if the string is empty.
|
|
*/
|
|
bool empty() const { return m_string.size() == 0; }
|
|
|
|
/**
|
|
* \brief Returns the string's length.
|
|
*/
|
|
size_type size() const;
|
|
|
|
/**
|
|
* \brief Returns the string's length.
|
|
*/
|
|
size_type length() const { return size(); };
|
|
|
|
/**
|
|
* \brief Clear the string.
|
|
*
|
|
* **Iterators :** Obviously, all iterators are invalidated.
|
|
*/
|
|
void clear() { m_string.clear(); }
|
|
|
|
void reserve(gd::String::size_type size) { m_string.reserve(size); }
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Iterators
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \brief Get a beginning iterator.
|
|
*/
|
|
String::iterator begin();
|
|
|
|
/**
|
|
* \brief Get a constant beginning iterator.
|
|
*/
|
|
String::const_iterator begin() const;
|
|
|
|
/**
|
|
* \brief Get a ending iterator.
|
|
*/
|
|
String::iterator end();
|
|
|
|
/**
|
|
* \brief Get a constant ending iterator.
|
|
*/
|
|
String::const_iterator end() const;
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Convert from/to numbers
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \brief Method to create a gd::String from a number (float, double, int, ...)
|
|
* \return a gd::String created from **value**.
|
|
*/
|
|
template<typename T>
|
|
static String From(T value)
|
|
{
|
|
static_assert(!std::is_same<T, std::string>::value, "Can't use gd::String::From with std::string.");
|
|
|
|
std::ostringstream oss;
|
|
oss << value;
|
|
return gd::String(oss.str().c_str());
|
|
}
|
|
|
|
/**
|
|
* \brief Method to convert the string to a number
|
|
* \return the string converted to the type **T**
|
|
*/
|
|
template<typename T>
|
|
T To() const
|
|
{
|
|
static_assert(!std::is_same<T, std::string>::value, "Can't use gd::String::To with std::string.");
|
|
|
|
T value;
|
|
std::istringstream oss(m_string);
|
|
oss >> value;
|
|
return value;
|
|
}
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Conversions from other string types
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \return a String created from a std::string encoded in the current
|
|
* locale.
|
|
*
|
|
* See \ref Conversions2 for more information.
|
|
*/
|
|
static String FromLocale( const std::string &localizedString );
|
|
|
|
/**
|
|
* \return a String created from a std::u32string.
|
|
*/
|
|
static String FromUTF32( const std::u32string &string );
|
|
|
|
/**
|
|
* \return a String created an UTF8 encoded std::string.
|
|
*/
|
|
static String FromUTF8( const std::string &utf8Str );
|
|
|
|
/**
|
|
* \return a String created from a std::wstring (UTF32 on Linux and UCS-2 on Windows)
|
|
*/
|
|
static String FromWide( const std::wstring &wstr );
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Conversions to other string types
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \return a localized std::string from the current string.
|
|
*
|
|
* See \ref Conversions2 for more information.
|
|
*/
|
|
std::string ToLocale() const;
|
|
|
|
/**
|
|
* \return a std::u32string.
|
|
*/
|
|
std::u32string ToUTF32() const;
|
|
|
|
/**
|
|
* \return a UTF8 encoded std::string from the current string.
|
|
*/
|
|
std::string ToUTF8() const;
|
|
|
|
/**
|
|
* \return a wide string (std::wstring) encoded in UTF32 on Linux and in UCS-2 on Windows
|
|
* \note On Windows, this is possibly a lossy conversion.
|
|
*/
|
|
std::wstring ToWide() const;
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name UTF8 tools
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \return true if the string is valid.
|
|
*/
|
|
bool IsValid() const;
|
|
|
|
/**
|
|
* \brief Searches the string for invalid characters and replaces them with **replacement**.
|
|
* \return *this
|
|
*/
|
|
String& ReplaceInvalid( value_type replacement = 0xfffd );
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Element access / Internal string access
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \brief Returns the code point at the specified position
|
|
* \warning This operator has a linear complexity on the character's
|
|
* position. You should avoid to use it in a loop and use the iterators
|
|
* provided by this class instead.
|
|
*/
|
|
value_type operator[]( const size_type position ) const;
|
|
|
|
/**
|
|
* \brief Get the raw UTF8-encoded std::string
|
|
*/
|
|
std::string& Raw() { return m_string; }
|
|
|
|
/**
|
|
* \brief Get the raw UTF8-encoded std::string
|
|
*/
|
|
const std::string& Raw() const { return m_string; }
|
|
|
|
/**
|
|
* \brief Get the C-string.
|
|
*/
|
|
const char* c_str() const { return m_string.c_str(); }
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name String modifiers
|
|
* \{
|
|
*/
|
|
|
|
String& operator+=( const String &other );
|
|
|
|
String& operator+=( const char *other );
|
|
|
|
String& operator+=( value_type character );
|
|
|
|
/**
|
|
* \brief Add a character (from its codepoint) at the end of the String.
|
|
*
|
|
* **Iterators : ** All iterators may be invalidated (in particular if the
|
|
* string is reallocated).
|
|
*/
|
|
void push_back( value_type character );
|
|
|
|
/**
|
|
* \brief Remove the last character of the String.
|
|
*
|
|
* **Iterators : ** All iterators may be invalidated (in particular if the
|
|
* string is reallocated).
|
|
*/
|
|
void pop_back();
|
|
|
|
/**
|
|
* \brief Inserts characters right before the character at **pos**.
|
|
*
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& insert( size_type pos, const String &str );
|
|
|
|
/**
|
|
* \brief Replace the portion of the String between **i1** and **i2** (**i2** not
|
|
* included) by the String **str**.
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& replace( iterator i1, iterator i2, const String &str );
|
|
|
|
/**
|
|
* \brief Replace the portion of the String between **i1** and **i2** (**i2** not
|
|
* included) by **n** consecutive copies of character **c**.
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& replace( iterator i1, iterator i2, size_type n, const char c );
|
|
|
|
/**
|
|
* \brief Replace the portion of the String between **pos** and **pos** + **len**
|
|
* (the character at **pos** + **len** is not included) with **str**.
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& replace( size_type pos, size_type len, const String &str );
|
|
|
|
/**
|
|
* \brief Replace the portion of the String between **pos** and **pos** + **len**
|
|
* (the character at **pos** + **len** is not included) with the character **c**.
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& replace( size_type pos, size_type len, const char c );
|
|
|
|
/**
|
|
* \brief Search in the portion of the String between **i1** and **i2** (**i2** not
|
|
* included) for characters matching predicate function **p** and replace them
|
|
* by the String **str**.
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& replace_if( iterator i1, iterator i2, std::function<bool(char32_t)> p, const String &str );
|
|
|
|
/**
|
|
* \brief Remove consecutive occurrences of the character **c** in the portion of the
|
|
* between **i1** and **i2** (**i2** not included) to replace it by a single occurrence.
|
|
* \return *this
|
|
*
|
|
* **Iterators :** All iterators may be invalidated.
|
|
*/
|
|
String& RemoveConsecutiveOccurrences(iterator i1, iterator i2, const char c);
|
|
|
|
/**
|
|
* \brief Erase the characters between **first** and **last** (**last** not included).
|
|
* \param first an iterator to the first character to remove
|
|
* \param last an iterator to the character next to the last one to remove
|
|
* \return an iterator pointing at the old position of the first deleted character
|
|
*/
|
|
iterator erase( iterator first, iterator last );
|
|
|
|
/**
|
|
* \brief Erase the character pointed by **p**.
|
|
* \param p an iterator pointing to the character to be erased
|
|
* \return an interator pointing at the old position of the deleted character
|
|
*/
|
|
iterator erase( iterator p );
|
|
|
|
/**
|
|
* \brief Erase the characters between the positions **pos** and **pos** + **len**
|
|
* (**pos** + **len** not included).
|
|
* \param pos the position of the first character to remove
|
|
* \param len the number of characters to remove from **pos**
|
|
*/
|
|
void erase( size_type pos = 0, size_type len = npos );
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name String operations
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \brief Split the string with a delimiter
|
|
* \param delimiter delimiter (an UTF32 codepoint)
|
|
* \return a std::vector containing all the gd::String objects
|
|
*
|
|
* **Usage:**
|
|
*
|
|
* \code
|
|
* gd::String str = u8"10;20;30;40";
|
|
* std::vector<gd::String> splittedStr = str.Split(U';');
|
|
* //the U prefix is mandatory to get a char32_t from the literal
|
|
* //Now the vector contains "10", "20", "30" and "40" as gd::String objects
|
|
* \endcode
|
|
*/
|
|
std::vector<String> Split( value_type delimiter ) const;
|
|
|
|
/**
|
|
* \brief Returns the case-folded string.
|
|
* \note This string is almost but not totally suitable for case-insensitive comparison because you have to make sure
|
|
* that it is normalized. So, to do a case-insensitive comparison, do :
|
|
* \code
|
|
* str1.CaseFold().Normalize() == str2.CaseFold().Normalize()
|
|
* \endcode
|
|
* You can also use gd::CaseInsensitiveEquiv();
|
|
*/
|
|
String CaseFold() const;
|
|
|
|
/**
|
|
* \brief Returns the string in uppercase.
|
|
* \note Some characters that maps to multiple characters when uppercased may not be processed, e.g. the german etzett.
|
|
*/
|
|
String UpperCase() const;
|
|
|
|
/**
|
|
* \brief Returns the string in lowercase.
|
|
* \note Some characters that maps to multiple characters when lowercased may not be processed, e.g. double SS to etzett in german.
|
|
*/
|
|
String LowerCase() const;
|
|
|
|
/**
|
|
* \brief Searches a string for a specified substring and returns a new string where all occurrences of this substring is replaced.
|
|
* \param search The string that will be replaced by the new string.
|
|
* \param replacement The value to replace the old substring with.
|
|
* \param all If set to false, only the first matching substring will be replaced.
|
|
*/
|
|
String FindAndReplace(String search, String replacement, bool all = true) const;
|
|
|
|
/**
|
|
* \brief Removes the specified characters (by default all the "whitespaces" and line breaks) from the beginning of the string,
|
|
* and return the new string.
|
|
*/
|
|
String LeftTrim(const gd::String& chars = " \t\n\v\f\r")
|
|
{
|
|
String trimmedString(*this);
|
|
trimmedString.erase(0, trimmedString.find_first_not_of(chars));
|
|
return trimmedString;
|
|
}
|
|
|
|
/**
|
|
* \brief Removes the specified characters (by default all the "whitespaces" and line breaks) from the end of the string,
|
|
* and return the new string.
|
|
*/
|
|
String RightTrim(const gd::String& chars = " \t\n\v\f\r")
|
|
{
|
|
String trimmedString(*this);
|
|
trimmedString.erase(trimmedString.find_last_not_of(chars) + 1);
|
|
return trimmedString;
|
|
}
|
|
|
|
/**
|
|
* \brief Removes the specified characters (by default all the "whitespaces" and line breaks) from the
|
|
* beginning and the end of the string and return the new string.
|
|
*/
|
|
String Trim(const gd::String& chars = " \t\n\v\f\r")
|
|
{
|
|
return LeftTrim(chars).RightTrim(chars);
|
|
}
|
|
|
|
/**
|
|
* Normalization form
|
|
*/
|
|
enum NormForm
|
|
{
|
|
NFD, ///< Normalization Form Decomposition: characters are decomposed by canonical equivalence, and multiple combining characters are arranged in a specific order.
|
|
NFC, ///< Normalization Form Composition: characters are decomposed and then recomposed by canonical equivalence.
|
|
NFKD, ///< Normalization Form Compatibility Decomposition: characters are decomposed by compatibility, and multiple combining characters are arranged in a specific order.
|
|
NFKC, ///< Normalization Form Compatibility Composition: characters are decomposed by compatibility, then recomposed by canonical equivalence.
|
|
};
|
|
|
|
/**
|
|
* Normalize the string using the normalization form **form**.
|
|
* \return *this
|
|
*/
|
|
String& Normalize(NormForm form = NFC);
|
|
|
|
/**
|
|
* Returns a sub-string starting from **start** and with length **length**.
|
|
*/
|
|
String substr( size_type start = 0, size_type length = npos ) const;
|
|
|
|
/**
|
|
* \return the position of the first occurrence of **search** starting from **pos**.
|
|
*/
|
|
size_type find( const String &search, size_type pos = 0 ) const;
|
|
|
|
/**
|
|
* \return the position of the first occurrence of **search** starting from **pos**.
|
|
*/
|
|
size_type find( const char *search, size_type pos = 0 ) const;
|
|
|
|
/**
|
|
* \return the position of the first occurrence of **search** starting from **pos**.
|
|
*/
|
|
size_type find( const value_type search, size_type pos = 0 ) const;
|
|
|
|
/**
|
|
* \return the position of the last occurrence of **search** starting before **pos**.
|
|
*/
|
|
size_type rfind( const String &search, size_type pos = npos ) const;
|
|
|
|
/**
|
|
* \return the position of the last occurrence of **search** starting before **pos**.
|
|
*/
|
|
size_type rfind( const char *search, size_type pos = npos ) const;
|
|
|
|
/**
|
|
* \return the position of the last occurrence of **search** starting before **pos**.
|
|
*/
|
|
size_type rfind( const value_type &search, size_type pos = npos ) const;
|
|
|
|
/**
|
|
* \brief Searches the string for the first character that matches any of the characters specified in
|
|
* its arguments.
|
|
* \param match the characters that will be looked for in the String
|
|
* \param startPos where to start the search
|
|
* \return the position of the first found character
|
|
*/
|
|
size_type find_first_of( const String &match, size_type startPos = 0 ) const;
|
|
|
|
/**
|
|
* \brief Searches the string for the first character that doesn't match any of the characters
|
|
* specified in its arguments.
|
|
* \param not_match the characters that will be looked for in the String
|
|
* \param startPos where to start the search
|
|
* \return the position of the first found character
|
|
*/
|
|
size_type find_first_not_of( const String ¬_match, size_type startPos = 0 ) const;
|
|
|
|
/**
|
|
* \brief Searches the string for the last character that matches any of the characters specified in
|
|
* its arguments.
|
|
* \param match the characters that will be looked for in the String
|
|
* \param endPos where to end the search (this is the last character considered in the
|
|
* search)
|
|
* \return the position of the last found character
|
|
*/
|
|
size_type find_last_of( const String &match, size_type endPos = npos ) const;
|
|
|
|
/**
|
|
* \brief Searches the string for the last character that doesn't match any of the characters
|
|
* specified in its arguments.
|
|
* \param not_match the characters that will be looked for in the String
|
|
* \param endPos where to end the search (this is the last character considered in the
|
|
* search)
|
|
* \return the position of the last found character
|
|
*/
|
|
size_type find_last_not_of( const String ¬_match, size_type endPos = npos ) const;
|
|
|
|
/**
|
|
* \brief Compares the current string with another.
|
|
*/
|
|
int compare( const String &other ) const;
|
|
|
|
/**
|
|
* \brief Do a case-insensitive search
|
|
* \return the position of the first occurrence of **search** starting from **pos**.
|
|
*
|
|
* \note This method isn't very efficient as it is linear on the string size times the
|
|
* search string size
|
|
*/
|
|
size_type FindCaseInsensitive( const String &search, size_type pos = 0 ) const;
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
private:
|
|
std::string m_string; ///< Internal std::string container
|
|
|
|
};
|
|
|
|
/**
|
|
* \name Non-member operators
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \relates String
|
|
* \return a String containing the concatenation of lhs and rhs.
|
|
*/
|
|
String GD_CORE_API operator+(String lhs, const String &rhs);
|
|
|
|
/**
|
|
* \relates String
|
|
* \return a String containing the concatenation of lhs and rhs (rhs is
|
|
* converted to gd::String assuming it's encoded in UTF8).
|
|
*/
|
|
String GD_CORE_API operator+(String lhs, const char *rhs);
|
|
|
|
/**
|
|
* \relates String
|
|
* \return a String containing the concatenation of lhs and rhs (lhs is
|
|
* converted to gd::String assuming it's encoded in UTF8).
|
|
*/
|
|
String GD_CORE_API operator+(const char *lhs, const String &rhs);
|
|
|
|
const String& GD_CORE_API operator||(const String &lhs, const String &rhs);
|
|
|
|
String GD_CORE_API operator||(String lhs, const char *rhs);
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Relational operators
|
|
* \{
|
|
*/
|
|
|
|
///\relates String
|
|
bool GD_CORE_API operator==( const String &lhs, const String &rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator==( const String &lhs, const char *rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator==( const char *lhs, const String &rhs );
|
|
|
|
///\relates String
|
|
bool GD_CORE_API operator!=( const String &lhs, const String &rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator!=( const String &lhs, const char *rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator!=( const char *lhs, const String &rhs );
|
|
|
|
///\relates String
|
|
bool GD_CORE_API operator<( const String &lhs, const String &rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator<( const String &lhs, const char *rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator<( const char *lhs, const String &rhs );
|
|
|
|
///\relates String
|
|
bool GD_CORE_API operator<=( const String &lhs, const String &rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator<=( const String &lhs, const char *rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator<=( const char *lhs, const String &rhs );
|
|
|
|
///\relates String
|
|
bool GD_CORE_API operator>( const String &lhs, const String &rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator>( const String &lhs, const char *rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator>( const char *lhs, const String &rhs );
|
|
|
|
///\relates String
|
|
bool GD_CORE_API operator>=( const String &lhs, const String &rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator>=( const String &lhs, const char *rhs );
|
|
///\relates String
|
|
bool GD_CORE_API operator>=( const char *lhs, const String &rhs );
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
/**
|
|
* \name Stream operators
|
|
* \{
|
|
*/
|
|
|
|
/**
|
|
* \relates String
|
|
* Outputs the string in a stream.
|
|
* \note The string is converted to the current locale before. If you want to stream the string
|
|
* as UTF8, do :
|
|
* \code
|
|
* std::cout << myString.Raw();
|
|
* \endcode
|
|
*/
|
|
std::ostream& GD_CORE_API operator<<(std::ostream &os, const String &str);
|
|
|
|
/**
|
|
* \relates String
|
|
* Extracts a string from an input string assuming the stream inputs characters encoded in the
|
|
* current locale.
|
|
* \note The content of the string is replaced.
|
|
*/
|
|
std::istream& GD_CORE_API operator>>(std::istream &is, String &str);
|
|
|
|
/**
|
|
* \}
|
|
*/
|
|
|
|
|
|
/**
|
|
* \relates String
|
|
* \param compat if true, the strings are normalized using a compatibility normalization form to remove characters special appearance.
|
|
* \return true if the two string are equivalent (in a case-sensitive way).
|
|
*/
|
|
bool GD_CORE_API CaseSensitiveEquiv( String lhs, String rhs, bool compat = true );
|
|
|
|
/**
|
|
* \relates String
|
|
* \param compat if true, the strings are normalized using a compatibility normalization form to remove characters special appearance.
|
|
* \return true if the two string are equivalent (in a case-insensitive way).
|
|
*/
|
|
bool GD_CORE_API CaseInsensitiveEquiv( const String &lhs, const String &rhs, bool compat = true );
|
|
|
|
}
|
|
|
|
namespace std
|
|
{
|
|
/**
|
|
* std::hash specialization for gd::String
|
|
*/
|
|
template <> struct GD_CORE_API hash<gd::String>
|
|
{
|
|
size_t operator()(const gd::String & x) const
|
|
{
|
|
return hash<std::string>()(x.Raw());
|
|
}
|
|
};
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
/**
|
|
* \class gd::String
|
|
*
|
|
* \section WhatIsUTF8 What is UTF8 and Unicode ?
|
|
* (from https://en.wikipedia.org/wiki/Unicode and https://en.wikipedia.org/wiki/UTF-8)
|
|
|
|
* Unicode is a computing industry standard for the consistent encoding, representation, and handling of text
|
|
* expressed in most of the world's writing systems.
|
|
* Unicode can be implemented by different character encodings. The most commonly used encodings are UTF-8, UTF-16
|
|
* and the now-obsolete UCS-2.
|
|
*
|
|
* UTF-8 is a character encoding capable of encoding all possible characters, or code points, in Unicode.
|
|
* The encoding is variable-length (not every codepoint is 1 byte long) and uses 8-bit code units. It was designed
|
|
* for backward compatibility with ASCII.
|
|
* UTF-8 encodes each of the 1,112,064 valid code points in the Unicode code space using one to four 8-bit bytes
|
|
* (a group of 8 bits is known as an octet in the Unicode Standard). Code points with lower numerical values
|
|
* (i.e., earlier code positions in the Unicode character set, which tend to occur more frequently) are encoded using
|
|
* fewer bytes. The first 128 characters of Unicode, which correspond one-to-one with ASCII, are encoded using a
|
|
* single octet with the same binary value as ASCII, making valid ASCII text valid UTF-8-encoded Unicode as well.
|
|
*
|
|
* \section Limitations Limitations
|
|
* The String class stores internally the string as an UTF8 encoded std::string. It results in some limitations : it's
|
|
* impossible to edit a single character with operator[]() nor at() because the new character length might not be the same.
|
|
*
|
|
* **The gd::String class supports almost all Unicode characters, except the ones that can't be represented as a single
|
|
* codepoint (obviously, a codepoint can be represented by 1 to 4 bytes, as codepoints are encoded in UTF8).** For examples,
|
|
* some special letters are composed of multiple codepoints (a letter, and the accents). Most of them can be combined into a
|
|
* single codepoint but some can't. These are the not supported ones. See \ref Normalization.
|
|
*
|
|
* \section Performance Performance
|
|
* The UTF8 encoding has the advantage to reduce the RAM consumption compared to UTF16 or UTF32 for strings using a lot
|
|
* of latin characters. But the characters variable length brings some performance issues compared to fixed size encoding.
|
|
* That's why the complexity of each methods is written in their documentation. For instance, the size() method is linear
|
|
* on the string size and so is the operator[]().
|
|
*
|
|
* \section Conversion Conversions from/to other string types
|
|
* The String handles implicit conversion with std::String (implicit constructor and implicit conversion
|
|
* operator).
|
|
*
|
|
* **However, this is not the case with std::string** as this conversion is not often lossless (mostly on Windows).
|
|
* You need to explicitly call gd::String::FromLocale or gd::String::FromUTF8
|
|
* to convert a std::string to a String. However, if you want to get a String object from a string literal, you can
|
|
* directly use the operator=() or the constructor as they are supporting const char* as argument (it assumes the string
|
|
* literal is encoded in UTF8, so you'll need to put the u8 prefix).
|
|
*
|
|
* \subsection Conversions2 Conversion from/to std::string
|
|
* \code
|
|
* //Get a String from a std::string encoded in the current locale
|
|
* std::string ansiStr = "Some beautiful localized characters. "; //Encoded in ANSI on Windows, UTF8 on Linux
|
|
* gd::String str = gd::String::FromLocale(ansiStr);
|
|
*
|
|
* //Create a String using a string literal encoded in UTF8
|
|
* gd::String anotherStr = u8"This is an UTF8 string";
|
|
* //The same as gd::String anotherStr = gd::utf8::FromUTF8(u8"This is an UTF8 string");
|
|
* //But it works only with string literals.
|
|
*
|
|
* gd::String finalStr = str + anotherStr; //Concatenates the two Strings
|
|
* std::cout << finalStr.ToLocale() << std::endl //Shows "Some beautiful localized characters. This is an UTF8 string"
|
|
* \endcode
|
|
*
|
|
* \section Normalization Normalization
|
|
* This class stores Unicode strings normalized with NFC which means that all characters are combined (if they can). For example, the "à"
|
|
* character can be written in two ways according to the Unicode norm : U+00E0 (the "à" in a single codepoint) or
|
|
* U+0061 (the "a" letter codepoint) + U+0300 "the "`" combining accent. We say that they are canonically equivalent.
|
|
* However, this can cause problem when comparing strings, that's why **this class normalizes the string when constructed** using
|
|
* the **Normalization Form Composition** (all characters are combined, e.g. "à" is represented by a single codepoint).
|
|
* If the string **is invalid when constructed, the string is not normalized** : it will be **normalized when the invalid characters
|
|
* will be removed using gd::String::ReplaceInvalid()**.
|
|
*
|
|
* \section CaseInsensitiveComparison Case-insensitive comparison
|
|
* In Unicode, uppercasing/lowercasing strings to compare them in a case-insensitive way is not recommended.
|
|
* That's why the function gd::CaseInsensitiveEquiv exists to compare two strings in a case-insensitive way.
|
|
*/
|