Files
GDevelop/Core/GDCore/String.cpp

794 lines
20 KiB
C++

/*
* GDevelop Core
* Copyright 2015-2016 Victor Levasseur (victorlevasseur52@gmail.com).
* This project is released under the MIT License.
*/
#include "GDCore/String.h"
#include <SFML/System/String.hpp>
#include "GDCore/CommonTools.h"
#include "GDCore/Utf8/utf8proc.h"
namespace gd
{
constexpr String::size_type String::npos;
String::String() : m_string()
{
}
String::String(const char *characters) : m_string()
{
*this = characters;
}
String::String(const sf::String &string) : m_string()
{
*this = string;
}
String::String(const std::u32string &string) : m_string()
{
*this = string;
}
String& String::operator=(const char *characters)
{
m_string = std::string(characters);
return *this;
}
String& String::operator=(const sf::String &string)
{
m_string.clear();
//In theory, an UTF8 character can be up to 6 bytes (even if in the current Unicode standard,
//the last character is 4 bytes long when encoded in UTF8).
//So, reserve the maximum possible size to avoid reallocations.
m_string.reserve( string.getSize() * 6 );
//Push_back all characters inside the string.
for( sf::String::ConstIterator it = string.begin(); it != string.end(); ++it )
{
push_back( *it );
}
m_string.shrink_to_fit();
return *this;
}
String& String::operator=(const std::u32string &string)
{
m_string.clear();
//In theory, an UTF8 character can be up to 6 bytes (even if in the current Unicode standard,
//the last character is 4 bytes long when encoded in UTF8).
//So, reserve the maximum possible size to avoid reallocations.
m_string.reserve( string.size() * 6 );
//Push_back all characters inside the string.
for( std::u32string::const_iterator it = string.begin(); it != string.end(); ++it )
{
push_back( *it );
}
m_string.shrink_to_fit();
return *this;
}
String::size_type String::size() const
{
return std::distance(begin(), end());
}
String::iterator String::begin()
{
return String::iterator(m_string.begin());
}
String::const_iterator String::begin() const
{
return String::const_iterator(m_string.cbegin());
}
String::iterator String::end()
{
return String::iterator(m_string.end());
}
String::const_iterator String::end() const
{
return String::const_iterator(m_string.end());
}
String String::FromLocale( const std::string &localizedString )
{
#if defined(WINDOWS)
return FromSfString(sf::String(localizedString)); //Don't need to use the current locale, on Windows, std::locale is always the C locale
#elif defined(MACOS)
return FromUTF8(localizedString); //Assume UTF8 is the current locale
#elif defined(EMSCRIPTEN)
return FromUTF8(localizedString); //Assume UTF8 is the current locale
#else
if(std::locale("").name().find("utf-8") != std::string::npos ||
std::locale("").name().find("UTF-8") != std::string::npos ||
std::locale("").name().find("utf8") != std::string::npos ||
std::locale("").name().find("UTF8") != std::string::npos)
return FromUTF8(localizedString); //UTF8 is already the current locale
else
return FromSfString(sf::String(localizedString, std::locale(""))); //Use the current locale (std::locale("")) for conversion
#endif
}
String String::FromUTF32( const std::u32string &string )
{
String str;
str = string; //operator=(const std::u32string&)
return str;
}
String String::FromSfString( const sf::String &sfString )
{
return String(sfString);
}
String String::FromUTF8( const std::string &utf8Str )
{
String str(utf8Str.c_str());
return str;
}
String String::FromWide( const std::wstring &wstr )
{
String str;
#ifdef WINDOWS //std::wstring is an UTF16 string on Windows
::utf8::utf16to8(wstr.begin(), wstr.end(), std::back_inserter(str.Raw()));
#else //and a UTF32 string on other OSes
::utf8::utf32to8(wstr.begin(), wstr.end(), std::back_inserter(str.Raw()));
#endif
return str;
}
std::string String::ToLocale() const
{
#if defined(WINDOWS)
return ToSfString().toAnsiString();
#elif defined(MACOS)
return m_string;
#elif defined(EMSCRIPTEN)
return m_string;
#else
if(std::locale("").name().find("utf-8") != std::string::npos ||
std::locale("").name().find("UTF-8") != std::string::npos ||
std::locale("").name().find("utf8") != std::string::npos ||
std::locale("").name().find("UTF8") != std::string::npos)
return m_string; //UTF8 is already the current locale on Linux
else
return ToSfString().toAnsiString(std::locale("")); //Use the current locale for conversion
#endif
}
std::u32string String::ToUTF32() const
{
std::u32string u32str;
for( const_iterator it = begin(); it != end(); ++it )
{
u32str.push_back( *it );
}
return u32str;
}
sf::String String::ToSfString() const
{
sf::String str;
for(const_iterator it = begin(); it != end(); ++it)
str += sf::String(static_cast<sf::Uint32>(*it));
return str;
}
String::operator sf::String() const
{
return ToSfString();
}
std::string String::ToUTF8() const
{
return m_string;
}
std::wstring String::ToWide() const
{
std::wstring wstr;
#ifdef WINDOWS //std::wstring is an UTF16 string on Windows
::utf8::utf8to16(m_string.begin(), m_string.end(), std::back_inserter(wstr));
#else //and a UTF32 string on other OSes
::utf8::utf8to32(m_string.begin(), m_string.end(), std::back_inserter(wstr));
#endif
return wstr;
}
bool String::IsValid() const
{
return ::utf8::is_valid(m_string.begin(), m_string.end());
}
String& String::ReplaceInvalid( value_type replacement )
{
std::string validStr;
::utf8::replace_invalid(m_string.begin(), m_string.end(), std::back_inserter(validStr), replacement);
m_string = validStr;
return *this;
}
String::value_type String::operator[]( const String::size_type position ) const
{
const_iterator it = begin();
std::advance(it, position);
return *it;
}
String& String::operator+=( const String &other )
{
m_string += other.m_string;
return *this;
}
String& String::operator+=( const char *other )
{
*this += gd::String(other);
return *this;
}
String& String::operator+=( value_type character )
{
push_back(character);
return *this;
}
void String::push_back( String::value_type character )
{
::utf8::unchecked::append(character, std::back_inserter(m_string));
}
void String::pop_back()
{
m_string.erase((--end()).base(), end().base());
}
String& String::insert( size_type pos, const String &str )
{
iterator it = begin();
std::advance(it, pos);
//Use the real position as bytes using the std::string::iterators
m_string.insert( std::distance(m_string.begin(), it.base()), str.m_string );
return *this;
}
String& String::replace( iterator i1, iterator i2, const String &str )
{
m_string.replace(i1.base(), i2.base(), str.m_string);
return *this;
}
String& String::replace( String::size_type pos, String::size_type len, const String &str )
{
if(pos > size())
throw std::out_of_range("[gd::String::replace] starting pos greater than size");
iterator i1 = begin();
std::advance( i1, pos );
iterator i2 = i1;
while(i2 != end() && len > 0) //Increment "len" times and stop if end() is reached
{
++i2;
--len;
}
return replace( i1, i2, str );
}
String::iterator String::erase( String::iterator first, String::iterator last )
{
return iterator( m_string.erase( first.base(), last.base() ) );
}
String::iterator String::erase( String::iterator p )
{
return iterator( m_string.erase( p.base() ) );
}
void String::erase( String::size_type pos, String::size_type len )
{
if(pos > size())
throw std::out_of_range("[gd::String::erase] starting pos greater than size");
iterator i1 = begin();
std::advance(i1, pos);
iterator i2 = i1;
while(i2 != end() && len != 0) //Increment "len" times and stop if end() is reached
{
++i2;
len--;
}
erase( i1, i2 );
}
std::vector<String> String::Split( String::value_type delimiter ) const
{
std::vector<String> splittedStrings(1);
String::const_iterator it = begin();
for(; it != end(); ++it)
{
String::value_type codepoint = *it;
if(codepoint == delimiter) //It's the delimiter, insert a new String in the vector
{
splittedStrings.emplace_back();
}
else
{
splittedStrings.back().push_back(codepoint);
}
}
return splittedStrings;
}
String String::CaseFold() const
{
unsigned char *newStr = nullptr;
utf8proc_map((unsigned char*)m_string.c_str(), 0, &newStr, static_cast<utf8proc_option_t>(UTF8PROC_CASEFOLD|UTF8PROC_NULLTERM));
String str((char*)newStr);
str.Normalize();
free(newStr);
return str;
}
String String::UpperCase() const
{
gd::String upperCasedStr;
std::for_each( begin(), end(), [&](char32_t codepoint){ upperCasedStr.push_back( utf8proc_toupper(codepoint) ); } );
return upperCasedStr;
}
String String::LowerCase() const
{
gd::String lowerCasedStr;
std::for_each( begin(), end(), [&](char32_t codepoint){ lowerCasedStr.push_back( utf8proc_tolower(codepoint) ); } );
return lowerCasedStr;
}
String String::FindAndReplace(String search, String replacement, bool all) const
{
gd::String result(*this);
size_type pos, lastPos = 0;
do {
pos = result.find(search, lastPos);
lastPos = pos;
if (pos != npos)
{
result.replace(pos, search.size(), replacement);
lastPos += replacement.size();
}
} while(lastPos != npos && all);
return result;
}
String& String::Normalize(String::NormForm form)
{
unsigned char *newStr = nullptr;
if(form == NFD)
newStr = utf8proc_NFD((unsigned char*)m_string.c_str());
else if(form == NFC)
newStr = utf8proc_NFC((unsigned char*)m_string.c_str());
else if(form == NFKD)
newStr = utf8proc_NFKD((unsigned char*)m_string.c_str());
else if(form == NFKC)
newStr = utf8proc_NFKC((unsigned char*)m_string.c_str());
m_string = (char*)newStr;
free(newStr);
return *this;
}
String String::substr( String::size_type start, String::size_type length ) const
{
String str;
const_iterator startIt = begin();
while(start > 0 && startIt != end())
{
++startIt;
--start;
}
if(start > 0) //We reach the end of the string before the start position
throw std::out_of_range("[gd::String::substr] starting pos greater than size");
const_iterator endIt = startIt;
while(length > 0 && endIt != end())
{
++endIt;
--length;
}
str.m_string = std::string( startIt.base(), endIt.base() );
return str;
}
String::size_type String::find( const String &search, String::size_type pos ) const
{
const_iterator it = begin();
//Move to pos
if(pos < size())
std::advance( it, pos );
else
return npos;
//Use the standard std::string to find a string (using their internal std::strings).
//Use the raw std::string iterator to get the offset as a **byte** count for the starting
//position.
std::string::size_type findPos =
m_string.find( search.m_string, std::distance( m_string.begin(), it.base() ) );
if( findPos != std::string::npos )
{
//Create a String::iterator from the std::string::iterator pointing to the find result.
const_iterator findPosIt( m_string.begin() + findPos );
//Return the distance in **characters** count (that's why we need a String::iterator).
return std::distance( begin(), findPosIt );
}
else
return npos;
}
String::size_type String::find( const char *search, String::size_type pos ) const
{
return find( String( search ), pos );
}
String::size_type String::find( const String::value_type search, String::size_type pos ) const
{
return find( String( std::u32string( 1, search ) ), pos );
}
String::size_type String::rfind( const String &search, String::size_type pos ) const
{
//Move to pos + 1 (we will then get the last byte of the character at pos)
const_iterator it = begin();
std::string::const_iterator baseIt;
if( pos < size() ) //little optimization by testing npos directly (avoid calculating size())
{
std::advance( it, pos + 1 );
baseIt = it.base();
--baseIt; //Decrement the std::string::iterator by one because we need
//it to point to the last byte of the character at pos
}
//The last character is included, so we need to put the position
//of the last byte of the character at the position "pos"
std::string::size_type findPos = m_string.rfind( search.m_string,
pos < size() ? std::distance( m_string.begin(), baseIt ) : std::string::npos
);
if( findPos != std::string::npos )
{
//Create a String::iterator to the find position to be able to get the real
//distance as characters count (it would be a distance as bytes count
//with a std::string::iterator)
const_iterator findPosIt( m_string.begin() + findPos );
//Return the distance (which is a distance as characters count)
return std::distance( begin(), findPosIt );
}
else
return npos;
}
String::size_type String::rfind( const char *search, String::size_type pos ) const
{
return rfind( String( search ), pos );
}
String::size_type String::rfind( const value_type &search, String::size_type pos ) const
{
return rfind( String( std::u32string( 1, search ) ), pos );
}
namespace priv
{
String::size_type find_first_of( const String &str, const String &match,
String::size_type startPos, bool not_of )
{
String::const_iterator it = str.begin();
if(startPos < str.size())
std::advance( it, startPos );
else
return String::npos;
for( ; it != str.end(); ++it )
{
//Search the current char in the match string
if( ( std::find( match.begin(), match.end(), (*it) ) != match.end() ) != not_of )
return std::distance( str.begin(), it );
}
return String::npos;
}
}
String::size_type String::find_first_of( const String &match, size_type startPos ) const
{
return priv::find_first_of(*this, match, startPos, false);
}
String::size_type String::find_first_not_of( const String &match, size_type startPos ) const
{
return priv::find_first_of(*this, match, startPos, true);
}
namespace priv
{
String::size_type find_last_of( const String &str, const String &match,
String::size_type endPos, bool not_of )
{
//Temporary store the size to avoid a double call to size()
String::size_type strSize = str.size();
String::const_iterator it = str.end();
if( endPos < strSize )
std::advance( it, endPos - strSize + 1 );
while( it != str.begin() )
{
--it;
if( ( std::find( match.begin(), match.end(), (*it) ) != match.end() ) != not_of )
return std::distance( str.begin(), it );
}
return String::npos;
}
}
String::size_type String::find_last_of( const String &match, size_type endPos ) const
{
return priv::find_last_of( *this, match, endPos, false );
}
String::size_type String::find_last_not_of( const String &match, size_type endPos ) const
{
return priv::find_last_of( *this, match, endPos, true );
}
int String::compare( const String &other ) const
{
return m_string.compare( other.m_string );
}
namespace priv
{
/**
* As the the casefolded version of a string can have a different size, the positions
* in the two versions of the string are not the same.
* \return where the **pos** position in the original string **str** is in the
* casefolded version of **str**
*/
String::size_type GetPositionInCaseFolded( const String &str, String::size_type pos )
{
//Use the substring to determine the differences between original and casefolded
//Get the substring from 0 to pos and then casefold it to see where the character
//is in the casefolded version
return str.substr(0, pos).CaseFold().size();
}
/**
* As the the casefolded version of a string can have a different size, the positions
* in the two versions of the string are not the same.
* \return where the **pos** position in the casefolded string of **str** is in the
* original version **str**
* \note str must be the non-casefolded string.
*/
String::size_type GetPositionFromCaseFolded( const String &str, String::size_type pos )
{
//Use the "opposite" operation (GetPositionInCaseFolded) to find where the position
//is in the original string.
String::size_type posInOrig = 0;
while(pos != GetPositionInCaseFolded(str, posInOrig))
posInOrig++;
return posInOrig;
}
}
String::size_type String::FindCaseInsensitive( const String &search, size_type pos ) const
{
//Find where is pos in the casefolded string (it's important because some letters
//are casefolded into multiples letters, e.g. the german eszett ß is casefolded to ss).
//Do a traditionnal find with both strings casefolded
gd::String casefoldedStr = CaseFold();
size_type findPos = casefoldedStr.find( search.CaseFold(), priv::GetPositionInCaseFolded(*this, pos) );
if(findPos == npos)
return npos;
else
return priv::GetPositionFromCaseFolded(*this, findPos);
}
String GD_CORE_API operator+(String lhs, const String &rhs)
{
lhs += rhs;
return lhs;
}
String GD_CORE_API operator+(String lhs, const char *rhs)
{
lhs += rhs;
return lhs;
}
String GD_CORE_API operator+(const char *lhs, const String &rhs)
{
String str(lhs);
str += rhs;
return str;
}
bool GD_CORE_API operator==( const String &lhs, const String &rhs )
{
return (lhs.compare(rhs) == 0);
}
bool GD_CORE_API operator==( const String &lhs, const char *rhs )
{
return (lhs == String(rhs));
}
bool GD_CORE_API operator==( const char *lhs, const gd::String &rhs )
{
return (String(lhs) == rhs);
}
bool GD_CORE_API operator!=( const String &lhs, const String &rhs )
{
return !(lhs == rhs);
}
bool GD_CORE_API operator!=( const String &lhs, const char *rhs )
{
return !(lhs == rhs);
}
bool GD_CORE_API operator!=( const char *lhs, const String &rhs )
{
return !(lhs == rhs);
}
bool GD_CORE_API operator<( const String &lhs, const String &rhs )
{
return (lhs.compare(rhs) < 0);
}
bool GD_CORE_API operator<( const String &lhs, const char *rhs )
{
return (lhs < String(rhs));
}
bool GD_CORE_API operator<( const char *lhs, const String &rhs )
{
return (String(lhs) < rhs);
}
bool GD_CORE_API operator<=( const String &lhs, const String &rhs )
{
return (lhs.compare(rhs) <= 0);
}
bool GD_CORE_API operator<=( const String &lhs, const char *rhs )
{
return (lhs <= String(rhs));
}
bool GD_CORE_API operator<=( const char *lhs, const String &rhs )
{
return (String(lhs) <= rhs);
}
bool GD_CORE_API operator>( const String &lhs, const String &rhs )
{
return (lhs.compare(rhs) > 0);
}
bool GD_CORE_API operator>( const String &lhs, const char *rhs )
{
return (lhs > String(rhs));
}
bool GD_CORE_API operator>( const char *lhs, const String &rhs )
{
return (String(lhs) > rhs);
}
bool GD_CORE_API operator>=( const String &lhs, const String &rhs )
{
return (lhs.compare(rhs) >= 0);
}
bool GD_CORE_API operator>=( const String &lhs, const char *rhs )
{
return (lhs >= String(rhs));
}
bool GD_CORE_API operator>=( const char *lhs, const String &rhs )
{
return (String(lhs) >= rhs);
}
std::ostream& GD_CORE_API operator<<(std::ostream& os, const String& str)
{
os << str.ToLocale();
return os;
}
std::istream& GD_CORE_API operator>>(std::istream &is, String &str)
{
std::string extractedString;
is >> extractedString;
str = String::FromLocale(extractedString);
return is;
}
bool GD_CORE_API CaseSensitiveEquiv( String lhs, String rhs, bool compat )
{
if(compat)
return (lhs.Normalize(String::NFKD) == rhs.Normalize(String::NFKD));
else
return (lhs.Normalize(String::NFD) == rhs.Normalize(String::NFD));
}
bool GD_CORE_API CaseInsensitiveEquiv( const String &lhs, const String &rhs, bool compat )
{
if(compat)
return (lhs.CaseFold().Normalize(String::NFKD) == rhs.CaseFold().Normalize(String::NFKD));
else
return (lhs.CaseFold().Normalize(String::NFD) == rhs.CaseFold().Normalize(String::NFD));
}
}