From 958e304ba2d6e5c4dedd22f27fa7d5426bcca906 Mon Sep 17 00:00:00 2001 From: jimmorrisson Date: Tue, 27 Oct 2020 14:02:51 +0100 Subject: [PATCH] [EGD-4121] UCS2 32 bit support (#897) --- changelog.md | 1 + module-cellular/at/Commands.hpp | 5 +- .../service-cellular/ServiceCellular.cpp | 10 +- module-utils/test/CMakeLists.txt | 12 ++ module-utils/test/unittest_ucs2.cpp | 202 ++++++++++++++++++ module-utils/ucs2/UCS2.cpp | 198 +++++++++-------- module-utils/ucs2/UCS2.hpp | 67 +++--- 7 files changed, 358 insertions(+), 137 deletions(-) create mode 100644 module-utils/test/unittest_ucs2.cpp diff --git a/changelog.md b/changelog.md index be2fa131d03fc8cca1eda6b794e032b62ea442b3..d92dbbd1d68e50494839200b06a7b56f9f765135 100644 --- a/changelog.md +++ b/changelog.md @@ -10,6 +10,7 @@ * `[appmgr]` Application manager refactored. ### Fixed +* `[cellular]` Fixed 32 bit UCS2 codes handling. ### Other diff --git a/module-cellular/at/Commands.hpp b/module-cellular/at/Commands.hpp index f49e89c10e71bef0c458a03377c1847640d08603..319598a48778b2d7fb3a05b0f8248d71576dd070 100644 --- a/module-cellular/at/Commands.hpp +++ b/module-cellular/at/Commands.hpp @@ -121,7 +121,8 @@ namespace at SET_TIME_ZONE_REPORTING, DISABLE_TIME_ZONE_UPDATE, DISABLE_TIME_ZONE_REPORTING, - ENABLE_NETWORK_REGISTRATION_URC + ENABLE_NETWORK_REGISTRATION_URC, + SET_SMS_TEXT_MODE_UCS2 }; inline auto factory(AT at) -> const Cmd & @@ -189,7 +190,7 @@ namespace at {AT::DISABLE_TIME_ZONE_UPDATE, {"AT+CTZU=0"}}, {AT::DISABLE_TIME_ZONE_REPORTING, {"AT+CTZR=0"}}, {AT::ENABLE_NETWORK_REGISTRATION_URC, {"AT+CREG=2"}}, - }; + {AT::SET_SMS_TEXT_MODE_UCS2, {"AT+CSMP=17,167,0,8"}}}; if (fact.count(at)) { return fact.at(at); } diff --git a/module-services/service-cellular/ServiceCellular.cpp b/module-services/service-cellular/ServiceCellular.cpp index 771f1eb7e529ac272ddfd8389196f0d0da3c8f96..2367d7e7a512ce0e4e8a5a4a837a4653500531ff 100644 --- a/module-services/service-cellular/ServiceCellular.cpp +++ b/module-services/service-cellular/ServiceCellular.cpp @@ -1074,18 +1074,18 @@ bool ServiceCellular::sendSMS(SMSRecord record) bool result = false; auto channel = cmux->get(TS0710::Channel::Commands); if (channel) { - + channel->cmd(at::AT::SET_SMS_TEXT_MODE_UCS2); channel->cmd(at::AT::SMS_UCSC2); // if text fit in single message send if (textLen < singleMessageLen) { if (cmux->CheckATCommandPrompt(channel->SendCommandPrompt( - (std::string(at::factory(at::AT::CMGS)) + UCS2(UTF8(record.number.getEntered())).modemStr() + "\"") + (std::string(at::factory(at::AT::CMGS)) + UCS2(UTF8(record.number.getEntered())).str() + "\"") .c_str(), 1, commandTimeout))) { - if (channel->cmd((UCS2(record.body).modemStr() + "\032").c_str())) { + if (channel->cmd((UCS2(record.body).str() + "\032").c_str())) { result = true; } else { @@ -1116,12 +1116,12 @@ bool ServiceCellular::sendSMS(SMSRecord record) } UTF8 messagePart = record.body.substr(i * singleMessageLen, partLength); - std::string command(at::factory(at::AT::QCMGS) + UCS2(UTF8(record.number.getEntered())).modemStr() + + std::string command(at::factory(at::AT::QCMGS) + UCS2(UTF8(record.number.getEntered())).str() + "\",120," + std::to_string(i + 1) + "," + std::to_string(messagePartsCount)); if (cmux->CheckATCommandPrompt(channel->SendCommandPrompt(command.c_str(), 1, commandTimeout))) { // prompt sign received, send data ended by "Ctrl+Z" - if (channel->cmd((UCS2(messagePart).modemStr() + "\032").c_str(), commandTimeout, 2)) { + if (channel->cmd((UCS2(messagePart).str() + "\032").c_str(), commandTimeout, 2)) { result = true; } else { diff --git a/module-utils/test/CMakeLists.txt b/module-utils/test/CMakeLists.txt index 8e57f8d206482b17c0786e5ed7d88957a15b0b7f..458352b17085c9e7e013a96bdb55fc0bea6b2a86 100644 --- a/module-utils/test/CMakeLists.txt +++ b/module-utils/test/CMakeLists.txt @@ -21,6 +21,18 @@ add_catch2_executable( module-utils ) +# UCS2 tests +add_catch2_executable( + NAME + utils-ucs2 + SRCS + unittest_ucs2.cpp + ../ucs2/UCS2.cpp + ../ucs2/UCS2.hpp + LIBS + module-utils +) + # UTF8 tests add_catch2_executable( NAME diff --git a/module-utils/test/unittest_ucs2.cpp b/module-utils/test/unittest_ucs2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e189c885cee4a723722b5d535597848edcb71b9c --- /dev/null +++ b/module-utils/test/unittest_ucs2.cpp @@ -0,0 +1,202 @@ +// Copyright (c) 2017-2020, Mudita Sp. z.o.o. All rights reserved. +// For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md + +#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do this in one cpp file +#include + +#include "ucs2/UCS2.hpp" + +TEST_CASE("UCS2 to UTF8 conversion") +{ + const char *messageRawBody = "0054006500730074"; + UTF8 decodedMessage = UCS2(std::string(messageRawBody)).toUTF8(); + REQUIRE((uint32_t)('T') == decodedMessage[0]); + REQUIRE((uint32_t)('e') == decodedMessage[1]); + REQUIRE((uint32_t)('s') == decodedMessage[2]); + REQUIRE((uint32_t)('t') == decodedMessage[3]); +} + +TEST_CASE("UCS2 from UTF8 emoji 😁") +{ + UTF8 utf8("😁"); + UCS2 ucs2 = UCS2(utf8); + std::string str = "D83DDE01"; + REQUIRE(ucs2.str() == str); +} + +TEST_CASE("UCS2 from UTF8 emoji 🍣") +{ + UTF8 utf8("🍣"); + UCS2 ucs2 = UCS2(utf8); + std::string str = "D83CDF63"; + REQUIRE(ucs2.str() == str); +} + +TEST_CASE("UTF8 to UCS2 conversion") +{ + UTF8 utf8("Test"); + UCS2 ucs2(utf8); + std::string str = "0054006500730074"; + REQUIRE(ucs2.str() == str); +} + +TEST_CASE("TEST special input characters from UTF8") +{ + UTF8 utf8("ęą"); + UCS2 ucs2(utf8); + std::string str = "01190105"; + REQUIRE(ucs2.str() == str); +} + +TEST_CASE("TEST special input characters from std::string") +{ + UCS2 ucs2(std::string("01190105")); + UTF8 utf8("ęą"); + REQUIRE(ucs2.toUTF8() == utf8); +} + +TEST_CASE("UTF8 emoji 🍣 from UCS2 code") +{ + UCS2 ucs2(std::string("D83CDF63")); + UTF8 utf8("🍣"); + REQUIRE(ucs2.toUTF8() == utf8); +} + +TEST_CASE("UTF8 emoji 😁 and text ęą from UCS2 code") +{ + UCS2 ucs2(std::string("D83DDE0101190105")); + UTF8 utf8("😁ęą"); + REQUIRE(ucs2.toUTF8() == utf8); +} + +TEST_CASE("UTF8 emoji 😁 and text abc from UCS2 code") +{ + UCS2 ucs2(std::string("D83CDF63006100620063")); + UTF8 utf8("🍣abc"); + REQUIRE(ucs2.toUTF8() == utf8); +} + +TEST_CASE("UCS2 to UTF8 long string conversion") +{ + const char *messageRawBody = "004C006F00720065006D00200069007000730075006D00200064006F006C006F007200200073006900740" + "0200061006D00650074002C00200063006" + "F006E00730065006300740065007400750072002000610064006900700069007300630069006E00670020" + "0065006C00690074002E002000500065006C006C0065006E007400650073007" + "10075006500200073006F006C006C0069006300690074007500640069006E002000760061007200690075" + "007300200069007000730075006D002C0020006100200073007500730063006" + "9007000690074002000660065006C00690073002000730061006700690074007400690073002000750074" + "002E00200041006C0069007100750061006D002000740072006900730074006" + "900710075006500200062006C0061006E006400690074002000660065006C00690073002C002000760069" + "00740061006500200063006F006D006D006F0064006F0020006D00610073007" + "300610020006D006100780069006D0075007300200069006E002E0020004D0061006500630065006E0061" + "00730020006100740020006D006F006C0065007300740069006500200070007" + "5007200750073002E0020005300650064002000760065006C00690074002000760065006C00690074002C" + "002000660069006E00690062007500730020007100750069007300200065007" + "30074002000610063002C00630075007200730075007300200076006F006C007500740070006100740020" + "006D00610067006E0061002E00200056006500730074006900620075006C007" + "5006D00200063006F006E00760061006C006C0069007300200075006C0074007200690063006900650073" + "00200061006E00740065002C002000760065006C00200067007200610076006" + "9006400610020006C006900670075006C006100200065006700650073007400610073002000610063002E" + "00200041006C0069007100750061006D0020007600690074006100650020007" + "00075006C00760069006E00610072002000750072006E0061002E00200055007400200076006F006C0075" + "0074007000610074002000660065006C0069007300200061006C00690071007" + "5006500740020006500660066006900630069007400750072002000740065006D0070006F0072002E0020" + "005300650064002000630075007200730075007300200063006F006E0064006" + "9006D0065006E00740075006D00200061006E00740065002E00200056006500730074006900620075006C" + "0075006D0020006600650075006700690061007400200061007200630075002" + "00061006300200069007000730075006D0020006D006100780069006D00750073002C00200069006E0020" + "0074007200690073007400690071007500650020006A007500730074006F002" + "00061007500630074006F0072002E002000530075007300700065006E0064006900730073006500200075" + "0074002000700075006C00760069006E006100720020006D006100730073006" + "1002E00200053006500640020006600610075006300690062007500730020006100740020007000750072" + "007500730020007600690074006100650020006C006F0062006F00720074006" + "90073002E00200041006C0069007100750061006D002000650067006500740020006E0065007100750065" + "0020007100750069007300200065007300740020006D0061007400740069007" + "30020006D006F006C006C00690073002E002000500068006100730065006C006C00750073002000710075" + "00690073002000730065006D007000650072002000720069007300750073002" + "E00200049006E00740065007200640075006D0020006500740020006D0061006C00650073007500610064" + "0061002000660061006D0065007300200061006300200061006E00740065002" + "00069007000730075006D0020007000720069006D0069007300200069006E002000660061007500630069" + "006200750073002E0020004D006F007200620069002000760065006C0020006" + "C006900670075006C006100200065007500200061006E0074006500200065006C0065006D0065006E0074" + "0075006D0020006C006100630069006E00690061002E0055007400200069006" + "4002000720068006F006E0063007500730020006E006900620068002E"; + UTF8 decodedMessage = UCS2(std::string(messageRawBody)).toUTF8(); + UTF8 longUtf8Message("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque sollicitudin varius " + "ipsum, a suscipit felis sagittis ut." + " Aliquam tristique blandit felis, vitae commodo massa maximus in. Maecenas at molestie " + "purus. Sed velit velit, finibus quis est ac," + "cursus volutpat magna. Vestibulum convallis ultricies ante, vel gravida ligula egestas ac. " + "Aliquam vitae pulvinar urna. Ut volutpat" + " felis aliquet efficitur tempor. Sed cursus condimentum ante. Vestibulum feugiat arcu ac " + "ipsum maximus, in tristique justo auctor." + " Suspendisse ut pulvinar massa. Sed faucibus at purus vitae lobortis. Aliquam eget neque " + "quis est mattis mollis. Phasellus quis" + " semper risus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Morbi vel " + "ligula eu ante elementum lacinia." + "Ut id rhoncus nibh."); + REQUIRE(decodedMessage == longUtf8Message); +} + +TEST_CASE("UTF8 to UCS2 long string conversion") +{ + UTF8 longUtf8Message("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque sollicitudin varius " + "ipsum, a suscipit felis sagittis ut." + " Aliquam tristique blandit felis, vitae commodo massa maximus in. Maecenas at molestie " + "purus. Sed velit velit, finibus quis est ac," + "cursus volutpat magna. Vestibulum convallis ultricies ante, vel gravida ligula egestas ac. " + "Aliquam vitae pulvinar urna. Ut volutpat" + " felis aliquet efficitur tempor. Sed cursus condimentum ante. Vestibulum feugiat arcu ac " + "ipsum maximus, in tristique justo auctor." + " Suspendisse ut pulvinar massa. Sed faucibus at purus vitae lobortis. Aliquam eget neque " + "quis est mattis mollis. Phasellus quis" + " semper risus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Morbi vel " + "ligula eu ante elementum lacinia." + "Ut id rhoncus nibh."); + + UCS2 longUcs2Message(longUtf8Message); + REQUIRE( + longUcs2Message.str() == + std::string("004C006F00720065006D00200069007000730075006D00200064006F006C006F0072002000730069007400200061006D00" + "650074002C00200063006F006E00730065006300740065007400750072002000610064006900700069007300630069006E" + "006700200065006C00690074002E002000500065006C006C00" + "65006E00740065007300710075006500200073006F006C006C0069006300690074007500640069006E0020007600610072" + "00690075007300200069007000730075006D002C0020006100" + "2000730075007300630069007000690074002000660065006C006900730020007300610067006900740074006900730020" + "00750074002E00200041006C0069007100750061006D002000" + "740072006900730074006900710075006500200062006C0061006E006400690074002000660065006C00690073002C0020" + "0076006900740061006500200063006F006D006D006F006400" + "6F0020006D00610073007300610020006D006100780069006D0075007300200069006E002E0020004D0061006500630065" + "006E006100730020006100740020006D006F006C0065007300" + "7400690065002000700075007200750073002E0020005300650064002000760065006C00690074002000760065006C0069" + "0074002C002000660069006E00690062007500730020007100" + "75006900730020006500730074002000610063002C00630075007200730075007300200076006F006C0075007400700061" + "00740020006D00610067006E0061002E002000560065007300" + "74006900620075006C0075006D00200063006F006E00760061006C006C0069007300200075006C00740072006900630069" + "0065007300200061006E00740065002C002000760065006C00" + "2000670072006100760069006400610020006C006900670075006C00610020006500670065007300740061007300200061" + "0063002E00200041006C0069007100750061006D0020007600" + "69007400610065002000700075006C00760069006E00610072002000750072006E0061002E00200055007400200076006F" + "006C00750074007000610074002000660065006C0069007300" + "200061006C006900710075006500740020006500660066006900630069007400750072002000740065006D0070006F0072" + "002E0020005300650064002000630075007200730075007300" + "200063006F006E00640069006D0065006E00740075006D00200061006E00740065002E0020005600650073007400690062" + "0075006C0075006D0020006600650075006700690061007400" + "20006100720063007500200061006300200069007000730075006D0020006D006100780069006D00750073002C00200069" + "006E0020007400720069007300740069007100750065002000" + "6A007500730074006F00200061007500630074006F0072002E002000530075007300700065006E00640069007300730065" + "002000750074002000700075006C00760069006E0061007200" + "20006D0061007300730061002E002000530065006400200066006100750063006900620075007300200061007400200070" + "00750072007500730020007600690074006100650020006C00" + "6F0062006F0072007400690073002E00200041006C0069007100750061006D002000650067006500740020006E00650071" + "00750065002000710075006900730020006500730074002000" + "6D006100740074006900730020006D006F006C006C00690073002E002000500068006100730065006C006C007500730020" + "0071007500690073002000730065006D007000650072002000" + "720069007300750073002E00200049006E00740065007200640075006D0020006500740020006D0061006C006500730075" + "006100640061002000660061006D0065007300200061006300" + "200061006E0074006500200069007000730075006D0020007000720069006D0069007300200069006E0020006600610075" + "00630069006200750073002E0020004D006F00720062006900" + "2000760065006C0020006C006900670075006C006100200065007500200061006E0074006500200065006C0065006D0065" + "006E00740075006D0020006C006100630069006E0069006100" + "2E00550074002000690064002000720068006F006E0063007500730020006E006900620068002E")); +} diff --git a/module-utils/ucs2/UCS2.cpp b/module-utils/ucs2/UCS2.cpp index a85570805c312a66291461b41d13bb2b97bb78e5..6f52b32f1acbb1c509fb2d39a524e0cc11f87181 100644 --- a/module-utils/ucs2/UCS2.cpp +++ b/module-utils/ucs2/UCS2.cpp @@ -1,13 +1,6 @@ // Copyright (c) 2017-2020, Mudita Sp. z.o.o. All rights reserved. // For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md -/* - * UCS2.cpp - * - * Created on: 24 wrz 2019 - * Author: kuba - */ - #include "UCS2.hpp" #include #include @@ -16,110 +9,87 @@ #include #include "log/log.hpp" #include +#include +#include -const uint32_t UCS2::ucs2bufferExt = 16; - -UCS2::UCS2(void) +namespace ucs2 { - sizeUsed = 0; - sizeAllocated = ucs2bufferExt; - buffer = new uint16_t[ucs2bufferExt]; - length = 0; -} + constexpr uint32_t bufferExt = 32; +} // namespace ucs2 -UCS2::~UCS2() +UCS2::UCS2() : sizeAllocated{ucs2::bufferExt}, buffer{std::make_unique(ucs2::bufferExt)} { - - delete[] buffer; } UCS2::UCS2(const UTF8 &string) { - this->clear(); - - for (uint32_t i = 0; i < string.length(); i++) { - uint32_t utfChar = string[i]; - - // LOG_INFO("decoded uft %x", ucs2char); - if (0xffff0000 & utfChar) { - sizeUsed = 0; - length = 0; - LOG_ERROR("UCS2::UCS2(const UTF8& string) failed, provided char is out of range"); - break; - } + clear(); - append(static_cast(utfChar)); + for (std::size_t i = 0; i < string.length(); i++) { + uint32_t utfChar = convertFromUtf(string[i]); + append(utfChar); } } UCS2::UCS2(const std::string &string) + : sizeAllocated{ucs2::bufferExt}, buffer{std::make_unique(ucs2::bufferExt)} { - sizeUsed = 0; - sizeAllocated = ucs2bufferExt; - buffer = new uint16_t[ucs2bufferExt]; - length = 0; - - for (unsigned int i = 0; i < string.size() / 4; i++) { - uint16_t ucs2char = 0; - try { - ucs2char = std::stoi(string.substr(i * 4, 4), 0, 16); - } - catch (std::invalid_argument &e) { - LOG_ERROR("UCS2::UCS2(const std::string& string) failed."); - this->clear(); - } - catch (std::out_of_range &e) { - this->clear(); - LOG_ERROR("UCS2::UCS2(const std::string& string) failed."); - } + constexpr uint8_t chunkSize16Bit = 4; + for (std::size_t i = 0; i < string.length() / chunkSize16Bit; i++) { + const auto ucs2char = getUcs2Char(string, i); + // handle 32 bit if (0xffff0000 & ucs2char) { - this->clear(); - LOG_ERROR("UCS2::UCS2(const UTF8& string) failed, provided char is out of range"); - // break; + ++i; + append(ucs2char); + continue; } - append(static_cast(ucs2char)); + // handle 16 bit + append(ucs2char); } // terminate ucs2 string by 0 append(0); } UCS2::UCS2(UCS2 &ucs) + : length{ucs.getLength()}, sizeUsed{ucs.getSizeUsed()}, + sizeAllocated{ucs.getSizeAlocated()}, buffer{std::make_unique(sizeAllocated)} { - sizeUsed = 0; - sizeAllocated = ucs2bufferExt; - buffer = new uint16_t[ucs2bufferExt]; - length = 0; - - this->sizeUsed = ucs.getSizeUsed(); - this->sizeAllocated = ucs.getSizeAlocated(); - this->length = ucs.getLength(); - this->buffer = new uint16_t[sizeAllocated]; - memset(buffer, 0, sizeAllocated); - memcpy(buffer, ucs.getData(), sizeUsed); + memcpy(buffer.get(), ucs.buffer.get(), sizeUsed); } -UTF8 UCS2::toUTF8(void) +UTF8 UCS2::toUTF8() const noexcept { - if (this->length == 0) + if (length == 0) return UTF8(); // create buffer for worst case scenario which is that every char will take 3 bytes in utf8 string // + 1 for null terminator - uint8_t *buffer = new uint8_t[3 * this->length + 1]; - memset(buffer, 0, 3 * this->length + 1); + const auto bufferSize = 3 * length + 1; + auto buffer = std::make_unique(bufferSize); uint32_t offset = 0; - for (uint32_t i = 0; i < this->length; i++) { + std::string s{}; + for (uint32_t i = 0; i < length; i++) { uint32_t c = this->buffer[i]; - // check if character must occupy 3 bytes - if (c > 0x07ff) { + if (c > 0xffff) { + // 32 bit conversion + // U' = yyyyyyyyyyxxxxxxxxxx // U - 0x10000 + // W1 = 110110yyyyyyyyyy // 0xD800 + yyyyyyyyyy + // W2 = 110111xxxxxxxxxx // 0xDC00 + xxxxxxxxxx + const uint16_t y = (c & 0x03FF0000) >> 16; + const uint16_t x = c & 0x03FF; + const uint32_t decoded = 0x10000 + (y << 10) + x; + std::u32string u32s = {decoded}; + + s.append(convertToUtf8String(u32s)); + } + else if (c > 0x07ff) { buffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12)); buffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6)); buffer[offset++] = (0x0080 | (c & 0x003F)); } - // check if character must occupy 2 bytes else if (c > 0x07f) { buffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6)); buffer[offset++] = (0x0080 | (c & 0x003F)); @@ -128,29 +98,26 @@ UTF8 UCS2::toUTF8(void) buffer[offset++] = c; } } - UTF8 retString(reinterpret_cast(buffer)); - delete[] buffer; - return retString; + return (!s.empty()) ? UTF8(s + reinterpret_cast(buffer.get())) + : UTF8(reinterpret_cast(buffer.get())); } -void UCS2::append(const uint16_t &ucs2char) +void UCS2::append(const uint32_t &ucs2char) { // check if buffer needs to be expanded if (sizeUsed == sizeAllocated) { - uint16_t *newBuffer = new uint16_t[sizeAllocated + ucs2bufferExt]; - memset(newBuffer, 0, sizeAllocated + ucs2bufferExt); - memcpy(newBuffer, buffer, sizeAllocated); - delete[] buffer; - buffer = newBuffer; - sizeAllocated = sizeAllocated + ucs2bufferExt; + auto newBuffer = std::make_unique(sizeAllocated + ucs2::bufferExt); + memcpy(newBuffer.get(), buffer.get(), sizeAllocated); + buffer = std::move(newBuffer); + sizeAllocated = sizeAllocated + ucs2::bufferExt; } // write character to the end of buffer, increment size and add 2 to used bytes ( usc2 character is two byte ) buffer[length] = ucs2char; length++; - sizeUsed += 2; + sizeUsed += sizeof(ucs2char); } -std::string UCS2::modemStr(void) +std::string UCS2::str() const noexcept { std::stringstream ss; @@ -162,13 +129,66 @@ std::string UCS2::modemStr(void) return ss.str(); } -void UCS2::clear(void) +void UCS2::clear() { sizeUsed = 0; - sizeAllocated = ucs2bufferExt; - - delete[] buffer; + sizeAllocated = ucs2::bufferExt; - buffer = new uint16_t[ucs2bufferExt]; + buffer = std::make_unique(ucs2::bufferExt); length = 0; } + +uint32_t UCS2::convertFromUtf(uint32_t utfChar) const noexcept +{ + if (0xffff0000 & utfChar) { + // 32 bit conversion + // U' = yyyyyyyyyyxxxxxxxxxx // U - 0x10000 + // W1 = 110110yyyyyyyyyy // 0xD800 + yyyyyyyyyy + // W2 = 110111xxxxxxxxxx // 0xDC00 + xxxxxxxxxx + const uint16_t x = utfChar & 0x3FF; + const uint16_t y = utfChar & 0xF400; + const uint16_t low = 0xDC00 + x; + const uint16_t high = 0xD800 + (y >> 10); + return (high << 16) + low; + } + return utfChar; +} + +std::string inline UCS2::convertToUtf8String(const std::u32string &s) const +{ + std::wstring_convert, char32_t> conv; + return conv.to_bytes(s); +} + +uint32_t UCS2::getUcs2Char(const std::string &string, const std::size_t &i) +{ + constexpr uint8_t chunkSize16Bit = 4; + constexpr uint8_t chunkSize32Bit = 8; + constexpr uint8_t indexPosition = 4; + constexpr int base = 16; + + uint32_t ucs2char = 0; + uint16_t nextUcs2char = 0; + try { + ucs2char = std::stoi(string.substr(i * indexPosition, chunkSize16Bit), 0, base); + // check next character + if (i < (string.length() / chunkSize16Bit) - 1) { + nextUcs2char = std::stoi(string.substr((i + 1) * indexPosition, chunkSize16Bit), 0, base); + // 32 bit + if (ucs2char & 0xD800 && nextUcs2char & 0xDC00) { + std::istringstream buf{string.substr(i * indexPosition, chunkSize32Bit)}; + buf >> std::hex >> ucs2char; + } + } + } + catch (const std::invalid_argument &e) { + clear(); + LOG_ERROR("UCS2::UCS2(const std::string& string) failed. Invalid argument."); + } + catch (const std::out_of_range &e) { + clear(); + LOG_ERROR("UCS2::UCS2(const std::string& string) failed. Out of range."); + } + + return ucs2char; +} diff --git a/module-utils/ucs2/UCS2.hpp b/module-utils/ucs2/UCS2.hpp index 4e9cf075dd31fdc6b09909acd3231edac1b469f5..026c61b7f6c6c4b9e630a0c54087820ca9a678b6 100644 --- a/module-utils/ucs2/UCS2.hpp +++ b/module-utils/ucs2/UCS2.hpp @@ -1,84 +1,69 @@ // Copyright (c) 2017-2020, Mudita Sp. z.o.o. All rights reserved. // For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md -/* - * UCS2.hpp - * - * Created on: 24 wrz 2019 - * Author: kuba - */ - -#ifndef MODULE_UTILS_UCS2_UCS2_HPP_ -#define MODULE_UTILS_UCS2_UCS2_HPP_ +#pragma once #include +#include #include "utf8/UTF8.hpp" class UCS2 { private: - // pointer to memory where ucs2 characters are stored. - uint16_t *buffer = nullptr; - // size in bytes of memory that was allcated to the buffer - uint32_t sizeAllocated = 0; - // size in bytes of memory used in buffer - uint32_t sizeUsed = 0; // number of characters in the string. its equal to size of allocated memory plus null terminator - uint32_t length = 0; - // - static const uint32_t ucs2bufferExt; - void clear(void); + uint32_t length = 0U; + // size in bytes of memory used in buffer + uint32_t sizeUsed = 0U; + // size in bytes of memory that was allcated to the buffer + uint32_t sizeAllocated = 0U; + // pointer to memory where ucs2 characters are stored. + std::unique_ptr buffer; + void clear(); + [[nodiscard]] uint32_t convertFromUtf(uint32_t utfChar) const noexcept; + + [[nodiscard]] std::string inline convertToUtf8String(const std::u32string &s) const; + [[nodiscard]] uint32_t getUcs2Char(const std::string &string, const std::size_t &i); public: - // default constructor - UCS2(void); - // - // UCS2( uint16_t* text); + UCS2(); /* * @brief Initializes new ucs2 string from utf8 string. It's used to convert text from * utf8 to ucs2. * @param string utf8 string to convert */ - UCS2(const UTF8 &string); + explicit UCS2(const UTF8 &string); /* * @brief Initializes new ucs2 string from std::string. It's used to convert text from * modem message format to ucs2. * @param string std::string to convert */ - UCS2(const std::string &string); - UCS2(UCS2 &ucs); - ~UCS2(void); - const char *c_str(void) + explicit UCS2(const std::string &string); + explicit UCS2(UCS2 &ucs); + [[nodiscard]] const char *c_str() const noexcept { - return reinterpret_cast(buffer); + return reinterpret_cast(buffer.get()); } /* * @brief It's converting ucs2 to utf string. * @return utf8 string */ - UTF8 toUTF8(void); - void append(const uint16_t &ucs2char); + [[nodiscard]] UTF8 toUTF8() const noexcept; + void append(const uint32_t &ucs2char); /* * @brief It's converting text coded in ucs2 to string. Used to send data to modem. * @return coded string */ - std::string modemStr(void); - uint32_t getLength(void) + [[nodiscard]] std::string str() const noexcept; + [[nodiscard]] uint32_t getLength() const noexcept { return length; }; - uint32_t getSizeUsed(void) + [[nodiscard]] uint32_t getSizeUsed() const noexcept { return sizeUsed; }; - uint32_t getSizeAlocated(void) + [[nodiscard]] uint32_t getSizeAlocated() const noexcept { return sizeAllocated; }; - uint16_t *getData(void) - { - return buffer; - }; }; - -#endif /* MODULE_UTILS_UCS2_UCS2_HPP_ */