~aleteoryx/muditaos: [EGD-4604] Fix incorrect message UCS2 decode

3 files changed, 44 insertions(+), 23 deletions(-)

M enabled_unittests
M module-utils/test/unittest_ucs2.cpp
M module-utils/ucs2/UCS2.cpp

M enabled_unittests => enabled_unittests +2 -0

@@ 288,12 288,14 @@ TESTS_LIST["catch2-utils-ucs2"]="
     UCS2 to UTF8 conversion;
     UCS2 from UTF8 emoji 😁;
     UCS2 from UTF8 emoji 🍣;
+    UCS2 text with emojis int the middle from UTF8 code;
     UTF8 to UCS2 conversion;
     TEST special input characters from UTF8;
     TEST special input characters from std::string;
     UTF8 emoji 🍣 from UCS2 code;
     UTF8 emoji 😁 and text ęą from UCS2 code;
     UTF8 emoji 😁 and text abc from UCS2 code;
+    UTF8 text with emojis int the middle from UCS2 code;
     UCS2 to UTF8 long string conversion;
     UTF8 to UCS2 long string conversion;
 "

M module-utils/test/unittest_ucs2.cpp => module-utils/test/unittest_ucs2.cpp +23 -0

@@ 47,6 47,18 @@ TEST_CASE("UCS2 from UTF8 emoji 🍣")
     REQUIRE(ucs2.str() == str);
 }
 
+TEST_CASE("UCS2 text with emojis int the middle from UTF8 code")
+{
+    UTF8 utf8("ęą😁ęą🍣ęą");
+    UCS2 ucs2 = UCS2(utf8);
+    std::string expected("01190105" //ęą
+                         "D83DDE01" // 😁
+                         "01190105" // ęą
+                         "D83CDF63" // 🍣
+                         "01190105");
+    REQUIRE(ucs2.str() == expected);
+}
+
 TEST_CASE("UTF8 to UCS2 conversion")
 {
     UTF8 utf8("Test");


@@ 91,6 103,17 @@ TEST_CASE("UTF8 emoji 😁 and text abc from UCS2 code")
     REQUIRE(ucs2.toUTF8() == utf8);
 }
 
+TEST_CASE("UTF8 text with emojis int the middle from UCS2 code")
+{
+    UCS2 ucs2(std::string("01190105" //ęą
+                          "D83DDE01" // 😁
+                          "01190105" // ęą
+                          "D83CDF63" // 🍣
+                          "01190105"));
+    UTF8 utf8("ęą😁ęą🍣ęą");
+    REQUIRE(ucs2.toUTF8() == utf8);
+}
+
 TEST_CASE("UCS2 to UTF8 long string conversion")
 {
     const char *messageRawBody = "004C006F00720065006D00200069007000730075006D00200064006F006C006F007200200073006900740"

M module-utils/ucs2/UCS2.cpp => module-utils/ucs2/UCS2.cpp +19 -23

@@ 1,4 1,4 @@
-// Copyright (c) 2017-2020, Mudita Sp. z.o.o. All rights reserved.
+// Copyright (c) 2017-2021, Mudita Sp. z.o.o. All rights reserved.
 // For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md
 
 #include "UCS2.hpp"


@@ 59,20 59,17 @@ UCS2::UCS2(UCS2 &ucs)
 
 UTF8 UCS2::toUTF8() const noexcept
 {
-    if (length == 0)
+    if (length == 0) {
         return UTF8();
+    }
 
-    // create buffer for worst case scenario which is that every char will take 3 bytes in utf8 string
-    // + 1 for null terminator
-
-    const auto bufferSize = 3 * length + 1;
-    auto buffer           = std::make_unique<uint8_t[]>(bufferSize);
-
-    uint32_t offset = 0;
-    std::string s{};
+    constexpr auto maxDecodedCharSize = 3;
+    std::array<uint8_t, maxDecodedCharSize> localBuffer;
+    std::string decodeResult;
+    decodeResult.reserve(maxDecodedCharSize * length + 1);
     for (uint32_t i = 0; i < length; i++) {
-        uint32_t c = this->buffer[i];
-
+        uint32_t offset  = 0;
+        const uint32_t c = this->buffer[i];
         if (c > 0xffff) {
             // 32 bit conversion
             // U' = yyyyyyyyyyxxxxxxxxxx  // U - 0x10000


@@ 81,25 78,24 @@ UTF8 UCS2::toUTF8() const noexcept
             const uint16_t y       = (c & 0x03FF0000) >> 16;
             const uint16_t x       = c & 0x03FF;
             const uint32_t decoded = 0x10000 + (y << 10) + x;
-            std::u32string u32s    = {decoded};
-
-            s.append(convertToUtf8String(u32s));
+            decodeResult.append(convertToUtf8String({decoded}));
+            continue;
         }
         else if (c > 0x07ff) {
-            buffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12));
-            buffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6));
-            buffer[offset++] = (0x0080 | (c & 0x003F));
+            localBuffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12));
+            localBuffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6));
+            localBuffer[offset++] = (0x0080 | (c & 0x003F));
         }
         else if (c > 0x07f) {
-            buffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6));
-            buffer[offset++] = (0x0080 | (c & 0x003F));
+            localBuffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6));
+            localBuffer[offset++] = (0x0080 | (c & 0x003F));
         }
         else {
-            buffer[offset++] = c;
+            localBuffer[offset++] = c;
         }
+        decodeResult.append(std::begin(localBuffer), std::next(std::begin(localBuffer), offset));
     }
-    return (!s.empty()) ? UTF8(s + reinterpret_cast<const char *>(buffer.get()))
-                        : UTF8(reinterpret_cast<const char *>(buffer.get()));
+    return UTF8(decodeResult.c_str());
 }
 
 void UCS2::append(const uint32_t &ucs2char)