From 19a897cf653e0f70d1ceb5396c8870ac1eab4ac1 Mon Sep 17 00:00:00 2001
From: mkamonMdt <michal.kamon@mudita.com>
Date: Wed, 10 Feb 2021 10:44:25 +0100
Subject: [PATCH] [EGD-4604] Fix incorrect message UCS2 decode

The problem could be noticed in a incoming message that contained
emojis. All emojis we pushed to the front of a message, due to
separation on decoding of `uint32_t` chars 'larger' than `0xffff`
---
 enabled_unittests                   |  2 ++
 module-utils/test/unittest_ucs2.cpp | 23 ++++++++++++++++
 module-utils/ucs2/UCS2.cpp          | 42 +++++++++++++----------------
 3 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/enabled_unittests b/enabled_unittests
index 997008e8edf1192218f0c901f74e658dad966c25..743cddeb64668713a528bd4211299aa4ef9c056d 100644
--- a/enabled_unittests
+++ b/enabled_unittests
@@ -288,12 +288,14 @@ TESTS_LIST["catch2-utils-ucs2"]="
     UCS2 to UTF8 conversion;
     UCS2 from UTF8 emoji 😁;
     UCS2 from UTF8 emoji 🍣;
+    UCS2 text with emojis int the middle from UTF8 code;
     UTF8 to UCS2 conversion;
     TEST special input characters from UTF8;
     TEST special input characters from std::string;
     UTF8 emoji 🍣 from UCS2 code;
     UTF8 emoji 😁 and text ęą from UCS2 code;
     UTF8 emoji 😁 and text abc from UCS2 code;
+    UTF8 text with emojis int the middle from UCS2 code;
     UCS2 to UTF8 long string conversion;
     UTF8 to UCS2 long string conversion;
 "
diff --git a/module-utils/test/unittest_ucs2.cpp b/module-utils/test/unittest_ucs2.cpp
index ff094c8530964a73af00e84752061641851dbbb5..b4213803a63868988e370a5eb851440f014f83c1 100644
--- a/module-utils/test/unittest_ucs2.cpp
+++ b/module-utils/test/unittest_ucs2.cpp
@@ -47,6 +47,18 @@ TEST_CASE("UCS2 from UTF8 emoji 🍣")
     REQUIRE(ucs2.str() == str);
 }
 
+TEST_CASE("UCS2 text with emojis int the middle from UTF8 code")
+{
+    UTF8 utf8("ęą😁ęą🍣ęą");
+    UCS2 ucs2 = UCS2(utf8);
+    std::string expected("01190105" //ęą
+                         "D83DDE01" // 😁
+                         "01190105" // ęą
+                         "D83CDF63" // 🍣
+                         "01190105");
+    REQUIRE(ucs2.str() == expected);
+}
+
 TEST_CASE("UTF8 to UCS2 conversion")
 {
     UTF8 utf8("Test");
@@ -91,6 +103,17 @@ TEST_CASE("UTF8 emoji 😁 and text abc from UCS2 code")
     REQUIRE(ucs2.toUTF8() == utf8);
 }
 
+TEST_CASE("UTF8 text with emojis int the middle from UCS2 code")
+{
+    UCS2 ucs2(std::string("01190105" //ęą
+                          "D83DDE01" // 😁
+                          "01190105" // ęą
+                          "D83CDF63" // 🍣
+                          "01190105"));
+    UTF8 utf8("ęą😁ęą🍣ęą");
+    REQUIRE(ucs2.toUTF8() == utf8);
+}
+
 TEST_CASE("UCS2 to UTF8 long string conversion")
 {
     const char *messageRawBody = "004C006F00720065006D00200069007000730075006D00200064006F006C006F007200200073006900740"
diff --git a/module-utils/ucs2/UCS2.cpp b/module-utils/ucs2/UCS2.cpp
index 6f52b32f1acbb1c509fb2d39a524e0cc11f87181..465d0dbc635e312a918f42ab681554f1963f1368 100644
--- a/module-utils/ucs2/UCS2.cpp
+++ b/module-utils/ucs2/UCS2.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2020, Mudita Sp. z.o.o. All rights reserved.
+// Copyright (c) 2017-2021, Mudita Sp. z.o.o. All rights reserved.
 // For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md
 
 #include "UCS2.hpp"
@@ -59,20 +59,17 @@ UCS2::UCS2(UCS2 &ucs)
 
 UTF8 UCS2::toUTF8() const noexcept
 {
-    if (length == 0)
+    if (length == 0) {
         return UTF8();
+    }
 
-    // create buffer for worst case scenario which is that every char will take 3 bytes in utf8 string
-    // + 1 for null terminator
-
-    const auto bufferSize = 3 * length + 1;
-    auto buffer           = std::make_unique<uint8_t[]>(bufferSize);
-
-    uint32_t offset = 0;
-    std::string s{};
+    constexpr auto maxDecodedCharSize = 3;
+    std::array<uint8_t, maxDecodedCharSize> localBuffer;
+    std::string decodeResult;
+    decodeResult.reserve(maxDecodedCharSize * length + 1);
     for (uint32_t i = 0; i < length; i++) {
-        uint32_t c = this->buffer[i];
-
+        uint32_t offset  = 0;
+        const uint32_t c = this->buffer[i];
         if (c > 0xffff) {
             // 32 bit conversion
             // U' = yyyyyyyyyyxxxxxxxxxx  // U - 0x10000
@@ -81,25 +78,24 @@ UTF8 UCS2::toUTF8() const noexcept
             const uint16_t y       = (c & 0x03FF0000) >> 16;
             const uint16_t x       = c & 0x03FF;
             const uint32_t decoded = 0x10000 + (y << 10) + x;
-            std::u32string u32s    = {decoded};
-
-            s.append(convertToUtf8String(u32s));
+            decodeResult.append(convertToUtf8String({decoded}));
+            continue;
         }
         else if (c > 0x07ff) {
-            buffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12));
-            buffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6));
-            buffer[offset++] = (0x0080 | (c & 0x003F));
+            localBuffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12));
+            localBuffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6));
+            localBuffer[offset++] = (0x0080 | (c & 0x003F));
         }
         else if (c > 0x07f) {
-            buffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6));
-            buffer[offset++] = (0x0080 | (c & 0x003F));
+            localBuffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6));
+            localBuffer[offset++] = (0x0080 | (c & 0x003F));
         }
         else {
-            buffer[offset++] = c;
+            localBuffer[offset++] = c;
         }
+        decodeResult.append(std::begin(localBuffer), std::next(std::begin(localBuffer), offset));
     }
-    return (!s.empty()) ? UTF8(s + reinterpret_cast<const char *>(buffer.get()))
-                        : UTF8(reinterpret_cast<const char *>(buffer.get()));
+    return UTF8(decodeResult.c_str());
 }
 
 void UCS2::append(const uint32_t &ucs2char)