~aleteoryx/muditaos

ref: 95d96b82b63c85af4f655d10e52c5612121c23f6 muditaos/module-utils/ucs2/UCS2.cpp -rw-r--r-- 5.8 KiB
95d96b82 — Pawel.Paprocki [BH-370] Convert utils common_data into a libs 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// Copyright (c) 2017-2021, Mudita Sp. z.o.o. All rights reserved.
// For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md

#include "UCS2.hpp"
#include <cstring>
#include <cstdint>
#include <sstream>

#include <iomanip>
#include <log.hpp>
#include <iterator>
#include <locale>
#include <codecvt>

namespace ucs2
{
    constexpr uint32_t bufferExt = 32;
} // namespace ucs2

UCS2::UCS2() : sizeAllocated{ucs2::bufferExt}, buffer{std::make_unique<uint32_t[]>(ucs2::bufferExt)}
{
}

UCS2::UCS2(const UTF8 &string)
{
    clear();

    for (std::size_t i = 0; i < string.length(); i++) {
        uint32_t utfChar = convertFromUtf(string[i]);
        append(utfChar);
    }
}

UCS2::UCS2(const std::string &string)
    : sizeAllocated{ucs2::bufferExt}, buffer{std::make_unique<uint32_t[]>(ucs2::bufferExt)}
{
    constexpr uint8_t chunkSize16Bit = 4;
    for (std::size_t i = 0; i < string.length() / chunkSize16Bit; i++) {
        const auto ucs2char = getUcs2Char(string, i);
        // handle 32 bit
        if (0xffff0000 & ucs2char) {
            ++i;
            append(ucs2char);
            continue;
        }
        // handle 16 bit
        append(ucs2char);
    }

    // terminate ucs2 string by 0
    append(0);
}
UCS2::UCS2(UCS2 &ucs)
    : length{ucs.getLength()}, sizeUsed{ucs.getSizeUsed()},
      sizeAllocated{ucs.getSizeAlocated()}, buffer{std::make_unique<uint32_t[]>(sizeAllocated)}
{
    memcpy(buffer.get(), ucs.buffer.get(), sizeUsed);
}

UTF8 UCS2::toUTF8() const noexcept
{
    if (length == 0) {
        return UTF8();
    }

    constexpr auto maxDecodedCharSize = 3;
    std::array<uint8_t, maxDecodedCharSize> localBuffer;
    std::string decodeResult;
    decodeResult.reserve(maxDecodedCharSize * length + 1);
    for (uint32_t i = 0; i < length; i++) {
        uint32_t offset  = 0;
        const uint32_t c = this->buffer[i];
        if (c > 0xffff) {
            // 32 bit conversion
            // U' = yyyyyyyyyyxxxxxxxxxx  // U - 0x10000
            // W1 = 110110yyyyyyyyyy      // 0xD800 + yyyyyyyyyy
            // W2 = 110111xxxxxxxxxx      // 0xDC00 + xxxxxxxxxx
            const uint16_t y       = (c & 0x03FF0000) >> 16;
            const uint16_t x       = c & 0x03FF;
            const uint32_t decoded = 0x10000 + (y << 10) + x;
            decodeResult.append(convertToUtf8String({decoded}));
            continue;
        }
        else if (c > 0x07ff) {
            localBuffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12));
            localBuffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6));
            localBuffer[offset++] = (0x0080 | (c & 0x003F));
        }
        else if (c > 0x07f) {
            localBuffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6));
            localBuffer[offset++] = (0x0080 | (c & 0x003F));
        }
        else {
            localBuffer[offset++] = c;
        }
        decodeResult.append(std::begin(localBuffer), std::next(std::begin(localBuffer), offset));
    }
    return UTF8(decodeResult.c_str());
}

void UCS2::append(const uint32_t &ucs2char)
{
    // check if buffer needs to be expanded
    if (sizeUsed == sizeAllocated) {
        auto newBuffer = std::make_unique<uint32_t[]>(sizeAllocated + ucs2::bufferExt);
        memcpy(newBuffer.get(), buffer.get(), sizeAllocated);
        buffer        = std::move(newBuffer);
        sizeAllocated = sizeAllocated + ucs2::bufferExt;
    }
    // write character to the end of buffer, increment size and add 2 to used bytes ( usc2 character is two byte )
    buffer[length] = ucs2char;
    length++;
    sizeUsed += sizeof(ucs2char);
}

std::string UCS2::str() const noexcept
{
    std::stringstream ss;

    for (uint32_t i = 0; i < length; i++) {
        ss << std::setw(4) << std::setfill('0') << std::hex << std::uppercase;
        ss << buffer[i];
    }

    return ss.str();
}

void UCS2::clear()
{
    sizeUsed      = 0;
    sizeAllocated = ucs2::bufferExt;

    buffer = std::make_unique<uint32_t[]>(ucs2::bufferExt);
    length = 0;
}

uint32_t UCS2::convertFromUtf(uint32_t utfChar) const noexcept
{
    if (0xffff0000 & utfChar) {
        // 32 bit conversion
        // U' = yyyyyyyyyyxxxxxxxxxx  // U - 0x10000
        // W1 = 110110yyyyyyyyyy      // 0xD800 + yyyyyyyyyy
        // W2 = 110111xxxxxxxxxx      // 0xDC00 + xxxxxxxxxx
        const uint16_t x    = utfChar & 0x3FF;
        const uint16_t y    = utfChar & 0xF400;
        const uint16_t low  = 0xDC00 + x;
        const uint16_t high = 0xD800 + (y >> 10);
        return (high << 16) + low;
    }
    return utfChar;
}

std::string inline UCS2::convertToUtf8String(const std::u32string &s) const
{
    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
    return conv.to_bytes(s);
}

uint32_t UCS2::getUcs2Char(const std::string &string, const std::size_t &i)
{
    constexpr uint8_t chunkSize16Bit = 4;
    constexpr uint8_t chunkSize32Bit = 8;
    constexpr uint8_t indexPosition  = 4;
    constexpr int base               = 16;

    uint32_t ucs2char     = 0;
    uint16_t nextUcs2char = 0;
    try {
        ucs2char = std::stoi(string.substr(i * indexPosition, chunkSize16Bit), 0, base);
        // check next character
        if (i < (string.length() / chunkSize16Bit) - 1) {
            nextUcs2char = std::stoi(string.substr((i + 1) * indexPosition, chunkSize16Bit), 0, base);
            // 32 bit
            if (ucs2char & 0xD800 && nextUcs2char & 0xDC00) {
                std::istringstream buf{string.substr(i * indexPosition, chunkSize32Bit)};
                buf >> std::hex >> ucs2char;
            }
        }
    }
    catch (const std::invalid_argument &e) {
        clear();
        LOG_ERROR("UCS2::UCS2(const std::string& string) failed. Invalid argument.");
    }
    catch (const std::out_of_range &e) {
        clear();
        LOG_ERROR("UCS2::UCS2(const std::string& string) failed. Out of range.");
    }

    return ucs2char;
}