~aleteoryx/muditaos

ref: 7d7003d62639426be8eb00f4fc288b68139f5566 muditaos/module-utils/ucs2/UCS2.cpp -rw-r--r-- 5.8 KiB
7d7003d6 — Marcin Smoczyński Merge branch 'master' into stable 5 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Copyright (c) 2017-2020, Mudita Sp. z.o.o. All rights reserved.
// For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md

#include "UCS2.hpp"
#include <cstring>
#include <cstdint>
#include <sstream>

#include <iomanip>
#include "log/log.hpp"
#include <iterator>
#include <locale>
#include <codecvt>

namespace ucs2
{
    constexpr uint32_t bufferExt = 32;
} // namespace ucs2

UCS2::UCS2() : sizeAllocated{ucs2::bufferExt}, buffer{std::make_unique<uint32_t[]>(ucs2::bufferExt)}
{
}

UCS2::UCS2(const UTF8 &string)
{
    clear();

    for (std::size_t i = 0; i < string.length(); i++) {
        uint32_t utfChar = convertFromUtf(string[i]);
        append(utfChar);
    }
}

UCS2::UCS2(const std::string &string)
    : sizeAllocated{ucs2::bufferExt}, buffer{std::make_unique<uint32_t[]>(ucs2::bufferExt)}
{
    constexpr uint8_t chunkSize16Bit = 4;
    for (std::size_t i = 0; i < string.length() / chunkSize16Bit; i++) {
        const auto ucs2char = getUcs2Char(string, i);
        // handle 32 bit
        if (0xffff0000 & ucs2char) {
            ++i;
            append(ucs2char);
            continue;
        }
        // handle 16 bit
        append(ucs2char);
    }

    // terminate ucs2 string by 0
    append(0);
}
UCS2::UCS2(UCS2 &ucs)
    : length{ucs.getLength()}, sizeUsed{ucs.getSizeUsed()},
      sizeAllocated{ucs.getSizeAlocated()}, buffer{std::make_unique<uint32_t[]>(sizeAllocated)}
{
    memcpy(buffer.get(), ucs.buffer.get(), sizeUsed);
}

UTF8 UCS2::toUTF8() const noexcept
{
    if (length == 0)
        return UTF8();

    // create buffer for worst case scenario which is that every char will take 3 bytes in utf8 string
    // + 1 for null terminator

    const auto bufferSize = 3 * length + 1;
    auto buffer           = std::make_unique<uint8_t[]>(bufferSize);

    uint32_t offset = 0;
    std::string s{};
    for (uint32_t i = 0; i < length; i++) {
        uint32_t c = this->buffer[i];

        if (c > 0xffff) {
            // 32 bit conversion
            // U' = yyyyyyyyyyxxxxxxxxxx  // U - 0x10000
            // W1 = 110110yyyyyyyyyy      // 0xD800 + yyyyyyyyyy
            // W2 = 110111xxxxxxxxxx      // 0xDC00 + xxxxxxxxxx
            const uint16_t y       = (c & 0x03FF0000) >> 16;
            const uint16_t x       = c & 0x03FF;
            const uint32_t decoded = 0x10000 + (y << 10) + x;
            std::u32string u32s    = {decoded};

            s.append(convertToUtf8String(u32s));
        }
        else if (c > 0x07ff) {
            buffer[offset++] = (0x00E0 | ((c & 0xF000) >> 12));
            buffer[offset++] = (0x0080 | ((c & 0x0FC0) >> 6));
            buffer[offset++] = (0x0080 | (c & 0x003F));
        }
        else if (c > 0x07f) {
            buffer[offset++] = (0x00C0 | ((c & 0x07C0) >> 6));
            buffer[offset++] = (0x0080 | (c & 0x003F));
        }
        else {
            buffer[offset++] = c;
        }
    }
    return (!s.empty()) ? UTF8(s + reinterpret_cast<const char *>(buffer.get()))
                        : UTF8(reinterpret_cast<const char *>(buffer.get()));
}

void UCS2::append(const uint32_t &ucs2char)
{
    // check if buffer needs to be expanded
    if (sizeUsed == sizeAllocated) {
        auto newBuffer = std::make_unique<uint32_t[]>(sizeAllocated + ucs2::bufferExt);
        memcpy(newBuffer.get(), buffer.get(), sizeAllocated);
        buffer        = std::move(newBuffer);
        sizeAllocated = sizeAllocated + ucs2::bufferExt;
    }
    // write character to the end of buffer, increment size and add 2 to used bytes ( usc2 character is two byte )
    buffer[length] = ucs2char;
    length++;
    sizeUsed += sizeof(ucs2char);
}

std::string UCS2::str() const noexcept
{
    std::stringstream ss;

    for (uint32_t i = 0; i < length; i++) {
        ss << std::setw(4) << std::setfill('0') << std::hex << std::uppercase;
        ss << buffer[i];
    }

    return ss.str();
}

void UCS2::clear()
{
    sizeUsed      = 0;
    sizeAllocated = ucs2::bufferExt;

    buffer = std::make_unique<uint32_t[]>(ucs2::bufferExt);
    length = 0;
}

uint32_t UCS2::convertFromUtf(uint32_t utfChar) const noexcept
{
    if (0xffff0000 & utfChar) {
        // 32 bit conversion
        // U' = yyyyyyyyyyxxxxxxxxxx  // U - 0x10000
        // W1 = 110110yyyyyyyyyy      // 0xD800 + yyyyyyyyyy
        // W2 = 110111xxxxxxxxxx      // 0xDC00 + xxxxxxxxxx
        const uint16_t x    = utfChar & 0x3FF;
        const uint16_t y    = utfChar & 0xF400;
        const uint16_t low  = 0xDC00 + x;
        const uint16_t high = 0xD800 + (y >> 10);
        return (high << 16) + low;
    }
    return utfChar;
}

std::string inline UCS2::convertToUtf8String(const std::u32string &s) const
{
    std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
    return conv.to_bytes(s);
}

uint32_t UCS2::getUcs2Char(const std::string &string, const std::size_t &i)
{
    constexpr uint8_t chunkSize16Bit = 4;
    constexpr uint8_t chunkSize32Bit = 8;
    constexpr uint8_t indexPosition  = 4;
    constexpr int base               = 16;

    uint32_t ucs2char     = 0;
    uint16_t nextUcs2char = 0;
    try {
        ucs2char = std::stoi(string.substr(i * indexPosition, chunkSize16Bit), 0, base);
        // check next character
        if (i < (string.length() / chunkSize16Bit) - 1) {
            nextUcs2char = std::stoi(string.substr((i + 1) * indexPosition, chunkSize16Bit), 0, base);
            // 32 bit
            if (ucs2char & 0xD800 && nextUcs2char & 0xDC00) {
                std::istringstream buf{string.substr(i * indexPosition, chunkSize32Bit)};
                buf >> std::hex >> ucs2char;
            }
        }
    }
    catch (const std::invalid_argument &e) {
        clear();
        LOG_ERROR("UCS2::UCS2(const std::string& string) failed. Invalid argument.");
    }
    catch (const std::out_of_range &e) {
        clear();
        LOG_ERROR("UCS2::UCS2(const std::string& string) failed. Out of range.");
    }

    return ucs2char;
}