~aleteoryx/muditaos (a405cad694b867fcd2498984830bd97d4b9bde2f): module-utils/unicode/utf8/test/unittest_utf8.cpp

// Copyright (c) 2017-2024, Mudita Sp. z.o.o. All rights reserved.
// For licensing, see https://github.com/mudita/MuditaOS/blob/master/LICENSE.md

#include <catch2/catch.hpp>

#include "utf8/UTF8.hpp"

TEST_CASE("UTF8: operator index returns value")
{
    UTF8 ustr = UTF8("Rąbać");

    REQUIRE((uint32_t)('R') == ustr[0]);
    // Test "ą"
    REQUIRE(0x00000105 == ustr[1]);
}

TEST_CASE("UTF8: operator index exceeds string size")
{
    UTF8 ustr = UTF8("Rąbać");

    REQUIRE(0 == ustr[ustr.length() + 1]);
}

TEST_CASE("UTF8: operator== returns properly")
{
    const UTF8 test_test_test = "test test test";
    const UTF8 test_not       = "test not";
    const UTF8 test_test      = "test test";

    REQUIRE(test_test_test == test_test_test); // text exactly the same
    REQUIRE(test_not != test_test_test);       // different text totally
    REQUIRE(test_test != test_test_test);      // text 1 shorter than 2nd (with same text at the begining)
    REQUIRE(test_test_test != test_test);      // text 2 shorter than 1st (with same text at the begining)
}

TEST_CASE("UTF8: substr returns empty string when zero length is passed")
{
    UTF8 sourceString = UTF8("Rąbać drewno siekierą");

    UTF8 destinationString = sourceString.substr(1, 0);

    REQUIRE(destinationString.length() == 0);
}

TEST_CASE("UTF8: substr returns empty string when length is exceeded")
{
    UTF8 sourceString = UTF8("Rąbać drewno siekierą");

    uint32_t length        = sourceString.length() + 5;
    UTF8 destinationString = sourceString.substr(1, length);

    REQUIRE(destinationString.length() == 0);
}

TEST_CASE("UTF8: substr returns proper begin char")
{
    UTF8 sourceString      = UTF8("Rąbać drewno siekierą");
    UTF8 destinationString = sourceString.substr(0, 5);

    REQUIRE(destinationString[0] == sourceString[0]);
}

TEST_CASE("UTF8: substr returns proper string length")
{
    UTF8 sourceString = UTF8("Rąbać drewno siekierą");
    uint32_t length   = sourceString.length();
    uint32_t position = 0;
    while (length > 1) {
        UTF8 destinationString = sourceString.substr(position, length);
        REQUIRE(destinationString.length() == length);
        position++;
        length--;
    }
}

TEST_CASE("UTF8: find returns npos if not found")
{
    UTF8 sourceString   = UTF8("AaBbCcŃń");
    const char *to_find = "E";
    REQUIRE(UTF8::npos == sourceString.find(to_find));
}

TEST_CASE("UTF8: find returns npos if pos exceeds string length")
{
    UTF8 sourceString   = UTF8("AaBbCcŃń");
    const char *to_find = "A";
    REQUIRE(UTF8::npos == sourceString.find(to_find, sourceString.length() + 5));
}

TEST_CASE("UTF8: find returns position of passed string")
{
    std::string base("AaBbCcŃń");
    UTF8 sourceString = UTF8("AaBbCcŃń");

    const char *to_find = "Ń";
    auto base_position  = base.find(to_find);

    REQUIRE(base_position == sourceString.find(to_find));
}

TEST_CASE("UTF8: find returns position of passed string when pos is passed")
{
    std::string base("Aa Bb aCcŃń");
    UTF8 sourceString = UTF8("Aa Bb aCcŃń");

    const char *to_find = "a";
    auto base_position  = base.find(to_find, 3);

    REQUIRE(base_position == sourceString.find(to_find, 3));
}

TEST_CASE("UTF8: findLast returns npos if not found")
{
    UTF8 sourceString   = UTF8("AaBbCcŃń");
    const char *to_find = "E";
    REQUIRE(UTF8::npos == sourceString.findLast(to_find, sourceString.length() - 1));
}

TEST_CASE("UTF8: findLast returns npos if pos exceeds string length")
{
    UTF8 sourceString   = UTF8("AaBbCcŃń");
    const char *to_find = "A";
    REQUIRE(UTF8::npos == sourceString.findLast(to_find, sourceString.length() + 5));
}

TEST_CASE("UTF8: findLast returns position of passed string")
{
    std::string base("AaBbCcŃń");
    UTF8 sourceString = UTF8("AaBbCcŃń");

    const char *to_find = "Ń";
    auto base_position  = base.find(to_find);

    REQUIRE(base_position == sourceString.findLast(to_find, sourceString.length() - 1));
}

TEST_CASE("UTF8: findLast returns position of passed string when pos is passed")
{
    std::string base("Aa Bb aCcŃń");
    UTF8 sourceString = UTF8("Aa Bb aCcŃń");

    const char *to_find         = "a";
    std::uint32_t base_position = 1;

    REQUIRE(base_position == sourceString.findLast(to_find, 3));
}

TEST_CASE("UTF8: split returns empty object if idx exceeds string length")
{
    UTF8 sourceString("String testowy PODZIAŁ string testowy");

    UTF8 destinationString = sourceString.split(sourceString.length() + 10);
    REQUIRE(0 == destinationString.length());
}

TEST_CASE("UTF8: split returns propper string")
{
    UTF8 sourceString("String testowy PODZIAŁ string testowy");
    UTF8 expected("PODZIAŁ string testowy");

    int32_t divisionIdx    = sourceString.find("PODZIAŁ");
    UTF8 destinationString = sourceString.split(divisionIdx);
    REQUIRE(expected == destinationString);
}

TEST_CASE("UTF8: split strings have propper length after split")
{
    UTF8 sourceString("String testowy Ąą Ćć ńźżę PODZIAŁ string testowy Łłódź ");
    UTF8 expectedSource("String testowy Ąą Ćć ńźżę ");
    UTF8 expectedDestination("PODZIAŁ string testowy Łłódź ");

    int32_t divisionIdx = sourceString.find("PODZIAŁ");

    UTF8 destinationString = sourceString.split(divisionIdx);

    REQUIRE(expectedSource.length() == sourceString.length());
    REQUIRE(expectedDestination.length() == destinationString.length());
}

TEST_CASE("UTF8: split summary length is equal after split")
{
    UTF8 sourceString("String testowy Ąą Ćć ńźżę PODZIAŁ string testowy Łłódź ");
    int32_t divisionIdx = sourceString.find("PODZIAŁ");

    uint32_t lengthBeforeSplit = sourceString.length();

    UTF8 destinationString = sourceString.split(divisionIdx);

    uint32_t destinationStringLength = destinationString.length();
    uint32_t sourceStringLesngth     = sourceString.length();

    REQUIRE(lengthBeforeSplit == destinationStringLength + sourceStringLesngth);
}

TEST_CASE("UTF8: getLine returns empty string if its not line")
{
    UTF8 sourceString("ĄŚĆćśą Pierwsza linia Druga linia ");

    UTF8 destinationString = sourceString.getLine();

    REQUIRE(destinationString == UTF8(""));
    REQUIRE(destinationString.length() == 0);
}

TEST_CASE("UTF8: getLine returns proper string when line ends with n")
{
    UTF8 sourceString("ĄŚĆćśą Pierwsza linia\n Druga linia ");

    UTF8 destinationString = sourceString.getLine();

    REQUIRE(destinationString == "ĄŚĆćśą Pierwsza linia");
}

TEST_CASE("UTF8: getLine returns proper string when line ends with r")
{
    UTF8 sourceString("ĄŚĆćśą Pierwsza linia\r Druga linia ");

    UTF8 destinationString = sourceString.getLine();

    REQUIRE(destinationString == "ĄŚĆćśą Pierwsza linia");
}

TEST_CASE("UTF8: removeChar returns false when string to remove exceed")
{
    UTF8 sourceString("Teścik");

    bool retVal = sourceString.removeChar(2, sourceString.length() + 1);
    REQUIRE_FALSE(retVal);
}

TEST_CASE("UTF8: removeChar returns false when pos to remove exceed")
{
    UTF8 sourceString("Teścik");

    bool retVal = sourceString.removeChar(sourceString.length() + 1, 2);
    REQUIRE_FALSE(retVal);
}

TEST_CASE("UTF8: removeChar returns propper string")
{
    UTF8 sourceString("Teścik");
    UTF8 toCompare("Tecik");
    // remove 'ś'
    bool retVal = sourceString.removeChar(2, 1);
    REQUIRE(retVal);
    REQUIRE(toCompare.length() == sourceString.length());
    REQUIRE(toCompare.used() == sourceString.used());
}

TEST_CASE("UTF8: getChar")
{
    UTF8 fin = "Zadzwonię później, walczę z ostrym cieniem mgły ;)";
    UTF8 end = "";

    for (unsigned int i = 0; i < fin.length(); ++i) {
        end.insert((const char *)fin.getChar(i).utf8);
    }

    REQUIRE(fin == end);
}

// getChar returns U8char, [] returns u16 encoded value
TEST_CASE("UTF8: encode / decode / how it works")
{
    std::string test                = "ę";
    uint32_t test_u8char_len        = 0;
    const uint32_t code_encoded_u8t = 0xC499; // code encoded using utf8 notation
    const uint32_t code_encoded_uni = 0x0119; // code encoded using U+ notation -> Ux0119 (utf16)

    uint32_t test_code_decoded_u16 = UTF8::decode(test.c_str(), test_u8char_len);

    REQUIRE(test_code_decoded_u16 == code_encoded_uni);

    uint32_t test_code_encode_u8     = 0;
    uint32_t test_code_encode_u8_len = 0;
    bool result = UTF8::encode(test_code_decoded_u16, test_code_encode_u8, test_code_encode_u8_len);

    REQUIRE(result == true);
    REQUIRE(test_code_encode_u8 == code_encoded_u8t);

    UTF8 u8t_test = "ę";

    REQUIRE(code_encoded_uni == u8t_test[0]);
}

TEST_CASE("UTF8 bad case scenario - operator[] returns")
{
    UTF8 test                       = "ę";
    UTF8 next                       = "";
    const uint32_t code_encoded_uni = 0x0119; // code encoded using U+ notation -> Ux0119 (utf16)

    // assert that there is utf8 in string - who says it has to be?
    REQUIRE(*test.c_str() == 0xc4);
    REQUIRE(*(test.c_str() + 1) == 0x99);

    // get utf16 value for first character
    auto code_utf16 = test[0];
    REQUIRE(code_utf16 == code_encoded_uni);

    // insert it with insert code
    next.insertCode(code_utf16);

    REQUIRE(test == next);
}

TEST_CASE("U8char && UTF8: encode")
{
    // this don't work...
    // encode(0x119) as ę
    UTF8 val;
    uint32_t code       = 0x119;
    uint32_t len        = 0;
    uint32_t output     = 0;
    const uint32_t pl_e = 0xc499;
    bool res            = UTF8::encode(code, output, len);
    // check that encode works
    REQUIRE(len == 2);
    REQUIRE(res);
    REQUIRE(output == pl_e);
    auto u = U8char(code);
    REQUIRE(u.size == len);
}

TEST_CASE("UTF8: insert whole string which doesn't work")
{
    unsigned int len = 50;
    UTF8 lol;
    UTF8 fin = "Zadzwonię później, walczę z ostrym cieniem mgły ;)";
    for (unsigned int i = 0; i < fin.length(); ++i) {
        union
        {
            char ch[4] = {0};
            uint32_t code;
        } tmp;
        tmp.code = fin[i];
        lol.insertCode(tmp.code);
    }
    REQUIRE(lol == fin);
    REQUIRE(lol.length() == len);
}

TEST_CASE("UTF8: Convert to ascii if is ascii combination")
{
    SECTION("6 characters")
    {
        UTF8 combination = "778568738465";
        REQUIRE(combination.isASCIICombination());
        REQUIRE(combination.toASCII() == std::string("MUDITA"));
    }

    SECTION("4 characters")
    {
        UTF8 combination = "100101101112";
        REQUIRE(combination.isASCIICombination());
        REQUIRE(combination.toASCII() == std::string("deep"));
    }

    SECTION("2 characters")
    {
        UTF8 combination = "111107";
        REQUIRE(combination.isASCIICombination());
        REQUIRE(combination.toASCII() == std::string("ok"));
    }
}

TEST_CASE("UTF8: Not ASCII combination")
{
    SECTION("Pl number - 9 digits")
    {
        UTF8 combination = "600123456";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("5 digits")
    {
        UTF8 combination = "11115";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("4 digits")
    {
        UTF8 combination = "1111";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("4 digits")
    {
        UTF8 combination = "8411";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("Play PL info number - 3 digits")
    {
        UTF8 combination = "100";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("611 Carrier number - 3 digits")
    {
        UTF8 combination = "611";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("411 Carrier number - 3 digits")
    {
        UTF8 combination = "411";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("Too short number - 2 digits")
    {
        UTF8 combination = "20";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("Too short number - 1 digit")
    {
        UTF8 combination = "1";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("Pl e164 number")
    {
        UTF8 combination = "+48600123456";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("Pl formatted number")
    {
        UTF8 combination = "600 123 456";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }

    SECTION("Pl formatted int number")
    {
        UTF8 combination = "+48 600 123 456";
        REQUIRE_FALSE(combination.isASCIICombination());
        REQUIRE_FALSE(combination.toASCII().has_value());
    }
}