~aleteoryx/muditaos

8038df5d694b13b6b0b1ccfb5f821e90368a90dd — Piotr Tański 4 years ago 702287e
[EGD-7828] Phone number match performance fixes

Fixed performance of the number matcher.
M module-apps/application-call/ApplicationCall.cpp => module-apps/application-call/ApplicationCall.cpp +1 -1
@@ 130,7 130,7 @@ namespace app
        if (response->retCode == sys::ReturnCodes::Success) {
            return retMsg;
        }
        return std::make_shared<sys::ResponseMessage>(sys::ReturnCodes::Unresolved);
        return handleAsyncResponse(resp);
    } // namespace app

    // Invoked during initialization

M module-db/Interface/ContactRecord.cpp => module-db/Interface/ContactRecord.cpp +43 -43
@@ 24,6 24,11 @@
#include <utility>
#include <vector>

namespace
{
    constexpr auto NumberMatcherPageSize = 100;
} // namespace

ContactRecordInterface::ContactRecordInterface(ContactsDB *db)
    : contactDB(db), favouritesGroupId(db->groups.favouritesId())
{}


@@ 186,12 191,11 @@ auto ContactRecordInterface::getNumbersIDs(std::uint32_t contactID, const Contac
{
    std::vector<std::uint32_t> ret;

    std::vector<ContactNumberHolder> contactNumberHolders;
    auto numberMatcher = buildNumberMatcher(contactNumberHolders);
    auto numberMatcher = buildNumberMatcher(NumberMatcherPageSize);
    for (const auto &number : contact.numbers) {
        auto numberMatch =
            numberMatcher.bestMatch(utils::PhoneNumber(number.number), utils::PhoneNumber::Match::POSSIBLE);
        if (numberMatch == numberMatcher.END) {
        if (!numberMatch.has_value()) {
            // number does not exist in the DB yet. Let's add it.
            if (!contactDB->number.add(ContactsNumberTableRow{Record(DB_ID_NONE),
                                                              .contactID  = contactID,


@@ 233,12 237,11 @@ auto ContactRecordInterface::addNumbers(std::uint32_t contactID, const std::vect
    -> std::pair<bool, std::string>
{
    std::string numbersIDs;
    std::vector<ContactNumberHolder> contactNumberHolders;
    auto numberMatcher = buildNumberMatcher(contactNumberHolders);
    auto numberMatcher = buildNumberMatcher(NumberMatcherPageSize);
    for (const auto &number : numbers) {
        auto numberMatch =
            numberMatcher.bestMatch(utils::PhoneNumber(number.number), utils::PhoneNumber::Match::POSSIBLE);
        if (numberMatch == numberMatcher.END) {
        if (!numberMatch.has_value()) {
            auto ret = contactDB->number.add(ContactsNumberTableRow{Record(DB_ID_NONE),
                                                                    .contactID  = contactID,
                                                                    .numberUser = number.number.getEntered(),


@@ 1155,24 1158,19 @@ auto ContactRecordInterface::addTemporaryContactForNumber(const ContactRecord::N
    return {true, tmp};
}

auto ContactRecordInterface::buildNumberMatcher(std::vector<ContactNumberHolder> &contactNumberHolders)
auto ContactRecordInterface::buildNumberMatcher(unsigned int maxPageSize)
    -> utils::NumberHolderMatcher<std::vector, ContactNumberHolder>
{
    auto numbers = getAllNumbers();

    for (const auto &number : numbers) {
        try {
            contactNumberHolders.emplace_back(number);
        }
        catch (const utils::PhoneNumber::Error &e) {
            debug_db_data(
                "Skipping invalid phone number pair: (%s, %s)", number.numberUser.c_str(), number.numbere164.c_str());
            continue;
        }
    }

    return utils::NumberHolderMatcher<std::vector, ContactNumberHolder>(std::cbegin(contactNumberHolders),
                                                                        std::cend(contactNumberHolders));
    return utils::NumberHolderMatcher<std::vector, ContactNumberHolder>(
        [this](const utils::PhoneNumber &number, auto offset, auto limit) {
            std::vector<ContactNumberHolder> contactNumberHolders;
            contactNumberHolders.reserve(limit);
            auto numbers = number.isValid() ? contactDB->number.getLimitOffset(number.get(), offset, limit)
                                            : contactDB->number.getLimitOffset(offset, limit);
            std::move(numbers.begin(), numbers.end(), std::back_inserter(contactNumberHolders));
            return contactNumberHolders;
        },
        maxPageSize);
}

auto ContactRecordInterface::MatchByNumber(const utils::PhoneNumber::View &numberView,


@@ 1180,12 1178,9 @@ auto ContactRecordInterface::MatchByNumber(const utils::PhoneNumber::View &numbe
                                           utils::PhoneNumber::Match matchLevel)
    -> std::optional<ContactRecordInterface::ContactNumberMatch>
{
    std::vector<ContactNumberHolder> contactNumberHolders;
    auto numberMatcher = buildNumberMatcher(contactNumberHolders);

    auto numberMatcher = buildNumberMatcher(NumberMatcherPageSize);
    auto matchedNumber = numberMatcher.bestMatch(utils::PhoneNumber(numberView), matchLevel);

    if (matchedNumber == numberMatcher.END) {
    if (!matchedNumber.has_value()) {
        if (createTempContact != CreateTempContact::True) {
            return std::nullopt;
        }


@@ 1277,24 1272,31 @@ auto ContactRecordInterface::getAllNumbers() -> const std::vector<ContactsNumber
    return v;
}

ContactNumberHolder::ContactNumberHolder(ContactsNumberTableRow numberRow)
    : row(std::move(numberRow)), number(row.numbere164.empty() ? utils::PhoneNumber(row.numberUser)
                                                               : utils::PhoneNumber(row.numberUser, row.numbere164))
ContactNumberHolder::ContactNumberHolder(ContactsNumberTableRow &&numberRow) : numberRow(std::move(numberRow))
{}

auto ContactNumberHolder::getNumber() const -> const utils::PhoneNumber &
auto ContactNumberHolder::getNumber() const noexcept -> utils::PhoneNumber
{
    return number;
    try {
        return utils::PhoneNumber{numberRow.numbere164.empty()
                                      ? utils::PhoneNumber(numberRow.numberUser)
                                      : utils::PhoneNumber(numberRow.numberUser, numberRow.numbere164)};
    }
    catch (const utils::PhoneNumber::Error &) {
        debug_db_data(
            "Skipping invalid phone number pair: (%s, %s)", numberRow.numberUser.c_str(), numberRow.numbere164.c_str());
        return utils::PhoneNumber{};
    }
}

auto ContactNumberHolder::getContactID() const -> std::uint32_t
auto ContactNumberHolder::getContactID() const noexcept -> std::uint32_t
{
    return row.contactID;
    return numberRow.contactID;
}

auto ContactNumberHolder::getNumberID() const -> std::uint32_t
auto ContactNumberHolder::getNumberID() const noexcept -> std::uint32_t
{
    return row.ID;
    return numberRow.ID;
}

void ContactRecord::addToFavourites(bool add)


@@ 1388,8 1390,7 @@ auto ContactRecordInterface::GetNumbersIdsByContact(std::uint32_t contactId) -> 

auto ContactRecordInterface::MergeContactsList(std::vector<ContactRecord> &contacts) -> bool
{
    std::vector<ContactNumberHolder> contactNumberHolders;
    auto numberMatcher = buildNumberMatcher(contactNumberHolders);
    auto numberMatcher = buildNumberMatcher(NumberMatcherPageSize);

    for (auto &contact : contacts) {
        // Important: Comparing only single number contacts


@@ 1398,7 1399,7 @@ auto ContactRecordInterface::MergeContactsList(std::vector<ContactRecord> &conta
        }
        auto matchedNumber = numberMatcher.bestMatch(contact.numbers[0].number, utils::PhoneNumber::Match::POSSIBLE);

        if (matchedNumber == numberMatcher.END) {
        if (!matchedNumber.has_value()) {
            if (!Add(contact)) {
                LOG_ERROR("Contacts list merge fail when adding the contact.");
                return false;


@@ 1409,7 1410,7 @@ auto ContactRecordInterface::MergeContactsList(std::vector<ContactRecord> &conta
            contact.ID = matchedNumber->getContactID();
            Update(contact);
            // Rebuild number matcher
            numberMatcher = buildNumberMatcher(contactNumberHolders);
            numberMatcher = buildNumberMatcher(NumberMatcherPageSize);
        }
    }
    return true;


@@ 1420,8 1421,7 @@ auto ContactRecordInterface::CheckContactsListDuplicates(std::vector<ContactReco
{
    std::vector<ContactRecord> unique;
    std::vector<ContactRecord> duplicates;
    std::vector<ContactNumberHolder> contactNumberHolders;
    auto numberMatcher = buildNumberMatcher(contactNumberHolders);
    auto numberMatcher = buildNumberMatcher(NumberMatcherPageSize);

    for (auto &contact : contacts) {
        // Important: Comparing only single number contacts


@@ 1429,7 1429,7 @@ auto ContactRecordInterface::CheckContactsListDuplicates(std::vector<ContactReco
            LOG_WARN("Contact with multiple numbers detected - ignoring all numbers except first");
        }
        auto matchedNumber = numberMatcher.bestMatch(contact.numbers[0].number, utils::PhoneNumber::Match::POSSIBLE);
        if (matchedNumber != numberMatcher.END) {
        if (matchedNumber.has_value()) {
            duplicates.push_back(contact);
        }
        else {

M module-db/Interface/ContactRecord.hpp => module-db/Interface/ContactRecord.hpp +6 -7
@@ 143,15 143,14 @@ enum class ContactRecordField
class ContactNumberHolder
{
  private:
    ContactsNumberTableRow row;
    utils::PhoneNumber number;
    ContactsNumberTableRow numberRow;

  public:
    explicit ContactNumberHolder(ContactsNumberTableRow numberRow);
    ContactNumberHolder(ContactsNumberTableRow &&numberRow);

    auto getNumber() const -> const utils::PhoneNumber &;
    auto getContactID() const -> std::uint32_t;
    auto getNumberID() const -> std::uint32_t;
    auto getNumber() const noexcept -> utils::PhoneNumber;
    auto getContactID() const noexcept -> std::uint32_t;
    auto getNumberID() const noexcept -> std::uint32_t;
};

class ContactRecordInterface : public RecordInterface<ContactRecord, ContactRecordField>


@@ 263,7 262,7 @@ class ContactRecordInterface : public RecordInterface<ContactRecord, ContactReco
    auto getByIdCommon(ContactsTableRow &contact) -> ContactRecord;
    auto getContactByNumber(const UTF8 &number) -> const std::unique_ptr<std::vector<ContactRecord>>;
    auto getAllNumbers() -> const std::vector<ContactsNumberTableRow>;
    auto buildNumberMatcher(std::vector<ContactNumberHolder> &contactNumberHolders)
    auto buildNumberMatcher(unsigned int maxPageSize = std::numeric_limits<unsigned int>::max())
        -> utils::NumberHolderMatcher<std::vector, ContactNumberHolder>;
    auto splitNumberIDs(const std::string &numberIDs) -> const std::vector<std::uint32_t>;
    auto joinNumberIDs(const std::vector<std::uint32_t> &numberIDs) -> std::string;

M module-db/Tables/ContactsNumberTable.cpp => module-db/Tables/ContactsNumberTable.cpp +27 -1
@@ 81,7 81,7 @@ std::vector<ContactsNumberTableRow> ContactsNumberTable::getByContactId(uint32_t

std::vector<ContactsNumberTableRow> ContactsNumberTable::getLimitOffset(uint32_t offset, uint32_t limit)
{
    auto retQuery = db->query("SELECT * from contact_number ORDER BY number_user LIMIT %lu OFFSET %lu;", limit, offset);
    auto retQuery = db->query("SELECT * from contact_number LIMIT %lu OFFSET %lu;", limit, offset);

    if ((retQuery == nullptr) || (retQuery->getRowCount() == 0)) {
        return std::vector<ContactsNumberTableRow>();


@@ 102,6 102,32 @@ std::vector<ContactsNumberTableRow> ContactsNumberTable::getLimitOffset(uint32_t
    return ret;
}

std::vector<ContactsNumberTableRow> ContactsNumberTable::getLimitOffset(const std::string &number,
                                                                        uint32_t offset,
                                                                        uint32_t limit)
{
    const char lastCharacter = number.back();
    auto retQuery = db->query("SELECT * from contact_number WHERE number_user like '%%%c' LIMIT %lu OFFSET %lu;",
                              lastCharacter,
                              limit,
                              offset);
    if ((retQuery == nullptr) || (retQuery->getRowCount() == 0)) {
        return std::vector<ContactsNumberTableRow>();
    }

    std::vector<ContactsNumberTableRow> ret;
    do {
        ret.push_back(ContactsNumberTableRow{
            (*retQuery)[0].getUInt32(),                                 // ID
            (*retQuery)[1].getUInt32(),                                 // contactID
            (*retQuery)[2].getString(),                                 // numberUser
            (*retQuery)[3].getString(),                                 // numbere164
            static_cast<ContactNumberType>((*retQuery)[4].getUInt32()), // type
        });
    } while (retQuery->nextRow());
    return ret;
}

std::vector<ContactsNumberTableRow> ContactsNumberTable::getLimitOffsetByField(uint32_t offset,
                                                                               uint32_t limit,
                                                                               ContactNumberTableFields field,

M module-db/Tables/ContactsNumberTable.hpp => module-db/Tables/ContactsNumberTable.hpp +17 -0
@@ 44,8 44,25 @@ class ContactsNumberTable : public Table<ContactsNumberTableRow, ContactNumberTa

    std::vector<ContactsNumberTableRow> getByContactId(uint32_t id);

    /**
     * Retrieves a subset of contact numbers from the DB.
     * @param offset    Starting position
     * @param limit     The number of rows to be retrieved
     * @return Contact numbers retrieved from the DB.
     */
    std::vector<ContactsNumberTableRow> getLimitOffset(uint32_t offset, uint32_t limit) override final;

    /**
     * Retrieves a subset of contact numbers from the DB.
     * The contact numbers are filtered by the last character of the "number" parameter.
     * Thanks to the filtering, statistically ~90% of numbers are ignored.
     * @param number    The phone number used to filter out the contact numbers by its last character.
     * @param offset    Starting position
     * @param limit     The number of rows to be retrieved
     * @return Contact numbers retrieved from the DB.
     */
    std::vector<ContactsNumberTableRow> getLimitOffset(const std::string &number, uint32_t offset, uint32_t limit);

    std::vector<ContactsNumberTableRow> getLimitOffsetByField(uint32_t offset,
                                                              uint32_t limit,
                                                              ContactNumberTableFields field,

M module-utils/phonenumber/NumberHolderMatcher.hpp => module-utils/phonenumber/NumberHolderMatcher.hpp +39 -34
@@ 4,6 4,7 @@
#pragma once

#include <PhoneNumber.hpp>
#include <time/ScopedTime.hpp>

namespace utils
{


@@ 16,28 17,36 @@ namespace utils
     */
    template <template <class, class> class C, class THolder> class NumberHolderMatcher
    {
      public:
        using NumbersProvider =
            std::function<std::vector<THolder>(const PhoneNumber &number, unsigned int offset, unsigned int limit)>;

      private:
        using TContainer = C<THolder, std::allocator<THolder>>;
        using TIter      = typename TContainer::const_iterator;
        using MatchMap   = std::map<PhoneNumber::Match, std::vector<THolder>>;
        using OwnType    = NumberHolderMatcher<C, THolder>;
        TIter startIterator;

      public:
        /**
         * @brief Default constructor (null object constructor)
         *
         */
        NumberHolderMatcher()
        {}
        TContainer numbers;
        NumbersProvider numbersProvider;
        unsigned int currentOffset;
        unsigned int limit;

        /**
         * @brief NumberHolderMatcher range constructor
         *
         * @param cont_begin - start iterator of a container with NumberHolders
         * @param cont_end - end iterator
         */
        NumberHolderMatcher(TIter cont_begin, TIter cont_end) : startIterator(cont_begin), END(cont_end)
        auto next(const PhoneNumber &number) -> bool
        {
            currentOffset += limit;
            numbers = numbersProvider(number, currentOffset, limit);
            return !numbers.empty();
        }

        void restartFor(const PhoneNumber &number)
        {
            currentOffset = 0U;
            numbers       = numbersProvider(number, currentOffset, limit);
        }

      public:
        NumberHolderMatcher(NumbersProvider &&provider, unsigned int pageSize)
            : numbers{}, numbersProvider{std::move(provider)}, currentOffset{0U}, limit{pageSize}
        {}

        OwnType &operator=(const OwnType &) = delete;


@@ 48,29 57,25 @@ namespace utils
         *
         * @param other - number to match with
         * @param level - minimum acceptable match level
         * @return TIter - an iterator instance pointing to best matched element in a container or an end iterator if
         * none could be matched
         * @return Best match for the other phone number
         */
        TIter bestMatch(const PhoneNumber &other, PhoneNumber::Match level = PhoneNumber::Match::EXACT) const
        std::optional<THolder> bestMatch(const PhoneNumber &phoneNumber,
                                         PhoneNumber::Match level = PhoneNumber::Match::EXACT)
        {
            TIter i = startIterator;

            while (i != END) {
                auto match = other.match(i->getNumber());
                if (match >= level) {
                    return i;
            utils::time::Scoped t{"bestMatch()"};
            restartFor(phoneNumber);
            do {
                const auto it =
                    std::find_if(numbers.cbegin(), numbers.cend(), [&phoneNumber, level](const auto &number) {
                        return phoneNumber.match(number.getNumber()) >= level;
                    });
                if (it != numbers.cend()) {
                    return *it;
                }
                i++;
            }
            } while (next(phoneNumber));

            return END;
            return std::nullopt;
        }

        /**
         * @brief end iterator for convienience
         *
         */
        TIter END;
    };

}; // namespace utils

M module-utils/phonenumber/tests/unittest_numbermatcher.cpp => module-utils/phonenumber/tests/unittest_numbermatcher.cpp +33 -12
@@ 26,6 26,8 @@ class DummyHolder
    }
};

static constexpr auto DefaultPageSize = 1; // so the paging mechanism is checked.

const static std::string NumberAPlNational = "600123456";
const static std::string NumberAPlE164     = "+48600123456";
const static PhoneNumber NumberAPlValid(NumberAPlE164, country::Id::UNKNOWN);


@@ 64,45 66,61 @@ auto make_test_vector(std::initializer_list<PhoneNumber> lnumber)
    return v;
}

auto retrieve_numbers(const std::vector<DummyHolder> &holders, unsigned int offset, unsigned int limit)
{
    const auto size = holders.size();
    if (offset > size - 1) {
        return std::vector<DummyHolder>{};
    }
    auto lastIndex = std::clamp<unsigned int>(offset + limit, offset, size);
    return std::vector<DummyHolder>(holders.begin() + offset, holders.begin() + lastIndex);
}

TEST_CASE("Number matcher - basics")
{
    std::vector<DummyHolder> numberHolders = {DummyHolder(PhoneNumber("500123456")),
                                              DummyHolder(PhoneNumber("600123456"))};
    auto provider = [&numberHolders]([[maybe_unused]] const utils::PhoneNumber &number, auto offset, auto limit) {
        return retrieve_numbers(numberHolders, offset, limit);
    };
    NumberHolderMatcher<std::vector, DummyHolder> matcher{std::move(provider), DefaultPageSize};

    NumberHolderMatcher<std::vector, DummyHolder> matcher(std::cbegin(numberHolders), std::cend(numberHolders));
    auto match = matcher.bestMatch(PhoneNumber("600123456"));
    REQUIRE(match != std::cend(numberHolders));
    REQUIRE(match.has_value());
    REQUIRE(match->getNumber().get() == "600123456");

    // match again to see if search is started at the beginning again
    auto newmatch = matcher.bestMatch(PhoneNumber("600123456"));
    REQUIRE(newmatch != std::cend(numberHolders));
    REQUIRE(newmatch->getNumber().get() == "600123456");
    auto newMatch = matcher.bestMatch(PhoneNumber("600123456"));
    REQUIRE(newMatch.has_value());
    REQUIRE(newMatch->getNumber().get() == "600123456");
}

TEST_CASE("Number matcher - match incoming (full list)")
{
    auto dummyHolders = make_test_vector(all_test_numbers);
    auto matcher = NumberHolderMatcher<std::vector, DummyHolder>(std::cbegin(dummyHolders), std::cend(dummyHolders));
    auto provider     = [&dummyHolders]([[maybe_unused]] const utils::PhoneNumber &number, auto offset, auto limit) {
        return retrieve_numbers(dummyHolders, offset, limit);
    };
    NumberHolderMatcher<std::vector, DummyHolder> matcher{std::move(provider), DefaultPageSize};

    SECTION("Incoming e164")
    {
        auto match = matcher.bestMatch(PhoneNumber("+48600123456"));
        REQUIRE(match != matcher.END);
        REQUIRE(match.has_value());
        REQUIRE(match->getNumber().get() == NumberAPlE164);
    }

    SECTION("Incoming national")
    {
        auto match = matcher.bestMatch(PhoneNumber("600123456", country::Id::UNKNOWN));
        REQUIRE(match != matcher.END);
        REQUIRE(match.has_value());
        REQUIRE(match->getNumber().get() == NumberAPlNational);
    }

    SECTION("Incoming valid national")
    {
        auto match = matcher.bestMatch(PhoneNumber("600123456", country::Id::POLAND));
        REQUIRE(match != matcher.END);
        REQUIRE(match.has_value());
        REQUIRE(match->getNumber().get() == NumberAPlE164);
    }
}


@@ 110,12 128,15 @@ TEST_CASE("Number matcher - match incoming (full list)")
TEST_CASE("Number matcher - match incoming (loose)")
{
    auto dummyHolders = make_test_vector({NumberAPlInvalid, NumberBPlInvalid, NumberCPlInvalid});
    auto matcher = NumberHolderMatcher<std::vector, DummyHolder>(std::cbegin(dummyHolders), std::cend(dummyHolders));
    auto provider     = [&dummyHolders]([[maybe_unused]] const utils::PhoneNumber &number, auto offset, auto limit) {
        return retrieve_numbers(dummyHolders, offset, limit);
    };
    NumberHolderMatcher<std::vector, DummyHolder> matcher{std::move(provider), DefaultPageSize};

    auto match = matcher.bestMatch(PhoneNumber("+48500123456"));
    REQUIRE(match == matcher.END);
    REQUIRE(!match.has_value());

    match = matcher.bestMatch(PhoneNumber("+48500123456"), PhoneNumber::Match::POSSIBLE);
    REQUIRE(match != matcher.END);
    REQUIRE(match.has_value());
    REQUIRE(match->getNumber().get() == NumberBPlNational);
}