diff --git a/autotests/o5mparsertest.cpp b/autotests/o5mparsertest.cpp index 53bd311..e105c66 100644 --- a/autotests/o5mparsertest.cpp +++ b/autotests/o5mparsertest.cpp @@ -1,83 +1,156 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include +// see https://wiki.openstreetmap.org/wiki/O5m for the examples used below class O5mParserTest : public QObject { Q_OBJECT private Q_SLOTS: void testParseUnsignedInt_data() { QTest::addColumn("data"); QTest::addColumn("num"); QTest::newRow("0") << QByteArray::fromHex("00") << 0u; QTest::newRow("1") << QByteArray::fromHex("01") << 1u; QTest::newRow("127") << QByteArray::fromHex("7f") << 127u; QTest::newRow("128") << QByteArray::fromHex("8001") << 128u; QTest::newRow("255") << QByteArray::fromHex("FF01") << 255u; QTest::newRow("256") << QByteArray::fromHex("8002") << 256u; QTest::newRow("323") << QByteArray::fromHex("c302") << 323u; QTest::newRow("16384") << QByteArray::fromHex("808001") << 16384u; } void testParseUnsignedInt() { QFETCH(QByteArray, data); QFETCH(uint32_t, num); OSM::O5mParser p(nullptr); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); QCOMPARE(p.readUnsigned(it, endIt), num); QVERIFY(it > beginIt); QVERIFY(it <= endIt); } void testParseSignedInt_data() { QTest::addColumn("data"); QTest::addColumn("num"); QTest::newRow("0") << QByteArray::fromHex("00") << 0; QTest::newRow("64") << QByteArray::fromHex("8001") << 64; QTest::newRow("-2") << QByteArray::fromHex("03") << -2; QTest::newRow("-65") << QByteArray::fromHex("8101") << -65; } void testParseSignedInt() { QFETCH(QByteArray, data); QFETCH(int32_t, num); OSM::O5mParser p(nullptr); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); QCOMPARE(p.readSigned(it, endIt), num); QVERIFY(it > beginIt); QVERIFY(it <= endIt); } + + void testParseString() + { + const auto data = QByteArray::fromHex("0031696e6e65720001"); + const auto beginIt = reinterpret_cast(data.constBegin()); + auto it = beginIt; + const auto endIt = reinterpret_cast(data.constEnd()); + + OSM::O5mParser p(nullptr); + auto s = p.readString(it, endIt); + QCOMPARE(s, "1inner"); + QCOMPARE(it, beginIt + 8); + + s = p.readString(it, endIt); + QCOMPARE(s, "1inner"); + QCOMPARE(it, beginIt + 9); + } + + void testParseStringPair() + { + const auto data = QByteArray::fromHex("006f6e6577617900796573000061746d006e6f000200fc07004a6f686e00020301"); + const auto beginIt = reinterpret_cast(data.constBegin()); + auto it = beginIt; + const auto endIt = reinterpret_cast(data.constEnd()); + + OSM::O5mParser p(nullptr); + auto s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "oneway"); + QCOMPARE(s.second, "yes"); + QCOMPARE(it, beginIt + 12); + + s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "atm"); + QCOMPARE(s.second, "no"); + QCOMPARE(it, beginIt + 20); + + s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "oneway"); + QCOMPARE(s.second, "yes"); + QCOMPARE(it, beginIt + 21); + + s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "\xfc\x07"); + QCOMPARE(s.second, "John"); + QCOMPARE(it, beginIt + 30); + + s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "atm"); + QCOMPARE(s.second, "no"); + QCOMPARE(it, beginIt + 31); + + s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "oneway"); + QCOMPARE(s.second, "yes"); + QCOMPARE(it, beginIt + 32); + + s = p.readStringPair(it, endIt); + QCOMPARE(s.first, "\xfc\x07"); + QCOMPARE(s.second, "John"); + QCOMPARE(it, beginIt + 33); + } + + void testParseWay() + { + const auto data = QByteArray::fromHex("CCE48E04002DCAAFA01A02BCA0AFF6018FFAD5F70180DFBB9E0FA5E5E5A60DE4E5E5A60DE385959D0F9E86959D0FF7E6E5A60D0062426F780031332E333634313031392C35322E353233323734312C31332E333635373039392C35322E353234323033310000726566003630323400"); + const auto beginIt = reinterpret_cast(data.constBegin()); + auto it = beginIt; + const auto endIt = reinterpret_cast(data.constEnd()); + + OSM::O5mParser p(nullptr); + p.readWay(it, endIt); + } }; QTEST_GUILESS_MAIN(O5mParserTest) #include "o5mparsertest.moc" diff --git a/src/osm/o5mparser.cpp b/src/osm/o5mparser.cpp index d077fe3..8217340 100644 --- a/src/osm/o5mparser.cpp +++ b/src/osm/o5mparser.cpp @@ -1,146 +1,251 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "o5mparser.h" #include "datatypes.h" #include #include using namespace OSM; enum : uint8_t { O5M_RESET = 0xff, O5M_NODE = 0x10, O5M_WAY = 0x11, O5M_RELATION = 0x12, O5M_BOUNDING_BOX = 0xdb, O5M_TIMESTAMP = 0xdc, O5M_HEADER = 0xe0, O5M_NUMBER_CONTINUATION = 0b1000'0000, O5M_NUMBER_MASK = 0b0111'1111, O5M_SIGNED_BIT = 0b1, }; +enum : uint16_t { + O5M_STRING_TABLE_SIZE = 15000, + O5M_STRING_TABLE_MAXLEN = 250, +}; + O5mParser::O5mParser(DataSet *dataSet) : m_dataSet(dataSet) { + m_stringLookupTable.resize(O5M_STRING_TABLE_SIZE); } void O5mParser::parse(const uint8_t* data, std::size_t len) { qDebug() << "begin parsing"; const auto endIt = data + len; for (auto it = data; it < endIt - 1;) { const auto blockType = (*it); if (blockType == O5M_RESET) { resetDeltaCodingState(); ++it; continue; } auto blockSize = readUnsigned(++it, endIt); if (blockSize >= (uint64_t)(endIt - it)) { qWarning() << "premature end of file, or blocksize too large" << (endIt - it) << blockType << blockSize; break; } switch (blockType) { case O5M_HEADER: if (blockSize != 4 || std::strncmp(reinterpret_cast(it), "o5m2", 4) != 0) { qWarning() << "Invalid file header"; return; } break; case O5M_BOUNDING_BOX: case O5M_TIMESTAMP: // not of interest at the moment break; case O5M_NODE: - parseNode(it, it + blockSize); + readNode(it, it + blockSize); break; case O5M_WAY: + readWay(it, it + blockSize); + break; case O5M_RELATION: - // TODO - qDebug() << "todo:" << (it - data) << blockType << blockSize; + readRelation(it, it + blockSize); break; default: qDebug() << "unhandled o5m block type:" << (it - data) << blockType << blockSize; } it += blockSize; } qDebug() << "parsing done"; } uint64_t O5mParser::readUnsigned(const uint8_t *&it, const uint8_t *endIt) const { uint64_t result = 0; int i = 0; for (; it < endIt && ((*it) & O5M_NUMBER_CONTINUATION); ++it, ++i) { result |= ((*it) & O5M_NUMBER_MASK) << (i * 7); } - result |= ((*it++) & O5M_NUMBER_MASK) << (i * 7); + result |= ((uint64_t)(*it++) & O5M_NUMBER_MASK) << (i * 7); return result; } int64_t O5mParser::readSigned(const uint8_t *&it, const uint8_t *endIt) const { - const auto u = readUnsigned(it, endIt); + const uint64_t u = readUnsigned(it, endIt); return (u & O5M_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1); } template T O5mParser::readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState) { deltaState += (T)readSigned(it, endIt); return deltaState; } -void O5mParser::parseNode(const uint8_t *begin, const uint8_t *end) +const char* O5mParser::readString(const uint8_t *&it, const uint8_t *endIt) { - auto it = begin; + auto ref = readUnsigned(it, endIt); + if (ref) { + return m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; + } else { + const auto s = reinterpret_cast(it); + const auto len = std::strlen(s); + if (len <= O5M_STRING_TABLE_MAXLEN) { + m_stringLookupTable[m_stringLookupPosition] = s; + m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; + } + it += len + 1; + return s; + } +} - const OSM::Id id = readDelta(it, end, m_nodeIdDelta); - if (it >= end) { return; } +std::pair O5mParser::readStringPair(const uint8_t *&it, const uint8_t *endIt) +{ + auto ref = readUnsigned(it, endIt); + if (ref) { + const auto s = m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; + const auto len1 = std::strlen(s); + return std::make_pair(s, s + len1 + 1); + } else { + const auto s = reinterpret_cast(it); + const auto len1 = std::strlen(s); + const auto len2 = std::strlen(s + len1 + 1); + + if (len1 + len2 <= O5M_STRING_TABLE_MAXLEN) { + m_stringLookupTable[m_stringLookupPosition] = s; + m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; + } + it += len1 + len2 + 2; + return std::make_pair(s, s + len1 + 1); + } +} + +void O5mParser::skipVersionInformation(const uint8_t *&it, const uint8_t *end) +{ + if (it >= end) { return; } const auto version = readUnsigned(it, end); if (version > 0) { qWarning() << "skipping changeset data not implemented yet!"; // timestamp (seconds since 1970, signed, delta-coded) // author information – only if timestamp is not 0: // changeset (signed, delta-coded) // uid, user (string pair) - return; + it = end; } +} + +void O5mParser::readNode(const uint8_t *begin, const uint8_t *end) +{ + auto it = begin; + const OSM::Id id = readDelta(it, end, m_nodeIdDelta); + skipVersionInformation(it, end); if (it >= end) { return; } const auto lat = readDelta(it, end, m_latDelata); const auto lon = readDelta(it, end, m_lonDelta); - qDebug() << " node " << id << (lat / 1.0e7) << (lon / 1.0e7); + qDebug() << " node" << id << (lat / 1.0e7) << (lon / 1.0e7); + if (it >= end) { return; } + + while (it < end) { + const auto tag = readStringPair(it, end); + qDebug() << " tag" << tag.first << tag.second; + } +} + +void O5mParser::readWay(const uint8_t *begin, const uint8_t *end) +{ + auto it = begin; + const OSM::Id id = readDelta(it, end, m_wayIdDelta); + skipVersionInformation(it, end); + if (it >= end) { return; } + + const auto nodesBlockSize = readUnsigned(it, end); + qDebug() << " way" << id << nodesBlockSize; + if (it + nodesBlockSize > end) { return; } + + const auto nodesBlockEnd = it + nodesBlockSize; + while(it < nodesBlockEnd) { + const OSM::Id nodeId = readDelta(it, end, m_wayNodeIdDelta); + qDebug() << " nd" << nodeId; + } + + while (it < end) { + const auto tag = readStringPair(it, end); + qDebug() << " tag" << tag.first << tag.second; + } +} + +void O5mParser::readRelation(const uint8_t *begin, const uint8_t *end) +{ + auto it = begin; + const OSM::Id id = readDelta(it, end, m_relIdDelta); + skipVersionInformation(it, end); if (it >= end) { return; } - // TODO tags + const auto relBlockSize = readUnsigned(it, end); + qDebug() << " relation" << id << relBlockSize; + if (it + relBlockSize > end) { return; } + + const auto relBlockEnd = it + relBlockSize; + while (it < relBlockEnd) { + const OSM::Id memId = readDelta(it, end, m_relMemberIdDelta); + const auto typeAndRole = readString(it, end); + qDebug() << " mem" << memId << typeAndRole[0] << (typeAndRole + 1); + } + + while (it < end) { + const auto tag = readStringPair(it, end); + qDebug() << " tag" << tag.first << tag.second; + } } void O5mParser::resetDeltaCodingState() { m_nodeIdDelta = 0; m_latDelata = 0; m_lonDelta = 0; + + m_wayIdDelta = 0; + m_wayNodeIdDelta = 0; + + m_relIdDelta = 0; + m_relMemberIdDelta = 0; } diff --git a/src/osm/o5mparser.h b/src/osm/o5mparser.h index 8b4b048..b5d6fee 100644 --- a/src/osm/o5mparser.h +++ b/src/osm/o5mparser.h @@ -1,64 +1,82 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef OSM_O5MPARSER_H #define OSM_O5MPARSER_H #include #include +#include class O5mParserTest; namespace OSM { class DataSet; /** Zero-copy parser of O5M binary files. * @see https://wiki.openstreetmap.org/wiki/O5m */ class O5mParser { public: explicit O5mParser(DataSet *dataSet); /** Parse the given binary content. * Feed this with QFile::map() for example. */ void parse(const uint8_t *data, std::size_t len); private: friend class ::O5mParserTest; uint64_t readUnsigned(const uint8_t *&it, const uint8_t *endIt) const; int64_t readSigned(const uint8_t *&it, const uint8_t *endIt) const; template T readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState); - void parseNode(const uint8_t *begin, const uint8_t *end); + const char* readString(const uint8_t *&it, const uint8_t *endIt); + std::pair readStringPair(const uint8_t *&it, const uint8_t *endIt); + + void skipVersionInformation(const uint8_t *&it, const uint8_t *end); + + void readNode(const uint8_t *begin, const uint8_t *end); + void readWay(const uint8_t *begin, const uint8_t *end); + void readRelation(const uint8_t *begin, const uint8_t *end); DataSet *m_dataSet = nullptr; // delta coding state void resetDeltaCodingState(); + int64_t m_nodeIdDelta = 0; int32_t m_latDelata = 0; // this can overflow, but that is intentional according to the spec! int32_t m_lonDelta = 0; + + int64_t m_wayIdDelta = 0; + int64_t m_wayNodeIdDelta = 0; + + int64_t m_relIdDelta = 0; + int64_t m_relMemberIdDelta = 0; + + std::vector m_stringLookupTable; + uint16_t m_stringLookupPosition = 0; }; } #endif // OSM_O5MPARSER_H