diff --git a/autotests/o5mparsertest.cpp b/autotests/o5mparsertest.cpp index a218bd7..f617fdc 100644 --- a/autotests/o5mparsertest.cpp +++ b/autotests/o5mparsertest.cpp @@ -1,160 +1,165 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include // see https://wiki.openstreetmap.org/wiki/O5m for the examples used below class O5mParserTest : public QObject { Q_OBJECT private Q_SLOTS: void testParseUnsignedInt_data() { QTest::addColumn("data"); QTest::addColumn("num"); QTest::newRow("0") << QByteArray::fromHex("00") << 0u; QTest::newRow("1") << QByteArray::fromHex("01") << 1u; QTest::newRow("127") << QByteArray::fromHex("7f") << 127u; QTest::newRow("128") << QByteArray::fromHex("8001") << 128u; QTest::newRow("255") << QByteArray::fromHex("FF01") << 255u; QTest::newRow("256") << QByteArray::fromHex("8002") << 256u; QTest::newRow("323") << QByteArray::fromHex("c302") << 323u; QTest::newRow("16384") << QByteArray::fromHex("808001") << 16384u; } void testParseUnsignedInt() { QFETCH(QByteArray, data); QFETCH(uint32_t, num); OSM::O5mParser p(nullptr); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); QCOMPARE(p.readUnsigned(it, endIt), num); QVERIFY(it > beginIt); QVERIFY(it <= endIt); } void testParseSignedInt_data() { QTest::addColumn("data"); QTest::addColumn("num"); QTest::newRow("0") << QByteArray::fromHex("00") << 0; QTest::newRow("64") << QByteArray::fromHex("8001") << 64; QTest::newRow("-2") << QByteArray::fromHex("03") << -2; QTest::newRow("-65") << QByteArray::fromHex("8101") << -65; } void testParseSignedInt() { QFETCH(QByteArray, data); QFETCH(int32_t, num); OSM::O5mParser p(nullptr); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); QCOMPARE(p.readSigned(it, endIt), num); QVERIFY(it > beginIt); QVERIFY(it <= endIt); } void testParseString() { const auto data = QByteArray::fromHex("0031696e6e65720001"); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); OSM::O5mParser p(nullptr); auto s = p.readString(it, endIt); QCOMPARE(s, "1inner"); QCOMPARE(it, beginIt + 8); s = p.readString(it, endIt); QCOMPARE(s, "1inner"); QCOMPARE(it, beginIt + 9); } void testParseStringPair() { const auto data = QByteArray::fromHex("006f6e6577617900796573000061746d006e6f000200fc07004a6f686e00020301"); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); OSM::O5mParser p(nullptr); auto s = p.readStringPair(it, endIt); QCOMPARE(s.first, "oneway"); QCOMPARE(s.second, "yes"); QCOMPARE(it, beginIt + 12); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "atm"); QCOMPARE(s.second, "no"); QCOMPARE(it, beginIt + 20); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "oneway"); QCOMPARE(s.second, "yes"); QCOMPARE(it, beginIt + 21); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "\xfc\x07"); QCOMPARE(s.second, "John"); QCOMPARE(it, beginIt + 30); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "atm"); QCOMPARE(s.second, "no"); QCOMPARE(it, beginIt + 31); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "oneway"); QCOMPARE(s.second, "yes"); QCOMPARE(it, beginIt + 32); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "\xfc\x07"); QCOMPARE(s.second, "John"); QCOMPARE(it, beginIt + 33); } void testParseWay() { const auto data = QByteArray::fromHex("CCE48E04002DCAAFA01A02BCA0AFF6018FFAD5F70180DFBB9E0FA5E5E5A60DE4E5E5A60DE385959D0F9E86959D0FF7E6E5A60D0062426F780031332E333634313031392C35322E353233323734312C31332E333635373039392C35322E353234323033310000726566003630323400"); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); OSM::DataSet dataSet; OSM::O5mParser p(&dataSet); p.readWay(it, endIt); QCOMPARE(dataSet.ways.size(), 1); + const auto &way = dataSet.ways[0]; + QCOMPARE(way.id, 4315430ll); + QCOMPARE(way.nodes.size(), 10); + QCOMPARE(way.tags.size(), 1); + QCOMPARE(way.bbox.isValid(), true); } }; QTEST_GUILESS_MAIN(O5mParserTest) #include "o5mparsertest.moc" diff --git a/src/osm/o5mparser.cpp b/src/osm/o5mparser.cpp index 4fae288..966928f 100644 --- a/src/osm/o5mparser.cpp +++ b/src/osm/o5mparser.cpp @@ -1,286 +1,294 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "o5mparser.h" #include "datatypes.h" #include +#include #include using namespace OSM; enum : uint8_t { O5M_BLOCK_RESET = 0xff, O5M_BLOCK_NODE = 0x10, O5M_BLOCK_WAY = 0x11, O5M_BLOCK_RELATION = 0x12, O5M_BLOCK_BOUNDING_BOX = 0xdb, O5M_BLOCK_TIMESTAMP = 0xdc, O5M_BLOCK_HEADER = 0xe0, O5M_NUMBER_CONTINUATION = 0b1000'0000, O5M_NUMBER_MASK = 0b0111'1111, O5M_NUMBER_SIGNED_BIT = 0b1, O5M_MEMTYPE_NODE = 0x30, O5M_MEMTYPE_WAY = 0x31, O5M_MEMTYPE_RELATION = 0x32, }; enum : uint16_t { O5M_STRING_TABLE_SIZE = 15000, O5M_STRING_TABLE_MAXLEN = 250, }; O5mParser::O5mParser(DataSet *dataSet) : m_dataSet(dataSet) { m_stringLookupTable.resize(O5M_STRING_TABLE_SIZE); } void O5mParser::parse(const uint8_t* data, std::size_t len) { - qDebug() << "begin parsing"; const auto endIt = data + len; for (auto it = data; it < endIt - 1;) { const auto blockType = (*it); if (blockType == O5M_BLOCK_RESET) { resetDeltaCodingState(); ++it; continue; } auto blockSize = readUnsigned(++it, endIt); if (blockSize >= (uint64_t)(endIt - it)) { qWarning() << "premature end of file, or blocksize too large" << (endIt - it) << blockType << blockSize; break; } switch (blockType) { case O5M_BLOCK_HEADER: if (blockSize != 4 || std::strncmp(reinterpret_cast(it), "o5m2", 4) != 0) { qWarning() << "Invalid file header"; return; } break; case O5M_BLOCK_BOUNDING_BOX: case O5M_BLOCK_TIMESTAMP: // not of interest at the moment break; case O5M_BLOCK_NODE: readNode(it, it + blockSize); break; case O5M_BLOCK_WAY: readWay(it, it + blockSize); break; case O5M_BLOCK_RELATION: readRelation(it, it + blockSize); break; default: qDebug() << "unhandled o5m block type:" << (it - data) << blockType << blockSize; } it += blockSize; } - - qDebug() << "parsing done"; } uint64_t O5mParser::readUnsigned(const uint8_t *&it, const uint8_t *endIt) const { uint64_t result = 0; int i = 0; for (; it < endIt && ((*it) & O5M_NUMBER_CONTINUATION); ++it, ++i) { result |= ((*it) & O5M_NUMBER_MASK) << (i * 7); } result |= ((uint64_t)(*it++) & O5M_NUMBER_MASK) << (i * 7); return result; } int64_t O5mParser::readSigned(const uint8_t *&it, const uint8_t *endIt) const { const uint64_t u = readUnsigned(it, endIt); return (u & O5M_NUMBER_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1); } template T O5mParser::readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState) { deltaState += (T)readSigned(it, endIt); return deltaState; } const char* O5mParser::readString(const uint8_t *&it, const uint8_t *endIt) { auto ref = readUnsigned(it, endIt); if (ref) { return m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; } else { const auto s = reinterpret_cast(it); const auto len = std::strlen(s); if (len <= O5M_STRING_TABLE_MAXLEN) { m_stringLookupTable[m_stringLookupPosition] = s; m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; } it += len + 1; return s; } } std::pair O5mParser::readStringPair(const uint8_t *&it, const uint8_t *endIt) { auto ref = readUnsigned(it, endIt); if (ref) { const auto s = m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; const auto len1 = std::strlen(s); return std::make_pair(s, s + len1 + 1); } else { const auto s = reinterpret_cast(it); const auto len1 = std::strlen(s); const auto len2 = std::strlen(s + len1 + 1); if (len1 + len2 <= O5M_STRING_TABLE_MAXLEN) { m_stringLookupTable[m_stringLookupPosition] = s; m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; } it += len1 + len2 + 2; return std::make_pair(s, s + len1 + 1); } } void O5mParser::skipVersionInformation(const uint8_t *&it, const uint8_t *end) { if (it >= end) { return; } const auto version = readUnsigned(it, end); if (version > 0) { qWarning() << "skipping changeset data not implemented yet!"; // timestamp (seconds since 1970, signed, delta-coded) // author information – only if timestamp is not 0: // changeset (signed, delta-coded) // uid, user (string pair) it = end; } } +template +void O5mParser::readTagOrBbox(Elem &e, const uint8_t *&it, const uint8_t *endIt) +{ + const auto tagData = readStringPair(it, endIt); + if (std::strcmp(tagData.first, "bBox") == 0) { + char *next = nullptr; + const auto lon1 = std::strtod(tagData.second, &next); + ++next; + const auto lat1 = std::strtod(next, &next); + ++next; + const auto lon2 = std::strtod(next, &next); + ++next; + const auto lat2 = std::strtod(next, &next); + e.bbox = OSM::BoundingBox(OSM::Coordinate(lat1, lon1), OSM::Coordinate(lat2, lon2)); + return; + } + + OSM::Tag tag; + tag.key = QString::fromUtf8(tagData.first); + tag.value = QString::fromUtf8(tagData.second); + OSM::setTag(e, std::move(tag)); +} + void O5mParser::readNode(const uint8_t *begin, const uint8_t *end) { OSM::Node node; auto it = begin; node.id = readDelta(it, end, m_nodeIdDelta); skipVersionInformation(it, end); if (it >= end) { return; } node.coordinate.longitude = (int64_t)readDelta(it, end, m_lonDelta) + 1'800'000'000ll; node.coordinate.latitude = (int64_t)readDelta(it, end, m_latDelata) + 900'000'000ll; - if (it >= end) { return; } while (it < end) { OSM::Tag tag; const auto tagData = readStringPair(it, end); tag.key = QString::fromUtf8(tagData.first); tag.value = QString::fromUtf8(tagData.second); OSM::setTag(node, std::move(tag)); } m_dataSet->addNode(std::move(node)); } void O5mParser::readWay(const uint8_t *begin, const uint8_t *end) { OSM::Way way; auto it = begin; way.id = readDelta(it, end, m_wayIdDelta); skipVersionInformation(it, end); if (it >= end) { return; } const auto nodesBlockSize = readUnsigned(it, end); if (it + nodesBlockSize > end) { return; } const auto nodesBlockEnd = it + nodesBlockSize; while(it < nodesBlockEnd) { way.nodes.push_back(readDelta(it, end, m_wayNodeIdDelta)); } while (it < end) { - const auto tagData = readStringPair(it, end); - // TODO handle bbox tags - - OSM::Tag tag; - tag.key = QString::fromUtf8(tagData.first); - tag.value = QString::fromUtf8(tagData.second); - OSM::setTag(way, std::move(tag)); + readTagOrBbox(way, it, end); } m_dataSet->addWay(std::move(way)); } void O5mParser::readRelation(const uint8_t *begin, const uint8_t *end) { OSM::Relation rel; auto it = begin; rel.id = readDelta(it, end, m_relIdDelta); skipVersionInformation(it, end); if (it >= end) { return; } const auto relBlockSize = readUnsigned(it, end); if (it + relBlockSize > end) { return; } const auto relBlockEnd = it + relBlockSize; while (it < relBlockEnd) { OSM::Member mem; mem.id = readDelta(it, end, m_relMemberIdDelta); const auto typeAndRole = readString(it, end); switch (typeAndRole[0]) { case O5M_MEMTYPE_NODE: mem.type = OSM::Type::Node; break; case O5M_MEMTYPE_WAY: mem.type = OSM::Type::Way; break; case O5M_MEMTYPE_RELATION: mem.type = OSM::Type::Relation; break; } mem.role = QString::fromUtf8(typeAndRole + 1); rel.members.push_back(std::move(mem)); } while (it < end) { - const auto tagData = readStringPair(it, end); - // TODO handle bbox tags - - OSM::Tag tag; - tag.key = QString::fromUtf8(tagData.first); - tag.value = QString::fromUtf8(tagData.second); - OSM::setTag(rel, std::move(tag)); + readTagOrBbox(rel, it, end); } m_dataSet->addRelation(std::move(rel)); } void O5mParser::resetDeltaCodingState() { m_nodeIdDelta = 0; m_latDelata = 0; m_lonDelta = 0; m_wayIdDelta = 0; m_wayNodeIdDelta = 0; m_relIdDelta = 0; m_relMemberIdDelta = 0; } diff --git a/src/osm/o5mparser.h b/src/osm/o5mparser.h index b5d6fee..4771b45 100644 --- a/src/osm/o5mparser.h +++ b/src/osm/o5mparser.h @@ -1,82 +1,84 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef OSM_O5MPARSER_H #define OSM_O5MPARSER_H #include #include #include class O5mParserTest; namespace OSM { class DataSet; /** Zero-copy parser of O5M binary files. * @see https://wiki.openstreetmap.org/wiki/O5m */ class O5mParser { public: explicit O5mParser(DataSet *dataSet); /** Parse the given binary content. * Feed this with QFile::map() for example. */ void parse(const uint8_t *data, std::size_t len); private: friend class ::O5mParserTest; uint64_t readUnsigned(const uint8_t *&it, const uint8_t *endIt) const; int64_t readSigned(const uint8_t *&it, const uint8_t *endIt) const; template T readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState); const char* readString(const uint8_t *&it, const uint8_t *endIt); std::pair readStringPair(const uint8_t *&it, const uint8_t *endIt); void skipVersionInformation(const uint8_t *&it, const uint8_t *end); + template + void readTagOrBbox(Elem &e, const uint8_t *&it, const uint8_t *endIt); void readNode(const uint8_t *begin, const uint8_t *end); void readWay(const uint8_t *begin, const uint8_t *end); void readRelation(const uint8_t *begin, const uint8_t *end); DataSet *m_dataSet = nullptr; // delta coding state void resetDeltaCodingState(); int64_t m_nodeIdDelta = 0; int32_t m_latDelata = 0; // this can overflow, but that is intentional according to the spec! int32_t m_lonDelta = 0; int64_t m_wayIdDelta = 0; int64_t m_wayNodeIdDelta = 0; int64_t m_relIdDelta = 0; int64_t m_relMemberIdDelta = 0; std::vector m_stringLookupTable; uint16_t m_stringLookupPosition = 0; }; } #endif // OSM_O5MPARSER_H