diff --git a/autotests/o5mparsertest.cpp b/autotests/o5mparsertest.cpp index e105c66..a218bd7 100644 --- a/autotests/o5mparsertest.cpp +++ b/autotests/o5mparsertest.cpp @@ -1,156 +1,160 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ +#include #include #include // see https://wiki.openstreetmap.org/wiki/O5m for the examples used below class O5mParserTest : public QObject { Q_OBJECT private Q_SLOTS: void testParseUnsignedInt_data() { QTest::addColumn("data"); QTest::addColumn("num"); QTest::newRow("0") << QByteArray::fromHex("00") << 0u; QTest::newRow("1") << QByteArray::fromHex("01") << 1u; QTest::newRow("127") << QByteArray::fromHex("7f") << 127u; QTest::newRow("128") << QByteArray::fromHex("8001") << 128u; QTest::newRow("255") << QByteArray::fromHex("FF01") << 255u; QTest::newRow("256") << QByteArray::fromHex("8002") << 256u; QTest::newRow("323") << QByteArray::fromHex("c302") << 323u; QTest::newRow("16384") << QByteArray::fromHex("808001") << 16384u; } void testParseUnsignedInt() { QFETCH(QByteArray, data); QFETCH(uint32_t, num); OSM::O5mParser p(nullptr); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); QCOMPARE(p.readUnsigned(it, endIt), num); QVERIFY(it > beginIt); QVERIFY(it <= endIt); } void testParseSignedInt_data() { QTest::addColumn("data"); QTest::addColumn("num"); QTest::newRow("0") << QByteArray::fromHex("00") << 0; QTest::newRow("64") << QByteArray::fromHex("8001") << 64; QTest::newRow("-2") << QByteArray::fromHex("03") << -2; QTest::newRow("-65") << QByteArray::fromHex("8101") << -65; } void testParseSignedInt() { QFETCH(QByteArray, data); QFETCH(int32_t, num); OSM::O5mParser p(nullptr); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); QCOMPARE(p.readSigned(it, endIt), num); QVERIFY(it > beginIt); QVERIFY(it <= endIt); } void testParseString() { const auto data = QByteArray::fromHex("0031696e6e65720001"); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); OSM::O5mParser p(nullptr); auto s = p.readString(it, endIt); QCOMPARE(s, "1inner"); QCOMPARE(it, beginIt + 8); s = p.readString(it, endIt); QCOMPARE(s, "1inner"); QCOMPARE(it, beginIt + 9); } void testParseStringPair() { const auto data = QByteArray::fromHex("006f6e6577617900796573000061746d006e6f000200fc07004a6f686e00020301"); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); OSM::O5mParser p(nullptr); auto s = p.readStringPair(it, endIt); QCOMPARE(s.first, "oneway"); QCOMPARE(s.second, "yes"); QCOMPARE(it, beginIt + 12); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "atm"); QCOMPARE(s.second, "no"); QCOMPARE(it, beginIt + 20); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "oneway"); QCOMPARE(s.second, "yes"); QCOMPARE(it, beginIt + 21); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "\xfc\x07"); QCOMPARE(s.second, "John"); QCOMPARE(it, beginIt + 30); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "atm"); QCOMPARE(s.second, "no"); QCOMPARE(it, beginIt + 31); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "oneway"); QCOMPARE(s.second, "yes"); QCOMPARE(it, beginIt + 32); s = p.readStringPair(it, endIt); QCOMPARE(s.first, "\xfc\x07"); QCOMPARE(s.second, "John"); QCOMPARE(it, beginIt + 33); } void testParseWay() { const auto data = QByteArray::fromHex("CCE48E04002DCAAFA01A02BCA0AFF6018FFAD5F70180DFBB9E0FA5E5E5A60DE4E5E5A60DE385959D0F9E86959D0FF7E6E5A60D0062426F780031332E333634313031392C35322E353233323734312C31332E333635373039392C35322E353234323033310000726566003630323400"); const auto beginIt = reinterpret_cast(data.constBegin()); auto it = beginIt; const auto endIt = reinterpret_cast(data.constEnd()); - OSM::O5mParser p(nullptr); + OSM::DataSet dataSet; + OSM::O5mParser p(&dataSet); p.readWay(it, endIt); + + QCOMPARE(dataSet.ways.size(), 1); } }; QTEST_GUILESS_MAIN(O5mParserTest) #include "o5mparsertest.moc" diff --git a/src/osm/o5mparser.cpp b/src/osm/o5mparser.cpp index 8217340..4fae288 100644 --- a/src/osm/o5mparser.cpp +++ b/src/osm/o5mparser.cpp @@ -1,251 +1,286 @@ /* Copyright (C) 2020 Volker Krause This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "o5mparser.h" #include "datatypes.h" #include #include using namespace OSM; enum : uint8_t { - O5M_RESET = 0xff, - O5M_NODE = 0x10, - O5M_WAY = 0x11, - O5M_RELATION = 0x12, - O5M_BOUNDING_BOX = 0xdb, - O5M_TIMESTAMP = 0xdc, - O5M_HEADER = 0xe0, + O5M_BLOCK_RESET = 0xff, + O5M_BLOCK_NODE = 0x10, + O5M_BLOCK_WAY = 0x11, + O5M_BLOCK_RELATION = 0x12, + O5M_BLOCK_BOUNDING_BOX = 0xdb, + O5M_BLOCK_TIMESTAMP = 0xdc, + O5M_BLOCK_HEADER = 0xe0, + O5M_NUMBER_CONTINUATION = 0b1000'0000, O5M_NUMBER_MASK = 0b0111'1111, - O5M_SIGNED_BIT = 0b1, + O5M_NUMBER_SIGNED_BIT = 0b1, + + O5M_MEMTYPE_NODE = 0x30, + O5M_MEMTYPE_WAY = 0x31, + O5M_MEMTYPE_RELATION = 0x32, }; enum : uint16_t { O5M_STRING_TABLE_SIZE = 15000, O5M_STRING_TABLE_MAXLEN = 250, }; O5mParser::O5mParser(DataSet *dataSet) : m_dataSet(dataSet) { m_stringLookupTable.resize(O5M_STRING_TABLE_SIZE); } void O5mParser::parse(const uint8_t* data, std::size_t len) { qDebug() << "begin parsing"; const auto endIt = data + len; for (auto it = data; it < endIt - 1;) { const auto blockType = (*it); - if (blockType == O5M_RESET) { + if (blockType == O5M_BLOCK_RESET) { resetDeltaCodingState(); ++it; continue; } auto blockSize = readUnsigned(++it, endIt); if (blockSize >= (uint64_t)(endIt - it)) { qWarning() << "premature end of file, or blocksize too large" << (endIt - it) << blockType << blockSize; break; } switch (blockType) { - case O5M_HEADER: + case O5M_BLOCK_HEADER: if (blockSize != 4 || std::strncmp(reinterpret_cast(it), "o5m2", 4) != 0) { qWarning() << "Invalid file header"; return; } break; - case O5M_BOUNDING_BOX: - case O5M_TIMESTAMP: + case O5M_BLOCK_BOUNDING_BOX: + case O5M_BLOCK_TIMESTAMP: // not of interest at the moment break; - case O5M_NODE: + case O5M_BLOCK_NODE: readNode(it, it + blockSize); break; - case O5M_WAY: + case O5M_BLOCK_WAY: readWay(it, it + blockSize); break; - case O5M_RELATION: + case O5M_BLOCK_RELATION: readRelation(it, it + blockSize); break; default: qDebug() << "unhandled o5m block type:" << (it - data) << blockType << blockSize; } it += blockSize; } qDebug() << "parsing done"; } uint64_t O5mParser::readUnsigned(const uint8_t *&it, const uint8_t *endIt) const { uint64_t result = 0; int i = 0; for (; it < endIt && ((*it) & O5M_NUMBER_CONTINUATION); ++it, ++i) { result |= ((*it) & O5M_NUMBER_MASK) << (i * 7); } result |= ((uint64_t)(*it++) & O5M_NUMBER_MASK) << (i * 7); return result; } int64_t O5mParser::readSigned(const uint8_t *&it, const uint8_t *endIt) const { const uint64_t u = readUnsigned(it, endIt); - return (u & O5M_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1); + return (u & O5M_NUMBER_SIGNED_BIT) ? (-(u >> 1) -1) : (u >> 1); } template T O5mParser::readDelta(const uint8_t *&it, const uint8_t *endIt, T &deltaState) { deltaState += (T)readSigned(it, endIt); return deltaState; } const char* O5mParser::readString(const uint8_t *&it, const uint8_t *endIt) { auto ref = readUnsigned(it, endIt); if (ref) { return m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; } else { const auto s = reinterpret_cast(it); const auto len = std::strlen(s); if (len <= O5M_STRING_TABLE_MAXLEN) { m_stringLookupTable[m_stringLookupPosition] = s; m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; } it += len + 1; return s; } } std::pair O5mParser::readStringPair(const uint8_t *&it, const uint8_t *endIt) { auto ref = readUnsigned(it, endIt); if (ref) { const auto s = m_stringLookupTable[(m_stringLookupPosition + O5M_STRING_TABLE_SIZE - ref) % O5M_STRING_TABLE_SIZE]; const auto len1 = std::strlen(s); return std::make_pair(s, s + len1 + 1); } else { const auto s = reinterpret_cast(it); const auto len1 = std::strlen(s); const auto len2 = std::strlen(s + len1 + 1); if (len1 + len2 <= O5M_STRING_TABLE_MAXLEN) { m_stringLookupTable[m_stringLookupPosition] = s; m_stringLookupPosition = (m_stringLookupPosition + 1) % O5M_STRING_TABLE_SIZE; } it += len1 + len2 + 2; return std::make_pair(s, s + len1 + 1); } } void O5mParser::skipVersionInformation(const uint8_t *&it, const uint8_t *end) { if (it >= end) { return; } const auto version = readUnsigned(it, end); if (version > 0) { qWarning() << "skipping changeset data not implemented yet!"; // timestamp (seconds since 1970, signed, delta-coded) // author information – only if timestamp is not 0: // changeset (signed, delta-coded) // uid, user (string pair) it = end; } } void O5mParser::readNode(const uint8_t *begin, const uint8_t *end) { + OSM::Node node; + auto it = begin; - const OSM::Id id = readDelta(it, end, m_nodeIdDelta); + node.id = readDelta(it, end, m_nodeIdDelta); skipVersionInformation(it, end); if (it >= end) { return; } - const auto lat = readDelta(it, end, m_latDelata); - const auto lon = readDelta(it, end, m_lonDelta); - qDebug() << " node" << id << (lat / 1.0e7) << (lon / 1.0e7); + node.coordinate.longitude = (int64_t)readDelta(it, end, m_lonDelta) + 1'800'000'000ll; + node.coordinate.latitude = (int64_t)readDelta(it, end, m_latDelata) + 900'000'000ll; if (it >= end) { return; } while (it < end) { - const auto tag = readStringPair(it, end); - qDebug() << " tag" << tag.first << tag.second; + OSM::Tag tag; + const auto tagData = readStringPair(it, end); + tag.key = QString::fromUtf8(tagData.first); + tag.value = QString::fromUtf8(tagData.second); + OSM::setTag(node, std::move(tag)); } + + m_dataSet->addNode(std::move(node)); } void O5mParser::readWay(const uint8_t *begin, const uint8_t *end) { + OSM::Way way; + auto it = begin; - const OSM::Id id = readDelta(it, end, m_wayIdDelta); + way.id = readDelta(it, end, m_wayIdDelta); skipVersionInformation(it, end); if (it >= end) { return; } const auto nodesBlockSize = readUnsigned(it, end); - qDebug() << " way" << id << nodesBlockSize; if (it + nodesBlockSize > end) { return; } const auto nodesBlockEnd = it + nodesBlockSize; while(it < nodesBlockEnd) { - const OSM::Id nodeId = readDelta(it, end, m_wayNodeIdDelta); - qDebug() << " nd" << nodeId; + way.nodes.push_back(readDelta(it, end, m_wayNodeIdDelta)); } while (it < end) { - const auto tag = readStringPair(it, end); - qDebug() << " tag" << tag.first << tag.second; + const auto tagData = readStringPair(it, end); + // TODO handle bbox tags + + OSM::Tag tag; + tag.key = QString::fromUtf8(tagData.first); + tag.value = QString::fromUtf8(tagData.second); + OSM::setTag(way, std::move(tag)); } + + m_dataSet->addWay(std::move(way)); } void O5mParser::readRelation(const uint8_t *begin, const uint8_t *end) { + OSM::Relation rel; + auto it = begin; - const OSM::Id id = readDelta(it, end, m_relIdDelta); + rel.id = readDelta(it, end, m_relIdDelta); skipVersionInformation(it, end); if (it >= end) { return; } const auto relBlockSize = readUnsigned(it, end); - qDebug() << " relation" << id << relBlockSize; if (it + relBlockSize > end) { return; } const auto relBlockEnd = it + relBlockSize; while (it < relBlockEnd) { - const OSM::Id memId = readDelta(it, end, m_relMemberIdDelta); + OSM::Member mem; + mem.id = readDelta(it, end, m_relMemberIdDelta); + const auto typeAndRole = readString(it, end); - qDebug() << " mem" << memId << typeAndRole[0] << (typeAndRole + 1); + switch (typeAndRole[0]) { + case O5M_MEMTYPE_NODE: mem.type = OSM::Type::Node; break; + case O5M_MEMTYPE_WAY: mem.type = OSM::Type::Way; break; + case O5M_MEMTYPE_RELATION: mem.type = OSM::Type::Relation; break; + } + mem.role = QString::fromUtf8(typeAndRole + 1); + + rel.members.push_back(std::move(mem)); } while (it < end) { - const auto tag = readStringPair(it, end); - qDebug() << " tag" << tag.first << tag.second; + const auto tagData = readStringPair(it, end); + // TODO handle bbox tags + + OSM::Tag tag; + tag.key = QString::fromUtf8(tagData.first); + tag.value = QString::fromUtf8(tagData.second); + OSM::setTag(rel, std::move(tag)); } + + m_dataSet->addRelation(std::move(rel)); } void O5mParser::resetDeltaCodingState() { m_nodeIdDelta = 0; m_latDelata = 0; m_lonDelta = 0; m_wayIdDelta = 0; m_wayNodeIdDelta = 0; m_relIdDelta = 0; m_relMemberIdDelta = 0; }