diff --git a/src/SourceData.cpp b/src/SourceData.cpp index bac91cd..3c0ac7e 100644 --- a/src/SourceData.cpp +++ b/src/SourceData.cpp @@ -1,765 +1,894 @@ /* Features of class SourceData: - Read a file (from the given URL) or accept data via a string. - Allocate and free buffers as necessary. - Run a preprocessor, when specified. - Run the line-matching preprocessor, when specified. - Run other preprocessing steps: Uppercase, ignore comments, remove carriage return, ignore numbers. Order of operation: 1. If data was given via a string then save it to a temp file. (see setData()) 2. If the specified file is nonlocal (URL) copy it to a temp file. 3. If a preprocessor was specified, run the input file through it. 4. Read the output of the preprocessor. 5. If Uppercase was specified: Turn the read data to uppercase. 6. Write the result to a temp file. 7. If a line-matching preprocessor was specified, run the temp file through it. 8. Read the output of the line-matching preprocessor. 9. If ignore numbers was specified, strip the LMPP-output of all numbers. 10. If ignore comments was specified, strip the LMPP-output of comments. Optimizations: Skip unneeded steps. */ #include "SourceData.h" #include "Utils.h" #include "diff.h" #include #include #include #include SourceData::SourceData() { m_pOptions = nullptr; reset(); } SourceData::~SourceData() { reset(); } void SourceData::reset() { m_pEncoding = nullptr; m_fileAccess = FileAccess(); m_normalData.reset(); m_lmppData.reset(); if(!m_tempInputFileName.isEmpty()) { FileAccess::removeFile(m_tempInputFileName); m_tempInputFileName = ""; } } void SourceData::setFilename(const QString& filename) { if(filename.isEmpty()) { reset(); } else { FileAccess fa(filename); setFileAccess(fa); } } bool SourceData::isEmpty() { return getFilename().isEmpty(); } bool SourceData::hasData() { return m_normalData.m_pBuf != nullptr; } bool SourceData::isValid() { return isEmpty() || hasData(); } void SourceData::setOptions(Options* pOptions) { m_pOptions = pOptions; } QString SourceData::getFilename() { return m_fileAccess.absoluteFilePath(); } QString SourceData::getAliasName() { return m_aliasName.isEmpty() ? m_fileAccess.prettyAbsPath() : m_aliasName; } void SourceData::setAliasName(const QString& name) { m_aliasName = name; } void SourceData::setFileAccess(const FileAccess& fileAccess) { m_fileAccess = fileAccess; m_aliasName = QString(); if(!m_tempInputFileName.isEmpty()) { FileAccess::removeFile(m_tempInputFileName); m_tempInputFileName = ""; } } void SourceData::setEncoding(QTextCodec* pEncoding) { m_pEncoding = pEncoding; } QStringList SourceData::setData(const QString& data) { QStringList errors; // Create a temp file for preprocessing: if(m_tempInputFileName.isEmpty()) { FileAccess::createTempFile(m_tempFile); m_tempInputFileName = m_tempFile.fileName(); } FileAccess f(m_tempInputFileName); QByteArray ba = QTextCodec::codecForName("UTF-8")->fromUnicode(data); bool bSuccess = f.writeFile(ba.constData(), ba.length()); if(!bSuccess) { errors.append(i18n("Writing clipboard data to temp file failed.")); } else { m_aliasName = i18n("From Clipboard"); m_fileAccess = FileAccess(""); // Effect: m_fileAccess.isValid() is false } return errors; } const LineData* SourceData::getLineDataForDiff() const { if(m_lmppData.m_pBuf == nullptr) return m_normalData.m_v.size() > 0 ? &m_normalData.m_v[0] : nullptr; else return m_lmppData.m_v.size() > 0 ? &m_lmppData.m_v[0] : nullptr; } const LineData* SourceData::getLineDataForDisplay() const { return m_normalData.m_v.size() > 0 ? &m_normalData.m_v[0] : nullptr; } LineRef SourceData::getSizeLines() const { return (LineRef)m_normalData.m_vSize; } qint64 SourceData::getSizeBytes() const { return m_normalData.m_size; } const char* SourceData::getBuf() const { return m_normalData.m_pBuf; } const QString& SourceData::getText() const { return m_normalData.m_unicodeBuf; } bool SourceData::isText() { return m_normalData.isText(); } bool SourceData::isIncompleteConversion() { return m_normalData.m_bIncompleteConversion; } bool SourceData::isFromBuffer() { return !m_fileAccess.isValid(); } bool SourceData::isBinaryEqualWith(const SourceData& other) const { return m_fileAccess.exists() && other.m_fileAccess.exists() && getSizeBytes() == other.getSizeBytes() && (getSizeBytes() == 0 || memcmp(getBuf(), other.getBuf(), getSizeBytes()) == 0); } void SourceData::FileData::reset() { delete[](char*) m_pBuf; m_pBuf = nullptr; m_v.clear(); m_size = 0; m_vSize = 0; m_bIsText = true; m_bIncompleteConversion = false; m_eLineEndStyle = eLineEndStyleUndefined; } bool SourceData::FileData::readFile(const QString& filename) { reset(); if(filename.isEmpty()) { return true; } FileAccess fa(filename); if(!fa.isNormal()) return true; m_size = fa.sizeForReading(); char* pBuf; m_pBuf = pBuf = new char[m_size + 100]; // Alloc 100 byte extra: Safety hack, not nice but does no harm. // Some extra bytes at the end of the buffer are needed by // the diff algorithm. See also GnuDiff::diff_2_files(). bool bSuccess = fa.readFile(pBuf, m_size); if(!bSuccess) { delete[] pBuf; m_pBuf = nullptr; m_size = 0; } return bSuccess; } bool SourceData::saveNormalDataAs(const QString& fileName) { return m_normalData.writeFile(fileName); } bool SourceData::FileData::writeFile(const QString& filename) { if(filename.isEmpty()) { return true; } FileAccess fa(filename); bool bSuccess = fa.writeFile(m_pBuf, m_size); return bSuccess; } void SourceData::FileData::copyBufFrom(const FileData& src) { reset(); char* pBuf; m_size = src.m_size; m_pBuf = pBuf = new char[m_size + 100]; Q_ASSERT(src.m_pBuf != nullptr); memcpy(pBuf, src.m_pBuf, m_size); } QTextCodec* SourceData::detectEncoding(const QString& fileName, QTextCodec* pFallbackCodec) { QFile f(fileName); if(f.open(QIODevice::ReadOnly)) { char buf[200]; qint64 size = f.read(buf, sizeof(buf)); qint64 skipBytes = 0; - QTextCodec* pCodec = ::detectEncoding(buf, size, skipBytes); + QTextCodec* pCodec = detectEncoding(buf, size, skipBytes); if(pCodec) return pCodec; } return pFallbackCodec; } QStringList SourceData::readAndPreprocess(QTextCodec* pEncoding, bool bAutoDetectUnicode) { m_pEncoding = pEncoding; QTemporaryFile fileIn1, fileOut1; QString fileNameIn1; QString fileNameOut1; QString fileNameIn2; QString fileNameOut2; QStringList errors; if(m_fileAccess.isValid() && !m_fileAccess.isNormal()) { errors.append(i18n("%1 is not a normal file.", m_fileAccess.prettyAbsPath())); return errors; } bool bTempFileFromClipboard = !m_fileAccess.isValid(); // Detect the input for the preprocessing operations if(!bTempFileFromClipboard) { if(m_fileAccess.isLocal()) { fileNameIn1 = m_fileAccess.absoluteFilePath(); } else // File is not local: create a temporary local copy: { if(m_tempInputFileName.isEmpty()) { m_fileAccess.createLocalCopy(); m_tempInputFileName = m_fileAccess.getTempName(); } fileNameIn1 = m_tempInputFileName; } if(bAutoDetectUnicode) { m_pEncoding = detectEncoding(fileNameIn1, pEncoding); } } else // The input was set via setData(), probably from clipboard. { fileNameIn1 = m_tempInputFileName; m_pEncoding = QTextCodec::codecForName("UTF-8"); } QTextCodec* pEncoding1 = m_pEncoding; QTextCodec* pEncoding2 = m_pEncoding; m_normalData.reset(); m_lmppData.reset(); FileAccess faIn(fileNameIn1); qint64 fileInSize = faIn.size(); if(faIn.exists()) { // Run the first preprocessor if(m_pOptions->m_PreProcessorCmd.isEmpty()) { // No preprocessing: Read the file directly: if(!m_normalData.readFile(fileNameIn1)) { errors.append(i18n("Failed to read file: %1", fileNameIn1)); return errors; } } else { QTemporaryFile tmpInPPFile; QString fileNameInPP = fileNameIn1; if(pEncoding1 != m_pOptions->m_pEncodingPP) { // Before running the preprocessor convert to the format that the preprocessor expects. FileAccess::createTempFile(tmpInPPFile); fileNameInPP = tmpInPPFile.fileName(); pEncoding1 = m_pOptions->m_pEncodingPP; convertFileEncoding(fileNameIn1, pEncoding, fileNameInPP, pEncoding1); } QString ppCmd = m_pOptions->m_PreProcessorCmd; FileAccess::createTempFile(fileOut1); fileNameOut1 = fileOut1.fileName(); QProcess ppProcess; ppProcess.setStandardInputFile(fileNameInPP); ppProcess.setStandardOutputFile(fileNameOut1); QString program; QStringList args; QString errorReason = Utils::getArguments(ppCmd, program, args); if(errorReason.isEmpty()) { ppProcess.start(program, args); ppProcess.waitForFinished(-1); } else errorReason = "\n(" + errorReason + ')'; bool bSuccess = errorReason.isEmpty() && m_normalData.readFile(fileNameOut1); if(fileInSize > 0 && (!bSuccess || m_normalData.m_size == 0)) { errors.append( i18n("Preprocessing possibly failed. Check this command:\n\n %1" "\n\nThe preprocessing command will be disabled now.", ppCmd) + errorReason); m_pOptions->m_PreProcessorCmd = ""; if(!m_normalData.readFile(fileNameIn1)) { errors.append(i18n("Failed to read file: %1", fileNameIn1)); return errors; } pEncoding1 = m_pEncoding; } } if(!m_normalData.preprocess(m_pOptions->m_bPreserveCarriageReturn, pEncoding1)) { errors.append(i18n("File %1 too large to process. Skipping.", fileNameIn1)); return errors; } //exit early for non text data further processing assumes a text file as input if(!m_normalData.isText()) return errors; // LineMatching Preprocessor if(!m_pOptions->m_LineMatchingPreProcessorCmd.isEmpty()) { QTemporaryFile tempOut2, fileInPP; fileNameIn2 = fileNameOut1.isEmpty() ? fileNameIn1 : fileNameOut1; QString fileNameInPP = fileNameIn2; pEncoding2 = pEncoding1; if(pEncoding2 != m_pOptions->m_pEncodingPP) { // Before running the preprocessor convert to the format that the preprocessor expects. FileAccess::createTempFile(fileInPP); fileNameInPP = fileInPP.fileName(); pEncoding2 = m_pOptions->m_pEncodingPP; convertFileEncoding(fileNameIn2, pEncoding1, fileNameInPP, pEncoding2); } QString ppCmd = m_pOptions->m_LineMatchingPreProcessorCmd; FileAccess::createTempFile(tempOut2); fileNameOut2 = tempOut2.fileName(); QProcess ppProcess; ppProcess.setStandardInputFile(fileNameInPP); ppProcess.setStandardOutputFile(fileNameOut2); QString program; QStringList args; QString errorReason = Utils::getArguments(ppCmd, program, args); if(errorReason.isEmpty()) { ppProcess.start(program, args); ppProcess.waitForFinished(-1); } else errorReason = "\n(" + errorReason + ')'; bool bSuccess = errorReason.isEmpty() && m_lmppData.readFile(fileNameOut2); if(FileAccess(fileNameIn2).size() > 0 && (!bSuccess || m_lmppData.m_size == 0)) { errors.append( i18n("The line-matching-preprocessing possibly failed. Check this command:\n\n %1" "\n\nThe line-matching-preprocessing command will be disabled now.", ppCmd) + errorReason); m_pOptions->m_LineMatchingPreProcessorCmd = ""; if(!m_lmppData.readFile(fileNameIn2)) { errors.append(i18n("Failed to read file: %1", fileNameIn2)); return errors; } } } else if(m_pOptions->m_bIgnoreComments || m_pOptions->m_bIgnoreCase) { // We need a copy of the normal data. m_lmppData.copyBufFrom(m_normalData); } } if(!m_lmppData.preprocess(false, pEncoding2)) { errors.append(i18n("File %1 too large to process. Skipping.", fileNameIn1)); return errors; } Q_ASSERT(m_lmppData.isText()); //TODO: Needed? if(m_lmppData.m_vSize < m_normalData.m_vSize) { // Preprocessing command may result in smaller data buffer so adjust size m_lmppData.m_v.resize((int)m_normalData.m_vSize); for(qint64 i = m_lmppData.m_vSize; i < m_normalData.m_vSize; ++i) { // Set all empty lines to point to the end of the buffer. m_lmppData.m_v[(int)i].setLine(m_lmppData.m_unicodeBuf.unicode() + m_lmppData.m_unicodeBuf.length()); } m_lmppData.m_vSize = m_normalData.m_vSize; } // Ignore comments if(m_pOptions->m_bIgnoreComments) { m_lmppData.removeComments(); LineRef vSize = (LineRef)std::min(m_normalData.m_vSize, m_lmppData.m_vSize); for(int i = 0; i < (int)vSize; ++i) { m_normalData.m_v[i].bContainsPureComment = m_lmppData.m_v[i].bContainsPureComment; } } return errors; } /** Prepare the linedata vector for every input line.*/ bool SourceData::FileData::preprocess(bool bPreserveCR, QTextCodec* pEncoding) { qint64 i; // detect line end style QVector vOrigDataLineEndStyle; m_eLineEndStyle = eLineEndStyleUndefined; for(i = 0; i < m_size; ++i) { if(m_pBuf[i] == '\r') { if(i + 1 < m_size && m_pBuf[i + 1] == '\n') // not 16-bit unicode { vOrigDataLineEndStyle.push_back(eLineEndStyleDos); ++i; } else if(i > 0 && i + 2 < m_size && m_pBuf[i - 1] == '\0' && m_pBuf[i + 1] == '\0' && m_pBuf[i + 2] == '\n') // 16-bit unicode { vOrigDataLineEndStyle.push_back(eLineEndStyleDos); i += 2; } else // old mac line end style ? { vOrigDataLineEndStyle.push_back(eLineEndStyleUndefined); const_cast(m_pBuf)[i] = '\n'; // fix it in original data } } else if(m_pBuf[i] == '\n') { vOrigDataLineEndStyle.push_back(eLineEndStyleUnix); } } if(!vOrigDataLineEndStyle.isEmpty()) m_eLineEndStyle = vOrigDataLineEndStyle[0]; qint64 skipBytes = 0; - QTextCodec* pCodec = ::detectEncoding(m_pBuf, m_size, skipBytes); + QTextCodec* pCodec = detectEncoding(m_pBuf, m_size, skipBytes); if(pCodec != pEncoding) skipBytes = 0; if(m_size - skipBytes > INT_MAX) return false; QByteArray ba = QByteArray::fromRawData(m_pBuf + skipBytes, (int)(m_size - skipBytes)); QTextStream ts(ba, QIODevice::ReadOnly | QIODevice::Text); ts.setCodec(pEncoding); ts.setAutoDetectUnicode(false); m_unicodeBuf = ts.readAll(); ba.clear(); int ucSize = m_unicodeBuf.length(); const QChar* p = m_unicodeBuf.unicode(); m_bIsText = true; int lines = 1; m_bIncompleteConversion = false; for(i = 0; i < ucSize; ++i) { if(i >= ucSize || p[i] == '\n') { ++lines; } if(p[i].isNull()) { m_bIsText = false; } if(p[i] == QChar::ReplacementCharacter) { m_bIncompleteConversion = true; } } m_v.resize(lines + 5); int lineIdx = 0; int lineLength = 0; bool bNonWhiteFound = false; int whiteLength = 0; for(i = 0; i <= ucSize; ++i) { if(i >= ucSize || p[i] == '\n') { const QChar* pLine = &p[i - lineLength]; m_v[lineIdx].setLine(&p[i - lineLength]); while(/*!bPreserveCR &&*/ lineLength > 0 && m_v[lineIdx].getLine()[lineLength - 1] == '\r') { --lineLength; } m_v[lineIdx].setFirstNonWhiteChar(m_v[lineIdx].getLine() + std::min(whiteLength, lineLength)); if(lineIdx < vOrigDataLineEndStyle.count() && bPreserveCR && i < ucSize) { ++lineLength; const_cast(pLine)[lineLength] = '\r'; //switch ( vOrigDataLineEndStyle[lineIdx] ) //{ //case eLineEndStyleUnix: const_cast(pLine)[lineLength] = '\n'; break; //case eLineEndStyleDos: const_cast(pLine)[lineLength] = '\r'; break; //case eLineEndStyleUndefined: const_cast(pLine)[lineLength] = '\x0b'; break; //} } m_v[lineIdx].setSize(lineLength); lineLength = 0; bNonWhiteFound = false; whiteLength = 0; ++lineIdx; } else { ++lineLength; if(!bNonWhiteFound && isWhite(p[i])) ++whiteLength; else bNonWhiteFound = true; } } Q_ASSERT(lineIdx == lines); m_vSize = lines; return true; } // Must not be entered, when within a comment. // Returns either at a newline-character p[i]=='\n' or when i==size. // A line that contains only comments is still "white". // Comments in white lines must remain, while comments in // non-white lines are overwritten with spaces. void SourceData::FileData::checkLineForComments( const QChar* p, // pointer to start of buffer int& i, // index of current position (in, out) int size, // size of buffer bool& bWhite, // false if this line contains nonwhite characters (in, out) bool& bCommentInLine, // true if any comment is within this line (in, out) bool& bStartsOpenComment // true if the line ends within an comment (out) ) { bStartsOpenComment = false; for(; i < size; ++i) { // A single apostroph ' has prio over a double apostroph " (e.g. '"') // (if not in a string) if(p[i] == '\'') { bWhite = false; ++i; for(; !isLineOrBufEnd(p, i, size) && p[i] != '\''; ++i) ; if(p[i] == '\'') ++i; } // Strings have priority over comments: e.g. "/* Not a comment, but a string. */" else if(p[i] == '"') { bWhite = false; ++i; for(; !isLineOrBufEnd(p, i, size) && !(p[i] == '"' && p[i - 1] != '\\'); ++i) ; if(p[i] == '"') ++i; } // C++-comment else if(p[i] == '/' && i + 1 < size && p[i + 1] == '/') { int commentStart = i; bCommentInLine = true; i += 2; for(; !isLineOrBufEnd(p, i, size); ++i) ; if(!bWhite) { size = i - commentStart; m_unicodeBuf.replace(commentStart, size, QString(" ").repeated(size)); } return; } // C-comment else if(p[i] == '/' && i + 1 < size && p[i + 1] == '*') { int commentStart = i; bCommentInLine = true; i += 2; for(; !isLineOrBufEnd(p, i, size); ++i) { if(i + 1 < size && p[i] == '*' && p[i + 1] == '/') // end of the comment { i += 2; // More comments in the line? checkLineForComments(p, i, size, bWhite, bCommentInLine, bStartsOpenComment); if(!bWhite) { size = i - commentStart; m_unicodeBuf.replace(commentStart, size, QString(" ").repeated(size)); } return; } } bStartsOpenComment = true; return; } if(isLineOrBufEnd(p, i, size)) { return; } else if(!p[i].isSpace()) { bWhite = false; } } } // Modifies the input data, and replaces C/C++ comments with whitespace // when the line contains other data too. If the line contains only // a comment or white data, remember this in the flag bContainsPureComment. void SourceData::FileData::removeComments() { int line = 0; const QChar* p = m_unicodeBuf.unicode(); bool bWithinComment = false; int size = m_unicodeBuf.length(); for(int i = 0; i < size; ++i) { // std::cout << "2 " << std::string(&p[i], m_v[line].size) << std::endl; bool bWhite = true; bool bCommentInLine = false; if(bWithinComment) { int commentStart = i; bCommentInLine = true; for(; !isLineOrBufEnd(p, i, size); ++i) { if(i + 1 < size && p[i] == '*' && p[i + 1] == '/') // end of the comment { i += 2; // More comments in the line? checkLineForComments(p, i, size, bWhite, bCommentInLine, bWithinComment); if(!bWhite) { size = i - commentStart; m_unicodeBuf.replace(commentStart, size, QString(" ").repeated(size)); } break; } } } else { checkLineForComments(p, i, size, bWhite, bCommentInLine, bWithinComment); } // end of line Q_ASSERT(isLineOrBufEnd(p, i, size)); m_v[line].bContainsPureComment = bCommentInLine && bWhite; /* std::cout << line << " : " << ( bCommentInLine ? "c" : " " ) << ( bWhite ? "w " : " ") << std::string(pLD[line].pLine, pLD[line].size) << std::endl;*/ ++line; } } + +bool SourceData::isLineOrBufEnd(const QChar* p, int i, int size) +{ + return i >= size // End of file + || isEndOfLine(p[i]) // Normal end of line + + // No support for Mac-end of line yet, because incompatible with GNU-diff-routines. + // || ( p[i]=='\r' && (i>=size-1 || p[i+1]!='\n') + // && (i==0 || p[i-1]!='\n') ) // Special case: '\r' without '\n' + ; +} + +// Convert the input file from input encoding to output encoding and write it to the output file. +bool SourceData::convertFileEncoding(const QString& fileNameIn, QTextCodec* pCodecIn, + const QString& fileNameOut, QTextCodec* pCodecOut) +{ + QFile in(fileNameIn); + if(!in.open(QIODevice::ReadOnly)) + return false; + QTextStream inStream(&in); + inStream.setCodec(pCodecIn); + inStream.setAutoDetectUnicode(false); + + QFile out(fileNameOut); + if(!out.open(QIODevice::WriteOnly)) + return false; + QTextStream outStream(&out); + outStream.setCodec(pCodecOut); + + QString data = inStream.readAll(); + outStream << data; + + return true; +} + +QTextCodec* SourceData::getEncodingFromTag(const QByteArray& s, const QByteArray& encodingTag) +{ + int encodingPos = s.indexOf(encodingTag); + if(encodingPos >= 0) + { + int apostrophPos = s.indexOf('"', encodingPos + encodingTag.length()); + int apostroph2Pos = s.indexOf('\'', encodingPos + encodingTag.length()); + char apostroph = '"'; + if(apostroph2Pos >= 0 && (apostrophPos < 0 || apostroph2Pos < apostrophPos)) + { + apostroph = '\''; + apostrophPos = apostroph2Pos; + } + + int encodingEnd = s.indexOf(apostroph, apostrophPos + 1); + if(encodingEnd >= 0) // e.g.: or + { + QByteArray encoding = s.mid(apostrophPos + 1, encodingEnd - (apostrophPos + 1)); + return QTextCodec::codecForName(encoding); + } + else // e.g.: + { + QByteArray encoding = s.mid(encodingPos + encodingTag.length(), apostrophPos - (encodingPos + encodingTag.length())); + return QTextCodec::codecForName(encoding); + } + } + return nullptr; +} + +QTextCodec* SourceData::detectEncoding(const char* buf, qint64 size, qint64& skipBytes) +{ + if(size >= 2) + { + if(buf[0] == '\xFF' && buf[1] == '\xFE') + { + skipBytes = 2; + return QTextCodec::codecForName("UTF-16LE"); + } + + if(buf[0] == '\xFE' && buf[1] == '\xFF') + { + skipBytes = 2; + return QTextCodec::codecForName("UTF-16BE"); + } + } + if(size >= 3) + { + if(buf[0] == '\xEF' && buf[1] == '\xBB' && buf[2] == '\xBF') + { + skipBytes = 3; + return QTextCodec::codecForName("UTF-8-BOM"); + } + } + skipBytes = 0; + QByteArray s; + /* + We don't need the whole file here just the header. +] */ + if(size <= 5000) + s=QByteArray(buf, (int)size); + else + s=QByteArray(buf, 5000); + + int xmlHeaderPos = s.indexOf("= 0) + { + int xmlHeaderEnd = s.indexOf("?>", xmlHeaderPos); + if(xmlHeaderEnd >= 0) + { + QTextCodec* pCodec = getEncodingFromTag(s.mid(xmlHeaderPos, xmlHeaderEnd - xmlHeaderPos), "encoding="); + if(pCodec) + return pCodec; + } + } + else // HTML + { + int metaHeaderPos = s.indexOf("= 0) + { + int metaHeaderEnd = s.indexOf(">", metaHeaderPos); + if(metaHeaderEnd >= 0) + { + QTextCodec* pCodec = getEncodingFromTag(s.mid(metaHeaderPos, metaHeaderEnd - metaHeaderPos), "charset="); + if(pCodec) + return pCodec; + + metaHeaderPos = s.indexOf(" * * Copyright (C) 2018 Michael Reeves reeves.87@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef SOURCEDATA_H #define SOURCEDATA_H #include #include #include "options.h" #include "fileaccess.h" #include "gnudiff_diff.h" class LineData; class SourceData { public: SourceData(); ~SourceData(); void setOptions(Options* pOptions); LineRef getSizeLines() const; qint64 getSizeBytes() const; const char* getBuf() const; const QString& getText() const; const LineData* getLineDataForDisplay() const; const LineData* getLineDataForDiff() const; void setFilename(const QString& filename); void setFileAccess(const FileAccess& fileAccess); void setEncoding(QTextCodec* pEncoding); //FileAccess& getFileAccess(); QString getFilename(); void setAliasName(const QString& name); QString getAliasName(); bool isEmpty(); // File was set bool hasData(); // Data was readable bool isText(); // is it pure text (vs. binary data) bool isIncompleteConversion(); // true if some replacement characters were found bool isFromBuffer(); // was it set via setData() (vs. setFileAccess() or setFilename()) QStringList setData(const QString& data); bool isValid(); // Either no file is specified or reading was successful // Returns a list of error messages if anything went wrong QStringList readAndPreprocess(QTextCodec* pEncoding, bool bAutoDetectUnicode); bool saveNormalDataAs(const QString& fileName); bool isBinaryEqualWith(const SourceData& other) const; void reset(); QTextCodec* getEncoding() const { return m_pEncoding; } e_LineEndStyle getLineEndStyle() const { return m_normalData.m_eLineEndStyle; } private: + bool convertFileEncoding(const QString& fileNameIn, QTextCodec* pCodecIn, + const QString& fileNameOut, QTextCodec* pCodecOut); + static bool isLineOrBufEnd(const QChar* p, int i, int size); + + static QTextCodec* detectEncoding(const char* buf, qint64 size, qint64& skipBytes); + static QTextCodec* getEncodingFromTag(const QByteArray& s, const QByteArray& encodingTag); + QTextCodec* detectEncoding(const QString& fileName, QTextCodec* pFallbackCodec); QString m_aliasName; FileAccess m_fileAccess; Options* m_pOptions; QString m_tempInputFileName; QTemporaryFile m_tempFile; //Created from clipboard content. class FileData { private: friend SourceData; const char* m_pBuf = nullptr; qint64 m_size = 0; qint64 m_vSize = 0; // Nr of lines in m_pBuf1 and size of m_v1, m_dv12 and m_dv13 QString m_unicodeBuf; QVector m_v; bool m_bIsText = false; bool m_bIncompleteConversion = false; e_LineEndStyle m_eLineEndStyle = eLineEndStyleUndefined; public: ~FileData() { reset(); } bool readFile(const QString& filename); bool writeFile(const QString& filename); bool preprocess(bool bPreserveCR, QTextCodec* pEncoding); void reset(); void removeComments(); void copyBufFrom(const FileData& src); void checkLineForComments( const QChar* p, // pointer to start of buffer int& i, // index of current position (in, out) int size, // size of buffer bool& bWhite, // false if this line contains nonwhite characters (in, out) bool& bCommentInLine, // true if any comment is within this line (in, out) bool& bStartsOpenComment // true if the line ends within an comment (out) ); bool isEmpty() { return m_size == 0; } bool isText() { return m_bIsText; } }; FileData m_normalData; FileData m_lmppData; QTextCodec* m_pEncoding; }; #endif // !SOURCEDATA_H diff --git a/src/diff.cpp b/src/diff.cpp index f14facf..fb6e9b6 100644 --- a/src/diff.cpp +++ b/src/diff.cpp @@ -1,1606 +1,1477 @@ /*************************************************************************** diff.cpp - description ------------------- begin : Mon Mar 18 2002 copyright : (C) 2002-2007 by Joachim Eibl email : joachim.eibl at gmx.de ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include "diff.h" #include "Utils.h" #include "fileaccess.h" #include "gnudiff_diff.h" #include "options.h" #include "progress.h" #include #include #include #include #include #include #include #include #include #include #include #include int LineData::width(int tabSize) const { int w = 0; int j = 0; for(int i = 0; i < size(); ++i) { if(pLine[i] == '\t') { for(j %= tabSize; j < tabSize; ++j) ++w; j = 0; } else { ++w; ++j; } } return w; } // The bStrict flag is true during the test where a nonmatching area ends. // Then the equal()-function requires that the match has more than 2 nonwhite characters. // This is to avoid matches on trivial lines (e.g. with white space only). // This choice is good for C/C++. bool equal(const LineData& l1, const LineData& l2, bool bStrict) { if(l1.getLine() == nullptr || l2.getLine() == nullptr) return false; if(bStrict && g_bIgnoreTrivialMatches) return false; // Ignore white space diff const QChar* p1 = l1.getLine(); const QChar* p1End = p1 + l1.size(); const QChar* p2 = l2.getLine(); const QChar* p2End = p2 + l2.size(); if(g_bIgnoreWhiteSpace) { int nonWhite = 0; for(;;) { while(isWhite(*p1) && p1 != p1End) ++p1; while(isWhite(*p2) && p2 != p2End) ++p2; if(p1 == p1End && p2 == p2End) { if(bStrict && g_bIgnoreTrivialMatches) { // Then equality is not enough return nonWhite > 2; } else // equality is enough return true; } else if(p1 == p1End || p2 == p2End) return false; if(*p1 != *p2) return false; ++p1; ++p2; ++nonWhite; } } else { return (l1.size() == l2.size() && memcmp(p1, p2, l1.size()) == 0); } } -bool isLineOrBufEnd(const QChar* p, int i, int size) -{ - return i >= size // End of file - || isEndOfLine(p[i]) // Normal end of line - - // No support for Mac-end of line yet, because incompatible with GNU-diff-routines. - // || ( p[i]=='\r' && (i>=size-1 || p[i+1]!='\n') - // && (i==0 || p[i-1]!='\n') ) // Special case: '\r' without '\n' - ; -} - -// Convert the input file from input encoding to output encoding and write it to the output file. -bool convertFileEncoding(const QString& fileNameIn, QTextCodec* pCodecIn, - const QString& fileNameOut, QTextCodec* pCodecOut) -{ - QFile in(fileNameIn); - if(!in.open(QIODevice::ReadOnly)) - return false; - QTextStream inStream(&in); - inStream.setCodec(pCodecIn); - inStream.setAutoDetectUnicode(false); - - QFile out(fileNameOut); - if(!out.open(QIODevice::WriteOnly)) - return false; - QTextStream outStream(&out); - outStream.setCodec(pCodecOut); - - QString data = inStream.readAll(); - outStream << data; - - return true; -} - -static QTextCodec* getEncodingFromTag(const QByteArray& s, const QByteArray& encodingTag) -{ - int encodingPos = s.indexOf(encodingTag); - if(encodingPos >= 0) - { - int apostrophPos = s.indexOf('"', encodingPos + encodingTag.length()); - int apostroph2Pos = s.indexOf('\'', encodingPos + encodingTag.length()); - char apostroph = '"'; - if(apostroph2Pos >= 0 && (apostrophPos < 0 || apostroph2Pos < apostrophPos)) - { - apostroph = '\''; - apostrophPos = apostroph2Pos; - } - - int encodingEnd = s.indexOf(apostroph, apostrophPos + 1); - if(encodingEnd >= 0) // e.g.: or - { - QByteArray encoding = s.mid(apostrophPos + 1, encodingEnd - (apostrophPos + 1)); - return QTextCodec::codecForName(encoding); - } - else // e.g.: - { - QByteArray encoding = s.mid(encodingPos + encodingTag.length(), apostrophPos - (encodingPos + encodingTag.length())); - return QTextCodec::codecForName(encoding); - } - } - return nullptr; -} - -QTextCodec* detectEncoding(const char* buf, qint64 size, qint64& skipBytes) -{ - if(size >= 2) - { - if(buf[0] == '\xFF' && buf[1] == '\xFE') - { - skipBytes = 2; - return QTextCodec::codecForName("UTF-16LE"); - } - - if(buf[0] == '\xFE' && buf[1] == '\xFF') - { - skipBytes = 2; - return QTextCodec::codecForName("UTF-16BE"); - } - } - if(size >= 3) - { - if(buf[0] == '\xEF' && buf[1] == '\xBB' && buf[2] == '\xBF') - { - skipBytes = 3; - return QTextCodec::codecForName("UTF-8-BOM"); - } - } - skipBytes = 0; - QByteArray s; - /* - We don't need the whole file here just the header. -] */ - if(size <= 5000) - s=QByteArray(buf, (int)size); - else - s=QByteArray(buf, 5000); - - int xmlHeaderPos = s.indexOf("= 0) - { - int xmlHeaderEnd = s.indexOf("?>", xmlHeaderPos); - if(xmlHeaderEnd >= 0) - { - QTextCodec* pCodec = getEncodingFromTag(s.mid(xmlHeaderPos, xmlHeaderEnd - xmlHeaderPos), "encoding="); - if(pCodec) - return pCodec; - } - } - else // HTML - { - int metaHeaderPos = s.indexOf("= 0) - { - int metaHeaderEnd = s.indexOf(">", metaHeaderPos); - if(metaHeaderEnd >= 0) - { - QTextCodec* pCodec = getEncodingFromTag(s.mid(metaHeaderPos, metaHeaderEnd - metaHeaderPos), "charset="); - if(pCodec) - return pCodec; - - metaHeaderPos = s.indexOf("begin(); int lineA = 0; int lineB = 0; Diff d(0, 0, 0); for(;;) { if(d.nofEquals == 0 && d.diff1 == 0 && d.diff2 == 0) { if(i != pDiffListAB->end()) { d = *i; ++i; } else break; } Diff3Line d3l; if(d.nofEquals > 0) { d3l.bAEqB = true; d3l.lineA = lineA; d3l.lineB = lineB; --d.nofEquals; ++lineA; ++lineB; } else if(d.diff1 > 0 && d.diff2 > 0) { d3l.lineA = lineA; d3l.lineB = lineB; --d.diff1; --d.diff2; ++lineA; ++lineB; } else if(d.diff1 > 0) { d3l.lineA = lineA; --d.diff1; ++lineA; } else if(d.diff2 > 0) { d3l.lineB = lineB; --d.diff2; ++lineB; } Q_ASSERT(d.nofEquals >= 0); d3ll.push_back(d3l); } } // Second step void calcDiff3LineListUsingAC( const DiffList* pDiffListAC, Diff3LineList& d3ll) { //////////////// // Now insert data from C using pDiffListAC DiffList::const_iterator i = pDiffListAC->begin(); Diff3LineList::iterator i3 = d3ll.begin(); int lineA = 0; int lineC = 0; Diff d(0, 0, 0); for(;;) { if(d.nofEquals == 0 && d.diff1 == 0 && d.diff2 == 0) { if(i != pDiffListAC->end()) { d = *i; ++i; } else break; } Diff3Line d3l; if(d.nofEquals > 0) { // Find the corresponding lineA while((*i3).lineA != lineA) ++i3; (*i3).lineC = lineC; (*i3).bAEqC = true; (*i3).bBEqC = (*i3).bAEqB; --d.nofEquals; ++lineA; ++lineC; ++i3; } else if(d.diff1 > 0 && d.diff2 > 0) { d3l.lineC = lineC; d3ll.insert(i3, d3l); --d.diff1; --d.diff2; ++lineA; ++lineC; } else if(d.diff1 > 0) { --d.diff1; ++lineA; } else if(d.diff2 > 0) { d3l.lineC = lineC; d3ll.insert(i3, d3l); --d.diff2; ++lineC; } } } // Third step void calcDiff3LineListUsingBC( const DiffList* pDiffListBC, Diff3LineList& d3ll) { //////////////// // Now improve the position of data from C using pDiffListBC // If a line from C equals a line from A then it is in the // same Diff3Line already. // If a line from C equals a line from B but not A, this // information will be used here. DiffList::const_iterator i = pDiffListBC->begin(); Diff3LineList::iterator i3b = d3ll.begin(); Diff3LineList::iterator i3c = d3ll.begin(); int lineB = 0; int lineC = 0; Diff d(0, 0, 0); for(;;) { if(d.nofEquals == 0 && d.diff1 == 0 && d.diff2 == 0) { if(i != pDiffListBC->end()) { d = *i; ++i; } else break; } Diff3Line d3l; if(d.nofEquals > 0) { // Find the corresponding lineB and lineC while(i3b != d3ll.end() && (*i3b).lineB != lineB) ++i3b; while(i3c != d3ll.end() && (*i3c).lineC != lineC) ++i3c; Q_ASSERT(i3b != d3ll.end()); Q_ASSERT(i3c != d3ll.end()); if(i3b == i3c) { Q_ASSERT((*i3b).lineC == lineC); (*i3b).bBEqC = true; } else { // Is it possible to move this line up? // Test if no other B's are used between i3c and i3b // First test which is before: i3c or i3b ? Diff3LineList::iterator i3c1 = i3c; Diff3LineList::iterator i3b1 = i3b; while(i3c1 != i3b && i3b1 != i3c) { Q_ASSERT(i3b1 != d3ll.end() || i3c1 != d3ll.end()); if(i3c1 != d3ll.end()) ++i3c1; if(i3b1 != d3ll.end()) ++i3b1; } if(i3c1 == i3b && !(*i3b).bAEqB) // i3c before i3b { Diff3LineList::iterator i3 = i3c; int nofDisturbingLines = 0; while(i3 != i3b && i3 != d3ll.end()) { if((*i3).lineB != -1) ++nofDisturbingLines; ++i3; } if(nofDisturbingLines > 0) //&& nofDisturbingLines < d.nofEquals*d.nofEquals+4 ) { Diff3LineList::iterator i3_last_equal_A = d3ll.end(); i3 = i3c; while(i3 != i3b) { if(i3->bAEqB) { i3_last_equal_A = i3; } ++i3; } /* If i3_last_equal_A isn't still set to d3ll.end(), then * we've found a line in A that is equal to one in B * somewhere between i3c and i3b */ bool before_or_on_equal_line_in_A = (i3_last_equal_A != d3ll.end()); // Move the disturbing lines up, out of sight. i3 = i3c; while(i3 != i3b) { if((*i3).lineB != -1 || (before_or_on_equal_line_in_A && i3->lineA != -1)) { d3l.lineB = (*i3).lineB; (*i3).lineB = -1; // Move A along if it matched B if(before_or_on_equal_line_in_A) { d3l.lineA = i3->lineA; d3l.bAEqB = i3->bAEqB; i3->lineA = -1; i3->bAEqC = false; } (*i3).bAEqB = false; (*i3).bBEqC = false; d3ll.insert(i3c, d3l); } if(i3 == i3_last_equal_A) { before_or_on_equal_line_in_A = false; } ++i3; } nofDisturbingLines = 0; } if(nofDisturbingLines == 0) { // Yes, the line from B can be moved. (*i3b).lineB = -1; // This might leave an empty line: removed later. (*i3b).bAEqB = false; (*i3b).bBEqC = false; (*i3c).lineB = lineB; (*i3c).bBEqC = true; (*i3c).bAEqB = (*i3c).bAEqC; } } else if(i3b1 == i3c && !(*i3c).bAEqC) { Diff3LineList::iterator i3 = i3b; int nofDisturbingLines = 0; while(i3 != i3c && i3 != d3ll.end()) { if((*i3).lineC != -1) ++nofDisturbingLines; ++i3; } if(nofDisturbingLines > 0) //&& nofDisturbingLines < d.nofEquals*d.nofEquals+4 ) { Diff3LineList::iterator i3_last_equal_A = d3ll.end(); i3 = i3b; while(i3 != i3c) { if(i3->bAEqC) { i3_last_equal_A = i3; } ++i3; } /* If i3_last_equal_A isn't still set to d3ll.end(), then * we've found a line in A that is equal to one in C * somewhere between i3b and i3c */ bool before_or_on_equal_line_in_A = (i3_last_equal_A != d3ll.end()); // Move the disturbing lines up. i3 = i3b; while(i3 != i3c) { if((*i3).lineC != -1 || (before_or_on_equal_line_in_A && i3->lineA != -1)) { d3l.lineC = (*i3).lineC; (*i3).lineC = -1; // Move A along if it matched C if(before_or_on_equal_line_in_A) { d3l.lineA = i3->lineA; d3l.bAEqC = i3->bAEqC; i3->lineA = -1; i3->bAEqB = false; } (*i3).bAEqC = false; (*i3).bBEqC = false; d3ll.insert(i3b, d3l); } if(i3 == i3_last_equal_A) { before_or_on_equal_line_in_A = false; } ++i3; } nofDisturbingLines = 0; } if(nofDisturbingLines == 0) { // Yes, the line from C can be moved. (*i3c).lineC = -1; // This might leave an empty line: removed later. (*i3c).bAEqC = false; (*i3c).bBEqC = false; (*i3b).lineC = lineC; (*i3b).bBEqC = true; (*i3b).bAEqC = (*i3b).bAEqB; } } } --d.nofEquals; ++lineB; ++lineC; ++i3b; ++i3c; } else if(d.diff1 > 0) { Diff3LineList::iterator i3 = i3b; while((*i3).lineB != lineB) ++i3; if(i3 != i3b && !(*i3).bAEqB) { // Take B from this line and move it up as far as possible d3l.lineB = lineB; d3ll.insert(i3b, d3l); (*i3).lineB = -1; } else { i3b = i3; } --d.diff1; ++lineB; ++i3b; if(d.diff2 > 0) { --d.diff2; ++lineC; } } else if(d.diff2 > 0) { --d.diff2; ++lineC; } } /* Diff3LineList::iterator it = d3ll.begin(); int li=0; for( ; it!=d3ll.end(); ++it, ++li ) { printf( "%4d %4d %4d %4d A%c=B A%c=C B%c=C\n", li, (*it).lineA, (*it).lineB, (*it).lineC, (*it).bAEqB ? '=' : '!', (*it).bAEqC ? '=' : '!', (*it).bBEqC ? '=' : '!' ); } printf("\n");*/ } // Test if the move would pass a barrier. Return true if not. static bool isValidMove(ManualDiffHelpList* pManualDiffHelpList, int line1, int line2, int winIdx1, int winIdx2) { if(line1 >= 0 && line2 >= 0) { ManualDiffHelpList::const_iterator i; for(i = pManualDiffHelpList->begin(); i != pManualDiffHelpList->end(); ++i) { const ManualDiffHelpEntry& mdhe = *i; // Barrier int l1 = winIdx1 == 1 ? mdhe.lineA1 : winIdx1 == 2 ? mdhe.lineB1 : mdhe.lineC1; int l2 = winIdx2 == 1 ? mdhe.lineA1 : winIdx2 == 2 ? mdhe.lineB1 : mdhe.lineC1; if(l1 >= 0 && l2 >= 0) { if((line1 >= l1 && line2 < l2) || (line1 < l1 && line2 >= l2)) return false; l1 = winIdx1 == 1 ? mdhe.lineA2 : winIdx1 == 2 ? mdhe.lineB2 : mdhe.lineC2; l2 = winIdx2 == 1 ? mdhe.lineA2 : winIdx2 == 2 ? mdhe.lineB2 : mdhe.lineC2; ++l1; ++l2; if((line1 >= l1 && line2 < l2) || (line1 < l1 && line2 >= l2)) return false; } } } return true; // no barrier passed. } static bool runDiff(const LineData* p1, LineRef size1, const LineData* p2, LineRef size2, DiffList& diffList, Options* pOptions) { ProgressProxy pp; static GnuDiff gnuDiff; // All values are initialized with zeros. pp.setCurrent(0); diffList.clear(); if(p1[0].getLine() == nullptr || p2[0].getLine() == nullptr || size1 == 0 || size2 == 0) { Diff d(0, 0, 0); if(p1[0].getLine() == nullptr && p2[0].getLine() == nullptr && size1 == size2) d.nofEquals = size1; else { d.diff1 = size1; d.diff2 = size2; } diffList.push_back(d); } else { GnuDiff::comparison comparisonInput; memset(&comparisonInput, 0, sizeof(comparisonInput)); comparisonInput.parent = nullptr; comparisonInput.file[0].buffer = p1[0].getLine(); //ptr to buffer comparisonInput.file[0].buffered = (p1[size1 - 1].getLine() - p1[0].getLine() + p1[size1 - 1].size()); // size of buffer comparisonInput.file[1].buffer = p2[0].getLine(); //ptr to buffer comparisonInput.file[1].buffered = (p2[size2 - 1].getLine() - p2[0].getLine() + p2[size2 - 1].size()); // size of buffer gnuDiff.ignore_white_space = GnuDiff::IGNORE_ALL_SPACE; // I think nobody needs anything else ... gnuDiff.bIgnoreWhiteSpace = true; gnuDiff.bIgnoreNumbers = pOptions->m_bIgnoreNumbers; gnuDiff.minimal = pOptions->m_bTryHard; gnuDiff.ignore_case = false; GnuDiff::change* script = gnuDiff.diff_2_files(&comparisonInput); LineRef equalLinesAtStart = comparisonInput.file[0].prefix_lines; LineRef currentLine1 = 0; LineRef currentLine2 = 0; GnuDiff::change* p = nullptr; for(GnuDiff::change* e = script; e; e = p) { Diff d(0, 0, 0); d.nofEquals = e->line0 - currentLine1; Q_ASSERT(d.nofEquals == e->line1 - currentLine2); d.diff1 = e->deleted; d.diff2 = e->inserted; currentLine1 += d.nofEquals + d.diff1; currentLine2 += d.nofEquals + d.diff2; diffList.push_back(d); p = e->link; free(e); } if(diffList.empty()) { Diff d(0, 0, 0); d.nofEquals = std::min(size1, size2); d.diff1 = size1 - d.nofEquals; d.diff2 = size2 - d.nofEquals; diffList.push_back(d); /* Diff d(0,0,0); d.nofEquals = equalLinesAtStart; if ( gnuDiff.files[0].missing_newline != gnuDiff.files[1].missing_newline ) { d.diff1 = gnuDiff.files[0].missing_newline ? 0 : 1; d.diff2 = gnuDiff.files[1].missing_newline ? 0 : 1; ++d.nofEquals; } else if ( !gnuDiff.files[0].missing_newline ) { ++d.nofEquals; } diffList.push_back(d); */ } else { diffList.front().nofEquals += equalLinesAtStart; currentLine1 += equalLinesAtStart; currentLine2 += equalLinesAtStart; LineRef nofEquals = std::min(size1 - currentLine1, size2 - currentLine2); if(nofEquals == 0) { diffList.back().diff1 += size1 - currentLine1; diffList.back().diff2 += size2 - currentLine2; } else { Diff d(nofEquals, size1 - currentLine1 - nofEquals, size2 - currentLine2 - nofEquals); diffList.push_back(d); } /* if ( gnuDiff.files[0].missing_newline != gnuDiff.files[1].missing_newline ) { diffList.back().diff1 += gnuDiff.files[0].missing_newline ? 0 : 1; diffList.back().diff2 += gnuDiff.files[1].missing_newline ? 0 : 1; } else if ( !gnuDiff.files[0].missing_newline ) { ++ diffList.back().nofEquals; } */ } } // Verify difflist { LineRef l1 = 0; LineRef l2 = 0; DiffList::iterator i; for(i = diffList.begin(); i != diffList.end(); ++i) { l1 += i->nofEquals + i->diff1; l2 += i->nofEquals + i->diff2; } //if( l1!=p1-p1start || l2!=p2-p2start ) Q_ASSERT(l1 == size1 && l2 == size2); } pp.setCurrent(1); return true; } bool runDiff(const LineData* p1, LineRef size1, const LineData* p2, LineRef size2, DiffList& diffList, int winIdx1, int winIdx2, ManualDiffHelpList* pManualDiffHelpList, Options* pOptions) { diffList.clear(); DiffList diffList2; int l1begin = 0; int l2begin = 0; ManualDiffHelpList::const_iterator i; for(i = pManualDiffHelpList->begin(); i != pManualDiffHelpList->end(); ++i) { const ManualDiffHelpEntry& mdhe = *i; int l1end = winIdx1 == 1 ? mdhe.lineA1 : winIdx1 == 2 ? mdhe.lineB1 : mdhe.lineC1; int l2end = winIdx2 == 1 ? mdhe.lineA1 : winIdx2 == 2 ? mdhe.lineB1 : mdhe.lineC1; if(l1end >= 0 && l2end >= 0) { runDiff(p1 + l1begin, l1end - l1begin, p2 + l2begin, l2end - l2begin, diffList2, pOptions); diffList.splice(diffList.end(), diffList2); l1begin = l1end; l2begin = l2end; l1end = winIdx1 == 1 ? mdhe.lineA2 : winIdx1 == 2 ? mdhe.lineB2 : mdhe.lineC2; l2end = winIdx2 == 1 ? mdhe.lineA2 : winIdx2 == 2 ? mdhe.lineB2 : mdhe.lineC2; if(l1end >= 0 && l2end >= 0) { ++l1end; // point to line after last selected line ++l2end; runDiff(p1 + l1begin, l1end - l1begin, p2 + l2begin, l2end - l2begin, diffList2, pOptions); diffList.splice(diffList.end(), diffList2); l1begin = l1end; l2begin = l2end; } } } runDiff(p1 + l1begin, size1 - l1begin, p2 + l2begin, size2 - l2begin, diffList2, pOptions); diffList.splice(diffList.end(), diffList2); return true; } void correctManualDiffAlignment(Diff3LineList& d3ll, ManualDiffHelpList* pManualDiffHelpList) { if(pManualDiffHelpList->empty()) return; // If a line appears unaligned in comparison to the manual alignment, correct this. ManualDiffHelpList::iterator iMDHL; for(iMDHL = pManualDiffHelpList->begin(); iMDHL != pManualDiffHelpList->end(); ++iMDHL) { Diff3LineList::iterator i3 = d3ll.begin(); int missingWinIdx = 0; int alignedSum = (iMDHL->lineA1 < 0 ? 0 : 1) + (iMDHL->lineB1 < 0 ? 0 : 1) + (iMDHL->lineC1 < 0 ? 0 : 1); if(alignedSum == 2) { // If only A & B are aligned then let C rather be aligned with A // If only A & C are aligned then let B rather be aligned with A // If only B & C are aligned then let A rather be aligned with B missingWinIdx = iMDHL->lineA1 < 0 ? 1 : (iMDHL->lineB1 < 0 ? 2 : 3); } else if(alignedSum <= 1) { return; } // At the first aligned line, move up the two other lines into new d3ls until the second input is aligned // Then move up the third input until all three lines are aligned. int wi = 0; for(; i3 != d3ll.end(); ++i3) { for(wi = 1; wi <= 3; ++wi) { if(i3->getLineInFile(wi) >= 0 && iMDHL->firstLine(wi) == i3->getLineInFile(wi)) break; } if(wi <= 3) break; } if(wi >= 1 && wi <= 3) { // Found manual alignment for one source Diff3LineList::iterator iDest = i3; // Move lines up until the next firstLine is found. Omit wi from move and search. int wi2 = 0; for(; i3 != d3ll.end(); ++i3) { for(wi2 = 1; wi2 <= 3; ++wi2) { if(wi != wi2 && i3->getLineInFile(wi2) >= 0 && iMDHL->firstLine(wi2) == i3->getLineInFile(wi2)) break; } if(wi2 > 3) { // Not yet found // Move both others up Diff3Line d3l; // Move both up if(wi == 1) // Move B and C up { d3l.bBEqC = i3->bBEqC; d3l.lineB = i3->lineB; d3l.lineC = i3->lineC; i3->lineB = -1; i3->lineC = -1; } if(wi == 2) // Move A and C up { d3l.bAEqC = i3->bAEqC; d3l.lineA = i3->lineA; d3l.lineC = i3->lineC; i3->lineA = -1; i3->lineC = -1; } if(wi == 3) // Move A and B up { d3l.bAEqB = i3->bAEqB; d3l.lineA = i3->lineA; d3l.lineB = i3->lineB; i3->lineA = -1; i3->lineB = -1; } i3->bAEqB = false; i3->bAEqC = false; i3->bBEqC = false; d3ll.insert(iDest, d3l); } else { // align the found line with the line we already have here if(i3 != iDest) { if(wi2 == 1) { iDest->lineA = i3->lineA; i3->lineA = -1; i3->bAEqB = false; i3->bAEqC = false; } else if(wi2 == 2) { iDest->lineB = i3->lineB; i3->lineB = -1; i3->bAEqB = false; i3->bBEqC = false; } else if(wi2 == 3) { iDest->lineC = i3->lineC; i3->lineC = -1; i3->bBEqC = false; i3->bAEqC = false; } } if(missingWinIdx != 0) { for(; i3 != d3ll.end(); ++i3) { int wi3 = missingWinIdx; if(i3->getLineInFile(wi3) >= 0) { // not found, move the line before iDest Diff3Line d3l; if(wi3 == 1) { if(i3->bAEqB) // Stop moving lines up if one equal is found. break; d3l.lineA = i3->lineA; i3->lineA = -1; i3->bAEqB = false; i3->bAEqC = false; } if(wi3 == 2) { if(i3->bAEqB) break; d3l.lineB = i3->lineB; i3->lineB = -1; i3->bAEqB = false; i3->bBEqC = false; } if(wi3 == 3) { if(i3->bAEqC) break; d3l.lineC = i3->lineC; i3->lineC = -1; i3->bAEqC = false; i3->bBEqC = false; } d3ll.insert(iDest, d3l); } } // for(), searching for wi3 } break; } } // for(), searching for wi2 } // if, wi found } // for (iMDHL) } // Fourth step void calcDiff3LineListTrim( Diff3LineList& d3ll, const LineData* pldA, const LineData* pldB, const LineData* pldC, ManualDiffHelpList* pManualDiffHelpList) { const Diff3Line d3l_empty; d3ll.removeAll(d3l_empty); Diff3LineList::iterator i3 = d3ll.begin(); Diff3LineList::iterator i3A = d3ll.begin(); Diff3LineList::iterator i3B = d3ll.begin(); Diff3LineList::iterator i3C = d3ll.begin(); int line = 0; // diff3line counters int lineA = 0; // int lineB = 0; int lineC = 0; ManualDiffHelpList::iterator iMDHL = pManualDiffHelpList->begin(); // The iterator i3 and the variable line look ahead. // The iterators i3A, i3B, i3C and corresponding lineA, lineB and lineC stop at empty lines, if found. // If possible, then the texts from the look ahead will be moved back to the empty places. for(; i3 != d3ll.end(); ++i3, ++line) { if(iMDHL != pManualDiffHelpList->end()) { if((i3->lineA >= 0 && i3->lineA == iMDHL->lineA1) || (i3->lineB >= 0 && i3->lineB == iMDHL->lineB1) || (i3->lineC >= 0 && i3->lineC == iMDHL->lineC1)) { i3A = i3; i3B = i3; i3C = i3; lineA = line; lineB = line; lineC = line; ++iMDHL; } } if(line > lineA && (*i3).lineA != -1 && (*i3A).lineB != -1 && (*i3A).bBEqC && ::equal(pldA[(*i3).lineA], pldB[(*i3A).lineB], false) && isValidMove(pManualDiffHelpList, (*i3).lineA, (*i3A).lineB, 1, 2) && isValidMove(pManualDiffHelpList, (*i3).lineA, (*i3A).lineC, 1, 3)) { // Empty space for A. A matches B and C in the empty line. Move it up. (*i3A).lineA = (*i3).lineA; (*i3A).bAEqB = true; (*i3A).bAEqC = true; (*i3).lineA = -1; (*i3).bAEqB = false; (*i3).bAEqC = false; ++i3A; ++lineA; } if(line > lineB && (*i3).lineB != -1 && (*i3B).lineA != -1 && (*i3B).bAEqC && ::equal(pldB[(*i3).lineB], pldA[(*i3B).lineA], false) && isValidMove(pManualDiffHelpList, (*i3).lineB, (*i3B).lineA, 2, 1) && isValidMove(pManualDiffHelpList, (*i3).lineB, (*i3B).lineC, 2, 3)) { // Empty space for B. B matches A and C in the empty line. Move it up. (*i3B).lineB = (*i3).lineB; (*i3B).bAEqB = true; (*i3B).bBEqC = true; (*i3).lineB = -1; (*i3).bAEqB = false; (*i3).bBEqC = false; ++i3B; ++lineB; } if(line > lineC && (*i3).lineC != -1 && (*i3C).lineA != -1 && (*i3C).bAEqB && ::equal(pldC[(*i3).lineC], pldA[(*i3C).lineA], false) && isValidMove(pManualDiffHelpList, (*i3).lineC, (*i3C).lineA, 3, 1) && isValidMove(pManualDiffHelpList, (*i3).lineC, (*i3C).lineB, 3, 2)) { // Empty space for C. C matches A and B in the empty line. Move it up. (*i3C).lineC = (*i3).lineC; (*i3C).bAEqC = true; (*i3C).bBEqC = true; (*i3).lineC = -1; (*i3).bAEqC = false; (*i3).bBEqC = false; ++i3C; ++lineC; } if(line > lineA && (*i3).lineA != -1 && !(*i3).bAEqB && !(*i3).bAEqC && isValidMove(pManualDiffHelpList, (*i3).lineA, (*i3A).lineB, 1, 2) && isValidMove(pManualDiffHelpList, (*i3).lineA, (*i3A).lineC, 1, 3)) { // Empty space for A. A doesn't match B or C. Move it up. (*i3A).lineA = (*i3).lineA; (*i3).lineA = -1; if(i3A->lineB != -1 && ::equal(pldA[i3A->lineA], pldB[i3A->lineB], false)) { i3A->bAEqB = true; } if((i3A->bAEqB && i3A->bBEqC) || (i3A->lineC != -1 && ::equal(pldA[i3A->lineA], pldC[i3A->lineC], false))) { i3A->bAEqC = true; } ++i3A; ++lineA; } if(line > lineB && (*i3).lineB != -1 && !(*i3).bAEqB && !(*i3).bBEqC && isValidMove(pManualDiffHelpList, (*i3).lineB, (*i3B).lineA, 2, 1) && isValidMove(pManualDiffHelpList, (*i3).lineB, (*i3B).lineC, 2, 3)) { // Empty space for B. B matches neither A nor C. Move B up. (*i3B).lineB = (*i3).lineB; (*i3).lineB = -1; if(i3B->lineA != -1 && ::equal(pldA[i3B->lineA], pldB[i3B->lineB], false)) { i3B->bAEqB = true; } if((i3B->bAEqB && i3B->bAEqC) || (i3B->lineC != -1 && ::equal(pldB[i3B->lineB], pldC[i3B->lineC], false))) { i3B->bBEqC = true; } ++i3B; ++lineB; } if(line > lineC && (*i3).lineC != -1 && !(*i3).bAEqC && !(*i3).bBEqC && isValidMove(pManualDiffHelpList, (*i3).lineC, (*i3C).lineA, 3, 1) && isValidMove(pManualDiffHelpList, (*i3).lineC, (*i3C).lineB, 3, 2)) { // Empty space for C. C matches neither A nor B. Move C up. (*i3C).lineC = (*i3).lineC; (*i3).lineC = -1; if(i3C->lineA != -1 && ::equal(pldA[i3C->lineA], pldC[i3C->lineC], false)) { i3C->bAEqC = true; } if((i3C->bAEqC && i3C->bAEqB) || (i3C->lineB != -1 && ::equal(pldB[i3C->lineB], pldC[i3C->lineC], false))) { i3C->bBEqC = true; } ++i3C; ++lineC; } if(line > lineA && line > lineB && (*i3).lineA != -1 && (*i3).bAEqB && !(*i3).bAEqC) { // Empty space for A and B. A matches B, but not C. Move A & B up. Diff3LineList::iterator i = lineA > lineB ? i3A : i3B; int l = lineA > lineB ? lineA : lineB; if(isValidMove(pManualDiffHelpList, i->lineC, (*i3).lineA, 3, 1) && isValidMove(pManualDiffHelpList, i->lineC, (*i3).lineB, 3, 2)) { (*i).lineA = (*i3).lineA; (*i).lineB = (*i3).lineB; (*i).bAEqB = true; if(i->lineC != -1 && ::equal(pldA[i->lineA], pldC[i->lineC], false)) { (*i).bAEqC = true; (*i).bBEqC = true; } (*i3).lineA = -1; (*i3).lineB = -1; (*i3).bAEqB = false; i3A = i; i3B = i; ++i3A; ++i3B; lineA = l + 1; lineB = l + 1; } } else if(line > lineA && line > lineC && (*i3).lineA != -1 && (*i3).bAEqC && !(*i3).bAEqB) { // Empty space for A and C. A matches C, but not B. Move A & C up. Diff3LineList::iterator i = lineA > lineC ? i3A : i3C; int l = lineA > lineC ? lineA : lineC; if(isValidMove(pManualDiffHelpList, i->lineB, (*i3).lineA, 2, 1) && isValidMove(pManualDiffHelpList, i->lineB, (*i3).lineC, 2, 3)) { (*i).lineA = (*i3).lineA; (*i).lineC = (*i3).lineC; (*i).bAEqC = true; if(i->lineB != -1 && ::equal(pldA[i->lineA], pldB[i->lineB], false)) { (*i).bAEqB = true; (*i).bBEqC = true; } (*i3).lineA = -1; (*i3).lineC = -1; (*i3).bAEqC = false; i3A = i; i3C = i; ++i3A; ++i3C; lineA = l + 1; lineC = l + 1; } } else if(line > lineB && line > lineC && (*i3).lineB != -1 && (*i3).bBEqC && !(*i3).bAEqC) { // Empty space for B and C. B matches C, but not A. Move B & C up. Diff3LineList::iterator i = lineB > lineC ? i3B : i3C; int l = lineB > lineC ? lineB : lineC; if(isValidMove(pManualDiffHelpList, i->lineA, (*i3).lineB, 1, 2) && isValidMove(pManualDiffHelpList, i->lineA, (*i3).lineC, 1, 3)) { (*i).lineB = (*i3).lineB; (*i).lineC = (*i3).lineC; (*i).bBEqC = true; if(i->lineA != -1 && ::equal(pldA[i->lineA], pldB[i->lineB], false)) { (*i).bAEqB = true; (*i).bAEqC = true; } (*i3).lineB = -1; (*i3).lineC = -1; (*i3).bBEqC = false; i3B = i; i3C = i; ++i3B; ++i3C; lineB = l + 1; lineC = l + 1; } } if((*i3).lineA != -1) { lineA = line + 1; i3A = i3; ++i3A; } if((*i3).lineB != -1) { lineB = line + 1; i3B = i3; ++i3B; } if((*i3).lineC != -1) { lineC = line + 1; i3C = i3; ++i3C; } } d3ll.removeAll(d3l_empty); /* Diff3LineList::iterator it = d3ll.begin(); int li=0; for( ; it!=d3ll.end(); ++it, ++li ) { printf( "%4d %4d %4d %4d A%c=B A%c=C B%c=C\n", li, (*it).lineA, (*it).lineB, (*it).lineC, (*it).bAEqB ? '=' : '!', (*it).bAEqC ? '=' : '!', (*it).bBEqC ? '=' : '!' ); } */ } void DiffBufferInfo::init(Diff3LineList* pD3ll, const Diff3LineVector* pD3lv, const LineData* pldA, LineRef sizeA, const LineData* pldB, LineRef sizeB, const LineData* pldC, LineRef sizeC) { m_pDiff3LineList = pD3ll; m_pDiff3LineVector = pD3lv; m_pLineDataA = pldA; m_pLineDataB = pldB; m_pLineDataC = pldC; m_sizeA = sizeA; m_sizeB = sizeB; m_sizeC = sizeC; Diff3LineList::iterator i3 = pD3ll->begin(); for(; i3 != pD3ll->end(); ++i3) { i3->m_pDiffBufferInfo = this; } } void calcWhiteDiff3Lines( Diff3LineList& d3ll, const LineData* pldA, const LineData* pldB, const LineData* pldC) { Diff3LineList::iterator i3 = d3ll.begin(); for(; i3 != d3ll.end(); ++i3) { i3->bWhiteLineA = ((*i3).lineA == -1 || pldA == nullptr || pldA[(*i3).lineA].whiteLine() || pldA[(*i3).lineA].bContainsPureComment); i3->bWhiteLineB = ((*i3).lineB == -1 || pldB == nullptr || pldB[(*i3).lineB].whiteLine() || pldB[(*i3).lineB].bContainsPureComment); i3->bWhiteLineC = ((*i3).lineC == -1 || pldC == nullptr || pldC[(*i3).lineC].whiteLine() || pldC[(*i3).lineC].bContainsPureComment); } } inline bool equal(QChar c1, QChar c2, bool /*bStrict*/) { // If bStrict then white space doesn't match //if ( bStrict && ( c1==' ' || c1=='\t' ) ) // return false; return c1 == c2; } // My own diff-invention: template void calcDiff(const T* p1, LineRef size1, const T* p2, LineRef size2, DiffList& diffList, int match, int maxSearchRange) { diffList.clear(); const T* p1start = p1; const T* p2start = p2; const T* p1end = p1 + size1; const T* p2end = p2 + size2; for(;;) { int nofEquals = 0; while(p1 != p1end && p2 != p2end && equal(*p1, *p2, false)) { ++p1; ++p2; ++nofEquals; } bool bBestValid = false; int bestI1 = 0; int bestI2 = 0; int i1 = 0; int i2 = 0; for(i1 = 0;; ++i1) { if(&p1[i1] == p1end || (bBestValid && i1 >= bestI1 + bestI2)) { break; } for(i2 = 0; i2 < maxSearchRange; ++i2) { if(&p2[i2] == p2end || (bBestValid && i1 + i2 >= bestI1 + bestI2)) { break; } else if(equal(p2[i2], p1[i1], true) && (match == 1 || abs(i1 - i2) < 3 || (&p2[i2 + 1] == p2end && &p1[i1 + 1] == p1end) || (&p2[i2 + 1] != p2end && &p1[i1 + 1] != p1end && equal(p2[i2 + 1], p1[i1 + 1], false)))) { if(i1 + i2 < bestI1 + bestI2 || !bBestValid) { bestI1 = i1; bestI2 = i2; bBestValid = true; break; } } } } // The match was found using the strict search. Go back if there are non-strict // matches. while(bestI1 >= 1 && bestI2 >= 1 && equal(p1[bestI1 - 1], p2[bestI2 - 1], false)) { --bestI1; --bestI2; } bool bEndReached = false; if(bBestValid) { // continue somehow Diff d(nofEquals, bestI1, bestI2); diffList.push_back(d); p1 += bestI1; p2 += bestI2; } else { // Nothing else to match. Diff d(nofEquals, p1end - p1, p2end - p2); diffList.push_back(d); bEndReached = true; //break; } // Sometimes the algorithm that chooses the first match unfortunately chooses // a match where later actually equal parts don't match anymore. // A different match could be achieved, if we start at the end. // Do it, if it would be a better match. int nofUnmatched = 0; const T* pu1 = p1 - 1; const T* pu2 = p2 - 1; while(pu1 >= p1start && pu2 >= p2start && equal(*pu1, *pu2, false)) { ++nofUnmatched; --pu1; --pu2; } Diff d = diffList.back(); if(nofUnmatched > 0) { // We want to go backwards the nofUnmatched elements and redo // the matching d = diffList.back(); Diff origBack = d; diffList.pop_back(); while(nofUnmatched > 0) { if(d.diff1 > 0 && d.diff2 > 0) { --d.diff1; --d.diff2; --nofUnmatched; } else if(d.nofEquals > 0) { --d.nofEquals; --nofUnmatched; } if(d.nofEquals == 0 && (d.diff1 == 0 || d.diff2 == 0) && nofUnmatched > 0) { if(diffList.empty()) break; d.nofEquals += diffList.back().nofEquals; d.diff1 += diffList.back().diff1; d.diff2 += diffList.back().diff2; diffList.pop_back(); bEndReached = false; } } if(bEndReached) diffList.push_back(origBack); else { p1 = pu1 + 1 + nofUnmatched; p2 = pu2 + 1 + nofUnmatched; diffList.push_back(d); } } if(bEndReached) break; } // Verify difflist { LineRef l1 = 0; LineRef l2 = 0; DiffList::iterator i; for(i = diffList.begin(); i != diffList.end(); ++i) { l1 += i->nofEquals + i->diff1; l2 += i->nofEquals + i->diff2; } Q_ASSERT(l1 == size1 && l2 == size2); } } bool fineDiff( Diff3LineList& diff3LineList, int selector, const LineData* v1, const LineData* v2) { // Finetuning: Diff each line with deltas ProgressProxy pp; int maxSearchLength = 500; Diff3LineList::iterator i; LineRef k1 = 0; LineRef k2 = 0; bool bTextsTotalEqual = true; int listSize = diff3LineList.size(); pp.setMaxNofSteps(listSize); int listIdx = 0; for(i = diff3LineList.begin(); i != diff3LineList.end(); ++i) { Q_ASSERT(selector == 1 || selector == 2 || selector == 3); if(selector == 1) { k1 = i->lineA; k2 = i->lineB; } else if(selector == 2) { k1 = i->lineB; k2 = i->lineC; } else if(selector == 3) { k1 = i->lineC; k2 = i->lineA; } if((k1 == -1 && k2 != -1) || (k1 != -1 && k2 == -1)) bTextsTotalEqual = false; if(k1 != -1 && k2 != -1) { if(v1[k1].size() != v2[k2].size() || memcmp(v1[k1].getLine(), v2[k2].getLine(), v1[k1].size() << 1) != 0) { bTextsTotalEqual = false; DiffList* pDiffList = new DiffList; calcDiff(v1[k1].getLine(), v1[k1].size(), v2[k2].getLine(), v2[k2].size(), *pDiffList, 2, maxSearchLength); // Optimize the diff list. DiffList::iterator dli; bool bUsefulFineDiff = false; for(dli = pDiffList->begin(); dli != pDiffList->end(); ++dli) { if(dli->nofEquals >= 4) { bUsefulFineDiff = true; break; } } for(dli = pDiffList->begin(); dli != pDiffList->end(); ++dli) { if(dli->nofEquals < 4 && (dli->diff1 > 0 || dli->diff2 > 0) && !(bUsefulFineDiff && dli == pDiffList->begin())) { dli->diff1 += dli->nofEquals; dli->diff2 += dli->nofEquals; dli->nofEquals = 0; } } Q_ASSERT(selector == 1 || selector == 2 || selector == 3); if(selector == 1) { delete(*i).pFineAB; (*i).pFineAB = pDiffList; } else if(selector == 2) { delete(*i).pFineBC; (*i).pFineBC = pDiffList; } else if(selector == 3) { delete(*i).pFineCA; (*i).pFineCA = pDiffList; } } if((v1[k1].bContainsPureComment || v1[k1].whiteLine()) && (v2[k2].bContainsPureComment || v2[k2].whiteLine())) { Q_ASSERT(selector == 1 || selector == 2 || selector == 3); if(selector == 1) { i->bAEqB = true; } else if(selector == 2) { i->bBEqC = true; } else if(selector == 3) { i->bAEqC = true; } } } ++listIdx; pp.step(); } return bTextsTotalEqual; } // Convert the list to a vector of pointers void calcDiff3LineVector(Diff3LineList& d3ll, Diff3LineVector& d3lv) { d3lv.resize(d3ll.size()); Diff3LineList::iterator i; int j = 0; for(i = d3ll.begin(); i != d3ll.end(); ++i, ++j) { d3lv[j] = &(*i); } Q_ASSERT(j == (int)d3lv.size()); } diff --git a/src/diff.h b/src/diff.h index 3d7b1b5..552f4b0 100644 --- a/src/diff.h +++ b/src/diff.h @@ -1,402 +1,396 @@ /*************************************************************************** * Copyright (C) 2003-2007 by Joachim Eibl * * Copyright (C) 2018 Michael Reeves reeves.87@gmail.com * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef DIFF_H #define DIFF_H #include #include #include #include #include "common.h" #include "fileaccess.h" #include "options.h" #include "gnudiff_diff.h" #include "SourceData.h" // Each range with matching elements is followed by a range with differences on either side. // Then again range of matching elements should follow. class Diff { public: LineRef nofEquals; qint64 diff1; qint64 diff2; Diff(LineRef eq, qint64 d1, qint64 d2) { nofEquals = eq; diff1 = d1; diff2 = d2; } }; typedef std::list DiffList; class LineData { private: const QChar* pLine = nullptr; const QChar* pFirstNonWhiteChar = nullptr; int mSize = 0; public: bool bContainsPureComment = false; inline int size() const { return mSize; } inline void setSize(const int newSize) { mSize = newSize; } inline void setFirstNonWhiteChar(const QChar* firstNonWhiteChar) { pFirstNonWhiteChar = firstNonWhiteChar;} inline const QChar* getFirstNonWhiteChar() const { return pFirstNonWhiteChar; } inline const QChar* getLine() const { return pLine; } inline void setLine(const QChar* line) { pLine = line;} int width(int tabSize) const; // Calcs width considering tabs. //int occurrences; bool whiteLine() const { return pFirstNonWhiteChar - pLine == mSize; } }; class Diff3LineList; class Diff3LineVector; class DiffBufferInfo { public: const LineData* m_pLineDataA; const LineData* m_pLineDataB; const LineData* m_pLineDataC; LineRef m_sizeA; LineRef m_sizeB; LineRef m_sizeC; const Diff3LineList* m_pDiff3LineList; const Diff3LineVector* m_pDiff3LineVector; void init(Diff3LineList* d3ll, const Diff3LineVector* d3lv, const LineData* pldA, LineRef sizeA, const LineData* pldB, LineRef sizeB, const LineData* pldC, LineRef sizeC); }; class Diff3Line { public: LineRef lineA = -1; LineRef lineB = -1; LineRef lineC = -1; bool bAEqC : 1; // These are true if equal or only white-space changes exist. bool bBEqC : 1; bool bAEqB : 1; bool bWhiteLineA : 1; bool bWhiteLineB : 1; bool bWhiteLineC : 1; DiffList* pFineAB; // These are 0 only if completely equal or if either source doesn't exist. DiffList* pFineBC; DiffList* pFineCA; int linesNeededForDisplay; // Due to wordwrap int sumLinesNeededForDisplay; // For fast conversion to m_diff3WrapLineVector DiffBufferInfo* m_pDiffBufferInfo; // For convenience Diff3Line() { lineA = -1; lineB = -1; lineC = -1; bAEqC = false; bAEqB = false; bBEqC = false; pFineAB = nullptr; pFineBC = nullptr; pFineCA = nullptr; linesNeededForDisplay = 1; sumLinesNeededForDisplay = 0; bWhiteLineA = false; bWhiteLineB = false; bWhiteLineC = false; m_pDiffBufferInfo = nullptr; } ~Diff3Line() { if(pFineAB != nullptr) delete pFineAB; if(pFineBC != nullptr) delete pFineBC; if(pFineCA != nullptr) delete pFineCA; pFineAB = nullptr; pFineBC = nullptr; pFineCA = nullptr; } bool operator==(const Diff3Line& d3l) const { return lineA == d3l.lineA && lineB == d3l.lineB && lineC == d3l.lineC && bAEqB == d3l.bAEqB && bAEqC == d3l.bAEqC && bBEqC == d3l.bBEqC; } const LineData* getLineData(int src) const { Q_ASSERT(m_pDiffBufferInfo != nullptr); if(src == 1 && lineA >= 0) return &m_pDiffBufferInfo->m_pLineDataA[lineA]; if(src == 2 && lineB >= 0) return &m_pDiffBufferInfo->m_pLineDataB[lineB]; if(src == 3 && lineC >= 0) return &m_pDiffBufferInfo->m_pLineDataC[lineC]; return nullptr; } QString getString(int src) const { const LineData* pld = getLineData(src); if(pld) return QString(pld->getLine(), pld->size()); else return QString(); } LineRef getLineInFile(int src) const { if(src == 1) return lineA; if(src == 2) return lineB; if(src == 3) return lineC; return -1; } }; class Diff3LineList : public QLinkedList { }; class Diff3LineVector : public QVector { }; class Diff3WrapLine { public: Diff3Line* pD3L; int diff3LineIndex; int wrapLineOffset; int wrapLineLength; }; typedef QVector Diff3WrapLineVector; class TotalDiffStatus { public: TotalDiffStatus() { reset(); } inline void reset() { bBinaryAEqC = false; bBinaryBEqC = false; bBinaryAEqB = false; bTextAEqC = false; bTextBEqC = false; bTextAEqB = false; nofUnsolvedConflicts = 0; nofSolvedConflicts = 0; nofWhitespaceConflicts = 0; } inline int getUnsolvedConflicts() const { return nofUnsolvedConflicts; } inline void setUnsolvedConflicts(const int unsolved) { nofUnsolvedConflicts = unsolved; } inline int getSolvedConflicts() const { return nofSolvedConflicts; } inline void setSolvedConflicts(const int solved) { nofSolvedConflicts = solved; } inline int getWhitespaceConflicts() const { return nofWhitespaceConflicts; } inline void setWhitespaceConflicts(const int wintespace) { nofWhitespaceConflicts = wintespace; } inline int getNonWhitespaceConflicts() { return getUnsolvedConflicts() + getSolvedConflicts() - getWhitespaceConflicts(); } bool isBinaryEqualAC() const { return bBinaryAEqC; } bool isBinaryEqualBC() const { return bBinaryBEqC; } bool isBinaryEqualAB() const { return bBinaryAEqB; } bool bBinaryAEqC = false; bool bBinaryBEqC = false; bool bBinaryAEqB = false; bool bTextAEqC = false; bool bTextBEqC = false; bool bTextAEqB = false; private: int nofUnsolvedConflicts; int nofSolvedConflicts; int nofWhitespaceConflicts; }; // Three corresponding ranges. (Minimum size of a valid range is one line.) class ManualDiffHelpEntry { public: ManualDiffHelpEntry() { lineA1 = -1; lineA2 = -1; lineB1 = -1; lineB2 = -1; lineC1 = -1; lineC2 = -1; } LineRef lineA1; LineRef lineA2; LineRef lineB1; LineRef lineB2; LineRef lineC1; LineRef lineC2; LineRef& firstLine(int winIdx) { return winIdx == 1 ? lineA1 : (winIdx == 2 ? lineB1 : lineC1); } LineRef& lastLine(int winIdx) { return winIdx == 1 ? lineA2 : (winIdx == 2 ? lineB2 : lineC2); } bool isLineInRange(LineRef line, int winIdx) { return line >= 0 && line >= firstLine(winIdx) && line <= lastLine(winIdx); } bool operator==(const ManualDiffHelpEntry& r) const { return lineA1 == r.lineA1 && lineB1 == r.lineB1 && lineC1 == r.lineC1 && lineA2 == r.lineA2 && lineB2 == r.lineB2 && lineC2 == r.lineC2; } }; // A list of corresponding ranges typedef std::list ManualDiffHelpList; void calcDiff3LineListUsingAB( const DiffList* pDiffListAB, Diff3LineList& d3ll); void calcDiff3LineListUsingAC( const DiffList* pDiffListAC, Diff3LineList& d3ll); void calcDiff3LineListUsingBC( const DiffList* pDiffListBC, Diff3LineList& d3ll); void correctManualDiffAlignment(Diff3LineList& d3ll, ManualDiffHelpList* pManualDiffHelpList); void calcDiff3LineListTrim(Diff3LineList& d3ll, const LineData* pldA, const LineData* pldB, const LineData* pldC, ManualDiffHelpList* pManualDiffHelpList); void calcWhiteDiff3Lines(Diff3LineList& d3ll, const LineData* pldA, const LineData* pldB, const LineData* pldC); void calcDiff3LineVector(Diff3LineList& d3ll, Diff3LineVector& d3lv); -bool isLineOrBufEnd(const QChar* p, int i, int size); -bool convertFileEncoding(const QString& fileNameIn, QTextCodec* pCodecIn, - const QString& fileNameOut, QTextCodec* pCodecOut); - -QTextCodec* detectEncoding(const char* buf, qint64 size, qint64& skipBytes); - // Helper class that swaps left and right for some commands. class MyPainter : public QPainter { int m_factor; int m_xOffset; int m_fontWidth; public: MyPainter(QPaintDevice* pd, bool bRTL, int width, int fontWidth) : QPainter(pd) { if(bRTL) { m_fontWidth = fontWidth; m_factor = -1; m_xOffset = width - 1; } else { m_fontWidth = 0; m_factor = 1; m_xOffset = 0; } } void fillRect(int x, int y, int w, int h, const QBrush& b) { if(m_factor == 1) QPainter::fillRect(m_xOffset + x, y, w, h, b); else QPainter::fillRect(m_xOffset - x - w, y, w, h, b); } void drawText(int x, int y, const QString& s, bool bAdapt = false) { Qt::LayoutDirection ld = (m_factor == 1 || !bAdapt) ? Qt::LeftToRight : Qt::RightToLeft; //QPainter::setLayoutDirection( ld ); if(ld == Qt::RightToLeft) // Reverse the text { QString s2; for(int i = s.length() - 1; i >= 0; --i) { s2 += s[i]; } QPainter::drawText(m_xOffset - m_fontWidth * s.length() + m_factor * x, y, s2); return; } QPainter::drawText(m_xOffset - m_fontWidth * s.length() + m_factor * x, y, s); } void drawLine(int x1, int y1, int x2, int y2) { QPainter::drawLine(m_xOffset + m_factor * x1, y1, m_xOffset + m_factor * x2, y2); } }; bool runDiff(const LineData* p1, LineRef size1, const LineData* p2, LineRef size2, DiffList& diffList, int winIdx1, int winIdx2, ManualDiffHelpList* pManualDiffHelpList, Options* pOptions); bool fineDiff( Diff3LineList& diff3LineList, int selector, const LineData* v1, const LineData* v2); bool equal(const LineData& l1, const LineData& l2, bool bStrict); inline bool isWhite(QChar c) { return c == ' ' || c == '\t' || c == '\r'; } /** Returns the number of equivalent spaces at position outPos. */ inline int tabber(int outPos, int tabSize) { return tabSize - (outPos % tabSize); } /** Returns a line number where the linerange [line, line+nofLines] can be displayed best. If it fits into the currently visible range then the returned value is the current firstLine. */ int getBestFirstLine(int line, int nofLines, int firstLine, int visibleLines); extern bool g_bIgnoreWhiteSpace; extern bool g_bIgnoreTrivialMatches; extern int g_bAutoSolve; // Cursor conversions that consider g_tabSize. int convertToPosInText(const QString& s, int posOnScreen, int tabSize); int convertToPosOnScreen(const QString& s, int posInText, int tabSize); enum e_CoordType { eFileCoords, eD3LLineCoords, eWrapCoords }; void calcTokenPos(const QString&, int posOnScreen, int& pos1, int& pos2, int tabSize); QString calcHistorySortKey(const QString& keyOrder, QRegExp& matchedRegExpr, const QStringList& parenthesesGroupList); bool findParenthesesGroups(const QString& s, QStringList& sl); #endif