Index: branches/kdepim/enterprise4/kdepimlibs/kpimutils/email.cpp =================================================================== --- branches/kdepim/enterprise4/kdepimlibs/kpimutils/email.cpp (revision 1066387) +++ branches/kdepim/enterprise4/kdepimlibs/kpimutils/email.cpp (revision 1066388) @@ -1,1068 +1,1069 @@ /* This file is part of the kpimutils library. Copyright (c) 2004 Matt Douhan This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** @file This file is part of the KDEPIM Utilities library and provides static methods for email address validation. @author Matt Douhan \ */ #include "email.h" #include #include #include #include #include #include using namespace KPIMUtils; //----------------------------------------------------------------------------- QStringList KPIMUtils::splitAddressList( const QString &aStr ) { // Features: // - always ignores quoted characters // - ignores everything (including parentheses and commas) // inside quoted strings // - supports nested comments // - ignores everything (including double quotes and commas) // inside comments QStringList list; if ( aStr.isEmpty() ) { return list; } QString addr; uint addrstart = 0; int commentlevel = 0; bool insidequote = false; for ( int index=0; index 0 ) { commentlevel--; } else { kDebug() << "Error in address splitting: Unmatched ')'"; return list; } } break; case '\\' : // quoted character index++; // ignore the quoted character break; case ',' : case ';' : if ( !insidequote && ( commentlevel == 0 ) ) { addr = aStr.mid( addrstart, index - addrstart ); if ( !addr.isEmpty() ) { list += addr.simplified(); } addrstart = index + 1; } break; } } // append the last address to the list if ( !insidequote && ( commentlevel == 0 ) ) { addr = aStr.mid( addrstart, aStr.length() - addrstart ); if ( !addr.isEmpty() ) { list += addr.simplified(); } } else { kDebug() << "Error in address splitting: Unexpected end of address list"; } return list; } //----------------------------------------------------------------------------- // Used by KPIMUtils::splitAddress(...) and KPIMUtils::firstEmailAddress(...). KPIMUtils::EmailParseResult splitAddressInternal( const QByteArray address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &comment, bool allowMultipleAddresses ) { // kDebug() << "address"; displayName = ""; addrSpec = ""; comment = ""; if ( address.isEmpty() ) { return AddressEmpty; } // The following is a primitive parser for a mailbox-list (cf. RFC 2822). // The purpose is to extract a displayable string from the mailboxes. // Comments in the addr-spec are not handled. No error checking is done. enum { TopLevel, InComment, InAngleAddress } context = TopLevel; bool inQuotedString = false; int commentLevel = 0; bool stop = false; for ( const char *p = address.data(); *p && !stop; ++p ) { switch ( context ) { case TopLevel : { switch ( *p ) { case '"' : inQuotedString = !inQuotedString; displayName += *p; break; case '(' : if ( !inQuotedString ) { context = InComment; commentLevel = 1; } else { displayName += *p; } break; case '<' : if ( !inQuotedString ) { context = InAngleAddress; } else { displayName += *p; } break; case '\\' : // quoted character displayName += *p; ++p; // skip the '\' if ( *p ) { displayName += *p; } else { return UnexpectedEnd; } break; case ',' : if ( !inQuotedString ) { if ( allowMultipleAddresses ) { stop = true; } else { return UnexpectedComma; } } else { displayName += *p; } break; default : displayName += *p; } break; } case InComment : { switch ( *p ) { case '(' : ++commentLevel; comment += *p; break; case ')' : --commentLevel; if ( commentLevel == 0 ) { context = TopLevel; comment += ' '; // separate the text of several comments } else { comment += *p; } break; case '\\' : // quoted character comment += *p; ++p; // skip the '\' if ( *p ) { comment += *p; } else { return UnexpectedEnd; } break; default : comment += *p; } break; } case InAngleAddress : { switch ( *p ) { case '"' : inQuotedString = !inQuotedString; addrSpec += *p; break; case '>' : if ( !inQuotedString ) { context = TopLevel; } else { addrSpec += *p; } break; case '\\' : // quoted character addrSpec += *p; ++p; // skip the '\' if ( *p ) { addrSpec += *p; } else { return UnexpectedEnd; } break; default : addrSpec += *p; } break; } } // switch ( context ) } // check for errors if ( inQuotedString ) { return UnbalancedQuote; } if ( context == InComment ) { return UnbalancedParens; } if ( context == InAngleAddress ) { return UnclosedAngleAddr; } displayName = displayName.trimmed(); comment = comment.trimmed(); addrSpec = addrSpec.trimmed(); if ( addrSpec.isEmpty() ) { if ( displayName.isEmpty() ) { return NoAddressSpec; } else { addrSpec = displayName; displayName.truncate( 0 ); } } /* kDebug() << "display-name : \"" << displayName << "\""; kDebug() << "comment : \"" << comment << "\""; kDebug() << "addr-spec : \"" << addrSpec << "\""; */ return AddressOk; } //----------------------------------------------------------------------------- EmailParseResult KPIMUtils::splitAddress( const QByteArray &address, QByteArray &displayName, QByteArray &addrSpec, QByteArray &comment ) { return splitAddressInternal( address, displayName, addrSpec, comment, false/* don't allow multiple addresses */ ); } //----------------------------------------------------------------------------- EmailParseResult KPIMUtils::splitAddress( const QString &address, QString &displayName, QString &addrSpec, QString &comment ) { QByteArray d, a, c; EmailParseResult result = splitAddress( address.toUtf8(), d, a, c ); if ( result == AddressOk ) { displayName = QString::fromUtf8( d ); addrSpec = QString::fromUtf8( a ); comment = QString::fromUtf8( c ); } return result; } //----------------------------------------------------------------------------- EmailParseResult KPIMUtils::isValidAddress( const QString &aStr ) { // If we are passed an empty string bail right away no need to process // further and waste resources if ( aStr.isEmpty() ) { return AddressEmpty; } // count how many @'s are in the string that is passed to us // if 0 or > 1 take action // at this point to many @'s cannot bail out right away since // @ is allowed in qoutes, so we use a bool to keep track // and then make a judgment further down in the parser // FIXME count only @ not in double quotes bool tooManyAtsFlag = false; int atCount = aStr.count( '@' ); if ( atCount > 1 ) { tooManyAtsFlag = true; } else if ( atCount == 0 ) { return TooFewAts; } // The main parser, try and catch all weird and wonderful // mistakes users and/or machines can create enum { TopLevel, InComment, InAngleAddress } context = TopLevel; bool inQuotedString = false; int commentLevel = 0; unsigned int strlen = aStr.length(); for ( unsigned int index=0; index < strlen; index++ ) { switch ( context ) { case TopLevel : { switch ( aStr[index].toLatin1() ) { case '"' : inQuotedString = !inQuotedString; break; case '(' : if ( !inQuotedString ) { context = InComment; commentLevel = 1; } break; case '[' : if ( !inQuotedString ) { return InvalidDisplayName; } break; case ']' : if ( !inQuotedString ) { return InvalidDisplayName; } break; case ':' : if ( !inQuotedString ) { return DisallowedChar; } break; case '<' : if ( !inQuotedString ) { context = InAngleAddress; } break; case '\\' : // quoted character ++index; // skip the '\' if ( ( index + 1 ) > strlen ) { return UnexpectedEnd; } break; case ',' : if ( !inQuotedString ) { return UnexpectedComma; } break; case ')' : if ( !inQuotedString ) { return UnbalancedParens; } break; case '>' : if ( !inQuotedString ) { return UnopenedAngleAddr; } break; case '@' : if ( !inQuotedString ) { if ( index == 0 ) { // Missing local part return MissingLocalPart; } else if ( index == strlen-1 ) { return MissingDomainPart; break; } } else if ( inQuotedString ) { --atCount; if ( atCount == 1 ) { tooManyAtsFlag = false; } } break; } break; } case InComment : { switch ( aStr[index].toLatin1() ) { case '(' : ++commentLevel; break; case ')' : --commentLevel; if ( commentLevel == 0 ) { context = TopLevel; } break; case '\\' : // quoted character ++index; // skip the '\' if ( ( index + 1 ) > strlen ) { return UnexpectedEnd; } break; } break; } case InAngleAddress : { switch ( aStr[index].toLatin1() ) { case ',' : if ( !inQuotedString ) { return UnexpectedComma; } break; case '"' : inQuotedString = !inQuotedString; break; case '@' : if ( inQuotedString ) { --atCount; if ( atCount == 1 ) { tooManyAtsFlag = false; } } break; case '>' : if ( !inQuotedString ) { context = TopLevel; break; } break; case '\\' : // quoted character ++index; // skip the '\' if ( ( index + 1 ) > strlen ) { return UnexpectedEnd; } break; } break; } } } if ( atCount == 0 && !inQuotedString ) { return TooFewAts; } if ( inQuotedString ) { return UnbalancedQuote; } if ( context == InComment ) { return UnbalancedParens; } if ( context == InAngleAddress ) { return UnclosedAngleAddr; } if ( tooManyAtsFlag ) { return TooManyAts; } return AddressOk; } //----------------------------------------------------------------------------- KPIMUtils::EmailParseResult KPIMUtils::isValidAddressList( const QString &aStr, QString &badAddr ) { if ( aStr.isEmpty() ) { return AddressEmpty; } QStringList list = splitAddressList( aStr ); QStringList::const_iterator it = list.begin(); EmailParseResult errorCode = AddressOk; for ( it = list.begin(); it != list.end(); ++it ) { errorCode = isValidAddress( *it ); if ( errorCode != AddressOk ) { badAddr = ( *it ); break; } } return errorCode; } //----------------------------------------------------------------------------- QString KPIMUtils::emailParseResultToString( EmailParseResult errorCode ) { switch ( errorCode ) { case TooManyAts : return i18n( "The email address you entered is not valid because it " "contains more than one @. " "You will not create valid messages if you do not " "change your address." ); case TooFewAts : return i18n( "The email address you entered is not valid because it " "does not contain a @." "You will not create valid messages if you do not " "change your address." ); case AddressEmpty : return i18n( "You have to enter something in the email address field." ); case MissingLocalPart : return i18n( "The email address you entered is not valid because it " "does not contain a local part." ); case MissingDomainPart : return i18n( "The email address you entered is not valid because it " "does not contain a domain part." ); case UnbalancedParens : return i18n( "The email address you entered is not valid because it " "contains unclosed comments/brackets." ); case AddressOk : return i18n( "The email address you entered is valid." ); case UnclosedAngleAddr : return i18n( "The email address you entered is not valid because it " "contains an unclosed anglebracket." ); case UnopenedAngleAddr : return i18n( "The email address you entered is not valid because it " "contains an unopened anglebracket." ); case UnexpectedComma : return i18n( "The email address you have entered is not valid because it " "contains an unexpected comma." ); case UnexpectedEnd : return i18n( "The email address you entered is not valid because it ended " "unexpectedly, this probably means you have used an escaping " "type character like an \\ as the last character in your " "email address." ); case UnbalancedQuote : return i18n( "The email address you entered is not valid because it " "contains quoted text which does not end." ); case NoAddressSpec : return i18n( "The email address you entered is not valid because it " "does not seem to contain an actual email address, i.e. " "something of the form joe@example.org." ); case DisallowedChar : return i18n( "The email address you entered is not valid because it " "contains an illegal character." ); case InvalidDisplayName : return i18n( "The email address you have entered is not valid because it " "contains an invalid displayname." ); } return i18n( "Unknown problem with email address" ); } //----------------------------------------------------------------------------- bool KPIMUtils::isValidSimpleAddress( const QString &aStr ) { // If we are passed an empty string bail right away no need to process further // and waste resources if ( aStr.isEmpty() ) { return false; } int atChar = aStr.lastIndexOf( '@' ); QString domainPart = aStr.mid( atChar + 1 ); QString localPart = aStr.left( atChar ); bool tooManyAtsFlag = false; bool inQuotedString = false; int atCount = localPart.count( '@' ); unsigned int strlen = localPart.length(); for ( unsigned int index=0; index < strlen; index++ ) { switch( localPart[ index ].toLatin1() ) { case '"' : inQuotedString = !inQuotedString; break; case '@' : if ( inQuotedString ) { --atCount; if ( atCount == 0 ) { tooManyAtsFlag = false; } } break; } } QString addrRx = "[a-zA-Z]*[~|{}`\\^?=/+*'&%$#!_\\w.-]*[~|{}`\\^?=/+*'&%$#!_a-zA-Z0-9-]@"; if ( localPart[ 0 ] == '\"' || localPart[ localPart.length()-1 ] == '\"' ) { addrRx = "\"[a-zA-Z@]*[\\w.@-]*[a-zA-Z0-9@]\"@"; } if ( domainPart[ 0 ] == '[' || domainPart[ domainPart.length()-1 ] == ']' ) { addrRx += "\\[[0-9]{,3}(\\.[0-9]{,3}){3}\\]"; } else { addrRx += "[\\w-]+(\\.[\\w-]+)*"; } QRegExp rx( addrRx ); return rx.exactMatch( aStr ) && !tooManyAtsFlag; } //----------------------------------------------------------------------------- QString KPIMUtils::simpleEmailAddressErrorMsg() { return i18n( "The email address you entered is not valid because it " "does not seem to contain an actual email address, i.e. " "something of the form joe@example.org." ); } //----------------------------------------------------------------------------- QByteArray KPIMUtils::extractEmailAddress( const QByteArray &address ) { QByteArray dummy1, dummy2, addrSpec; EmailParseResult result = splitAddressInternal( address, dummy1, addrSpec, dummy2, false/* don't allow multiple addresses */ ); if ( result != AddressOk ) { addrSpec = QByteArray(); kDebug() // << "\n" << "Input: aStr\nError:" << emailParseResultToString( result ); } return addrSpec; } //----------------------------------------------------------------------------- QString KPIMUtils::extractEmailAddress( const QString &address ) { return QString::fromUtf8( extractEmailAddress( address.toUtf8() ) ); } //----------------------------------------------------------------------------- QByteArray KPIMUtils::firstEmailAddress( const QByteArray &addresses ) { QByteArray dummy1, dummy2, addrSpec; EmailParseResult result = splitAddressInternal( addresses, dummy1, addrSpec, dummy2, true/* allow multiple addresses */ ); if ( result != AddressOk ) { addrSpec = QByteArray(); kDebug() // << "\n" << "Input: aStr\nError:" << emailParseResultToString( result ); } return addrSpec; } //----------------------------------------------------------------------------- QString KPIMUtils::firstEmailAddress( const QString &addresses ) { return QString::fromUtf8( firstEmailAddress( addresses.toUtf8() ) ); } //----------------------------------------------------------------------------- bool KPIMUtils::extractEmailAddressAndName( const QString &aStr, QString &mail, QString &name ) { name.clear(); mail.clear(); const int len = aStr.length(); const char cQuotes = '"'; bool bInComment = false; bool bInQuotesOutsideOfEmail = false; int i=0, iAd=0, iMailStart=0, iMailEnd=0; QChar c; unsigned int commentstack = 0; // Find the '@' of the email address // skipping all '@' inside "(...)" comments: while ( i < len ) { c = aStr[i]; if ( '(' == c ) { commentstack++; } if ( ')' == c ) { commentstack--; } bInComment = commentstack != 0; if ( '"' == c && !bInComment ) { bInQuotesOutsideOfEmail = !bInQuotesOutsideOfEmail; } if( !bInComment && !bInQuotesOutsideOfEmail ) { if ( '@' == c ) { iAd = i; break; // found it } } ++i; } if ( !iAd ) { // We suppose the user is typing the string manually and just // has not finished typing the mail address part. // So we take everything that's left of the '<' as name and the rest as mail for ( i = 0; len > i; ++i ) { c = aStr[i]; if ( '<' != c ) { name.append( c ); } else { break; } } mail = aStr.mid( i + 1 ); if ( mail.endsWith( '>' ) ) { mail.truncate( mail.length() - 1 ); } } else { // Loop backwards until we find the start of the string // or a ',' that is outside of a comment // and outside of quoted text before the leading '<'. bInComment = false; bInQuotesOutsideOfEmail = false; for ( i = iAd-1; 0 <= i; --i ) { c = aStr[i]; if ( bInComment ) { if ( '(' == c ) { if ( !name.isEmpty() ) { name.prepend( ' ' ); } bInComment = false; } else { name.prepend( c ); // all comment stuff is part of the name } } else if ( bInQuotesOutsideOfEmail ) { if ( cQuotes == c ) { bInQuotesOutsideOfEmail = false; } else { name.prepend( c ); } } else { // found the start of this addressee ? if ( ',' == c ) { break; } // stuff is before the leading '<' ? if ( iMailStart ) { if ( cQuotes == c ) { bInQuotesOutsideOfEmail = true; // end of quoted text found } else { name.prepend( c ); } } else { switch ( c.toLatin1() ) { case '<': iMailStart = i; break; case ')': if ( !name.isEmpty() ) { name.prepend( ' ' ); } bInComment = true; break; default: if ( ' ' != c ) { mail.prepend( c ); } } } } } name = name.simplified(); mail = mail.simplified(); if ( mail.isEmpty() ) { return false; } mail.append( '@' ); // Loop forward until we find the end of the string // or a ',' that is outside of a comment // and outside of quoted text behind the trailing '>'. bInComment = false; bInQuotesOutsideOfEmail = false; int parenthesesNesting = 0; for ( i = iAd+1; len > i; ++i ) { c = aStr[i]; if ( bInComment ) { if ( ')' == c ) { if ( --parenthesesNesting == 0 ) { bInComment = false; if ( !name.isEmpty() ) { name.append( ' ' ); } } else { // nested ")", add it name.append( ')' ); // name can't be empty here } } else { if ( '(' == c ) { // nested "(" ++parenthesesNesting; } name.append( c ); // all comment stuff is part of the name } } else if ( bInQuotesOutsideOfEmail ) { if ( cQuotes == c ) { bInQuotesOutsideOfEmail = false; } else { name.append( c ); } } else { // found the end of this addressee ? if ( ',' == c ) { break; } // stuff is behind the trailing '>' ? if ( iMailEnd ){ if ( cQuotes == c ) { bInQuotesOutsideOfEmail = true; // start of quoted text found } else { name.append( c ); } } else { switch ( c.toLatin1() ) { case '>': iMailEnd = i; break; case '(': if ( !name.isEmpty() ) { name.append( ' ' ); } if ( ++parenthesesNesting > 0 ) { bInComment = true; } break; default: if ( ' ' != c ) { mail.append( c ); } } } } } } name = name.simplified(); mail = mail.simplified(); return ! ( name.isEmpty() || mail.isEmpty() ); } //----------------------------------------------------------------------------- bool KPIMUtils::compareEmail( const QString &email1, const QString &email2, bool matchName ) { QString e1Name, e1Email, e2Name, e2Email; extractEmailAddressAndName( email1, e1Email, e1Name ); extractEmailAddressAndName( email2, e2Email, e2Name ); return e1Email == e2Email && ( !matchName || ( e1Name == e2Name ) ); } //----------------------------------------------------------------------------- QString KPIMUtils::normalizedAddress( const QString &displayName, const QString &addrSpec, const QString &comment ) { - if ( displayName.isEmpty() && comment.isEmpty() ) { + const QString realDisplayName = KMime::removeBidiControlChars( displayName ); + if ( realDisplayName.isEmpty() && comment.isEmpty() ) { return addrSpec; } else if ( comment.isEmpty() ) { - if ( !displayName.startsWith('\"') ) { - return quoteNameIfNecessary( displayName ) + " <" + addrSpec + '>'; + if ( !realDisplayName.startsWith('\"') ) { + return quoteNameIfNecessary( realDisplayName ) + " <" + addrSpec + '>'; } else { - return displayName + " <" + addrSpec + '>'; + return realDisplayName + " <" + addrSpec + '>'; } - } else if ( displayName.isEmpty() ) { + } else if ( realDisplayName.isEmpty() ) { QString commentStr = comment; return quoteNameIfNecessary( commentStr ) + " <" + addrSpec + '>'; } else { - return displayName + " (" + comment + ") <" + addrSpec + '>'; + return realDisplayName + " (" + comment + ") <" + addrSpec + '>'; } } //----------------------------------------------------------------------------- QString KPIMUtils::fromIdn( const QString &addrSpec ) { const int atPos = addrSpec.lastIndexOf( '@' ); if ( atPos == -1 ) { return addrSpec; } QString idn = KUrl::fromAce( addrSpec.mid( atPos + 1 ).toLatin1() ); if ( idn.isEmpty() ) { return QString(); } return addrSpec.left( atPos + 1 ) + idn; } //----------------------------------------------------------------------------- QString KPIMUtils::toIdn( const QString &addrSpec ) { const int atPos = addrSpec.lastIndexOf( '@' ); if ( atPos == -1 ) { return addrSpec; } QString idn = KUrl::toAce( addrSpec.mid( atPos + 1 ) ); if ( idn.isEmpty() ) { return addrSpec; } return addrSpec.left( atPos + 1 ) + idn; } //----------------------------------------------------------------------------- QString KPIMUtils::normalizeAddressesAndDecodeIdn( const QString &str ) { // kDebug() << str; if ( str.isEmpty() ) { return str; } const QStringList addressList = splitAddressList( str ); QStringList normalizedAddressList; QByteArray displayName, addrSpec, comment; for ( QStringList::ConstIterator it = addressList.begin(); ( it != addressList.end() ); ++it ) { if ( !(*it).isEmpty() ) { if ( splitAddress( (*it).toUtf8(), displayName, addrSpec, comment ) == AddressOk ) { displayName = KMime::decodeRFC2047String(displayName).toUtf8(); comment = KMime::decodeRFC2047String(comment).toUtf8(); normalizedAddressList << normalizedAddress( QString::fromUtf8( displayName ), fromIdn( QString::fromUtf8( addrSpec ) ), QString::fromUtf8( comment ) ); } else { kDebug() << "splitting address failed:" << *it; } } } /* kDebug() << "normalizedAddressList: \"" << normalizedAddressList.join( ", " ) << "\""; */ return normalizedAddressList.join( ", " ); } //----------------------------------------------------------------------------- QString KPIMUtils::normalizeAddressesAndEncodeIdn( const QString &str ) { //kDebug() << str; if ( str.isEmpty() ) { return str; } const QStringList addressList = splitAddressList( str ); QStringList normalizedAddressList; QByteArray displayName, addrSpec, comment; for ( QStringList::ConstIterator it = addressList.begin(); ( it != addressList.end() ); ++it ) { if ( !(*it).isEmpty() ) { if ( splitAddress( (*it).toUtf8(), displayName, addrSpec, comment ) == AddressOk ) { normalizedAddressList << normalizedAddress( QString::fromUtf8( displayName ), toIdn( QString::fromUtf8( addrSpec ) ), QString::fromUtf8( comment ) ); } else { kDebug() << "splitting address failed:" << *it; } } } /* kDebug() << "normalizedAddressList: \"" << normalizedAddressList.join( ", " ) << "\""; */ return normalizedAddressList.join( ", " ); } //----------------------------------------------------------------------------- // Escapes unescaped doublequotes in str. static QString escapeQuotes( const QString &str ) { if ( str.isEmpty() ) { return QString(); } QString escaped; // reserve enough memory for the worst case ( """..."" -> \"\"\"...\"\" ) escaped.reserve( 2 * str.length() ); unsigned int len = 0; for ( int i = 0; i < str.length(); ++i, ++len ) { if ( str[i] == '"' ) { // unescaped doublequote escaped[len] = '\\'; ++len; } else if ( str[i] == '\\' ) { // escaped character escaped[len] = '\\'; ++len; ++i; if ( i >= str.length() ) { // handle trailing '\' gracefully break; } } escaped[len] = str[i]; } escaped.truncate( len ); return escaped; } //----------------------------------------------------------------------------- QString KPIMUtils::quoteNameIfNecessary( const QString &str ) { QString quoted = str; QRegExp needQuotes( "[^ 0-9A-Za-z\\x0080-\\xFFFF]" ); // avoid double quoting if ( ( quoted[0] == '"' ) && ( quoted[quoted.length() - 1] == '"' ) ) { quoted = "\"" + escapeQuotes( quoted.mid( 1, quoted.length() - 2 ) ) + "\""; } else if ( quoted.indexOf( needQuotes ) != -1 ) { quoted = "\"" + escapeQuotes( quoted ) + "\""; } return quoted; } Index: branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_util.cpp =================================================================== --- branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_util.cpp (revision 1066387) +++ branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_util.cpp (revision 1066388) @@ -1,490 +1,505 @@ /* kmime_util.cpp KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_util.h" #include "kmime_util_p.h" #include "kmime_header_parsing.h" #include #include // for strcasestr #include #include #include #include +#include #include #include #include #include #include #include #include using namespace KMime; namespace KMime { QList c_harsetCache; QList l_anguageCache; QByteArray cachedCharset( const QByteArray &name ) { foreach ( const QByteArray& charset, c_harsetCache ) { if ( qstricmp( name.data(), charset.data() ) == 0 ) { return charset; } } c_harsetCache.append( name.toUpper() ); //kDebug(5320) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); return c_harsetCache.last(); } QByteArray cachedLanguage( const QByteArray &name ) { foreach ( const QByteArray& language, l_anguageCache ) { if ( qstricmp( name.data(), language.data() ) == 0 ) { return language; } } l_anguageCache.append( name.toUpper() ); //kDebug(5320) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); return l_anguageCache.last(); } bool isUsAscii( const QString &s ) { uint sLength = s.length(); for ( uint i=0; i@[\] const uchar specialsMap[16] = { 0x00, 0x00, 0x00, 0x00, // CTLs 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?' 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 0x00, 0x00, 0x00, 0x00 // '`' ... DEL }; // "(),:;<>@[\]/=? const uchar tSpecialsMap[16] = { 0x00, 0x00, 0x00, 0x00, // CTLs 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?' 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 0x00, 0x00, 0x00, 0x00 // '`' ... DEL }; // all except specials, CTLs, SPACE. const uchar aTextMap[16] = { 0x00, 0x00, 0x00, 0x00, 0x5F, 0x35, 0xFF, 0xC5, 0x7F, 0xFF, 0xFF, 0xE3, 0xFF, 0xFF, 0xFF, 0xFE }; // all except tspecials, CTLs, SPACE. const uchar tTextMap[16] = { 0x00, 0x00, 0x00, 0x00, 0x5F, 0x36, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE3, 0xFF, 0xFF, 0xFF, 0xFE }; // none except a-zA-Z0-9!*+-/ const uchar eTextMap[16] = { 0x00, 0x00, 0x00, 0x00, 0x40, 0x35, 0xFF, 0xC0, 0x7F, 0xFF, 0xFF, 0xE0, 0x7F, 0xFF, 0xFF, 0xE0 }; QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS ) { QByteArray result; QByteArray spaceBuffer; const char *scursor = src.constData(); const char *send = scursor + src.length(); bool onlySpacesSinceLastWord = false; while ( scursor != send ) { // space if ( isspace( *scursor ) && onlySpacesSinceLastWord ) { spaceBuffer += *scursor++; continue; } // possible start of an encoded word if ( *scursor == '=' ) { QByteArray language; QString decoded; ++scursor; const char *start = scursor; if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) { result += decoded.toUtf8(); onlySpacesSinceLastWord = true; spaceBuffer.clear(); } else { if ( onlySpacesSinceLastWord ) { result += spaceBuffer; onlySpacesSinceLastWord = false; } result += '='; scursor = start; // reset cursor after parsing failure } continue; } else { // unencoded data if ( onlySpacesSinceLastWord ) { result += spaceBuffer; onlySpacesSinceLastWord = false; } result += *scursor; ++scursor; } } return QString::fromUtf8(result); } QString decodeRFC2047String( const QByteArray &src ) { QByteArray usedCS; return decodeRFC2047String( src, usedCS, "utf-8", false ); } QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, bool addressHeader, bool allow8BitHeaders ) { QByteArray encoded8Bit, result, usedCS; int start=0, end=0; bool nonAscii=false, ok=true, useQEncoding=false; QTextCodec *codec=0; usedCS = charset; codec = KGlobal::charsets()->codecForName( usedCS, ok ); if ( !ok ) { //no codec available => try local8Bit and hope the best ;-) usedCS = KGlobal::locale()->encoding(); codec = KGlobal::charsets()->codecForName( usedCS, ok ); } if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets useQEncoding = true; } encoded8Bit = codec->fromUnicode( src ); if ( allow8BitHeaders ) { return encoded8Bit; } uint encoded8BitLength = encoded8Bit.length(); for ( unsigned int i=0; i@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) { end = start; // non us-ascii char found, now we determine where to stop encoding nonAscii = true; break; } } if ( nonAscii ) { while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { // we encode complete words end++; } for ( int x=end; x@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) { end = encoded8Bit.length(); // we found another non-ascii word while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { // we encode complete words end++; } } } result = encoded8Bit.left( start ) + "=?" + usedCS; if ( useQEncoding ) { result += "?Q?"; char c, hexcode;// "Q"-encoding implementation described in RFC 2047 for ( int i=start; i= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers ( ( c >= '0' ) && ( c <= '9' ) ) ) { result += c; } else { result += '='; // "stolen" from KMail ;-) hexcode = ((c & 0xF0) >> 4) + 48; if ( hexcode >= 58 ) { hexcode += 7; } result += hexcode; hexcode = (c & 0x0F) + 48; if ( hexcode >= 58 ) { hexcode += 7; } result += hexcode; } } } } else { result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64(); } result +="?="; result += encoded8Bit.right( encoded8Bit.length() - end ); } else { result = encoded8Bit; } return result; } QByteArray uniqueString() { static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; time_t now; char p[11]; int pos, ran; unsigned int timeval; p[10] = '\0'; now = time( 0 ); ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0)); timeval = (now / ran) + getpid(); for ( int i=0; i<10; i++ ) { pos = (int) (61.0*rand() / (RAND_MAX + 1.0)); //kDebug(5320) << pos; p[i] = chars[pos]; } QByteArray ret; ret.setNum( timeval ); ret += '.'; ret += p; return ret; } QByteArray multiPartBoundary() { return "nextPart" + uniqueString(); } QByteArray unfoldHeader( const QByteArray &header ) { QByteArray result; int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0; while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) { foldBegin = foldEnd = foldMid; // find the first space before the line-break while ( foldBegin > 0 ) { if ( !QChar( header[foldBegin - 1] ).isSpace() ) { break; } --foldBegin; } // find the first non-space after the line-break while ( foldEnd <= header.length() - 1 ) { if ( !QChar( header[foldEnd] ).isSpace() ) { break; } ++foldEnd; } result += header.mid( pos, foldBegin - pos ); if ( foldEnd < header.length() -1 ) result += ' '; pos = foldEnd; } result += header.mid( pos, header.length() - pos ); return result; } int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded ) { QByteArray n = name; n.append( ':' ); int begin = -1; if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) { begin = 0; } else { n.prepend('\n'); const char *p = strcasestr( src.constData(), n.constData() ); if ( !p ) { begin = -1; } else { begin = p - src.constData(); ++begin; } } if ( begin > -1) { //there is a header with the given name dataBegin = begin + name.length() + 1; //skip the name // skip the usual space after the colon if ( src.at( dataBegin ) == ' ' ) { ++dataBegin; } end = dataBegin; int len = src.length() - 1; if ( folded ) *folded = false; if ( src.at(end) != '\n' ) { // check if the header is not empty while ( true ) { end = src.indexOf( '\n', end + 1 ); if ( end == -1 || end == len || ( src[end+1] != ' ' && src[end+1] != '\t' ) ) { //break if we reach the end of the string, honor folded lines break; } else { if ( folded ) *folded = true; } } } if ( end < 0 ) { end = len + 1; //take the rest of the string } return begin; } else { dataBegin = -1; return -1; //header not found } } QByteArray extractHeader( const QByteArray &src, const QByteArray &name ) { int begin, end; bool folded; indexOfHeader( src, name, end, begin, &folded ); if ( begin >= 0 ) { if ( !folded ) { return src.mid( begin, end - begin ); } else { QByteArray hdrValue = src.mid( begin, end - begin ); return unfoldHeader( hdrValue ); } } else { return QByteArray(); //header not found } } void removeHeader( QByteArray &header, const QByteArray &name ) { int begin, end, dummy; begin = indexOfHeader( header, name, end, dummy ); if ( begin >= 0 ) { header.remove( begin, end - begin + 1 ); } } QByteArray CRLFtoLF( const QByteArray &s ) { QByteArray ret = s; ret.replace( "\r\n", "\n" ); return ret; } QByteArray LFtoCRLF( const QByteArray &s ) { QByteArray ret = s; ret.replace( "\n", "\r\n" ); return ret; } namespace { template < typename T > void removeQuotesGeneric( T & str ) { bool inQuote = false; for ( int i = 0; i < str.length(); ++i ) { if ( str[i] == '"' ) { str.remove( i, 1 ); i--; inQuote = !inQuote; } else { if ( inQuote && ( str[i] == '\\' ) ) { str.remove( i, 1 ); } } } } } void removeQuots( QByteArray &str ) { removeQuotesGeneric( str ); } void removeQuots( QString &str ) { removeQuotesGeneric( str ); } void addQuotes( QByteArray &str, bool forceQuotes ) { bool needsQuotes=false; for ( int i=0; i < str.length(); i++ ) { if ( strchr("()<>@,.;:[]=\\\"", str[i] ) != 0 ) { needsQuotes = true; } if ( str[i] == '\\' || str[i] == '\"' ) { str.insert( i, '\\' ); i++; } } if ( needsQuotes || forceQuotes ) { str.insert( 0, '\"' ); str.append( "\"" ); } } +QString removeBidiControlChars( const QString &input ) +{ + const int LRO = 0x202D; + const int RLO = 0x202E; + const int LRE = 0x202A; + const int RLE = 0x202B; + QString result = input; + result.remove( LRO ); + result.remove( RLO ); + result.remove( LRE ); + result.remove( RLE ); + return result; +} + } // namespace KMime Index: branches/kdepim/enterprise4/kdepimlibs/kmime/tests/kmime_message_test.cpp =================================================================== --- branches/kdepim/enterprise4/kdepimlibs/kmime/tests/kmime_message_test.cpp (revision 1066387) +++ branches/kdepim/enterprise4/kdepimlibs/kmime/tests/kmime_message_test.cpp (revision 1066388) @@ -1,174 +1,203 @@ /* Copyright (c) 2007 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_message_test.h" #include "kmime_message_test.moc" #include #include using namespace KMime; QTEST_KDEMAIN( MessageTest, NoGUI ) void MessageTest::testMainBodyPart() { Message *msg = new Message(); Message *msg2 = new Message(); Content *text = new Content(); text->contentType()->setMimeType( "text/plain" ); Content *html = new Content(); html->contentType()->setMimeType( "text/html" ); // empty message QCOMPARE( msg->mainBodyPart(), msg ); QCOMPARE( msg->mainBodyPart( "text/plain" ), (Content*)0 ); // non-multipart msg->contentType()->setMimeType( "text/html" ); QCOMPARE( msg->mainBodyPart(), msg ); QCOMPARE( msg->mainBodyPart( "text/plain" ), (Content*)0 ); QCOMPARE( msg->mainBodyPart( "text/html" ), msg ); // multipart/mixed msg2->contentType()->setMimeType( "multipart/mixed" ); msg2->addContent( text ); msg2->addContent( html ); QCOMPARE( msg2->mainBodyPart(), text ); QCOMPARE( msg2->mainBodyPart( "text/plain" ), text ); QCOMPARE( msg2->mainBodyPart( "text/html" ), (Content*)0 ); // mulitpart/alternative msg->contentType()->setMimeType( "multipart/alternative" ); msg->addContent( html ); msg->addContent( text ); QCOMPARE( msg->mainBodyPart(), html ); QCOMPARE( msg->mainBodyPart( "text/plain" ), text ); QCOMPARE( msg->mainBodyPart( "text/html" ), html ); // mulitpart/alternative inside multipart/mixed Message* msg3 = new Message(); msg3->contentType()->setMimeType( "multipart/mixed" ); msg3->addContent( msg ); Content *attach = new Content(); attach->contentType()->setMimeType( "text/plain" ); QCOMPARE( msg3->mainBodyPart(), html ); QCOMPARE( msg3->mainBodyPart( "text/plain" ), text ); QCOMPARE( msg3->mainBodyPart( "text/html" ), html ); } void MessageTest::testBrunosMultiAssembleBug() { QByteArray data = "From: Sender \n" "Subject: Sample message\n" "To: Receiver \n" "Date: Sat, 04 Aug 2007 12:44 +0200\n" "MIME-Version: 1.0\n" "Content-Type: text/plain\n" "X-Foo: bla\n" "X-Bla: foo\n" "\n" "body"; Message *msg = new Message; msg->setContent( data ); msg->parse(); msg->assemble(); QCOMPARE( msg->encodedContent(), data ); msg->inReplyTo(); msg->assemble(); QCOMPARE( msg->encodedContent(), data ); delete msg; } void MessageTest::testWillsAndTillsCrash() { QByteArray deadlyMail = "From: censored@yahoogroups.com\n" "To: censored@yahoogroups.com\n" "Sender: censored@yahoogroups.com\n" "MIME-Version: 1.0\n" "Date: 29 Jan 2006 23:58:21 -0000\n" "Subject: [censored] Birthday Reminder\n" "Reply-To: censored@yahoogroups.com\n" "Content-Type: multipart/alternative;\n boundary=\"YCalReminder=cNM4SNTGA4Cg1MVLaPpqNF1138579098\"\n" "X-Length: 9594\n" "X-UID: 6161\n" "Status: RO\n" "X-Status: OC\n" "X-KMail-EncryptionState:\n" "X-KMail-SignatureState:\n" "X-KMail-MDN-Sent:\n\n"; // QByteArray deadlyMail; // QFile f( "deadlymail" ); // f.open( QFile::ReadOnly ); // deadlyMail = f.readAll(); KMime::Message *msg = new KMime::Message; msg->setContent( deadlyMail ); msg->parse(); QVERIFY( !msg->date()->isEmpty() ); QCOMPARE( msg->subject()->as7BitString( false ), QByteArray( "[censored] Birthday Reminder" ) ); QCOMPARE( msg->from()->mailboxes().count(), 1 ); QCOMPARE( msg->sender()->mailboxes().count(), 1 ); QCOMPARE( msg->replyTo()->mailboxes().count(), 1 ); QCOMPARE( msg->to()->mailboxes().count(), 1 ); QCOMPARE( msg->cc()->mailboxes().count(), 0 ); QCOMPARE( msg->bcc()->mailboxes().count(), 0 ); QCOMPARE( msg->inReplyTo()->identifiers().count(), 0 ); QCOMPARE( msg->messageID()->identifiers().count(), 0 ); delete msg; } void MessageTest::missingHeadersTest() { // Test that the message body is OK even though some headers are missing KMime::Message msg; QString body = "Hi Donald, look at those nice pictures I found!\n"; QString content = "From: georgebush@whitehouse.org\n" "To: donaldrumsfeld@whitehouse.org\n" "Subject: Cute Kittens\n" "\n" + body; msg.setContent( content.toAscii() ); msg.parse(); msg.assemble(); QCOMPARE( body, QString::fromAscii( msg.body() ) ); // Now create a new message, based on the content of the first one. // The body of the new message should still be the same. // (there was a bug that caused missing mandatory headers to be // added as a empty newline, which caused parts of the header to // leak into the body) KMime::Message msg2; qDebug() << msg.encodedContent(); msg2.setContent( msg.encodedContent() ); msg2.parse(); msg2.assemble(); QCOMPARE( body, QString::fromAscii( msg2.body() ) ); } +void MessageTest::testBidiSpoofing() +{ + const QString RLO( QChar( 0x202E ) ); + const QString PDF( QChar( 0x202C ) ); + + const QByteArray senderAndRLO = + encodeRFC2047String( "\"Sender" + RLO + "\" ", "utf-8" ); + + // The display name of the "From" has an RLO, make sure the KMime parser balances it + QByteArray data = + "From: " + senderAndRLO + "\n" + "\n" + "Body"; + + KMime::Message msg; + msg.setContent( data ); + msg.parse(); + + // Test adjusted for taking into account that KMIME now removes bidi control chars + // instead of adding PDF chars, because of broken KHTML. + //const QString expectedDisplayName = "\"Sender" + RLO + PDF + "\""; + const QString expectedDisplayName = "\"Sender\""; + const QString expectedMailbox = expectedDisplayName + " "; + QCOMPARE( msg.from()->addresses().count(), 1 ); + QCOMPARE( msg.from()->asUnicodeString(), expectedMailbox ); + QCOMPARE( msg.from()->displayNames().first(), expectedDisplayName ); + QCOMPARE( msg.from()->mailboxes().first().name(), expectedDisplayName ); + QCOMPARE( msg.from()->mailboxes().first().address().data(), "sender@test.org" ); +} Index: branches/kdepim/enterprise4/kdepimlibs/kmime/tests/kmime_message_test.h =================================================================== --- branches/kdepim/enterprise4/kdepimlibs/kmime/tests/kmime_message_test.h (revision 1066387) +++ branches/kdepim/enterprise4/kdepimlibs/kmime/tests/kmime_message_test.h (revision 1066388) @@ -1,36 +1,37 @@ /* Copyright (c) 2007 Volker Krause This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef KMIME_MESSAGE_TEST_H #define KMIME_MESSAGE_TEST_H #include class MessageTest : public QObject { Q_OBJECT private slots: void testMainBodyPart(); void testBrunosMultiAssembleBug(); void testWillsAndTillsCrash(); void missingHeadersTest(); + void testBidiSpoofing(); }; #endif Index: branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_header_parsing.cpp =================================================================== --- branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_header_parsing.cpp (revision 1066387) +++ branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_header_parsing.cpp (revision 1066388) @@ -1,2040 +1,2042 @@ /* -*- c++ -*- kmime_header_parsing.cpp KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001-2002 Marc Mutz This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "kmime_header_parsing.h" #include "kmime_codecs.h" #include "kmime_util.h" #include "kmime_dateformatter.h" #include "kmime_warning.h" #include #include #include #include #include #include #include // for isdigit #include using namespace KMime; using namespace KMime::Types; namespace KMime { namespace Types { // QUrl::fromAce is extremely expensive, so only use it when necessary. // Fortunately, the presence of IDNA is readily detected with a substring match... static inline QString QUrl_fromAce_wrapper( const QString & domain ) { if ( domain.contains( QLatin1String( "xn--" ) ) ) return QUrl::fromAce( domain.toLatin1() ); else return domain; } static QString addr_spec_as_string( const AddrSpec & as, bool pretty ) { if ( as.isEmpty() ) { return QString(); } bool needsQuotes = false; QString result; result.reserve( as.localPart.length() + as.domain.length() + 1 ); for ( int i = 0 ; i < as.localPart.length() ; ++i ) { const char ch = as.localPart[i].toLatin1(); if ( ch == '.' || isAText( ch ) ) { result += ch; } else { needsQuotes = true; if ( ch == '\\' || ch == '"' ) { result += '\\'; } result += ch; } } const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ; if ( needsQuotes ) { return '"' + result + "\"@" + dom; } else { return result + '@' + dom; } } QString AddrSpec::asString() const { return addr_spec_as_string( *this, false ); } QString AddrSpec::asPrettyString() const { return addr_spec_as_string( *this, true ); } bool AddrSpec::isEmpty() const { return localPart.isEmpty() && domain.isEmpty(); } QByteArray Mailbox::address() const { return mAddrSpec.asString().toLatin1(); } AddrSpec Mailbox::addrSpec() const { return mAddrSpec; } QString Mailbox::name() const { return mDisplayName; } void Mailbox::setAddress( const AddrSpec &addr ) { mAddrSpec = addr; } void Mailbox::setAddress( const QByteArray &addr ) { const char *cursor = addr.constData(); if ( !HeaderParsing::parseAngleAddr( cursor, cursor + addr.length(), mAddrSpec ) ) { if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(), mAddrSpec ) ) { kWarning() << "Invalid address"; return; } } } void Mailbox::setName( const QString &name ) { - mDisplayName = name; + mDisplayName = removeBidiControlChars( name ); } void Mailbox::setNameFrom7Bit( const QByteArray &name, const QByteArray &defaultCharset ) { QByteArray cs; - mDisplayName = decodeRFC2047String( name, cs, defaultCharset, false ); + setName( decodeRFC2047String( name, cs, defaultCharset, false ) ); } bool Mailbox::hasAddress() const { return !mAddrSpec.isEmpty(); } bool Mailbox::hasName() const { return !mDisplayName.isEmpty(); } QString Mailbox::prettyAddress() const { if ( !hasName() ) { return address(); } QString s = name(); if ( hasAddress() ) { s += QLatin1String(" <") + address() + QLatin1Char('>'); } return s; } void Mailbox::fromUnicodeString( const QString &s ) { from7BitString( encodeRFC2047String( s, "utf-8", false ) ); } void Mailbox::from7BitString( const QByteArray &s ) { const char *cursor = s.constData(); HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this ); } QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const { if ( !hasName() ) { return address(); } QByteArray rv; if ( isUsAscii( name() ) ) { QByteArray tmp = name().toLatin1(); addQuotes( tmp, false ); rv += tmp; } else { rv += encodeRFC2047String( name(), encCharset, true ); } if ( hasAddress() ) { rv += " <" + address() + '>'; } return rv; } } // namespace Types namespace HeaderParsing { // parse the encoded-word (scursor points to after the initial '=') bool parseEncodedWord( const char* &scursor, const char * const send, QString &result, QByteArray &language, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS ) { // make sure the caller already did a bit of the work. assert( *(scursor-1) == '=' ); // // STEP 1: // scan for the charset/language portion of the encoded-word // char ch = *scursor++; if ( ch != '?' ) { kDebug(5320) << "first"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // remember start of charset (ie. just after the initial "=?") and // language (just after the first '*') fields: const char * charsetStart = scursor; const char * languageStart = 0; // find delimiting '?' (and the '*' separating charset and language // tags, if any): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?') { break; } else if ( *scursor == '*' && languageStart == 0 ) { languageStart = scursor + 1; } } // not found? can't be an encoded-word! if ( scursor == send || *scursor != '?' ) { kDebug(5320) << "second"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // extract the language information, if any (if languageStart is 0, // language will be null, too): QByteArray maybeLanguage( languageStart, scursor - languageStart ); // extract charset information (keep in mind: the size given to the // ctor is one off due to the \0 terminator): QByteArray maybeCharset( charsetStart, ( languageStart ? languageStart - 1 : scursor ) - charsetStart ); // // STEP 2: // scan for the encoding portion of the encoded-word // // remember start of encoding (just _after_ the second '?'): scursor++; const char * encodingStart = scursor; // find next '?' (ending the encoding tag): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { break; } } // not found? Can't be an encoded-word! if ( scursor == send || *scursor != '?' ) { kDebug(5320) << "third"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } // extract the encoding information: QByteArray maybeEncoding( encodingStart, scursor - encodingStart ); kDebug(5320) << "parseEncodedWord: found charset == \"" << maybeCharset << "\"; language == \"" << maybeLanguage << "\"; encoding == \"" << maybeEncoding << "\""; // // STEP 3: // scan for encoded-text portion of encoded-word // // remember start of encoded-text (just after the third '?'): scursor++; const char * encodedTextStart = scursor; // find next '?' (ending the encoded-text): for ( ; scursor != send ; scursor++ ) { if ( *scursor == '?' ) { break; } } // not found? Can't be an encoded-word! // ### maybe evaluate it nonetheless if the rest is OK? if ( scursor == send || *scursor != '?' ) { kDebug(5320) << "fourth"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } scursor++; // check for trailing '=': if ( scursor == send || *scursor != '=' ) { kDebug(5320) << "fifth"; KMIME_WARN_PREMATURE_END_OF( EncodedWord ); return false; } scursor++; // set end sentinel for encoded-text: const char * const encodedTextEnd = scursor - 2; // // STEP 4: // setup decoders for the transfer encoding and the charset // // try if there's a codec for the encoding found: Codec * codec = Codec::codecForName( maybeEncoding ); if ( !codec ) { KMIME_WARN_UNKNOWN( Encoding, maybeEncoding ); return false; } // get an instance of a corresponding decoder: Decoder * dec = codec->makeDecoder(); assert( dec ); // try if there's a (text)codec for the charset found: bool matchOK = false; QTextCodec *textCodec = 0; if ( forceCS || maybeCharset.isEmpty() ) { textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK ); usedCS = cachedCharset( defaultCS ); } else { textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK ); if ( !matchOK ) { //no suitable codec found => use default charset textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK ); usedCS = cachedCharset( defaultCS ); } else { usedCS = cachedCharset( maybeCharset ); } } if ( !matchOK || !textCodec ) { KMIME_WARN_UNKNOWN( Charset, maybeCharset ); delete dec; return false; }; kDebug(5320) << "mimeName(): \"" << textCodec->name() << "\""; // allocate a temporary buffer to store the 8bit text: int encodedTextLength = encodedTextEnd - encodedTextStart; QByteArray buffer; buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) ); char *bbegin = buffer.data(); char *bend = bbegin + buffer.length(); // // STEP 5: // do the actual decoding // if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) { KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor(" << encodedTextLength << ")\nresult may be truncated"; } result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() ); kDebug(5320) << "result now: \"" << result << "\""; // cleanup: delete dec; language = maybeLanguage; return true; } static inline void eatWhiteSpace( const char* &scursor, const char * const send ) { while ( scursor != send && ( *scursor == ' ' || *scursor == '\n' || *scursor == '\t' || *scursor == '\r' ) ) scursor++; } bool parseAtom( const char * &scursor, const char * const send, QString &result, bool allow8Bit ) { QPair maybeResult; if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { result += QString::fromLatin1( maybeResult.first, maybeResult.second ); return true; } return false; } bool parseAtom( const char * &scursor, const char * const send, QPair &result, bool allow8Bit ) { bool success = false; const char *start = scursor; while ( scursor != send ) { signed char ch = *scursor++; if ( ch > 0 && isAText( ch ) ) { // AText: OK success = true; } else if ( allow8Bit && ch < 0 ) { // 8bit char: not OK, but be tolerant. KMIME_WARN_8BIT( ch ); success = true; } else { // CTL or special - marking the end of the atom: // re-set sursor to point to the offending // char and return: scursor--; break; } } result.first = start; result.second = scursor - start; return success; } bool parseToken( const char * &scursor, const char * const send, QString &result, bool allow8Bit ) { QPair maybeResult; if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { result += QString::fromLatin1( maybeResult.first, maybeResult.second ); return true; } return false; } bool parseToken( const char * &scursor, const char * const send, QPair &result, bool allow8Bit ) { bool success = false; const char * start = scursor; while ( scursor != send ) { signed char ch = *scursor++; if ( ch > 0 && isTText( ch ) ) { // TText: OK success = true; } else if ( allow8Bit && ch < 0 ) { // 8bit char: not OK, but be tolerant. KMIME_WARN_8BIT( ch ); success = true; } else { // CTL or tspecial - marking the end of the atom: // re-set sursor to point to the offending // char and return: scursor--; break; } } result.first = start; result.second = scursor - start; return success; } #define READ_ch_OR_FAIL if ( scursor == send ) { \ KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \ return false; \ } else { \ ch = *scursor++; \ } // known issues: // // - doesn't handle quoted CRLF bool parseGenericQuotedString( const char* &scursor, const char * const send, QString &result, bool isCRLF, const char openChar, const char closeChar ) { char ch; // We are in a quoted-string or domain-literal or comment and the // cursor points to the first char after the openChar. // We will apply unfolding and quoted-pair removal. // We return when we either encounter the end or unescaped openChar // or closeChar. assert( *(scursor-1) == openChar || *(scursor-1) == closeChar ); while ( scursor != send ) { ch = *scursor++; if ( ch == closeChar || ch == openChar ) { // end of quoted-string or another opening char: // let caller decide what to do. return true; } switch( ch ) { case '\\': // quoted-pair // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 READ_ch_OR_FAIL; KMIME_WARN_IF_8BIT( ch ); result += QChar( ch ); break; case '\r': // ### // The case of lonely '\r' is easy to solve, as they're // not part of Unix Line-ending conventions. // But I see a problem if we are given Unix-native // line-ending-mails, where we cannot determine anymore // whether a given '\n' was part of a CRLF or was occurring // on it's own. READ_ch_OR_FAIL; if ( ch != '\n' ) { // CR on it's own... KMIME_WARN_LONE( CR ); result += QChar('\r'); scursor--; // points to after the '\r' again } else { // CRLF encountered. // lookahead: check for folding READ_ch_OR_FAIL; if ( ch == ' ' || ch == '\t' ) { // correct folding; // position cursor behind the CRLF WSP (unfolding) // and add the WSP to the result result += QChar( ch ); } else { // this is the "shouldn't happen"-case. There is a CRLF // inside a quoted-string without it being part of FWS. // We take it verbatim. KMIME_WARN_NON_FOLDING( CRLF ); result += "\r\n"; // the cursor is decremented again, so's we need not // duplicate the whole switch here. "ch" could've been // everything (incl. openChar or closeChar). scursor--; } } break; case '\n': // Note: CRLF has been handled above already! // ### LF needs special treatment, depending on whether isCRLF // is true (we can be sure a lonely '\n' was meant this way) or // false ('\n' alone could have meant LF or CRLF in the original // message. This parser assumes CRLF iff the LF is followed by // either WSP (folding) or NULL (premature end of quoted-string; // Should be fixed, since NULL is allowed as per rfc822). READ_ch_OR_FAIL; if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { // folding // correct folding result += QChar( ch ); } else { // non-folding KMIME_WARN_LONE( LF ); result += QChar('\n'); // pos is decremented, so's we need not duplicate the whole // switch here. ch could've been everything (incl. <">, "\"). scursor--; } break; default: KMIME_WARN_IF_8BIT( ch ); result += QChar( ch ); } } return false; } // known issues: // // - doesn't handle encoded-word inside comments. bool parseComment( const char* &scursor, const char * const send, QString &result, bool isCRLF, bool reallySave ) { int commentNestingDepth = 1; const char *afterLastClosingParenPos = 0; QString maybeCmnt; const char *oldscursor = scursor; assert( *(scursor-1) == '(' ); while ( commentNestingDepth ) { QString cmntPart; if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { assert( *(scursor-1) == ')' || *(scursor-1) == '(' ); // see the kdoc for above function for the possible conditions // we have to check: switch ( *(scursor-1) ) { case ')': if ( reallySave ) { // add the chunk that's now surely inside the comment. result += maybeCmnt; result += cmntPart; if ( commentNestingDepth > 1 ) { // don't add the outermost ')'... result += QChar(')'); } maybeCmnt.clear(); } afterLastClosingParenPos = scursor; --commentNestingDepth; break; case '(': if ( reallySave ) { // don't add to "result" yet, because we might find that we // are already outside the (broken) comment... maybeCmnt += cmntPart; maybeCmnt += QChar('('); } ++commentNestingDepth; break; default: assert( 0 ); } // switch } else { // !parseGenericQuotedString, ie. premature end if ( afterLastClosingParenPos ) { scursor = afterLastClosingParenPos; } else { scursor = oldscursor; } return false; } } // while return true; } // known issues: none. bool parsePhrase( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None; QString tmp; QByteArray lang, charset; const char *successfullyParsed = 0; // only used by the encoded-word branch const char *oldscursor; // used to suppress whitespace between adjacent encoded-words // (rfc2047, 6.2): bool lastWasEncodedWord = false; while ( scursor != send ) { char ch = *scursor++; switch ( ch ) { case '.': // broken, but allow for intorop's sake if ( found == None ) { --scursor; return false; } else { if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) { result += ". "; } else { result += '.'; } successfullyParsed = scursor; } break; case '"': // quoted-string tmp.clear(); if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { successfullyParsed = scursor; assert( *(scursor-1) == '"' ); switch ( found ) { case None: found = QuotedString; break; case Phrase: case Atom: case EncodedWord: case QuotedString: found = Phrase; result += QChar(' '); // rfc822, 3.4.4 break; default: assert( 0 ); } lastWasEncodedWord = false; result += tmp; } else { // premature end of quoted string. // What to do? Return leading '"' as special? Return as quoted-string? // We do the latter if we already found something, else signal failure. if ( found == None ) { return false; } else { result += QChar(' '); // rfc822, 3.4.4 result += tmp; return true; } } break; case '(': // comment // parse it, but ignore content: tmp.clear(); if ( parseComment( scursor, send, tmp, isCRLF, false /*don't bother with the content*/ ) ) { successfullyParsed = scursor; lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 } else { if ( found == None ) { return false; } else { scursor = successfullyParsed; return true; } } break; case '=': // encoded-word tmp.clear(); oldscursor = scursor; lang.clear(); charset.clear(); if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { successfullyParsed = scursor; switch ( found ) { case None: found = EncodedWord; break; case Phrase: case EncodedWord: case Atom: case QuotedString: if ( !lastWasEncodedWord ) { result += QChar(' '); // rfc822, 3.4.4 } found = Phrase; break; default: assert( 0 ); } lastWasEncodedWord = true; result += tmp; break; } else { // parse as atom: scursor = oldscursor; } // fall though... default: //atom tmp.clear(); scursor--; if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { successfullyParsed = scursor; switch ( found ) { case None: found = Atom; break; case Phrase: case Atom: case EncodedWord: case QuotedString: found = Phrase; result += QChar(' '); // rfc822, 3.4.4 break; default: assert( 0 ); } lastWasEncodedWord = false; result += tmp; } else { if ( found == None ) { return false; } else { scursor = successfullyParsed; return true; } } } eatWhiteSpace( scursor, send ); } return found != None; } bool parseDotAtom( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); // always points to just after the last atom parsed: const char *successfullyParsed; QString tmp; if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { return false; } result += tmp; successfullyParsed = scursor; while ( scursor != send ) { // end of header or no '.' -> return if ( scursor == send || *scursor != '.' ) { return true; } scursor++; // eat '.' if ( scursor == send || !isAText( *scursor ) ) { // end of header or no AText, but this time following a '.'!: // reset cursor to just after last successfully parsed char and // return: scursor = successfullyParsed; return true; } // try to parse the next atom: QString maybeAtom; if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { scursor = successfullyParsed; return true; } result += QChar('.'); result += maybeAtom; successfullyParsed = scursor; } scursor = successfullyParsed; return true; } void eatCFWS( const char* &scursor, const char * const send, bool isCRLF ) { QString dummy; while ( scursor != send ) { const char *oldscursor = scursor; char ch = *scursor++; switch( ch ) { case ' ': case '\t': // whitespace case '\r': case '\n': // folding continue; case '(': // comment if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) { continue; } scursor = oldscursor; return; default: scursor = oldscursor; return; } } } bool parseDomain( const char* &scursor, const char * const send, QString &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // domain := dot-atom / domain-literal / atom *("." atom) // // equivalent to: // domain = dot-atom / domain-literal, // since parseDotAtom does allow CFWS between atoms and dots if ( *scursor == '[' ) { // domain-literal: QString maybeDomainLiteral; // eat '[': scursor++; while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, isCRLF, '[', ']' ) ) { if ( scursor == send ) { // end of header: check for closing ']': if ( *(scursor-1) == ']' ) { // OK, last char was ']': result = maybeDomainLiteral; return true; } else { // not OK, domain-literal wasn't closed: return false; } } // we hit openChar in parseGenericQuotedString. // include it in maybeDomainLiteral and keep on parsing: if ( *(scursor-1) == '[' ) { maybeDomainLiteral += QChar('['); continue; } // OK, real end of domain-literal: result = maybeDomainLiteral; return true; } } else { // dot-atom: QString maybeDotAtom; if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { result = maybeDotAtom; // Domain may end with '.', if so preserve it' if ( scursor != send && *scursor == '.' ) { result += QChar('.'); scursor++; } return true; } } return false; } bool parseObsRoute( const char* &scursor, const char* const send, QStringList &result, bool isCRLF, bool save ) { while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // empty entry: if ( *scursor == ',' ) { scursor++; if ( save ) { result.append( QString() ); } continue; } // empty entry ending the list: if ( *scursor == ':' ) { scursor++; if ( save ) { result.append( QString() ); } return true; } // each non-empty entry must begin with '@': if ( *scursor != '@' ) { return false; } else { scursor++; } QString maybeDomain; if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { return false; } if ( save ) { result.append( maybeDomain ); } // eat the following (optional) comma: eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( *scursor == ':' ) { scursor++; return true; } if ( *scursor == ',' ) { scursor++; } } return false; } bool parseAddrSpec( const char* &scursor, const char * const send, AddrSpec &result, bool isCRLF ) { // // STEP 1: // local-part := dot-atom / quoted-string / word *("." word) // // this is equivalent to: // local-part := word *("." word) QString maybeLocalPart; QString tmp; while ( scursor != send ) { // first, eat any whitespace eatCFWS( scursor, send, isCRLF ); char ch = *scursor++; switch ( ch ) { case '.': // dot maybeLocalPart += QChar('.'); break; case '@': goto SAW_AT_SIGN; break; case '"': // quoted-string tmp.clear(); if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { maybeLocalPart += tmp; } else { return false; } break; default: // atom scursor--; // re-set scursor to point to ch again tmp.clear(); if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { maybeLocalPart += tmp; } else { return false; // parseAtom can only fail if the first char is non-atext. } break; } } return false; // // STEP 2: // domain // SAW_AT_SIGN: assert( *(scursor-1) == '@' ); QString maybeDomain; if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { return false; } result.localPart = maybeLocalPart; result.domain = maybeDomain; return true; } bool parseAngleAddr( const char* &scursor, const char * const send, AddrSpec &result, bool isCRLF ) { // first, we need an opening angle bracket: eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != '<' ) { return false; } scursor++; // eat '<' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( *scursor == '@' || *scursor == ',' ) { // obs-route: parse, but ignore: KMIME_WARN << "obsolete source route found! ignoring."; QStringList dummy; if ( !parseObsRoute( scursor, send, dummy, isCRLF, false /* don't save */ ) ) { return false; } // angle-addr isn't complete until after the '>': if ( scursor == send ) { return false; } } // parse addr-spec: AddrSpec maybeAddrSpec; if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != '>' ) { return false; } scursor++; result = maybeAddrSpec; return true; } bool parseMailbox( const char* &scursor, const char * const send, Mailbox &result, bool isCRLF ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } AddrSpec maybeAddrSpec; QString maybeDisplayName; // first, try if it's a vanilla addr-spec: const char * oldscursor = scursor; if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { result.setAddress( maybeAddrSpec ); // check for the obsolete form of display-name (as comment): eatWhiteSpace( scursor, send ); if ( scursor != send && *scursor == '(' ) { scursor++; if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { return false; } } result.setNameFrom7Bit( maybeDisplayName.toLatin1() ); return true; } scursor = oldscursor; // second, see if there's a display-name: if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { // failed: reset cursor, note absent display-name maybeDisplayName.clear(); scursor = oldscursor; } else { // succeeded: eat CFWS eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } } // third, parse the angle-addr: if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) { return false; } if ( maybeDisplayName.isNull() ) { // check for the obsolete form of display-name (as comment): eatWhiteSpace( scursor, send ); if ( scursor != send && *scursor == '(' ) { scursor++; if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { return false; } } } result.setName( maybeDisplayName ); result.setAddress( maybeAddrSpec ); return true; } bool parseGroup( const char* &scursor, const char * const send, Address &result, bool isCRLF ) { // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] // // equivalent to: // group := display-name ":" [ obs-mbox-list ] ";" eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // get display-name: QString maybeDisplayName; if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { return false; } // get ":": eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != ':' ) { return false; } - result.displayName = maybeDisplayName; + // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that + // automatically calls removeBidiControlChars + result.displayName = removeBidiControlChars( maybeDisplayName ); // get obs-mbox-list (may contain empty entries): scursor++; while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // empty entry: if ( *scursor == ',' ) { scursor++; continue; } // empty entry ending the list: if ( *scursor == ';' ) { scursor++; return true; } Mailbox maybeMailbox; if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { return false; } result.mailboxList.append( maybeMailbox ); eatCFWS( scursor, send, isCRLF ); // premature end: if ( scursor == send ) { return false; } // regular end of the list: if ( *scursor == ';' ) { scursor++; return true; } // eat regular list entry separator: if ( *scursor == ',' ) { scursor++; } } return false; } bool parseAddress( const char* &scursor, const char * const send, Address &result, bool isCRLF ) { // address := mailbox / group eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // first try if it's a single mailbox: Mailbox maybeMailbox; const char * oldscursor = scursor; if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { // yes, it is: result.displayName.clear(); result.mailboxList.append( maybeMailbox ); return true; } scursor = oldscursor; Address maybeAddress; // no, it's not a single mailbox. Try if it's a group: if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) { return false; } result = maybeAddress; return true; } bool parseAddressList( const char* &scursor, const char * const send, AddressList &result, bool isCRLF ) { while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); // end of header: this is OK. if ( scursor == send ) { return true; } // empty entry: ignore: if ( *scursor == ',' ) { scursor++; continue; } // broken clients might use ';' as list delimiter, accept that as well if ( *scursor == ';' ) { scursor++; continue; } // parse one entry Address maybeAddress; if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) { return false; } result.append( maybeAddress ); eatCFWS( scursor, send, isCRLF ); // end of header: this is OK. if ( scursor == send ) { return true; } // comma separating entries: eat it. if ( *scursor == ',' ) { scursor++; } } return true; } static QString asterisk = QString::fromLatin1( "*0*", 1 ); static QString asteriskZero = QString::fromLatin1( "*0*", 2 ); //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 ); bool parseParameter( const char* &scursor, const char * const send, QPair &result, bool isCRLF ) { // parameter = regular-parameter / extended-parameter // regular-parameter = regular-parameter-name "=" value // extended-parameter = // value = token / quoted-string // // note that rfc2231 handling is out of the scope of this function. // Therefore we return the attribute as QString and the value as // (start,length) tupel if we see that the value is encoded // (trailing asterisk), for parseParameterList to decode... eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // parse the parameter name: // QString maybeAttribute; if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) { return false; } eatCFWS( scursor, send, isCRLF ); // premature end: not OK (haven't seen '=' yet). if ( scursor == send || *scursor != '=' ) { return false; } scursor++; // eat '=' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { // don't choke on attribute=, meaning the value was omitted: if ( maybeAttribute.endsWith( asterisk ) ) { KMIME_WARN << "attribute ends with \"*\", but value is empty!" "Chopping away \"*\"."; maybeAttribute.truncate( maybeAttribute.length() - 1 ); } result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return true; } const char * oldscursor = scursor; // // parse the parameter value: // QStringOrQPair maybeValue; if ( *scursor == '"' ) { // value is a quoted-string: scursor++; if ( maybeAttribute.endsWith( asterisk ) ) { // attributes ending with "*" designate extended-parameters, // which cannot have quoted-strings as values. So we remove the // trailing "*" to not confuse upper layers. KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!" "Chopping away \"*\"."; maybeAttribute.truncate( maybeAttribute.length() - 1 ); } if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { scursor = oldscursor; result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return false; // this case needs further processing by upper layers!! } } else { // value is a token: if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { scursor = oldscursor; result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); return false; // this case needs further processing by upper layers!! } } result = qMakePair( maybeAttribute.toLower(), maybeValue ); return true; } bool parseRawParameterList( const char* &scursor, const char * const send, QMap &result, bool isCRLF ) { // we use parseParameter() consecutively to obtain a map of raw // attributes to raw values. "Raw" here means that we don't do // rfc2231 decoding and concatenation. This is left to // parseParameterList(), which will call this function. // // The main reason for making this chunk of code a separate // (private) method is that we can deal with broken parameters // _here_ and leave the rfc2231 handling solely to // parseParameterList(), which will still be enough work. while ( scursor != send ) { eatCFWS( scursor, send, isCRLF ); // empty entry ending the list: OK. if ( scursor == send ) { return true; } // empty list entry: ignore. if ( *scursor == ';' ) { scursor++; continue; } QPair maybeParameter; if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { // we need to do a bit of work if the attribute is not // NULL. These are the cases marked with "needs further // processing" in parseParameter(). Specifically, parsing of the // token or the quoted-string, which should represent the value, // failed. We take the easy way out and simply search for the // next ';' to start parsing again. (Another option would be to // take the text between '=' and ';' as value) if ( maybeParameter.first.isNull() ) { return false; } while ( scursor != send ) { if ( *scursor++ == ';' ) { goto IS_SEMICOLON; } } // scursor == send case: end of list. return true; IS_SEMICOLON: // *scursor == ';' case: parse next entry. continue; } // successful parsing brings us here: result.insert( maybeParameter.first, maybeParameter.second ); eatCFWS( scursor, send, isCRLF ); // end of header: ends list. if ( scursor == send ) { return true; } // regular separator: eat it. if ( *scursor == ';' ) { scursor++; } } return true; } static void decodeRFC2231Value( Codec* &rfc2231Codec, QTextCodec* &textcodec, bool isContinuation, QString &value, QPair &source ) { // // parse the raw value into (charset,language,text): // const char * decBegin = source.first; const char * decCursor = decBegin; const char * decEnd = decCursor + source.second; if ( !isContinuation ) { // find the first single quote while ( decCursor != decEnd ) { if ( *decCursor == '\'' ) { break; } else { decCursor++; } } if ( decCursor == decEnd ) { // there wasn't a single single quote at all! // take the whole value to be in latin-1: KMIME_WARN << "No charset in extended-initial-value." "Assuming \"iso-8859-1\"."; value += QString::fromLatin1( decBegin, source.second ); return; } QByteArray charset( decBegin, decCursor - decBegin ); const char * oldDecCursor = ++decCursor; // find the second single quote (we ignore the language tag): while ( decCursor != decEnd ) { if ( *decCursor == '\'' ) { break; } else { decCursor++; } } if ( decCursor == decEnd ) { KMIME_WARN << "No language in extended-initial-value." "Trying to recover."; decCursor = oldDecCursor; } else { decCursor++; } // decCursor now points to the start of the // "extended-other-values": // // get the decoders: // bool matchOK = false; textcodec = KGlobal::charsets()->codecForName( charset, matchOK ); if ( !matchOK ) { textcodec = 0; KMIME_WARN_UNKNOWN( Charset, charset ); } } if ( !rfc2231Codec ) { rfc2231Codec = Codec::codecForName("x-kmime-rfc2231"); assert( rfc2231Codec ); } if ( !textcodec ) { value += QString::fromLatin1( decCursor, decEnd - decCursor ); return; } Decoder * dec = rfc2231Codec->makeDecoder(); assert( dec ); // // do the decoding: // QByteArray buffer; buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); QByteArray::Iterator bit = buffer.begin(); QByteArray::ConstIterator bend = buffer.end(); if ( !dec->decode( decCursor, decEnd, bit, bend ) ) { KMIME_WARN << rfc2231Codec->name() << "codec lies about its maxDecodedSizeFor()" << endl << "result may be truncated"; } value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); kDebug(5320) << "value now: \"" << value << "\""; // cleanup: delete dec; } // known issues: // - permutes rfc2231 continuations when the total number of parts // exceeds 10 (other-sections then becomes *xy, ie. two digits) bool parseParameterList( const char* &scursor, const char * const send, QMap &result, bool isCRLF ) { // parse the list into raw attribute-value pairs: QMap rawParameterList; if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) { return false; } if ( rawParameterList.isEmpty() ) { return true; } // decode rfc 2231 continuations and alternate charset encoding: // NOTE: this code assumes that what QMapIterator delivers is sorted // by the key! Codec * rfc2231Codec = 0; QTextCodec * textcodec = 0; QString attribute; QString value; enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode; QMap::Iterator it, end = rawParameterList.end(); for ( it = rawParameterList.begin() ; it != end ; ++it ) { if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { // // new attribute: // // store the last attribute/value pair in the result map now: if ( !attribute.isNull() ) { result.insert( attribute, value ); } // and extract the information from the new raw attribute: value.clear(); attribute = it.key(); mode = NoMode; // is the value encoded? if ( attribute.endsWith( asterisk ) ) { attribute.truncate( attribute.length() - 1 ); mode = (Modes) ((int) mode | Encoded); } // is the value continued? if ( attribute.endsWith( asteriskZero ) ) { attribute.truncate( attribute.length() - 2 ); mode = (Modes) ((int) mode | Continued); } // // decode if necessary: // if ( mode & Encoded ) { decodeRFC2231Value( rfc2231Codec, textcodec, false, /* isn't continuation */ value, (*it).qpair ); } else { // not encoded. if ( (*it).qpair.first ) { value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); } else { value += (*it).qstring; } } // // shortcut-processing when the value isn't encoded: // if ( !(mode & Continued) ) { // save result already: result.insert( attribute, value ); // force begin of a new attribute: attribute.clear(); } } else { // it.key().startsWith( attribute ) // // continuation // // ignore the section and trust QMap to have sorted the keys: if ( it.key().endsWith( asterisk ) ) { // encoded decodeRFC2231Value( rfc2231Codec, textcodec, true, /* is continuation */ value, (*it).qpair ); } else { // not encoded if ( (*it).qpair.first ) { value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); } else { value += (*it).qstring; } } } } // write last attr/value pair: if ( !attribute.isNull() ) { result.insert( attribute, value ); } return true; } static const char * stdDayNames[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames; static bool parseDayName( const char* &scursor, const char * const send ) { // check bounds: if ( send - scursor < 3 ) { return false; } for ( int i = 0 ; i < stdDayNamesLen ; ++i ) { if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { scursor += 3; // kDebug(5320) << "found" << stdDayNames[i]; return true; } } return false; } static const char * stdMonthNames[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static const int stdMonthNamesLen = sizeof stdMonthNames / sizeof *stdMonthNames; static bool parseMonthName( const char* &scursor, const char * const send, int &result ) { // check bounds: if ( send - scursor < 3 ) { return false; } for ( result = 0 ; result < stdMonthNamesLen ; ++result ) { if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { scursor += 3; return true; } } // not found: return false; } static const struct { const char * tzName; long int secsEastOfGMT; } timeZones[] = { // rfc 822 timezones: { "GMT", 0 }, { "UT", 0 }, { "EDT", -4*3600 }, { "EST", -5*3600 }, { "MST", -5*3600 }, { "CST", -6*3600 }, { "MDT", -6*3600 }, { "MST", -7*3600 }, { "PDT", -7*3600 }, { "PST", -8*3600 }, // common, non-rfc-822 zones: { "CET", 1*3600 }, { "MET", 1*3600 }, { "UTC", 0 }, { "CEST", 2*3600 }, { "BST", 1*3600 }, // rfc 822 military timezones: { "Z", 0 }, { "A", -1*3600 }, { "B", -2*3600 }, { "C", -3*3600 }, { "D", -4*3600 }, { "E", -5*3600 }, { "F", -6*3600 }, { "G", -7*3600 }, { "H", -8*3600 }, { "I", -9*3600 }, // J is not used! { "K", -10*3600 }, { "L", -11*3600 }, { "M", -12*3600 }, { "N", 1*3600 }, { "O", 2*3600 }, { "P", 3*3600 }, { "Q", 4*3600 }, { "R", 5*3600 }, { "S", 6*3600 }, { "T", 7*3600 }, { "U", 8*3600 }, { "V", 9*3600 }, { "W", 10*3600 }, { "X", 11*3600 }, { "Y", 12*3600 }, }; static const int timeZonesLen = sizeof timeZones / sizeof *timeZones; static bool parseAlphaNumericTimeZone( const char* &scursor, const char * const send, long int &secsEastOfGMT, bool &timeZoneKnown ) { QPair maybeTimeZone( 0, 0 ); if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) { return false; } for ( int i = 0 ; i < timeZonesLen ; ++i ) { if ( qstrnicmp( timeZones[i].tzName, maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { scursor += maybeTimeZone.second; secsEastOfGMT = timeZones[i].secsEastOfGMT; timeZoneKnown = true; return true; } } // don't choke just because we don't happen to know the time zone KMIME_WARN_UNKNOWN( time zone, QByteArray( maybeTimeZone.first, maybeTimeZone.second ) ); secsEastOfGMT = 0; timeZoneKnown = false; return true; } // parse a number and return the number of digits parsed: int parseDigits( const char* &scursor, const char * const send, int &result ) { result = 0; int digits = 0; for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { result *= 10; result += int( *scursor - '0' ); } return digits; } static bool parseTimeOfDay( const char* &scursor, const char * const send, int &hour, int &min, int &sec, bool isCRLF=false ) { // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] // // 2DIGIT representing "hour": // if ( !parseDigits( scursor, send, hour ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send || *scursor != ':' ) { return false; } scursor++; // eat ':' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // 2DIGIT representing "minute": // if ( !parseDigits( scursor, send, min ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return true; // seconds are optional } // // let's see if we have a 2DIGIT representing "second": // if ( *scursor == ':' ) { // yepp, there are seconds: scursor++; // eat ':' eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseDigits( scursor, send, sec ) ) { return false; } } else { sec = 0; } return true; } bool parseTime( const char* &scursor, const char * send, int &hour, int &min, int &sec, long int &secsEastOfGMT, bool &timeZoneKnown, bool isCRLF ) { // time := time-of-day CFWS ( zone / obs-zone ) // // obs-zone := "UT" / "GMT" / // "EST" / "EDT" / ; -0500 / -0400 // "CST" / "CDT" / ; -0600 / -0500 // "MST" / "MDT" / ; -0700 / -0600 // "PST" / "PDT" / ; -0800 / -0700 // "A"-"I" / "a"-"i" / // "K"-"Z" / "k"-"z" eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { timeZoneKnown = false; secsEastOfGMT = 0; return true; // allow missing timezone } timeZoneKnown = true; if ( *scursor == '+' || *scursor == '-' ) { // remember and eat '-'/'+': const char sign = *scursor++; // numerical timezone: int maybeTimeZone; if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) { return false; } secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); if ( sign == '-' ) { secsEastOfGMT *= -1; if ( secsEastOfGMT == 0 ) { timeZoneKnown = false; // -0000 means indetermined tz } } } else { // maybe alphanumeric timezone: if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) { return false; } } return true; } bool parseDateTime( const char* &scursor, const char * const send, KDateTime &result, bool isCRLF ) { // Parsing date-time; strict mode: // // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date // time // // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" result = KDateTime(); QDateTime maybeDateTime; eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // let's see if there's a day-of-week: // if ( parseDayName( scursor, send ) ) { eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // day-name should be followed by ',' but we treat it as optional: if ( *scursor == ',' ) { scursor++; // eat ',' eatCFWS( scursor, send, isCRLF ); } } // // 1*2DIGIT representing "day" (of month): // int maybeDay; if ( !parseDigits( scursor, send, maybeDay ) ) { return false; } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // month-name: // int maybeMonth = 0; if ( !parseMonthName( scursor, send, maybeMonth ) ) { return false; } if ( scursor == send ) { return false; } assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); ++maybeMonth; // 0-11 -> 1-12 eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } // // 2*DIGIT representing "year": // int maybeYear; if ( !parseDigits( scursor, send, maybeYear ) ) { return false; } // RFC 2822 4.3 processing: if ( maybeYear < 50 ) { maybeYear += 2000; } else if ( maybeYear < 1000 ) { maybeYear += 1900; } // else keep as is if ( maybeYear < 1900 ) { return false; // rfc2822, 3.3 } eatCFWS( scursor, send, isCRLF ); if ( scursor == send ) { return false; } maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) ); // // time // int maybeHour, maybeMinute, maybeSecond; long int secsEastOfGMT; bool timeZoneKnown = true; if ( !parseTime( scursor, send, maybeHour, maybeMinute, maybeSecond, secsEastOfGMT, timeZoneKnown, isCRLF ) ) { return false; } maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) ); if ( !maybeDateTime.isValid() ) return false; result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) ); if ( !result.isValid() ) return false; return true; } } // namespace HeaderParsing } // namespace KMime Index: branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_util.h =================================================================== --- branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_util.h (revision 1066387) +++ branches/kdepim/enterprise4/kdepimlibs/kmime/kmime_util.h (revision 1066388) @@ -1,234 +1,242 @@ /* -*- c++ -*- kmime_util.h KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef __KMIME_UTIL_H__ #define __KMIME_UTIL_H__ #include #include "kmime_export.h" namespace KMime { /** Consult the charset cache. Only used for reducing mem usage by keeping strings in a common repository. @param name */ KMIME_EXPORT extern QByteArray cachedCharset( const QByteArray &name ); /** Consult the language cache. Only used for reducing mem usage by keeping strings in a common repository. @param name */ KMIME_EXPORT extern QByteArray cachedLanguage( const QByteArray &name ); /** Checks whether @p s contains any non-us-ascii characters. @param s */ KMIME_EXPORT extern bool isUsAscii( const QString &s ); //@cond PRIVATE extern const uchar specialsMap[16]; extern const uchar tSpecialsMap[16]; extern const uchar aTextMap[16]; extern const uchar tTextMap[16]; extern const uchar eTextMap[16]; inline bool isOfSet( const uchar map[16], unsigned char ch ) { return ( ch < 128 ) && ( map[ ch/8 ] & 0x80 >> ch%8 ); } inline bool isSpecial( char ch ) { return isOfSet( specialsMap, ch ); } inline bool isTSpecial( char ch ) { return isOfSet( tSpecialsMap, ch ); } inline bool isAText( char ch ) { return isOfSet( aTextMap, ch ); } inline bool isTText( char ch ) { return isOfSet( tTextMap, ch ); } inline bool isEText( char ch ) { return isOfSet( eTextMap, ch ); } //@endcond /** Decodes string @p src according to RFC2047,i.e., the construct =?charset?[qb]?encoded?= @param src source string. @param usedCS the detected charset is returned here @param defaultCS the charset to use in case the detected one isn't known to us. @param forceCS force the use of the default charset. @return the decoded string. */ KMIME_EXPORT extern QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS = QByteArray(), bool forceCS = false ); /** Decode string @p src according to RFC2047 (ie. the =?charset?[qb]?encoded?= construct). @param src source string. @return the decoded string. */ KMIME_EXPORT extern QString decodeRFC2047String( const QByteArray &src ); /** Encodes string @p src according to RFC2047 using charset @p charset. @param src source string. @param charset charset to use. @param addressHeader if this flag is true, all special chars like <,>,[,],... will be encoded, too. @param allow8bitHeaders if this flag is true, 8Bit headers are allowed. @return the encoded string. */ KMIME_EXPORT extern QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, bool addressHeader=false, bool allow8bitHeaders=false ); /** Uses current time, pid and random numbers to construct a string that aims to be unique on a per-host basis (ie. for the local part of a message-id or for multipart boundaries. @return the unique string. @see multiPartBoundary */ KMIME_EXPORT extern QByteArray uniqueString(); /** Constructs a random string (sans leading/trailing "--") that can be used as a multipart delimiter (ie. as @p boundary parameter to a multipart/... content-type). @return the randomized string. @see uniqueString */ KMIME_EXPORT extern QByteArray multiPartBoundary(); /** Unfolds the given header if necessary. @param header The header to unfold. */ KMIME_EXPORT extern QByteArray unfoldHeader( const QByteArray &header ); /** Tries to extract the header with name @p name from the string @p src, unfolding it if necessary. @param src the source string. @param name the name of the header to search for. @return the first instance of the header @p name in @p src or a null QCString if no such header was found. */ KMIME_EXPORT extern QByteArray extractHeader( const QByteArray &src, const QByteArray &name ); /** Converts all occurrences of "\r\n" (CRLF) in @p s to "\n" (LF). This function is expensive and should be used only if the mail will be stored locally. All decode functions can cope with both line endings. @param s source string containing CRLF's @return the string with CRLF's substitued for LF's @see CRLFtoLF(const char*) LFtoCRLF */ KMIME_EXPORT extern QByteArray CRLFtoLF( const QByteArray &s ); /** Converts all occurrences of "\r\n" (CRLF) in @p s to "\n" (LF). This function is expensive and should be used only if the mail will be stored locally. All decode functions can cope with both line endings. @param s source string containing CRLF's @return the string with CRLF's substitued for LF's @see CRLFtoLF(const QCString&) LFtoCRLF */ KMIME_EXPORT extern QByteArray CRLFtoLF( const char *s ); /** Converts all occurrences of "\n" (LF) in @p s to "\r\n" (CRLF). This function is expensive and should be used only if the mail will be transmitted as an RFC822 message later. All decode functions can cope with and all encode functions can optionally produce both line endings, which is much faster. @param s source string containing CRLF's @return the string with CRLF's substitued for LF's @see CRLFtoLF(const QCString&) LFtoCRLF */ KMIME_EXPORT extern QByteArray LFtoCRLF( const QByteArray &s ); /** Removes quote (DQUOTE) characters and decodes "quoted-pairs" (ie. backslash-escaped characters) @param str the string to work on. @see addQuotes */ KMIME_EXPORT extern void removeQuots( QByteArray &str ); /** Removes quote (DQUOTE) characters and decodes "quoted-pairs" (ie. backslash-escaped characters) @param str the string to work on. @see addQuotes */ KMIME_EXPORT extern void removeQuots( QString &str ); /** Converts the given string into a quoted-string if the string contains any special characters (ie. one of ()<>@,.;:[]=\"). @param str us-ascii string to work on. @param forceQuotes if @p true, always add quote characters. */ KMIME_EXPORT extern void addQuotes( QByteArray &str, bool forceQuotes ); +/** + * Similar to the above function. Instead of trying to balance the Bidi chars, it outright + * removes them from the string. + * + * Reason: KHTML seems to ignore the PDF character, so adding them doesn't fix things :( + */ +KMIME_EXPORT QString removeBidiControlChars( const QString &input ); + } // namespace KMime #endif /* __KMIME_UTIL_H__ */