24#include "kmime_util.h"
25#include "kmime_util_p.h"
29#include "kmime_header_parsing.h"
30#include "kmime_message.h"
31#include "kmime_warning.h"
33#include <config-kmime.h>
37#include <klocalizedstring.h>
42#include <QtCore/QList>
43#include <QtCore/QString>
44#include <QtCore/QTextCodec>
55QList<QByteArray> c_harsetCache;
56QList<QByteArray> l_anguageCache;
57QString f_allbackCharEnc;
58bool u_seOutlookEncoding =
false;
60QByteArray cachedCharset(
const QByteArray &name )
62 foreach (
const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
68 c_harsetCache.append( name.toUpper() );
70 return c_harsetCache.last();
73QByteArray cachedLanguage(
const QByteArray &name )
75 foreach (
const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.data(), language.data() ) == 0 ) {
81 l_anguageCache.append( name.toUpper() );
83 return l_anguageCache.last();
86bool isUsAscii(
const QString &s )
88 uint sLength = s.length();
89 for ( uint i=0; i<sLength; i++ ) {
90 if ( s.at( i ).toLatin1() <= 0 ) {
100 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
101 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
102 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
103 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
104 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
105 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
106 default:
return QString::fromLatin1(
"unknown" );
110QList<Headers::contentEncoding> encodingsForData(
const QByteArray &data )
112 QList<Headers::contentEncoding> allowed;
115 switch ( cf.type() ) {
117 allowed << Headers::CE7Bit;
119 allowed << Headers::CE8Bit;
121 if ( cf.printableRatio() > 5.0/6.0 ) {
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
133 allowed << Headers::CEbase64;
144const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00,
146 0x20, 0xCA, 0x00, 0x3A,
147 0x80, 0x00, 0x00, 0x1C,
148 0x00, 0x00, 0x00, 0x00
152const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00,
154 0x20, 0xC9, 0x00, 0x3F,
155 0x80, 0x00, 0x00, 0x1C,
156 0x00, 0x00, 0x00, 0x00
160const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
168const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
176const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
183void setFallbackCharEncoding(
const QString& fallbackCharEnc)
185 f_allbackCharEnc = fallbackCharEnc;
188QString fallbackCharEncoding()
190 return f_allbackCharEnc;
193void setUseOutlookAttachmentEncoding(
bool violateStandard )
195 u_seOutlookEncoding = violateStandard;
198bool useOutlookAttachmentEncoding()
200 return u_seOutlookEncoding;
204QString decodeRFC2047String(
const QByteArray &src, QByteArray &usedCS,
205 const QByteArray &defaultCS,
bool forceCS )
209 QByteArray spaceBuffer;
210 spaceBuffer.reserve(64);
211 const char *scursor = src.constData();
212 const char *send = scursor + src.length();
213 bool onlySpacesSinceLastWord =
false;
215 while ( scursor != send ) {
217 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
218 spaceBuffer += *scursor++;
223 if ( *scursor ==
'=' ) {
227 const char *start = scursor;
228 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
229 result += decoded.toUtf8();
230 onlySpacesSinceLastWord =
true;
233 if ( onlySpacesSinceLastWord ) {
234 result += spaceBuffer;
235 onlySpacesSinceLastWord =
false;
243 if ( onlySpacesSinceLastWord ) {
244 result += spaceBuffer;
245 onlySpacesSinceLastWord =
false;
253 const QString tryUtf8 = QString::fromUtf8( result );
254 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
255 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
256 return codec->toUnicode( result );
262QString decodeRFC2047String(
const QByteArray &src )
265 return decodeRFC2047String( src, usedCS,
"utf-8",
false );
268static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
270QByteArray encodeRFC2047String(
const QString &src,
const QByteArray &charset,
271 bool addressHeader,
bool allow8BitHeaders )
275 bool nonAscii=
false, ok=
true, useQEncoding=
false;
278 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
283 usedCS = KGlobal::locale()->encoding();
284 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
287 if ( charset.isEmpty() ) {
288 usedCS = codec->name();
294 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
295 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
296 if ( converterState.invalidChars > 0 ) {
298 codec = QTextCodec::codecForName( usedCS );
299 encoded8Bit = codec->fromUnicode( src );
302 if ( usedCS.contains(
"8859-" ) ) {
306 if ( allow8BitHeaders ) {
310 uint encoded8BitLength = encoded8Bit.length();
311 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
312 if ( encoded8Bit[i] ==
' ' ) {
317 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
318 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
326 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
331 for (
int x=end; x<encoded8Bit.length(); x++ ) {
332 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
333 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
336 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
343 result = encoded8Bit.left( start ) +
"=?" + usedCS;
345 if ( useQEncoding ) {
349 for (
int i=start; i<end; i++ ) {
354 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
355 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
356 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
360 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
361 if ( hexcode >= 58 ) {
365 hexcode = ( c & 0x0F ) + 48;
366 if ( hexcode >= 58 ) {
374 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
378 result += encoded8Bit.right( encoded8Bit.length() - end );
380 result = encoded8Bit;
386QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
389 QList<QChar> splitChars;
390 splitChars << QLatin1Char(
',' ) << QLatin1Char(
'\"' ) << QLatin1Char(
';' ) << QLatin1Char(
'\\' );
391 const QChar *ch = src.constData();
392 const int length = src.length();
399 while ( pos < length ) {
401 const bool isAscii = ch->unicode() < 127;
402 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
403 if ( isAscii && isReserved ) {
404 const int wordSize = pos - wordStart;
405 if ( wordSize > 0 ) {
406 const QString word = src.mid( wordStart, wordSize );
407 result += encodeRFC2047String( word, charset );
410 result += ch->toLatin1();
418 const int wordSize = pos - wordStart;
419 if ( wordSize > 0 ) {
420 const QString word = src.mid( wordStart, pos - wordStart );
421 result += encodeRFC2047String( word, charset );
430QByteArray encodeRFC2231String(
const QString& str,
const QByteArray& charset )
432 if ( str.isEmpty() ) {
436 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
438 if ( charset ==
"us-ascii" ) {
439 latin = str.toLatin1();
440 }
else if ( codec ) {
441 latin = codec->fromUnicode( str );
443 latin = str.toLocal8Bit();
447 for ( l = latin.data(); *l; ++l ) {
448 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
457 QByteArray result = charset +
"''";
458 for ( l = latin.data(); *l; ++l ) {
459 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
460 if ( !needsQuoting ) {
461 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
462 int len = especials.length();
463 for (
int i = 0; i < len; i++ ) {
464 if ( *l == especials[i] ) {
470 if ( needsQuoting ) {
472 unsigned char hexcode;
473 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
474 if ( hexcode >= 58 ) {
478 hexcode = ( *l & 0x0F ) + 48;
479 if ( hexcode >= 58 ) {
492QString decodeRFC2231String(
const QByteArray &str, QByteArray &usedCS,
const QByteArray &defaultCS,
495 int p = str.indexOf(
'\'' );
497 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
501 QByteArray charset = str.left( p );
503 QByteArray st = str.mid( str.lastIndexOf(
'\'' ) + 1 );
507 while ( p < (
int)st.length() ) {
508 if ( st.at( p ) == 37 ) {
511 if ( p + 2 < st.length() ) {
512 ch = st.at( p + 1 ) - 48;
516 ch2 = st.at( p + 2 ) - 48;
520 st[p] = ch * 16 + ch2;
521 st.remove( p + 1, 2 );
526 kDebug() <<
"Got pre-decoded:" << st;
528 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
529 if ( !charsetcodec || forceCS ) {
530 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
533 usedCS = charsetcodec->name();
534 return charsetcodec->toUnicode( st );
537QString decodeRFC2231String(
const QByteArray &src )
540 return decodeRFC2231String( src, usedCS,
"utf-8",
false );
543QByteArray uniqueString()
545 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
549 unsigned int timeval;
553 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
554 timeval = ( now / ran ) + getpid();
556 for (
int i = 0; i < 10; i++ ) {
557 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
563 ret.setNum( timeval );
570QByteArray multiPartBoundary()
572 return "nextPart" + uniqueString();
575QByteArray unfoldHeader(
const QByteArray &header )
578 if ( header.isEmpty() ) {
582 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
583 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
584 foldBegin = foldEnd = foldMid;
586 while ( foldBegin > 0 ) {
587 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
593 while ( foldEnd <= header.length() - 1 ) {
594 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
596 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
597 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
598 ( ( header[foldEnd + 1] ==
'0' &&
599 header[foldEnd + 2] ==
'9' ) ||
600 ( header[foldEnd + 1] ==
'2' &&
601 header[foldEnd + 2] ==
'0' ) ) ) {
610 result += header.mid( pos, foldBegin - pos );
611 if ( foldEnd < header.length() - 1 ) {
616 const int len = header.length();
618 result += header.mid( pos, len - pos );
623int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
626 int len = src.length() - 1;
632 if ( dataBegin < 0 ) {
637 if ( dataBegin > len ) {
645 if ( src.at( end ) ==
'\n' && end + 1 < len &&
646 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
653 if ( src.at( end ) !=
'\n' ) {
655 end = src.indexOf(
'\n', end + 1 );
656 if ( end == -1 || end == len ) {
659 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
660 ( src[end + 1] ==
'=' && end + 3 <= len &&
661 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
662 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
680int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
686 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
690 const char *p = strcasestr( src.constData(), n.constData() );
694 begin = p - src.constData();
700 dataBegin = begin + name.length() + 1;
702 if ( src.at( dataBegin ) ==
' ' ) {
705 end = findHeaderLineEnd( src, dataBegin, folded );
715QByteArray extractHeader(
const QByteArray &src,
const QByteArray &name )
721 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
727 result = src.mid( begin, end - begin );
730 QByteArray hdrValue = src.mid( begin, end - begin );
731 result = unfoldHeader( hdrValue );
738QList<QByteArray> extractHeaders(
const QByteArray &src,
const QByteArray &name )
742 QList<QByteArray> result;
743 QByteArray copySrc( src );
745 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
749 while ( begin >= 0 ) {
751 result.append( copySrc.mid( begin, end - begin ) );
753 QByteArray hdrValue = copySrc.mid( begin, end - begin );
754 result.append( unfoldHeader( hdrValue ) );
758 copySrc = copySrc.mid( end );
759 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
766void removeHeader( QByteArray &header,
const QByteArray &name )
768 int begin, end, dummy;
769 begin = indexOfHeader( header, name, end, dummy );
771 header.remove( begin, end - begin + 1 );
775QByteArray CRLFtoLF(
const QByteArray &s )
778 ret.replace(
"\r\n",
"\n" );
782QByteArray CRLFtoLF(
const char *s )
785 return CRLFtoLF( ret );
788QByteArray LFtoCRLF(
const QByteArray &s )
791 ret.replace(
'\n',
"\r\n" );
795QByteArray LFtoCRLF(
const char *s )
798 return LFtoCRLF( ret );
802template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
804 bool inQuote =
false;
805 for (
int i = 0; i < str.length(); ++i ) {
806 if ( str[i] == CharType(
'"' ) ) {
811 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
819void removeQuots( QByteArray &str )
821 removeQuotesGeneric<QByteArray, char>( str );
824void removeQuots( QString &str )
826 removeQuotesGeneric<QString, QLatin1Char>( str );
829template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
830void addQuotes_impl( StringType &str,
bool forceQuotes )
832 bool needsQuotes=
false;
833 for (
int i=0; i < str.length(); i++ ) {
834 const CharType cur = str.at( i );
835 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
838 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
839 str.insert( i, CharConverterType(
'\\' ) );
844 if ( needsQuotes || forceQuotes ) {
845 str.insert( 0, CharConverterType(
'\"' ) );
846 str.append( StringConverterType(
"\"" ) );
850void addQuotes( QByteArray &str,
bool forceQuotes )
852 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
855void addQuotes( QString &str,
bool forceQuotes )
857 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
860KMIME_EXPORT QString balanceBidiState(
const QString &input )
862 const int LRO = 0x202D;
863 const int RLO = 0x202E;
864 const int LRE = 0x202A;
865 const int RLE = 0x202B;
866 const int PDF = 0x202C;
868 QString result = input;
870 int openDirChangers = 0;
871 int numPDFsRemoved = 0;
872 for (
int i = 0; i < input.length(); i++ ) {
873 const ushort &code = input.at( i ).unicode();
874 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
876 }
else if ( code == PDF ) {
877 if ( openDirChangers > 0 ) {
881 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
882 result.remove( i - numPDFsRemoved, 1 );
888 if ( openDirChangers > 0 ) {
889 kWarning() <<
"Possible Unicode spoofing detected in" << input;
894 for (
int i = openDirChangers; i > 0; i-- ) {
895 if ( result.endsWith( QLatin1Char(
'"' ) ) ) {
896 result.insert( result.length() - 1, QChar( PDF ) );
898 result += QChar( PDF );
906QString removeBidiControlChars(
const QString &input )
908 const int LRO = 0x202D;
909 const int RLO = 0x202E;
910 const int LRE = 0x202A;
911 const int RLE = 0x202B;
912 QString result = input;
913 result.remove( LRO );
914 result.remove( RLO );
915 result.remove( LRE );
916 result.remove( RLE );
920static bool isCryptoPart(
Content* content )
932 const QByteArray lowerSubType = contentType->
subType().toLower();
933 return ( contentType->
mediaType().toLower() ==
"application" &&
934 ( lowerSubType ==
"pgp-encrypted" ||
935 lowerSubType ==
"pgp-signature" ||
936 lowerSubType ==
"pkcs7-mime" ||
937 lowerSubType ==
"x-pkcs7-mime" ||
938 lowerSubType ==
"pkcs7-signature" ||
939 lowerSubType ==
"x-pkcs7-signature" ||
940 ( lowerSubType ==
"octet-stream" &&
944bool hasAttachment(
Content* content )
950 bool emptyFilename =
true;
953 emptyFilename =
false;
956 if ( emptyFilename &&
959 emptyFilename =
false;
963 if ( !emptyFilename && !isCryptoPart( content ) ) {
970 if ( hasAttachment( child ) ) {
978bool hasInvitation(
Content *content )
984 if ( isInvitation(content) ) {
991 if ( hasInvitation( child ) ) {
999bool isSigned(
Message *message )
1006 if ( contentType->
isSubtype(
"signed" ) ||
1007 contentType->
isSubtype(
"pgp-signature" ) ||
1008 contentType->
isSubtype(
"pkcs7-signature" ) ||
1009 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
1011 message->
mainBodyPart(
"application/pgp-signature" ) ||
1012 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
1013 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
1019bool isEncrypted(
Message *message )
1026 if ( contentType->
isSubtype(
"encrypted" ) ||
1027 contentType->
isSubtype(
"pgp-encrypted" ) ||
1028 contentType->
isSubtype(
"pkcs7-mime" ) ||
1029 contentType->
isSubtype(
"x-pkcs7-mime" ) ||
1031 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1033 message->
mainBodyPart(
"application/x-pkcs7-mime" ) ) {
1040bool isInvitation(
Content *content )
1048 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {
A class for performing basic data typing using frequency count heuristics.
@ EightBitData
8bit binary
@ SevenBitData
7bit binary
A class that encapsulates MIME encoded Content.
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
List contents() const
For multipart contents, this will return a list of all multipart child contents.
Represents a (email) message.
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
This file is part of the API for handling MIME data and defines the CharFreq class.
This file is part of the API for handling MIME data and defines the Codec class.