• Skip to content
  • Skip to link menu
KDE 4.5 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_parsers.cpp

00001 /*
00002     kmime_parsers.cpp
00003 
00004     KMime, the KDE Internet mail/usenet news message library.
00005     Copyright (c) 2001 the KMime authors.
00006     See file AUTHORS for details
00007 
00008     This library is free software; you can redistribute it and/or
00009     modify it under the terms of the GNU Library General Public
00010     License as published by the Free Software Foundation; either
00011     version 2 of the License, or (at your option) any later version.
00012 
00013     This library is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016     Library General Public License for more details.
00017 
00018     You should have received a copy of the GNU Library General Public License
00019     along with this library; see the file COPYING.LIB.  If not, write to
00020     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021     Boston, MA 02110-1301, USA.
00022 */
00023 #include "kmime_parsers.h"
00024 
00025 #include <QtCore/QByteArray>
00026 
00027 using namespace KMime::Parser;
00028 
00029 namespace KMime {
00030 namespace Parser {
00031 
00032 MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary )
00033 {
00034   s_rc=src;
00035   b_oundary=boundary;
00036 }
00037 
00038 bool MultiPart::parse()
00039 {
00040   QByteArray b = "--" + b_oundary, part;
00041   int pos1=0, pos2=0, blen=b.length();
00042 
00043   p_arts.clear();
00044 
00045   //find the first valid boundary
00046   while ( 1 ) {
00047     if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 ||
00048          s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all
00049       break;
00050     }
00051     pos1 += blen; //boundary found but not valid => skip it;
00052   }
00053 
00054   if ( pos1 > -1 ) {
00055     pos1 += blen;
00056     if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) {
00057       // the only valid boundary is the end-boundary
00058       // this message is *really* broken
00059       pos1 = -1; //we give up
00060     } else if ( ( pos1 - blen ) > 1 ) { //preamble present
00061       p_reamble = s_rc.left( pos1 - blen );
00062     }
00063   }
00064 
00065   while ( pos1 > -1 && pos2 > -1 ) {
00066 
00067     //skip the rest of the line for the first boundary - the message-part starts here
00068     if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) {
00069       //now search the next linebreak
00070       //now find the next valid boundary
00071       pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
00072       while ( 1 ) {
00073         if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 ||
00074              s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found
00075           break;
00076         }
00077         pos2 += blen; //boundary is invalid => skip it;
00078       }
00079 
00080       if ( pos2 == -1 ) { // no more boundaries found
00081         part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string
00082         p_arts.append( part );
00083         pos1 = -1;
00084         pos2 = -1; //break;
00085       } else {
00086         part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
00087         p_arts.append( part );
00088         pos2 += blen; //pos2 points now to the first character after the boundary
00089         if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary
00090           pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
00091 
00092           if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line
00093             //everything after the end-boundary is considered as the epilouge
00094             e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 );
00095           }
00096           pos1 = -1;
00097           pos2 = -1; //break
00098         } else {
00099           pos1 = pos2; //the search continues ...
00100         }
00101       }
00102     }
00103   }
00104 
00105   return !p_arts.isEmpty();
00106 }
00107 
00108 //=============================================================================
00109 
00110 NonMimeParser::NonMimeParser( const QByteArray &src ) :
00111   s_rc( src ), p_artNr( -1 ), t_otalNr( -1 )
00112 {
00113 }
00114 
00118 QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName )
00119 {
00120   QByteArray tmp, mimeType;
00121   int pos;
00122 
00123   if ( !fileName.isEmpty() ) {
00124     pos = fileName.lastIndexOf( '.' );
00125     if ( pos++ != -1 ) {
00126       tmp = fileName.mid( pos, fileName.length() - pos).toUpper();
00127       if ( tmp == "JPG" || tmp=="JPEG" ) {
00128         mimeType = "image/jpeg";
00129       } else if ( tmp == "GIF") {
00130         mimeType = "image/gif";
00131       } else if ( tmp == "PNG") {
00132         mimeType = "image/png";
00133       } else if ( tmp == "TIFF" || tmp == "TIF") {
00134         mimeType = "image/tiff";
00135       } else if ( tmp == "XPM") {
00136         mimeType = "image/x-xpixmap";
00137       } else if ( tmp == "XBM") {
00138         mimeType = "image/x-xbitmap";
00139       } else if ( tmp == "BMP") {
00140         mimeType = "image/bmp";
00141       } else if ( tmp == "TXT" ||
00142                   tmp == "ASC" ||
00143                   tmp == "H" ||
00144                   tmp == "C" ||
00145                   tmp == "CC" ||
00146                   tmp == "CPP") {
00147         mimeType = "text/plain";
00148       } else if ( tmp == "HTML" || tmp == "HTM" ) {
00149         mimeType = "text/html";
00150       } else {
00151         mimeType = "application/octet-stream";
00152       }
00153     } else {
00154       mimeType = "application/octet-stream";
00155     }
00156   } else {
00157     mimeType = "application/octet-stream";
00158   }
00159 
00160   return mimeType;
00161 }
00162 
00163 //==============================================================================
00164 
00165 UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) :
00166   NonMimeParser( src ), s_ubject( subject ), m_beginRegExp( "begin [0-9][0-9][0-9]" ), m_numberRegExp( "[0-9]+/[0-9]+" )
00167 {}
00168 
00169 bool UUEncoded::parse()
00170 {
00171   int currentPos=0;
00172   bool success=true, firstIteration=true;
00173 
00174   while ( success ) {
00175     int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
00176     bool containsBegin=false, containsEnd=false;
00177     QByteArray tmp, fileName;
00178 
00179     if ( ( beginPos = QString( s_rc ).
00180            indexOf( m_beginRegExp, currentPos ) ) > -1 &&
00181          ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n') ) {
00182       containsBegin = true;
00183       uuStart = s_rc.indexOf( '\n', beginPos );
00184       if ( uuStart == -1 ) {//no more line breaks found, we give up
00185         success = false;
00186         break;
00187       } else {
00188         uuStart++; //points now at the beginning of the next line
00189       }
00190     } else {
00191       beginPos=currentPos;
00192     }
00193 
00194     if ( ( endPos = s_rc.
00195            indexOf( "\nend", ( uuStart > 0 ) ? uuStart-1:0 ) ) == -1 ) {
00196       endPos = s_rc.length(); //no end found
00197     } else {
00198       containsEnd = true;
00199     }
00200 
00201     if ( ( containsBegin && containsEnd ) || firstIteration ) {
00202 
00203       //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
00204       //all lines in a uuencoded text start with 'M'
00205       for ( int idx=uuStart; idx<endPos; idx++ ) {
00206         if ( s_rc[idx] == '\n' ) {
00207           lineCount++;
00208           if ( idx+1 < endPos && s_rc[idx+1] == 'M') {
00209             idx++;
00210             MCount++;
00211           }
00212         }
00213       }
00214 
00215       //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
00216       if ( MCount == 0 || ( lineCount - MCount ) > 10 ||
00217            ( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) {
00218         // harder check for splitted-articles
00219         success = false;
00220         break; //too many "non-M-Lines" found, we give up
00221       }
00222 
00223       if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) {
00224         // message may be split up => parse subject
00225         pos = m_numberRegExp.indexIn( QString( s_ubject ), 0 );
00226         len = m_numberRegExp.matchedLength();
00227         if ( pos != -1 ) {
00228           tmp = s_ubject.mid( pos, len );
00229           pos = tmp.indexOf( '/' );
00230           p_artNr = tmp.left( pos ).toInt();
00231           t_otalNr = tmp.right( tmp.length() - pos - 1).toInt();
00232         } else {
00233           success = false;
00234           break; //no "part-numbers" found in the subject, we give up
00235         }
00236       }
00237 
00238       //everything before "begin" is text
00239       if ( beginPos > 0 ) {
00240         t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
00241       }
00242 
00243       if ( containsBegin ) {
00244         //everything between "begin ### " and the next LF is considered as the filename
00245         fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 );
00246       } else {
00247         fileName = "";
00248       }
00249       f_ilenames.append( fileName );
00250       //everything beetween "begin" and "end" is uuencoded
00251       b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) );
00252       m_imeTypes.append( guessMimeType( fileName ) );
00253       firstIteration = false;
00254 
00255       int next = s_rc.indexOf( '\n', endPos + 1 );
00256       if ( next == -1 ) { //no more line breaks found, we give up
00257         success = false;
00258         break;
00259       } else {
00260         next++; //points now at the beginning of the next line
00261       }
00262       currentPos = next;
00263 
00264     } else {
00265       success = false;
00266     }
00267   }
00268 
00269   // append trailing text part of the article
00270   t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
00271 
00272   return ( ( b_ins.count() > 0 ) || isPartial() );
00273 }
00274 
00275 //==============================================================================
00276 
00277 YENCEncoded::YENCEncoded( const QByteArray &src ) :
00278   NonMimeParser( src )
00279 {
00280 }
00281 
00282 bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value )
00283 {
00284   bool found = false;
00285   QByteArray sought=name + '=';
00286 
00287   int iPos = src.indexOf( sought );
00288   if ( iPos > -1 ) {
00289     int pos1 = src.indexOf( ' ', iPos );
00290     int pos2 = src.indexOf( '\r', iPos );
00291     int pos3 = src.indexOf( '\t', iPos );
00292     int pos4 = src.indexOf( '\n', iPos );
00293     if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) {
00294       pos1 = pos2;
00295     }
00296     if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) {
00297       pos1 = pos3;
00298     }
00299     if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) {
00300       pos1 = pos4;
00301     }
00302     iPos=src.lastIndexOf( '=', pos1 ) + 1;
00303     if ( iPos < pos1 ) {
00304       char c = src.at( iPos );
00305       if ( c>='0' && c<='9' ) {
00306         found = true;
00307         *value = src.mid( iPos, pos1 - iPos ).toInt();
00308       }
00309     }
00310   }
00311   return found;
00312 }
00313 
00314 bool YENCEncoded::parse()
00315 {
00316   int currentPos=0;
00317   bool success=true;
00318 
00319   while ( success ) {
00320     int beginPos=currentPos, yencStart=currentPos;
00321     bool containsPart=false;
00322     QByteArray fileName, mimeType;
00323 
00324     if ( ( beginPos = s_rc.
00325            indexOf( "=ybegin ", currentPos ) ) > -1 &&
00326          ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) {
00327       yencStart = s_rc.indexOf( '\n', beginPos );
00328       if ( yencStart == -1 ) { // no more line breaks found, give up
00329         success = false;
00330         break;
00331       } else {
00332         yencStart++;
00333         if ( s_rc.indexOf( "=ypart", yencStart ) == yencStart ) {
00334           containsPart = true;
00335           yencStart = s_rc.indexOf( '\n', yencStart );
00336           if ( yencStart == -1 ) {
00337             success = false;
00338             break;
00339           }
00340           yencStart++;
00341         }
00342       }
00343       // Try to identify yenc meta data
00344 
00345       // Filenames can contain any embedded chars until end of line
00346       QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos );
00347       int namePos = meta.indexOf( "name=" );
00348       if ( namePos == -1 ) {
00349         success = false;
00350         break;
00351       }
00352       int eolPos = meta.indexOf( '\r', namePos );
00353       if ( eolPos == -1 ) {
00354         eolPos = meta.indexOf( '\n', namePos );
00355       }
00356       if ( eolPos == -1 ) {
00357         success = false;
00358         break;
00359       }
00360       fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) );
00361 
00362       // Other metadata is integer
00363       int yencLine;
00364       if ( !yencMeta( meta, "line", &yencLine ) ) {
00365         success = false;
00366         break;
00367       }
00368       int yencSize;
00369       if ( !yencMeta( meta, "size", &yencSize ) ) {
00370         success = false;
00371         break;
00372       }
00373 
00374       int partBegin, partEnd;
00375       if ( containsPart ) {
00376         if ( !yencMeta( meta, "part", &p_artNr ) ) {
00377           success = false;
00378           break;
00379         }
00380         if ( !yencMeta( meta, "begin", &partBegin ) ||
00381              !yencMeta( meta, "end", &partEnd ) ) {
00382           success = false;
00383           break;
00384         }
00385         if ( !yencMeta( meta, "total", &t_otalNr ) ) {
00386           t_otalNr = p_artNr + 1;
00387         }
00388         if ( yencSize == partEnd - partBegin + 1 ) {
00389           t_otalNr = 1;
00390         } else {
00391           yencSize = partEnd - partBegin + 1;
00392         }
00393       }
00394 
00395       // We have a valid yenc header; now we extract the binary data
00396       int totalSize = 0;
00397       int pos = yencStart;
00398       int len = s_rc.length();
00399       bool lineStart = true;
00400       int lineLength = 0;
00401       bool containsEnd = false;
00402       QByteArray binary;
00403       binary.resize( yencSize );
00404       while ( pos < len ) {
00405         int ch = s_rc.at( pos );
00406         if ( ch < 0 ) {
00407           ch += 256;
00408         }
00409         if ( ch == '\r' ) {
00410           if ( lineLength != yencLine && totalSize != yencSize ) {
00411             break;
00412           }
00413           pos++;
00414         }
00415         else if ( ch == '\n' ) {
00416           lineStart = true;
00417           lineLength = 0;
00418           pos++;
00419         } else {
00420           if ( ch == '=' ) {
00421             if ( pos + 1 < len ) {
00422               ch = s_rc.at( pos + 1 );
00423               if ( lineStart && ch == 'y' ) {
00424                 containsEnd = true;
00425                 break;
00426               }
00427               pos += 2;
00428               ch -= 64+42;
00429               if ( ch < 0 ) {
00430                 ch += 256;
00431               }
00432               if ( totalSize >= yencSize ) {
00433                 break;
00434               }
00435               binary[totalSize++] = ch;
00436               lineLength++;
00437             } else {
00438               break;
00439             }
00440           } else {
00441             ch -= 42;
00442             if ( ch < 0 ) {
00443               ch += 256;
00444             }
00445             if ( totalSize >= yencSize ) {
00446               break;
00447             }
00448             binary[totalSize++] = ch;
00449             lineLength++;
00450             pos++;
00451           }
00452           lineStart = false;
00453         }
00454       }
00455 
00456       if ( !containsEnd ) {
00457         success = false;
00458         break;
00459       }
00460       if ( totalSize != yencSize ) {
00461         success = false;
00462         break;
00463       }
00464 
00465       // pos now points to =yend; get end data
00466       eolPos = s_rc.indexOf( '\n', pos );
00467       if ( eolPos == -1 ) {
00468         success = false;
00469         break;
00470       }
00471       meta = s_rc.mid( pos, eolPos - pos );
00472       if ( !yencMeta( meta, "size", &totalSize ) ) {
00473         success = false;
00474         break;
00475       }
00476       if ( totalSize != yencSize ) {
00477         success = false;
00478         break;
00479       }
00480 
00481       f_ilenames.append( fileName );
00482       m_imeTypes.append( guessMimeType( fileName ) );
00483       b_ins.append( binary );
00484 
00485       //everything before "begin" is text
00486       if ( beginPos > 0 ) {
00487         t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
00488       }
00489       currentPos = eolPos + 1;
00490 
00491     } else {
00492       success = false;
00493     }
00494   }
00495 
00496   // append trailing text part of the article
00497   t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
00498 
00499   return b_ins.count()>0;
00500 }
00501 
00502 } // namespace Parser
00503 
00504 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.7.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal