00001 #ifndef _MIMETIC_PARSER_ITPARSER_H_
00002 #define _MIMETIC_PARSER_ITPARSER_H_
00003 #include <iterator>
00004 #include <algorithm>
00005 #include <stack>
00006 #include <iostream>
00007 #include <mimetic/tree.h>
00008 #include <mimetic/utils.h>
00009 #include <mimetic/mimeentity.h>
00010
00011
00012
00013
00014 namespace mimetic
00015 {
00016
00017
00018 template<typename Iterator,
00019 typename ItCategory=typename std::iterator_traits<Iterator>::iterator_category>
00020 struct IteratorParser
00021 {
00022 };
00023
00024
00025
00026
00027 template<typename Iterator>
00028 struct IteratorParser<Iterator, std::input_iterator_tag>
00029 {
00030
00031 IteratorParser(MimeEntity& me)
00032 : m_me(me), m_iMask(imNone), m_lastBoundary(NoBoundary)
00033 {
00034 m_entityStack.push(&m_me);
00035 }
00036 virtual ~IteratorParser()
00037 {
00038 }
00039
00040
00041
00042 void iMask(size_t mask) { m_iMask = mask; }
00043
00044
00045
00046 size_t iMask() const { return m_iMask; }
00047
00048
00049
00050 void run(Iterator bit, Iterator eit)
00051 {
00052 m_bit = bit;
00053 m_eit = eit;
00054 doLoad();
00055 }
00056 protected:
00057 typedef std::list<std::string> BoundaryList;
00058 enum {
00059 CR = 0xD,
00060 LF = 0xA,
00061 NL = '\n'
00062 };
00063 enum {
00064 peIgnore,
00065 pePreamble,
00066 peBody,
00067 peEpilogue
00068 };
00069 enum BoundaryType {
00070 NoBoundary = 0,
00071 Boundary,
00072 ClosingBoundary,
00073 HigherLevelBoundary
00074
00075 };
00076 enum EntityType {
00077 etRfc822,
00078 etMsgRfc822,
00079 etMultipart
00080 };
00081
00082 MimeEntity& m_me;
00083 Iterator m_bit, m_eit;
00084 size_t m_iMask;
00085 BoundaryList m_boundaryList;
00086 BoundaryType m_lastBoundary;
00087 std::stack<MimeEntity*> m_entityStack;
00088
00089 protected:
00090 void appendPreambleBlock(const char* buf, int sz)
00091 {
00092 MimeEntity* pMe = m_entityStack.top();
00093 pMe->body().preamble().append(buf,sz);
00094 }
00095
00096 void appendEpilogueBlock(const char* buf, int sz)
00097 {
00098 MimeEntity* pMe = m_entityStack.top();
00099 pMe->body().epilogue().append(buf,sz);
00100 }
00101
00102 void appendBodyBlock(const char* buf, int sz)
00103 {
00104 MimeEntity* pMe = m_entityStack.top();
00105 pMe->body().append(buf, sz);
00106 }
00107
00108 std::string getBoundary()
00109 {
00110 const MimeEntity* pMe = m_entityStack.top();
00111 const ContentType& ct = pMe->header().contentType();
00112 return std::string("--") + ct.param("boundary");
00113 }
00114
00115 void popChild()
00116 {
00117 m_entityStack.pop();
00118 }
00119
00120 void pushNewChild()
00121 {
00122 MimeEntity* pMe = m_entityStack.top();
00123 MimeEntity* pChild = new MimeEntity;
00124 pMe->body().parts().push_back(pChild);
00125 m_entityStack.push(pChild);
00126 }
00127
00128 EntityType getType()
00129 {
00130 MimeEntity* pMe = m_entityStack.top();
00131 const Header& h = pMe->header();
00132
00133
00134 const ContentType& ct = h.contentType();
00135 if(ct.isMultipart())
00136 return etMultipart;
00137 else if (ct.type() == "message" && ct.subtype() == "rfc822")
00138 return etMsgRfc822;
00139 else
00140 return etRfc822;
00141 }
00142
00143 void addField(const std::string& name, const std::string& value)
00144 {
00145 MimeEntity* pMe = m_entityStack.top();
00146 Header& h = pMe->header();
00147 Header::iterator it = h.insert(h.end(), Field());
00148 it->name(name);
00149 it->value(value);
00150 }
00151
00152 BoundaryType isBoundary(const std::string& line)
00153 {
00154 if(line.length() == 0 || line[0] != '-')
00155 return m_lastBoundary = NoBoundary;
00156
00157 int level = 0;
00158 int lineLen = line.length();
00159 BoundaryList::const_iterator bit,eit;
00160 bit = m_boundaryList.begin(), eit = m_boundaryList.end();
00161 for(;bit != eit; ++bit, ++level)
00162 {
00163 const std::string& b = *bit;
00164 int bLen = b.length();
00165 if(line.compare(0, bLen, b) == 0)
00166 {
00167
00168 if(level > 0)
00169 return m_lastBoundary=HigherLevelBoundary;
00170
00171 if(lineLen > bLen && line.compare(bLen,2,"--") == 0)
00172 return m_lastBoundary = ClosingBoundary;
00173 else
00174 return m_lastBoundary = Boundary;
00175 }
00176 }
00177 return m_lastBoundary = NoBoundary;
00178 }
00179
00180 inline bool isnl(char c) const
00181 {
00182 return (c == CR || c == LF);
00183 }
00184
00185 inline bool isnl(char a, char b) const
00186 {
00187 if(a == CR || a == LF)
00188 if(b == (a == CR ? LF : CR))
00189 return true;
00190 return false;
00191 }
00192 void doLoad()
00193 {
00194 loadHeader();
00195 loadBody();
00196 }
00197 bool valid() const
00198 {
00199 return m_bit != m_eit;
00200 }
00201 void append(char*& buf, size_t& bufsz, char c, size_t& pos)
00202 {
00203 enum { alloc_block = 128};
00204 if(pos == bufsz)
00205 {
00206
00207 char* tmp = buf;
00208 int oldBufsz = bufsz;
00209 while(pos >= bufsz)
00210 bufsz = bufsz + alloc_block;
00211 buf = new char[bufsz+1];
00212 if(tmp != 0)
00213 {
00214 assert(oldBufsz > 0);
00215 memset(buf, 0, bufsz);
00216 memcpy(buf, tmp, oldBufsz);
00217 delete[] tmp;
00218 }
00219 }
00220 buf[pos++] = c;
00221 }
00222
00223
00224 void loadHeader()
00225 {
00226 enum {
00227 sInit,
00228 sIgnoreLine,
00229 sNewline,
00230 sWaitingName,
00231 sWaitingValue,
00232 sWaitingFoldedValue,
00233 sName,
00234 sValue,
00235 sIgnoreHeader
00236 };
00237 register int status;
00238 int pos;
00239 char *name, *value;
00240 size_t nBufSz, vBufSz, nPos, vPos;
00241 char prev, c = 0;
00242
00243 name = value = 0;
00244 pos = nBufSz = vBufSz = nPos = vPos = 0;
00245 status = (m_iMask & imHeader ? sIgnoreHeader : sInit);
00246
00247 while(m_bit != m_eit)
00248 {
00249 c = *m_bit;
00250 switch(status)
00251 {
00252 case sInit:
00253 if(isnl(c))
00254 status = sNewline;
00255 else
00256 status = sName;
00257 continue;
00258 case sIgnoreLine:
00259 if(!isnl(c))
00260 break;
00261 status = sNewline;
00262 continue;
00263 case sNewline:
00264 status = sWaitingName;
00265 if(pos > 0)
00266 {
00267 pos = 0;
00268 prev = c;
00269 if(++m_bit == m_eit) goto out;
00270 c = *m_bit;
00271 if(c == (prev == CR ? LF : CR))
00272 {
00273 --pos;
00274 break;
00275 } else
00276 continue;
00277 } else {
00278
00279 prev = c;
00280 if(++m_bit == m_eit) goto out;
00281 c = *m_bit;
00282 if(c == (prev == CR ? LF : CR))
00283 ++m_bit;
00284 goto out;
00285 }
00286 case sWaitingName:
00287 if(isblank(c))
00288 {
00289
00290 status = sWaitingFoldedValue;
00291 continue;
00292 }
00293
00294 if(nPos)
00295 {
00296 name[nPos] = 0;
00297
00298 if(vPos)
00299 {
00300 value[vPos] = 0;
00301 addField(name,value);
00302 } else
00303 addField(name,"");
00304 nPos = vPos = 0;
00305 }
00306 status = (isnl(c) ? sNewline : sName);
00307 continue;
00308 case sWaitingValue:
00309 if(isblank(c) || isnl(c))
00310 break;
00311 status = sValue;
00312 continue;
00313 case sWaitingFoldedValue:
00314 if(isblank(c))
00315 break;
00316 append(value, vBufSz, ' ', vPos);
00317 status = sValue;
00318 continue;
00319 case sName:
00320 if(c > 32 && c < 127 && c != ':') {
00321 append(name, nBufSz, c, nPos);
00322 } else if(c == ':') {
00323 status = sWaitingValue;
00324 } else {
00325 nPos = 0;
00326 status = sIgnoreLine;
00327 continue;
00328 }
00329 break;
00330 case sValue:
00331 if(isnl(c))
00332 {
00333 status = sNewline;
00334 continue;
00335 }
00336 append(value, vBufSz, c, vPos);
00337 break;
00338 case sIgnoreHeader:
00339 if(isnl(c))
00340 {
00341 prev = c;
00342 if(++m_bit == m_eit) goto out;
00343 c = *m_bit;
00344 if(c == (prev == CR ? LF : CR))
00345 ++m_bit;
00346 if(pos == 0)
00347 goto out;
00348 pos = 0;
00349 continue;
00350 }
00351 break;
00352 }
00353 ++m_bit; ++pos;
00354 }
00355 out:
00356 if(name)
00357 delete[] name;
00358 if(value)
00359 delete[] value;
00360 return;
00361 }
00362 void loadBody()
00363 {
00364 switch(getType())
00365 {
00366 case etRfc822:
00367 if(m_iMask & imBody)
00368 jump_to_next_boundary();
00369 else
00370 copy_until_boundary(peBody);
00371 break;
00372 case etMultipart:
00373 loadMultipart();
00374 break;
00375 case etMsgRfc822:
00376 if(m_iMask & imChildParts)
00377 jump_to_next_boundary();
00378 else {
00379 pushNewChild();
00380 doLoad();
00381 popChild();
00382 }
00383 break;
00384 }
00385 }
00386 void loadMultipart()
00387 {
00388 std::string boundary = getBoundary();
00389 m_boundaryList.push_front(boundary);
00390 ParsingElem pe;
00391
00392 pe = (m_iMask & imPreamble ? peIgnore : pePreamble );
00393 copy_until_boundary(pe);
00394 while(m_bit != m_eit)
00395 {
00396 switch(m_lastBoundary)
00397 {
00398 case NoBoundary:
00399 return;
00400 case Boundary:
00401 if(m_iMask & imChildParts)
00402 jump_to_next_boundary();
00403 else {
00404 pushNewChild();
00405 doLoad();
00406 popChild();
00407 }
00408 break;
00409 case ClosingBoundary:
00410 m_boundaryList.erase(m_boundaryList.begin());
00411
00412 pe=(m_iMask & imEpilogue? peIgnore: peEpilogue);
00413 copy_until_boundary(pe);
00414 return;
00415 case HigherLevelBoundary:
00416 m_boundaryList.erase(m_boundaryList.begin());
00417 return;
00418 }
00419 }
00420 }
00421 inline void onBlock(const char* block, int sz, ParsingElem pe)
00422 {
00423 switch(pe)
00424 {
00425 case peIgnore:
00426 return;
00427 case pePreamble:
00428 appendPreambleBlock(block, sz);
00429 break;
00430 case peEpilogue:
00431 appendEpilogueBlock(block, sz);
00432 break;
00433 case peBody:
00434 appendBodyBlock(block, sz);
00435 break;
00436 }
00437 }
00438 void jump_to_next_boundary()
00439 {
00440 copy_until_boundary(peIgnore);
00441 }
00442
00443
00444
00445 virtual void copy_until_boundary(ParsingElem pe)
00446 {
00447 size_t pos, lines, eomsz = 0;
00448 register char c;
00449 enum { nlsz = 1 };
00450 const char *eom = 0;
00451
00452 enum { blksz = 4096 };
00453 char block[blksz];
00454 size_t blkpos = 0;
00455 size_t sl_off = 0;
00456
00457 pos = lines = 0;
00458 while(m_bit != m_eit)
00459 {
00460
00461 if(blkpos >= blksz - 2 - nlsz)
00462 {
00463 if(sl_off == 0)
00464 {
00465
00466
00467
00468 block[blkpos] = 0;
00469 onBlock(block, blkpos, pe);
00470 blkpos = sl_off = 0;
00471 } else {
00472
00473
00474 size_t llen = blkpos - sl_off;
00475 onBlock(block, sl_off, pe);
00476 memmove(block, block + sl_off, llen);
00477 sl_off = 0;
00478 blkpos = llen;
00479 }
00480 }
00481 c = *m_bit;
00482 if(isnl(c))
00483 {
00484 char nlbuf[3] = { 0, 0, 0 };
00485
00486 nlbuf[0] = c;
00487
00488
00489 if(++m_bit != m_eit)
00490 {
00491 char next = *m_bit;
00492 if(next == (c == CR ? LF : CR))
00493 {
00494 nlbuf[1] = next;
00495 ++m_bit;
00496 }
00497 }
00498
00499 if(pos)
00500 {
00501
00502 block[blkpos] = 0;
00503 if(block[sl_off] == '-' && sl_off < blkpos &&
00504 block[sl_off+1] == '-')
00505 {
00506 std::string Line(block+sl_off, blkpos-sl_off);
00507 if(isBoundary(Line))
00508 {
00509
00510 if (sl_off>=2)
00511 {
00512 int i = sl_off;
00513 char a = block[--i];
00514 char b = block[--i];
00515
00516 if(isnl(a,b))
00517 sl_off -= 2;
00518 else if(isnl(a))
00519 sl_off--;
00520
00521 } else if (sl_off==1 && isnl(block[0])) {
00522 sl_off--;
00523 }
00524 onBlock(block, sl_off, pe);
00525 return;
00526 }
00527 }
00528
00529
00530 if(eom && pos >= eomsz)
00531 {
00532 char *line = block + sl_off;
00533 size_t i = 0;
00534 for(; i < eomsz; i++)
00535 if(eom[i] != line[i])
00536 break;
00537 if(i==eomsz)
00538 {
00539 onBlock(block, sl_off,
00540 pe);
00541 return;
00542 }
00543 }
00544 }
00545
00546 for(int i = 0; nlbuf[i] != 0; i++)
00547 block[blkpos++] = nlbuf[i];
00548 block[blkpos] = 0;
00549 sl_off = blkpos;
00550 pos = 0;
00551 } else {
00552 pos++;
00553 block[blkpos++] = c;
00554 ++m_bit;
00555 }
00556 }
00557
00558 block[blkpos] = 0;
00559 onBlock(block, blkpos, pe);
00560 }
00561 };
00562
00563
00564
00565
00566
00567 template<typename Iterator>
00568 struct IteratorParser<Iterator, std::forward_iterator_tag>:
00569 public IteratorParser<Iterator, std::input_iterator_tag>
00570 {
00571
00572
00573
00574
00575
00576 typedef IteratorParser<Iterator, std::input_iterator_tag> base_type;
00577 IteratorParser(MimeEntity& me)
00578 : base_type(me)
00579 {
00580 }
00581 };
00582
00583
00584
00585
00586 template<typename Iterator>
00587 struct IteratorParser<Iterator, std::bidirectional_iterator_tag>:
00588 public IteratorParser<Iterator, std::forward_iterator_tag>
00589 {
00590 typedef IteratorParser<Iterator, std::forward_iterator_tag> base_type;
00591 IteratorParser(MimeEntity& me)
00592 : base_type(me)
00593 {
00594 }
00595 };
00596
00597
00598
00599
00600 template<typename Iterator>
00601 struct IteratorParser<Iterator, std::random_access_iterator_tag>:
00602 public IteratorParser<Iterator, std::bidirectional_iterator_tag>
00603 {
00604 typedef IteratorParser<Iterator, std::bidirectional_iterator_tag> base_type;
00605 IteratorParser(MimeEntity& me)
00606 : base_type(me)
00607 {
00608 }
00609 private:
00610 using base_type::peIgnore;
00611 using base_type::pePreamble;
00612 using base_type::peBody;
00613 using base_type::peEpilogue;
00614
00615 using base_type::NoBoundary;
00616 using base_type::Boundary;
00617 using base_type::ClosingBoundary;
00618 using base_type::HigherLevelBoundary;
00619
00620 using base_type::m_boundaryList;
00621 using base_type::m_lastBoundary;
00622 using base_type::m_entityStack;
00623 using base_type::m_me;
00624 using base_type::m_iMask;
00625 using base_type::m_bit;
00626 using base_type::m_eit;
00627 using base_type::isnl;
00628
00629 typedef TreeNode<char> BoundaryTree;
00630 inline void onBlock(Iterator bit, int size, ParsingElem pe)
00631 {
00632 if(pe == peIgnore)
00633 return;
00634 Iterator eit = bit + size;
00635 MimeEntity* pMe = m_entityStack.top();
00636 switch(pe)
00637 {
00638 case pePreamble:
00639 pMe->body().preamble().append(bit, eit);
00640 break;
00641 case peEpilogue:
00642 pMe->body().epilogue().append(bit, eit);
00643 break;
00644 case peBody:
00645 pMe->body().append(bit, eit);
00646 break;
00647 }
00648 }
00649 void copy_until_boundary(ParsingElem pe)
00650 {
00651
00652 if(m_boundaryList.empty())
00653 {
00654 onBlock(m_bit, m_eit-m_bit, pe);
00655 m_bit = m_eit;
00656 return;
00657 }
00658
00659
00660
00661 typename base_type::BoundaryList::const_iterator
00662 bBit = m_boundaryList.begin(), bEit = m_boundaryList.end();
00663 m_lastBoundary = NoBoundary;
00664 int depth = 0;
00665 for( ;bBit != bEit; ++bBit, ++depth)
00666 {
00667 const std::string& boundary = *bBit;
00668 Iterator off;
00669 if( (off=utils::find_bm(m_bit,m_eit,boundary)) != m_eit)
00670 {
00671 Iterator base = m_bit;
00672 size_t block_sz = off - base;
00673 m_lastBoundary =
00674 (depth ? HigherLevelBoundary: Boundary);
00675 off += boundary.length();
00676 m_bit = off;
00677 if(off<m_eit-1 && *off =='-' && *(off+1) == '-')
00678 {
00679 m_lastBoundary = ClosingBoundary;
00680 m_bit = off + 2;
00681 }
00682 if(m_bit < m_eit-1 && isnl(*m_bit))
00683 {
00684 char c = *m_bit++;
00685 char next = *m_bit;
00686 if(isnl(next) && next != c)
00687 ++m_bit;
00688 }
00689
00690
00691 if(block_sz)
00692 {
00693 Iterator p = base + block_sz;
00694 char a = *--p, b = *--p;
00695 if(isnl(a,b))
00696 block_sz -= 2;
00697 else if(isnl(a))
00698 block_sz--;
00699 }
00700 onBlock(base, block_sz, pe);
00701 return;
00702 } else {
00703 onBlock(m_bit, m_eit-m_bit, pe);
00704 m_bit = m_eit;
00705 }
00706 }
00707 }
00708 BoundaryTree m_boundaryTree;
00709 void buildBoundaryTree()
00710 {
00711 m_boundaryTree = BoundaryTree();
00712 typename base_type::BoundaryList::const_iterator
00713 bit = m_boundaryList.begin(), eit = m_boundaryList.end();
00714 BoundaryTree::NodeList *pChilds;
00715 BoundaryTree::NodeList::iterator it;
00716 int depth = 0;
00717 for( ; bit != eit; ++bit)
00718 {
00719 pChilds = &m_boundaryTree.childList();
00720 it = pChilds->begin();
00721 const char *w = bit->c_str();
00722 do
00723 {
00724 it = find_if(pChilds->begin(), pChilds->end(),
00725 FindNodePred<char>(*w));
00726 if( it == pChilds->end() )
00727 it = pChilds->insert(pChilds->end(),*w);
00728 pChilds = &it->childList();
00729 depth++;
00730 } while(*(++w));
00731 }
00732 }
00733
00734 };
00735
00736 }
00737
00738 #endif