pion-net  4.0.9
HTTPParser.cpp
1 // ------------------------------------------------------------------
2 // pion-net: a C++ framework for building lightweight HTTP interfaces
3 // ------------------------------------------------------------------
4 // Copyright (C) 2007-2008 Atomic Labs, Inc. (http://www.atomiclabs.com)
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See http://www.boost.org/LICENSE_1_0.txt
8 //
9 
10 #include <cstdlib>
11 #include <boost/regex.hpp>
12 #include <boost/logic/tribool.hpp>
13 #include <pion/net/HTTPParser.hpp>
14 #include <pion/net/HTTPRequest.hpp>
15 #include <pion/net/HTTPResponse.hpp>
16 #include <pion/net/HTTPMessage.hpp>
17 
18 
19 namespace pion { // begin namespace pion
20 namespace net { // begin namespace net (Pion Network Library)
21 
22 
23 // static members of HTTPParser
24 
25 const boost::uint32_t HTTPParser::STATUS_MESSAGE_MAX = 1024; // 1 KB
26 const boost::uint32_t HTTPParser::METHOD_MAX = 1024; // 1 KB
27 const boost::uint32_t HTTPParser::RESOURCE_MAX = 256 * 1024; // 256 KB
28 const boost::uint32_t HTTPParser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB
29 const boost::uint32_t HTTPParser::HEADER_NAME_MAX = 1024; // 1 KB
30 const boost::uint32_t HTTPParser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB
31 const boost::uint32_t HTTPParser::QUERY_NAME_MAX = 1024; // 1 KB
32 const boost::uint32_t HTTPParser::QUERY_VALUE_MAX = 1024 * 1024; // 1 MB
33 const boost::uint32_t HTTPParser::COOKIE_NAME_MAX = 1024; // 1 KB
34 const boost::uint32_t HTTPParser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB
35 const std::size_t HTTPParser::DEFAULT_CONTENT_MAX = 1024 * 1024; // 1 MB
36 HTTPParser::ErrorCategory * HTTPParser::m_error_category_ptr = NULL;
37 boost::once_flag HTTPParser::m_instance_flag = BOOST_ONCE_INIT;
38 
39 
40 // HTTPParser member functions
41 
42 boost::tribool HTTPParser::parse(HTTPMessage& http_msg,
43  boost::system::error_code& ec)
44 {
45  PION_ASSERT(! eof() );
46 
47  boost::tribool rc = boost::indeterminate;
48  std::size_t total_bytes_parsed = 0;
49 
50  if(http_msg.hasMissingPackets()) {
51  http_msg.setDataAfterMissingPacket(true);
52  }
53 
54  do {
55  switch (m_message_parse_state) {
56  // just started parsing the HTTP message
57  case PARSE_START:
58  m_message_parse_state = PARSE_HEADERS;
59  // step through to PARSE_HEADERS
60 
61  // parsing the HTTP headers
62  case PARSE_HEADERS:
63  rc = parseHeaders(http_msg, ec);
64  total_bytes_parsed += m_bytes_last_read;
65  // check if we have finished parsing HTTP headers
66  if (rc == true) {
67  // finishHeaderParsing() updates m_message_parse_state
68  rc = finishHeaderParsing(http_msg, ec);
69  }
70  break;
71 
72  // parsing chunked payload content
73  case PARSE_CHUNKS:
74  rc = parseChunks(http_msg.getChunkCache(), ec);
75  total_bytes_parsed += m_bytes_last_read;
76  // check if we have finished parsing all chunks
77  if (rc == true) {
78  http_msg.concatenateChunks();
79  }
80  break;
81 
82  // parsing regular payload content with a known length
83  case PARSE_CONTENT:
84  rc = consumeContent(http_msg, ec);
85  total_bytes_parsed += m_bytes_last_read;
86  break;
87 
88  // parsing payload content with no length (until EOF)
89  case PARSE_CONTENT_NO_LENGTH:
91  total_bytes_parsed += m_bytes_last_read;
92  break;
93 
94  // finished parsing the HTTP message
95  case PARSE_END:
96  rc = true;
97  break;
98  }
99  } while ( boost::indeterminate(rc) && ! eof() );
100 
101  // check if we've finished parsing the HTTP message
102  if (rc == true) {
103  m_message_parse_state = PARSE_END;
104  finish(http_msg);
105  } else if(rc == false) {
106  computeMsgStatus(http_msg, false);
107  }
108 
109  // update bytes last read (aggregate individual operations for caller)
110  m_bytes_last_read = total_bytes_parsed;
111 
112  return rc;
113 }
114 
115 boost::tribool HTTPParser::parseMissingData(HTTPMessage& http_msg,
116  std::size_t len, boost::system::error_code& ec)
117 {
118  static const char MISSING_DATA_CHAR = 'X';
119  boost::tribool rc = boost::indeterminate;
120 
121  http_msg.setMissingPackets(true);
122 
123  switch (m_message_parse_state) {
124 
125  // cannot recover from missing data while parsing HTTP headers
126  case PARSE_START:
127  case PARSE_HEADERS:
128  setError(ec, ERROR_MISSING_HEADER_DATA);
129  rc = false;
130  break;
131 
132  // parsing chunked payload content
133  case PARSE_CHUNKS:
134  // parsing chunk data -> we can only recover if data fits into current chunk
135  if (m_chunked_content_parse_state == PARSE_CHUNK
136  && m_bytes_read_in_current_chunk < m_size_of_current_chunk
137  && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
138  {
139  // use dummy content for missing data
140  for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n)
141  http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
142 
143  m_bytes_read_in_current_chunk += len;
144  m_bytes_last_read = len;
145  m_bytes_total_read += len;
146  m_bytes_content_read += len;
147 
148  if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
149  m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
150  }
151  } else {
152  // cannot recover from missing data
153  setError(ec, ERROR_MISSING_CHUNK_DATA);
154  rc = false;
155  }
156  break;
157 
158  // parsing regular payload content with a known length
159  case PARSE_CONTENT:
160  // parsing content (with length) -> we can only recover if data fits into content
161  if (m_bytes_content_remaining == 0) {
162  // we have all of the remaining payload content
163  rc = true;
164  } else if (m_bytes_content_remaining < len) {
165  // cannot recover from missing data
166  setError(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
167  rc = false;
168  } else {
169 
170  // make sure content buffer is not already full
171  if ( (m_bytes_content_read+len) <= m_max_content_length) {
172  // use dummy content for missing data
173  for (std::size_t n = 0; n < len; ++n)
174  http_msg.getContent()[m_bytes_content_read++] = MISSING_DATA_CHAR;
175  } else {
176  m_bytes_content_read += len;
177  }
178 
179  m_bytes_content_remaining -= len;
180  m_bytes_total_read += len;
181  m_bytes_last_read = len;
182 
183  if (m_bytes_content_remaining == 0)
184  rc = true;
185  }
186  break;
187 
188  // parsing payload content with no length (until EOF)
189  case PARSE_CONTENT_NO_LENGTH:
190  // use dummy content for missing data
191  for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n)
192  http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
193  m_bytes_last_read = len;
194  m_bytes_total_read += len;
195  m_bytes_content_read += len;
196  break;
197 
198  // finished parsing the HTTP message
199  case PARSE_END:
200  rc = true;
201  break;
202  }
203 
204  // check if we've finished parsing the HTTP message
205  if (rc == true) {
206  m_message_parse_state = PARSE_END;
207  finish(http_msg);
208  } else if(rc == false) {
209  computeMsgStatus(http_msg, false);
210  }
211 
212  return rc;
213 }
214 
215 boost::tribool HTTPParser::parseHeaders(HTTPMessage& http_msg,
216  boost::system::error_code& ec)
217 {
218  //
219  // note that boost::tribool may have one of THREE states:
220  //
221  // false: encountered an error while parsing HTTP headers
222  // true: finished successfully parsing the HTTP headers
223  // indeterminate: parsed bytes, but the HTTP headers are not yet finished
224  //
225  const char *read_start_ptr = m_read_ptr;
226  m_bytes_last_read = 0;
227  while (m_read_ptr < m_read_end_ptr) {
228 
229  if (m_save_raw_headers)
230  m_raw_headers += *m_read_ptr;
231 
232  switch (m_headers_parse_state) {
233  case PARSE_METHOD_START:
234  // we have not yet started parsing the HTTP method string
235  if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace
236  if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
237  setError(ec, ERROR_METHOD_CHAR);
238  return false;
239  }
240  m_headers_parse_state = PARSE_METHOD;
241  m_method.erase();
242  m_method.push_back(*m_read_ptr);
243  }
244  break;
245 
246  case PARSE_METHOD:
247  // we have started parsing the HTTP method string
248  if (*m_read_ptr == ' ') {
249  m_resource.erase();
250  m_headers_parse_state = PARSE_URI_STEM;
251  } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
252  setError(ec, ERROR_METHOD_CHAR);
253  return false;
254  } else if (m_method.size() >= METHOD_MAX) {
255  setError(ec, ERROR_METHOD_SIZE);
256  return false;
257  } else {
258  m_method.push_back(*m_read_ptr);
259  }
260  break;
261 
262  case PARSE_URI_STEM:
263  // we have started parsing the URI stem (or resource name)
264  if (*m_read_ptr == ' ') {
265  m_headers_parse_state = PARSE_HTTP_VERSION_H;
266  } else if (*m_read_ptr == '?') {
267  m_query_string.erase();
268  m_headers_parse_state = PARSE_URI_QUERY;
269  } else if (*m_read_ptr == '\r') {
270  http_msg.setVersionMajor(0);
271  http_msg.setVersionMinor(0);
272  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
273  } else if (*m_read_ptr == '\n') {
274  http_msg.setVersionMajor(0);
275  http_msg.setVersionMinor(0);
276  m_headers_parse_state = PARSE_EXPECTING_CR;
277  } else if (isControl(*m_read_ptr)) {
278  setError(ec, ERROR_URI_CHAR);
279  return false;
280  } else if (m_resource.size() >= RESOURCE_MAX) {
281  setError(ec, ERROR_URI_SIZE);
282  return false;
283  } else {
284  m_resource.push_back(*m_read_ptr);
285  }
286  break;
287 
288  case PARSE_URI_QUERY:
289  // we have started parsing the URI query string
290  if (*m_read_ptr == ' ') {
291  m_headers_parse_state = PARSE_HTTP_VERSION_H;
292  } else if (isControl(*m_read_ptr)) {
293  setError(ec, ERROR_QUERY_CHAR);
294  return false;
295  } else if (m_query_string.size() >= QUERY_STRING_MAX) {
296  setError(ec, ERROR_QUERY_SIZE);
297  return false;
298  } else {
299  m_query_string.push_back(*m_read_ptr);
300  }
301  break;
302 
303  case PARSE_HTTP_VERSION_H:
304  // parsing "HTTP"
305  if (*m_read_ptr == '\r') {
306  // should only happen for requests (no HTTP/VERSION specified)
307  if (! m_is_request) {
308  setError(ec, ERROR_VERSION_EMPTY);
309  return false;
310  }
311  http_msg.setVersionMajor(0);
312  http_msg.setVersionMinor(0);
313  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
314  } else if (*m_read_ptr == '\n') {
315  // should only happen for requests (no HTTP/VERSION specified)
316  if (! m_is_request) {
317  setError(ec, ERROR_VERSION_EMPTY);
318  return false;
319  }
320  http_msg.setVersionMajor(0);
321  http_msg.setVersionMinor(0);
322  m_headers_parse_state = PARSE_EXPECTING_CR;
323  } else if (*m_read_ptr != 'H') {
324  setError(ec, ERROR_VERSION_CHAR);
325  return false;
326  }
327  m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
328  break;
329 
330  case PARSE_HTTP_VERSION_T_1:
331  // parsing "HTTP"
332  if (*m_read_ptr != 'T') {
333  setError(ec, ERROR_VERSION_CHAR);
334  return false;
335  }
336  m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
337  break;
338 
339  case PARSE_HTTP_VERSION_T_2:
340  // parsing "HTTP"
341  if (*m_read_ptr != 'T') {
342  setError(ec, ERROR_VERSION_CHAR);
343  return false;
344  }
345  m_headers_parse_state = PARSE_HTTP_VERSION_P;
346  break;
347 
348  case PARSE_HTTP_VERSION_P:
349  // parsing "HTTP"
350  if (*m_read_ptr != 'P') {
351  setError(ec, ERROR_VERSION_CHAR);
352  return false;
353  }
354  m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
355  break;
356 
357  case PARSE_HTTP_VERSION_SLASH:
358  // parsing slash after "HTTP"
359  if (*m_read_ptr != '/') {
360  setError(ec, ERROR_VERSION_CHAR);
361  return false;
362  }
363  m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
364  break;
365 
366  case PARSE_HTTP_VERSION_MAJOR_START:
367  // parsing the first digit of the major version number
368  if (!isDigit(*m_read_ptr)) {
369  setError(ec, ERROR_VERSION_CHAR);
370  return false;
371  }
372  http_msg.setVersionMajor(*m_read_ptr - '0');
373  m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
374  break;
375 
376  case PARSE_HTTP_VERSION_MAJOR:
377  // parsing the major version number (not first digit)
378  if (*m_read_ptr == '.') {
379  m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
380  } else if (isDigit(*m_read_ptr)) {
381  http_msg.setVersionMajor( (http_msg.getVersionMajor() * 10)
382  + (*m_read_ptr - '0') );
383  } else {
384  setError(ec, ERROR_VERSION_CHAR);
385  return false;
386  }
387  break;
388 
389  case PARSE_HTTP_VERSION_MINOR_START:
390  // parsing the first digit of the minor version number
391  if (!isDigit(*m_read_ptr)) {
392  setError(ec, ERROR_VERSION_CHAR);
393  return false;
394  }
395  http_msg.setVersionMinor(*m_read_ptr - '0');
396  m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
397  break;
398 
399  case PARSE_HTTP_VERSION_MINOR:
400  // parsing the major version number (not first digit)
401  if (*m_read_ptr == ' ') {
402  // ignore trailing spaces after version in request
403  if (! m_is_request) {
404  m_headers_parse_state = PARSE_STATUS_CODE_START;
405  }
406  } else if (*m_read_ptr == '\r') {
407  // should only happen for requests
408  if (! m_is_request) {
409  setError(ec, ERROR_STATUS_EMPTY);
410  return false;
411  }
412  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
413  } else if (*m_read_ptr == '\n') {
414  // should only happen for requests
415  if (! m_is_request) {
416  setError(ec, ERROR_STATUS_EMPTY);
417  return false;
418  }
419  m_headers_parse_state = PARSE_EXPECTING_CR;
420  } else if (isDigit(*m_read_ptr)) {
421  http_msg.setVersionMinor( (http_msg.getVersionMinor() * 10)
422  + (*m_read_ptr - '0') );
423  } else {
424  setError(ec, ERROR_VERSION_CHAR);
425  return false;
426  }
427  break;
428 
429  case PARSE_STATUS_CODE_START:
430  // parsing the first digit of the response status code
431  if (!isDigit(*m_read_ptr)) {
432  setError(ec, ERROR_STATUS_CHAR);
433  return false;
434  }
435  m_status_code = (*m_read_ptr - '0');
436  m_headers_parse_state = PARSE_STATUS_CODE;
437  break;
438 
439  case PARSE_STATUS_CODE:
440  // parsing the response status code (not first digit)
441  if (*m_read_ptr == ' ') {
442  m_status_message.erase();
443  m_headers_parse_state = PARSE_STATUS_MESSAGE;
444  } else if (isDigit(*m_read_ptr)) {
445  m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
446  } else if (*m_read_ptr == '\r') {
447  // recover from status message not sent
448  m_status_message.erase();
449  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
450  } else if (*m_read_ptr == '\n') {
451  // recover from status message not sent
452  m_status_message.erase();
453  m_headers_parse_state = PARSE_EXPECTING_CR;
454  } else {
455  setError(ec, ERROR_STATUS_CHAR);
456  return false;
457  }
458  break;
459 
460  case PARSE_STATUS_MESSAGE:
461  // parsing the response status message
462  if (*m_read_ptr == '\r') {
463  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
464  } else if (*m_read_ptr == '\n') {
465  m_headers_parse_state = PARSE_EXPECTING_CR;
466  } else if (isControl(*m_read_ptr)) {
467  setError(ec, ERROR_STATUS_CHAR);
468  return false;
469  } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
470  setError(ec, ERROR_STATUS_CHAR);
471  return false;
472  } else {
473  m_status_message.push_back(*m_read_ptr);
474  }
475  break;
476 
477  case PARSE_EXPECTING_NEWLINE:
478  // we received a CR; expecting a newline to follow
479  if (*m_read_ptr == '\n') {
480  m_headers_parse_state = PARSE_HEADER_START;
481  } else if (*m_read_ptr == '\r') {
482  // we received two CR's in a row
483  // assume CR only is (incorrectly) being used for line termination
484  // therefore, the message is finished
485  ++m_read_ptr;
486  m_bytes_last_read = (m_read_ptr - read_start_ptr);
487  m_bytes_total_read += m_bytes_last_read;
488  return true;
489  } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
490  m_headers_parse_state = PARSE_HEADER_WHITESPACE;
491  } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
492  setError(ec, ERROR_HEADER_CHAR);
493  return false;
494  } else {
495  // assume it is the first character for the name of a header
496  m_header_name.erase();
497  m_header_name.push_back(*m_read_ptr);
498  m_headers_parse_state = PARSE_HEADER_NAME;
499  }
500  break;
501 
502  case PARSE_EXPECTING_CR:
503  // we received a newline without a CR
504  if (*m_read_ptr == '\r') {
505  m_headers_parse_state = PARSE_HEADER_START;
506  } else if (*m_read_ptr == '\n') {
507  // we received two newlines in a row
508  // assume newline only is (incorrectly) being used for line termination
509  // therefore, the message is finished
510  ++m_read_ptr;
511  m_bytes_last_read = (m_read_ptr - read_start_ptr);
512  m_bytes_total_read += m_bytes_last_read;
513  return true;
514  } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
515  m_headers_parse_state = PARSE_HEADER_WHITESPACE;
516  } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
517  setError(ec, ERROR_HEADER_CHAR);
518  return false;
519  } else {
520  // assume it is the first character for the name of a header
521  m_header_name.erase();
522  m_header_name.push_back(*m_read_ptr);
523  m_headers_parse_state = PARSE_HEADER_NAME;
524  }
525  break;
526 
527  case PARSE_HEADER_WHITESPACE:
528  // parsing whitespace before a header name
529  if (*m_read_ptr == '\r') {
530  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
531  } else if (*m_read_ptr == '\n') {
532  m_headers_parse_state = PARSE_EXPECTING_CR;
533  } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
534  if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
535  setError(ec, ERROR_HEADER_CHAR);
536  return false;
537  // assume it is the first character for the name of a header
538  m_header_name.erase();
539  m_header_name.push_back(*m_read_ptr);
540  m_headers_parse_state = PARSE_HEADER_NAME;
541  }
542  break;
543 
544  case PARSE_HEADER_START:
545  // parsing the start of a new header
546  if (*m_read_ptr == '\r') {
547  m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
548  } else if (*m_read_ptr == '\n') {
549  m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
550  } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
551  m_headers_parse_state = PARSE_HEADER_WHITESPACE;
552  } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
553  setError(ec, ERROR_HEADER_CHAR);
554  return false;
555  } else {
556  // first character for the name of a header
557  m_header_name.erase();
558  m_header_name.push_back(*m_read_ptr);
559  m_headers_parse_state = PARSE_HEADER_NAME;
560  }
561  break;
562 
563  case PARSE_HEADER_NAME:
564  // parsing the name of a header
565  if (*m_read_ptr == ':') {
566  m_header_value.erase();
567  m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
568  } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
569  setError(ec, ERROR_HEADER_CHAR);
570  return false;
571  } else if (m_header_name.size() >= HEADER_NAME_MAX) {
572  setError(ec, ERROR_HEADER_NAME_SIZE);
573  return false;
574  } else {
575  // character (not first) for the name of a header
576  m_header_name.push_back(*m_read_ptr);
577  }
578  break;
579 
580  case PARSE_SPACE_BEFORE_HEADER_VALUE:
581  // parsing space character before a header's value
582  if (*m_read_ptr == ' ') {
583  m_headers_parse_state = PARSE_HEADER_VALUE;
584  } else if (*m_read_ptr == '\r') {
585  http_msg.addHeader(m_header_name, m_header_value);
586  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
587  } else if (*m_read_ptr == '\n') {
588  http_msg.addHeader(m_header_name, m_header_value);
589  m_headers_parse_state = PARSE_EXPECTING_CR;
590  } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
591  setError(ec, ERROR_HEADER_CHAR);
592  return false;
593  } else {
594  // assume it is the first character for the value of a header
595  m_header_value.push_back(*m_read_ptr);
596  m_headers_parse_state = PARSE_HEADER_VALUE;
597  }
598  break;
599 
600  case PARSE_HEADER_VALUE:
601  // parsing the value of a header
602  if (*m_read_ptr == '\r') {
603  http_msg.addHeader(m_header_name, m_header_value);
604  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
605  } else if (*m_read_ptr == '\n') {
606  http_msg.addHeader(m_header_name, m_header_value);
607  m_headers_parse_state = PARSE_EXPECTING_CR;
608  } else if (isControl(*m_read_ptr)) {
609  setError(ec, ERROR_HEADER_CHAR);
610  return false;
611  } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
612  setError(ec, ERROR_HEADER_VALUE_SIZE);
613  return false;
614  } else {
615  // character (not first) for the value of a header
616  m_header_value.push_back(*m_read_ptr);
617  }
618  break;
619 
620  case PARSE_EXPECTING_FINAL_NEWLINE:
621  if (*m_read_ptr == '\n') ++m_read_ptr;
622  m_bytes_last_read = (m_read_ptr - read_start_ptr);
623  m_bytes_total_read += m_bytes_last_read;
624  return true;
625 
626  case PARSE_EXPECTING_FINAL_CR:
627  if (*m_read_ptr == '\r') ++m_read_ptr;
628  m_bytes_last_read = (m_read_ptr - read_start_ptr);
629  m_bytes_total_read += m_bytes_last_read;
630  return true;
631  }
632 
633  ++m_read_ptr;
634  }
635 
636  m_bytes_last_read = (m_read_ptr - read_start_ptr);
637  m_bytes_total_read += m_bytes_last_read;
638  return boost::indeterminate;
639 }
640 
642 {
643  if (isParsingRequest()) {
644 
645  // finish an HTTP request message
646 
647  HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
648  http_request.setMethod(m_method);
649  http_request.setResource(m_resource);
650  http_request.setQueryString(m_query_string);
651 
652  // parse query pairs from the URI query string
653  if (! m_query_string.empty()) {
654  if (! parseURLEncoded(http_request.getQueryParams(),
655  m_query_string.c_str(),
656  m_query_string.size()))
657  PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
658  }
659 
660  // parse "Cookie" headers in request
661  std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
662  cookie_pair = http_request.getHeaders().equal_range(HTTPTypes::HEADER_COOKIE);
663  for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
664  cookie_iterator != http_request.getHeaders().end()
665  && cookie_iterator != cookie_pair.second; ++cookie_iterator)
666  {
667  if (! parseCookieHeader(http_request.getCookieParams(),
668  cookie_iterator->second, false) )
669  PION_LOG_WARN(m_logger, "Cookie header parsing failed");
670  }
671 
672  } else {
673 
674  // finish an HTTP response message
675 
676  HTTPResponse& http_response(dynamic_cast<HTTPResponse&>(http_msg));
677  http_response.setStatusCode(m_status_code);
678  http_response.setStatusMessage(m_status_message);
679 
680  // parse "Set-Cookie" headers in response
681  std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
682  cookie_pair = http_response.getHeaders().equal_range(HTTPTypes::HEADER_SET_COOKIE);
683  for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
684  cookie_iterator != http_response.getHeaders().end()
685  && cookie_iterator != cookie_pair.second; ++cookie_iterator)
686  {
687  if (! parseCookieHeader(http_response.getCookieParams(),
688  cookie_iterator->second, true) )
689  PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
690  }
691 
692  }
693 }
694 
696  boost::system::error_code& ec)
697 {
698  boost::tribool rc = boost::indeterminate;
699 
700  m_bytes_content_remaining = m_bytes_content_read = 0;
701  http_msg.setContentLength(0);
703  updateMessageWithHeaderData(http_msg);
704 
705  if (http_msg.isChunked()) {
706 
707  // content is encoded using chunks
708  m_message_parse_state = PARSE_CHUNKS;
709 
710  // return true if parsing headers only
711  if (m_parse_headers_only)
712  rc = true;
713 
714  } else if (http_msg.isContentLengthImplied()) {
715 
716  // content length is implied to be zero
717  m_message_parse_state = PARSE_END;
718  rc = true;
719 
720  } else {
721  // content length should be specified in the headers
722 
723  if (http_msg.hasHeader(HTTPTypes::HEADER_CONTENT_LENGTH)) {
724 
725  // message has a content-length header
726  try {
728  } catch (...) {
729  PION_LOG_ERROR(m_logger, "Unable to update content length");
730  setError(ec, ERROR_INVALID_CONTENT_LENGTH);
731  return false;
732  }
733 
734  // check if content-length header == 0
735  if (http_msg.getContentLength() == 0) {
736  m_message_parse_state = PARSE_END;
737  rc = true;
738  } else {
739  m_message_parse_state = PARSE_CONTENT;
740  m_bytes_content_remaining = http_msg.getContentLength();
741 
742  // check if content-length exceeds maximum allowed
743  if (m_bytes_content_remaining > m_max_content_length)
744  http_msg.setContentLength(m_max_content_length);
745 
746  // return true if parsing headers only
747  if (m_parse_headers_only)
748  rc = true;
749  }
750 
751  } else {
752  // no content-length specified, and the content length cannot
753  // otherwise be determined
754 
755  // only if not a request, read through the close of the connection
756  if (! m_is_request) {
757  // clear the chunk buffers before we start
758  http_msg.getChunkCache().clear();
759 
760  // continue reading content until there is no more data
761  m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
762 
763  // return true if parsing headers only
764  if (m_parse_headers_only)
765  rc = true;
766  } else {
767  m_message_parse_state = PARSE_END;
768  rc = true;
769  }
770  }
771  }
772 
773  // allocate a buffer for payload content (may be zero-size)
774  http_msg.createContentBuffer();
775 
776  return rc;
777 }
778 
780  const char *ptr, const size_t len)
781 {
782  // used to track whether we are parsing the name or value
783  enum QueryParseState {
784  QUERY_PARSE_NAME, QUERY_PARSE_VALUE
785  } parse_state = QUERY_PARSE_NAME;
786 
787  // misc other variables used for parsing
788  const char * const end = ptr + len;
789  std::string query_name;
790  std::string query_value;
791 
792  // iterate through each encoded character
793  while (ptr < end) {
794  switch (parse_state) {
795 
796  case QUERY_PARSE_NAME:
797  // parsing query name
798  if (*ptr == '=') {
799  // end of name found (OK if empty)
800  parse_state = QUERY_PARSE_VALUE;
801  } else if (*ptr == '&') {
802  // if query name is empty, just skip it (i.e. "&&")
803  if (! query_name.empty()) {
804  // assume that "=" is missing -- it's OK if the value is empty
805  dict.insert( std::make_pair(query_name, query_value) );
806  query_name.erase();
807  }
808  } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
809  // ignore linefeeds, carriage return and tabs (normally within POST content)
810  } else if (isControl(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
811  // control character detected, or max sized exceeded
812  return false;
813  } else {
814  // character is part of the name
815  query_name.push_back(*ptr);
816  }
817  break;
818 
819  case QUERY_PARSE_VALUE:
820  // parsing query value
821  if (*ptr == '&') {
822  // end of value found (OK if empty)
823  if (! query_name.empty()) {
824  dict.insert( std::make_pair(query_name, query_value) );
825  query_name.erase();
826  }
827  query_value.erase();
828  parse_state = QUERY_PARSE_NAME;
829  } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
830  // ignore linefeeds, carriage return and tabs (normally within POST content)
831  } else if (isControl(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
832  // control character detected, or max sized exceeded
833  return false;
834  } else {
835  // character is part of the value
836  query_value.push_back(*ptr);
837  }
838  break;
839  }
840 
841  ++ptr;
842  }
843 
844  // handle last pair in string
845  if (! query_name.empty())
846  dict.insert( std::make_pair(query_name, query_value) );
847 
848  return true;
849 }
850 
852  const char *ptr, const size_t len,
853  bool set_cookie_header)
854 {
855  // BASED ON RFC 2109
856  // http://www.ietf.org/rfc/rfc2109.txt
857  //
858  // The current implementation ignores cookie attributes which begin with '$'
859  // (i.e. $Path=/, $Domain=, etc.)
860 
861  // used to track what we are parsing
862  enum CookieParseState {
863  COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
864  } parse_state = COOKIE_PARSE_NAME;
865 
866  // misc other variables used for parsing
867  const char * const end = ptr + len;
868  std::string cookie_name;
869  std::string cookie_value;
870  char value_quote_character = '\0';
871 
872  // iterate through each character
873  while (ptr < end) {
874  switch (parse_state) {
875 
876  case COOKIE_PARSE_NAME:
877  // parsing cookie name
878  if (*ptr == '=') {
879  // end of name found (OK if empty)
880  value_quote_character = '\0';
881  parse_state = COOKIE_PARSE_VALUE;
882  } else if (*ptr == ';' || *ptr == ',') {
883  // ignore empty cookie names since this may occur naturally
884  // when quoted values are encountered
885  if (! cookie_name.empty()) {
886  // value is empty (OK)
887  if (! isCookieAttribute(cookie_name, set_cookie_header))
888  dict.insert( std::make_pair(cookie_name, cookie_value) );
889  cookie_name.erase();
890  }
891  } else if (*ptr != ' ') { // ignore whitespace
892  // check if control character detected, or max sized exceeded
893  if (isControl(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
894  return false;
895  // character is part of the name
896  cookie_name.push_back(*ptr);
897  }
898  break;
899 
900  case COOKIE_PARSE_VALUE:
901  // parsing cookie value
902  if (value_quote_character == '\0') {
903  // value is not (yet) quoted
904  if (*ptr == ';' || *ptr == ',') {
905  // end of value found (OK if empty)
906  if (! isCookieAttribute(cookie_name, set_cookie_header))
907  dict.insert( std::make_pair(cookie_name, cookie_value) );
908  cookie_name.erase();
909  cookie_value.erase();
910  parse_state = COOKIE_PARSE_NAME;
911  } else if (*ptr == '\'' || *ptr == '"') {
912  if (cookie_value.empty()) {
913  // begin quoted value
914  value_quote_character = *ptr;
915  } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
916  // max size exceeded
917  return false;
918  } else {
919  // assume character is part of the (unquoted) value
920  cookie_value.push_back(*ptr);
921  }
922  } else if (*ptr != ' ' || !cookie_value.empty()) { // ignore leading unquoted whitespace
923  // check if control character detected, or max sized exceeded
924  if (isControl(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
925  return false;
926  // character is part of the (unquoted) value
927  cookie_value.push_back(*ptr);
928  }
929  } else {
930  // value is quoted
931  if (*ptr == value_quote_character) {
932  // end of value found (OK if empty)
933  if (! isCookieAttribute(cookie_name, set_cookie_header))
934  dict.insert( std::make_pair(cookie_name, cookie_value) );
935  cookie_name.erase();
936  cookie_value.erase();
937  parse_state = COOKIE_PARSE_IGNORE;
938  } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
939  // max size exceeded
940  return false;
941  } else {
942  // character is part of the (quoted) value
943  cookie_value.push_back(*ptr);
944  }
945  }
946  break;
947 
948  case COOKIE_PARSE_IGNORE:
949  // ignore everything until we reach a comma "," or semicolon ";"
950  if (*ptr == ';' || *ptr == ',')
951  parse_state = COOKIE_PARSE_NAME;
952  break;
953  }
954 
955  ++ptr;
956  }
957 
958  // handle last cookie in string
959  if (! isCookieAttribute(cookie_name, set_cookie_header))
960  dict.insert( std::make_pair(cookie_name, cookie_value) );
961 
962  return true;
963 }
964 
966  boost::system::error_code& ec)
967 {
968  //
969  // note that boost::tribool may have one of THREE states:
970  //
971  // false: encountered an error while parsing message
972  // true: finished successfully parsing the message
973  // indeterminate: parsed bytes, but the message is not yet finished
974  //
975  const char *read_start_ptr = m_read_ptr;
976  m_bytes_last_read = 0;
977  while (m_read_ptr < m_read_end_ptr) {
978 
979  switch (m_chunked_content_parse_state) {
980  case PARSE_CHUNK_SIZE_START:
981  // we have not yet started parsing the next chunk size
982  if (isHexDigit(*m_read_ptr)) {
983  m_chunk_size_str.erase();
984  m_chunk_size_str.push_back(*m_read_ptr);
985  m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
986  } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
987  // Ignore leading whitespace. Technically, the standard probably doesn't allow white space here,
988  // but we'll be flexible, since there's no ambiguity.
989  break;
990  } else {
991  setError(ec, ERROR_CHUNK_CHAR);
992  return false;
993  }
994  break;
995 
996  case PARSE_CHUNK_SIZE:
997  if (isHexDigit(*m_read_ptr)) {
998  m_chunk_size_str.push_back(*m_read_ptr);
999  } else if (*m_read_ptr == '\x0D') {
1000  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
1001  } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
1002  // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this,
1003  // but we'll be flexible, since there's no ambiguity.
1004  m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
1005  } else {
1006  setError(ec, ERROR_CHUNK_CHAR);
1007  return false;
1008  }
1009  break;
1010 
1011  case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
1012  if (*m_read_ptr == '\x0D') {
1013  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
1014  } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
1015  // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this,
1016  // but we'll be flexible, since there's no ambiguity.
1017  break;
1018  } else {
1019  setError(ec, ERROR_CHUNK_CHAR);
1020  return false;
1021  }
1022  break;
1023 
1024  case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
1025  // We received a CR; expecting LF to follow. We can't be flexible here because
1026  // if we see anything other than LF, we can't be certain where the chunk starts.
1027  if (*m_read_ptr == '\x0A') {
1028  m_bytes_read_in_current_chunk = 0;
1029  m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
1030  if (m_size_of_current_chunk == 0) {
1031  m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK;
1032  } else {
1033  m_chunked_content_parse_state = PARSE_CHUNK;
1034  }
1035  } else {
1036  setError(ec, ERROR_CHUNK_CHAR);
1037  return false;
1038  }
1039  break;
1040 
1041  case PARSE_CHUNK:
1042  if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
1043  if (chunk_cache.size() < m_max_content_length)
1044  chunk_cache.push_back(*m_read_ptr);
1045  m_bytes_read_in_current_chunk++;
1046  }
1047  if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
1048  m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
1049  }
1050  break;
1051 
1052  case PARSE_EXPECTING_CR_AFTER_CHUNK:
1053  // we've read exactly m_size_of_current_chunk bytes since starting the current chunk
1054  if (*m_read_ptr == '\x0D') {
1055  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
1056  } else {
1057  setError(ec, ERROR_CHUNK_CHAR);
1058  return false;
1059  }
1060  break;
1061 
1062  case PARSE_EXPECTING_LF_AFTER_CHUNK:
1063  // we received a CR; expecting LF to follow
1064  if (*m_read_ptr == '\x0A') {
1065  m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
1066  } else {
1067  setError(ec, ERROR_CHUNK_CHAR);
1068  return false;
1069  }
1070  break;
1071 
1072  case PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK:
1073  // we've read the final chunk; expecting final CRLF
1074  if (*m_read_ptr == '\x0D') {
1075  m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
1076  } else {
1077  setError(ec, ERROR_CHUNK_CHAR);
1078  return false;
1079  }
1080  break;
1081 
1082  case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
1083  // we received the final CR; expecting LF to follow
1084  if (*m_read_ptr == '\x0A') {
1085  ++m_read_ptr;
1086  m_bytes_last_read = (m_read_ptr - read_start_ptr);
1087  m_bytes_total_read += m_bytes_last_read;
1088  m_bytes_content_read += m_bytes_last_read;
1089  PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
1090  return true;
1091  } else {
1092  setError(ec, ERROR_CHUNK_CHAR);
1093  return false;
1094  }
1095  }
1096 
1097  ++m_read_ptr;
1098  }
1099 
1100  m_bytes_last_read = (m_read_ptr - read_start_ptr);
1101  m_bytes_total_read += m_bytes_last_read;
1102  m_bytes_content_read += m_bytes_last_read;
1103  return boost::indeterminate;
1104 }
1105 
1106 boost::tribool HTTPParser::consumeContent(HTTPMessage& http_msg,
1107  boost::system::error_code& ec)
1108 {
1109  size_t content_bytes_to_read;
1110  size_t content_bytes_available = bytes_available();
1111  boost::tribool rc = boost::indeterminate;
1112 
1113  if (m_bytes_content_remaining == 0) {
1114  // we have all of the remaining payload content
1115  return true;
1116  } else {
1117  if (content_bytes_available >= m_bytes_content_remaining) {
1118  // we have all of the remaining payload content
1119  rc = true;
1120  content_bytes_to_read = m_bytes_content_remaining;
1121  } else {
1122  // only some of the payload content is available
1123  content_bytes_to_read = content_bytes_available;
1124  }
1125  m_bytes_content_remaining -= content_bytes_to_read;
1126  }
1127 
1128  // make sure content buffer is not already full
1129  if (m_bytes_content_read < m_max_content_length) {
1130  if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
1131  // read would exceed maximum size for content buffer
1132  // copy only enough bytes to fill up the content buffer
1133  memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr,
1134  m_max_content_length - m_bytes_content_read);
1135  } else {
1136  // copy all bytes available
1137  memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
1138  }
1139  }
1140 
1141  m_read_ptr += content_bytes_to_read;
1142  m_bytes_content_read += content_bytes_to_read;
1143  m_bytes_total_read += content_bytes_to_read;
1144  m_bytes_last_read = content_bytes_to_read;
1145 
1146  return rc;
1147 }
1148 
1150 {
1151  if (bytes_available() == 0) {
1152  m_bytes_last_read = 0;
1153  } else {
1154  m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
1155  while (m_read_ptr < m_read_end_ptr) {
1156  if (chunk_cache.size() < m_max_content_length)
1157  chunk_cache.push_back(*m_read_ptr);
1158  ++m_read_ptr;
1159  }
1160  m_bytes_total_read += m_bytes_last_read;
1161  m_bytes_content_read += m_bytes_last_read;
1162  }
1163  return m_bytes_last_read;
1164 }
1165 
1166 void HTTPParser::finish(HTTPMessage& http_msg) const
1167 {
1168  switch (m_message_parse_state) {
1169  case PARSE_START:
1170  http_msg.setIsValid(false);
1171  http_msg.setContentLength(0);
1172  http_msg.createContentBuffer();
1173  return;
1174  case PARSE_END:
1175  http_msg.setIsValid(true);
1176  break;
1177  case PARSE_HEADERS:
1178  http_msg.setIsValid(false);
1179  updateMessageWithHeaderData(http_msg);
1180  http_msg.setContentLength(0);
1181  http_msg.createContentBuffer();
1182  break;
1183  case PARSE_CONTENT:
1184  http_msg.setIsValid(false);
1185  if (getContentBytesRead() < m_max_content_length) // NOTE: we can read more than we have allocated/stored
1187  break;
1188  case PARSE_CHUNKS:
1189  http_msg.setIsValid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
1190  http_msg.concatenateChunks();
1191  break;
1192  case PARSE_CONTENT_NO_LENGTH:
1193  http_msg.setIsValid(true);
1194  http_msg.concatenateChunks();
1195  break;
1196  }
1197 
1198  computeMsgStatus(http_msg, http_msg.isValid());
1199 
1200  if (isParsingRequest()) {
1201  // Parse query pairs from post content if content type is x-www-form-urlencoded.
1202  // Type could be followed by parameters (as defined in section 3.6 of RFC 2616)
1203  // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8
1204  HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
1205  const std::string& content_type_header = http_request.getHeader(HTTPTypes::HEADER_CONTENT_TYPE);
1206  if (content_type_header.compare(0, HTTPTypes::CONTENT_TYPE_URLENCODED.length(),
1207  HTTPTypes::CONTENT_TYPE_URLENCODED) == 0)
1208  {
1209  if (! parseURLEncoded(http_request.getQueryParams(),
1210  http_request.getContent(),
1211  http_request.getContentLength()))
1212  PION_LOG_WARN(m_logger, "Request query string parsing failed (POST content)");
1213  }
1214  }
1215 }
1216 
1217 void HTTPParser::computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok )
1218 {
1219  HTTPMessage::DataStatus st = HTTPMessage::STATUS_NONE;
1220 
1221  if(http_msg.hasMissingPackets()) {
1222  st = http_msg.hasDataAfterMissingPackets() ?
1223  HTTPMessage::STATUS_PARTIAL : HTTPMessage::STATUS_TRUNCATED;
1224  } else {
1225  st = msg_parsed_ok ? HTTPMessage::STATUS_OK : HTTPMessage::STATUS_TRUNCATED;
1226  }
1227 
1228  http_msg.setStatus(st);
1229 }
1230 
1232 {
1233  static ErrorCategory UNIQUE_ERROR_CATEGORY;
1234  m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
1235 }
1236 
1237 bool HTTPParser::parseForwardedFor(const std::string& header, std::string& public_ip)
1238 {
1239  // static regex's used to check for ipv4 address
1240  static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
1241 
1247  static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
1248 
1249  // sanity check
1250  if (header.empty())
1251  return false;
1252 
1253  // local variables re-used by while loop
1254  boost::match_results<std::string::const_iterator> m;
1255  std::string::const_iterator start_it = header.begin();
1256 
1257  // search for next ip address within the header
1258  while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
1259  // get ip that matched
1260  std::string ip_str(m[0].first, m[0].second);
1261  // check if public network ip address
1262  if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
1263  // match found!
1264  public_ip = ip_str;
1265  return true;
1266  }
1267  // update search starting position
1268  start_it = m[0].second;
1269  }
1270 
1271  // no matches found
1272  return false;
1273 }
1274 
1275 } // end namespace net
1276 } // end namespace pion
1277 
void setStatusCode(unsigned int n)
sets the HTTP response status code
boost::tribool parse(HTTPMessage &http_msg, boost::system::error_code &ec)
Definition: HTTPParser.cpp:42
void setMissingPackets(bool newVal)
set to true when missing packets detected
void addHeader(const std::string &key, const std::string &value)
adds a value for the HTTP header named key
static void setError(boost::system::error_code &ec, ErrorValue ev)
Definition: HTTPParser.hpp:450
static bool parseForwardedFor(const std::string &header, std::string &public_ip)
static const boost::uint32_t METHOD_MAX
maximum length for the request method
Definition: HTTPParser.hpp:471
static const boost::uint32_t QUERY_NAME_MAX
maximum length for the name of a query string variable
Definition: HTTPParser.hpp:486
QueryParams & getQueryParams(void)
returns the query parameters
Definition: HTTPRequest.hpp:76
static void createErrorCategory(void)
creates the unique HTTPParser ErrorCategory
std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache &chunk_buffers)
boost::tribool parseChunks(HTTPMessage::ChunkCache &chunk_buffers, boost::system::error_code &ec)
Definition: HTTPParser.cpp:965
const std::string & getHeader(const std::string &key) const
returns a value for the header if any are defined; otherwise, an empty string
CookieParams & getCookieParams(void)
returns the cookie parameters
const bool m_is_request
true if the message is an HTTP request; false if it is an HTTP response
Definition: HTTPParser.hpp:502
Headers & getHeaders(void)
returns a reference to the HTTP headers
class-specific error category
Definition: HTTPParser.hpp:65
void concatenateChunks(void)
bool hasDataAfterMissingPackets() const
true if more data seen after the missing packets
static const boost::uint32_t COOKIE_VALUE_MAX
maximum length for the value of a cookie; also used for path and domain
Definition: HTTPParser.hpp:495
static const boost::uint32_t RESOURCE_MAX
maximum length for the resource requested
Definition: HTTPParser.hpp:474
void setResource(const std::string &str)
sets the resource or uri-stem originally requested
Definition: HTTPRequest.hpp:92
static bool parseURLEncoded(HTTPTypes::QueryParams &dict, const char *ptr, const std::size_t len)
static const boost::uint32_t HEADER_VALUE_MAX
maximum length for an HTTP header value
Definition: HTTPParser.hpp:483
static const std::size_t DEFAULT_CONTENT_MAX
maximum length for HTTP payload content
Definition: HTTPParser.hpp:40
StringDictionary QueryParams
data type for HTTP query parameters
Definition: HTTPTypes.hpp:106
void setVersionMajor(const boost::uint16_t n)
sets the major HTTP version number
static const boost::uint32_t HEADER_NAME_MAX
maximum length for an HTTP header name
Definition: HTTPParser.hpp:480
void updateMessageWithHeaderData(HTTPMessage &http_msg) const
Definition: HTTPParser.cpp:641
std::size_t getContentLength(void) const
returns the length of the payload content (in bytes)
char * createContentBuffer(void)
static const boost::uint32_t COOKIE_NAME_MAX
maximum length for the name of a cookie name
Definition: HTTPParser.hpp:492
StringDictionary CookieParams
data type for HTTP cookie parameters
Definition: HTTPTypes.hpp:103
virtual bool isContentLengthImplied(void) const =0
should return true if the content length can be implied without headers
boost::tribool parseMissingData(HTTPMessage &http_msg, std::size_t len, boost::system::error_code &ec)
Definition: HTTPParser.cpp:115
boost::uint16_t getVersionMajor(void) const
returns the major HTTP version number
boost::tribool parseHeaders(HTTPMessage &http_msg, boost::system::error_code &ec)
Definition: HTTPParser.cpp:215
void updateContentLengthUsingHeader(void)
sets the length of the payload content using the Content-Length header
DataStatus
defines message data integrity status codes
Definition: HTTPMessage.hpp:69
ChunkCache & getChunkCache(void)
returns a reference to the chunk cache
bool hasMissingPackets() const
true if there were missing packets
bool hasHeader(const std::string &key) const
returns true if at least one value for the header is defined
void setContentLength(const std::size_t n)
sets the length of the payload content (in bytes)
void setStatusMessage(const std::string &msg)
sets the HTTP response status message
std::size_t getContentBytesRead(void) const
returns the total number of bytes read while parsing the payload content
Definition: HTTPParser.hpp:252
static void computeMsgStatus(HTTPMessage &http_msg, bool msg_parsed_ok)
boost::tribool consumeContent(HTTPMessage &http_msg, boost::system::error_code &ec)
void finish(HTTPMessage &http_msg) const
static const boost::uint32_t QUERY_VALUE_MAX
maximum length for the value of a query string variable
Definition: HTTPParser.hpp:489
static const boost::uint32_t STATUS_MESSAGE_MAX
maximum length for response status message
Definition: HTTPParser.hpp:468
void setIsValid(bool b=true)
sets whether or not the message is valid
static const boost::uint32_t QUERY_STRING_MAX
maximum length for the query string
Definition: HTTPParser.hpp:477
boost::uint16_t getVersionMinor(void) const
returns the minor HTTP version number
std::size_t bytes_available(void) const
returns the number of bytes available in the read buffer
Definition: HTTPParser.hpp:243
PionLogger m_logger
primary logging interface used by this class
Definition: HTTPParser.hpp:499
static bool parseCookieHeader(HTTPTypes::CookieParams &dict, const char *ptr, const std::size_t len, bool set_cookie_header)
const char * m_read_ptr
points to the next character to be consumed in the read_buffer
Definition: HTTPParser.hpp:505
boost::tribool finishHeaderParsing(HTTPMessage &http_msg, boost::system::error_code &ec)
Definition: HTTPParser.cpp:695
std::vector< char > ChunkCache
used to cache chunked data
Definition: HTTPMessage.hpp:46
void updateTransferCodingUsingHeader(void)
sets the transfer coding using the Transfer-Encoding header
bool eof(void) const
returns true if there are no more bytes available in the read buffer
Definition: HTTPParser.hpp:240
void setVersionMinor(const boost::uint16_t n)
sets the minor HTTP version number
const char * m_read_end_ptr
points to the end of the read_buffer (last byte + 1)
Definition: HTTPParser.hpp:508
void setQueryString(const std::string &str)
sets the uri-query or query string requested
bool isChunked(void) const
returns true if the message content is chunked
bool isValid(void) const
returns true if the message is valid
bool isParsingRequest(void) const
returns true if the parser is being used to parse an HTTP request
Definition: HTTPParser.hpp:264
char * getContent(void)
returns a pointer to the payload content, or NULL if there is none
void setMethod(const std::string &str)
sets the HTTP request method (i.e. GET, POST, PUT)
Definition: HTTPRequest.hpp:86