pion-net  4.0.9
HTTPParser.hpp
1 // ------------------------------------------------------------------
2 // pion-net: a C++ framework for building lightweight HTTP interfaces
3 // ------------------------------------------------------------------
4 // Copyright (C) 2007-2008 Atomic Labs, Inc. (http://www.atomiclabs.com)
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See http://www.boost.org/LICENSE_1_0.txt
8 //
9 
10 #ifndef __PION_HTTPPARSER_HEADER__
11 #define __PION_HTTPPARSER_HEADER__
12 
13 #include <string>
14 #include <boost/noncopyable.hpp>
15 #include <boost/logic/tribool.hpp>
16 #include <boost/system/error_code.hpp>
17 #include <boost/thread/once.hpp>
18 #include <pion/PionConfig.hpp>
19 #include <pion/PionLogger.hpp>
20 #include <pion/net/HTTPMessage.hpp>
21 
22 
23 namespace pion { // begin namespace pion
24 namespace net { // begin namespace net (Pion Network Library)
25 
26 // forward declarations used for finishing HTTP messages
27 class HTTPRequest;
28 class HTTPResponse;
29 
33 class PION_NET_API HTTPParser :
34  private boost::noncopyable
35 {
36 
37 public:
38 
40  static const std::size_t DEFAULT_CONTENT_MAX;
41 
43  enum ErrorValue {
44  ERROR_METHOD_CHAR = 1,
45  ERROR_METHOD_SIZE,
46  ERROR_URI_CHAR,
47  ERROR_URI_SIZE,
48  ERROR_QUERY_CHAR,
49  ERROR_QUERY_SIZE,
50  ERROR_VERSION_EMPTY,
51  ERROR_VERSION_CHAR,
52  ERROR_STATUS_EMPTY,
53  ERROR_STATUS_CHAR,
54  ERROR_HEADER_CHAR,
55  ERROR_HEADER_NAME_SIZE,
56  ERROR_HEADER_VALUE_SIZE,
57  ERROR_INVALID_CONTENT_LENGTH,
58  ERROR_CHUNK_CHAR,
59  ERROR_MISSING_CHUNK_DATA,
60  ERROR_MISSING_HEADER_DATA,
61  ERROR_MISSING_TOO_MUCH_CONTENT,
62  };
63 
66  : public boost::system::error_category
67  {
68  public:
69  const char *name() const { return "HTTPParser"; }
70  std::string message(int ev) const {
71  switch (ev) {
72  case ERROR_METHOD_CHAR:
73  return "invalid method character";
74  case ERROR_METHOD_SIZE:
75  return "method exceeds maximum size";
76  case ERROR_URI_CHAR:
77  return "invalid URI character";
78  case ERROR_URI_SIZE:
79  return "method exceeds maximum size";
80  case ERROR_QUERY_CHAR:
81  return "invalid query string character";
82  case ERROR_QUERY_SIZE:
83  return "query string exceeds maximum size";
84  case ERROR_VERSION_EMPTY:
85  return "HTTP version undefined";
86  case ERROR_VERSION_CHAR:
87  return "invalid version character";
88  case ERROR_STATUS_EMPTY:
89  return "HTTP status undefined";
90  case ERROR_STATUS_CHAR:
91  return "invalid status character";
92  case ERROR_HEADER_CHAR:
93  return "invalid header character";
94  case ERROR_HEADER_NAME_SIZE:
95  return "header name exceeds maximum size";
96  case ERROR_HEADER_VALUE_SIZE:
97  return "header value exceeds maximum size";
98  case ERROR_INVALID_CONTENT_LENGTH:
99  return "invalid Content-Length header";
100  case ERROR_CHUNK_CHAR:
101  return "invalid chunk character";
102  case ERROR_MISSING_HEADER_DATA:
103  return "missing header data";
104  case ERROR_MISSING_CHUNK_DATA:
105  return "missing chunk data";
106  case ERROR_MISSING_TOO_MUCH_CONTENT:
107  return "missing too much content";
108  }
109  return "HTTPParser error";
110  }
111  };
112 
120  HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
121  : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
122  m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
123  m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
124  m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
125  m_bytes_content_remaining(0), m_bytes_content_read(0),
126  m_bytes_last_read(0), m_bytes_total_read(0),
127  m_max_content_length(max_content_length),
128  m_parse_headers_only(false), m_save_raw_headers(false)
129  {}
130 
132  virtual ~HTTPParser() {}
133 
145  boost::tribool parse(HTTPMessage& http_msg, boost::system::error_code& ec);
146 
159  boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len,
160  boost::system::error_code& ec);
161 
167  void finish(HTTPMessage& http_msg) const;
168 
175  inline void setReadBuffer(const char *ptr, size_t len) {
176  m_read_ptr = ptr;
177  m_read_end_ptr = ptr + len;
178  }
179 
186  inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
187  read_ptr = m_read_ptr;
188  read_end_ptr = m_read_end_ptr;
189  }
190 
199  inline bool checkPrematureEOF(HTTPMessage& http_msg) {
200  if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
201  return true;
202  m_message_parse_state = PARSE_END;
203  http_msg.concatenateChunks();
204  finish(http_msg);
205  return false;
206  }
207 
213  inline void parseHeadersOnly(bool b = true) { m_parse_headers_only = b; }
214 
220  inline void skipHeaderParsing(HTTPMessage& http_msg) {
221  boost::system::error_code ec;
222  finishHeaderParsing(http_msg, ec);
223  }
224 
226  inline void reset(void) {
227  m_message_parse_state = PARSE_START;
228  m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
229  m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
230  m_status_code = 0;
231  m_status_message.erase();
232  m_method.erase();
233  m_resource.erase();
234  m_query_string.erase();
235  m_raw_headers.erase();
236  m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
237  }
238 
240  inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
241 
243  inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
244 
246  inline std::size_t gcount(void) const { return m_bytes_last_read; }
247 
249  inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
250 
252  inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
253 
255  inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
256 
258  inline const std::string& getRawHeaders(void) const { return m_raw_headers; }
259 
261  inline bool getSaveRawHeaders(void) const { return m_save_raw_headers; }
262 
264  inline bool isParsingRequest(void) const { return m_is_request; }
265 
267  inline bool isParsingResponse(void) const { return ! m_is_request; }
268 
270  inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
271 
273  inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
274 
276  inline void setSaveRawHeaders(bool b) { m_save_raw_headers = b; }
277 
279  inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
280 
282  inline PionLogger getLogger(void) { return m_logger; }
283 
284 
295  static bool parseURLEncoded(HTTPTypes::QueryParams& dict,
296  const char *ptr, const std::size_t len);
297 
309  static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
310  const char *ptr, const std::size_t len,
311  bool set_cookie_header);
312 
323  static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
324  const std::string& cookie_header, bool set_cookie_header)
325  {
326  return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
327  }
328 
338  static inline bool parseURLEncoded(HTTPTypes::QueryParams& dict,
339  const std::string& query)
340  {
341  return parseURLEncoded(dict, query.c_str(), query.size());
342  }
343 
353  static bool parseForwardedFor(const std::string& header, std::string& public_ip);
354 
356  static inline ErrorCategory& getErrorCategory(void) {
357  boost::call_once(HTTPParser::createErrorCategory, m_instance_flag);
358  return *m_error_category_ptr;
359  }
360 
361 
362 protected:
363 
376  boost::tribool parseHeaders(HTTPMessage& http_msg, boost::system::error_code& ec);
377 
383  void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
384 
397  boost::tribool finishHeaderParsing(HTTPMessage& http_msg,
398  boost::system::error_code& ec);
399 
411  boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers,
412  boost::system::error_code& ec);
413 
425  boost::tribool consumeContent(HTTPMessage& http_msg,
426  boost::system::error_code& ec);
427 
435  std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
436 
442  static void computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok);
443 
450  static inline void setError(boost::system::error_code& ec, ErrorValue ev) {
451  ec = boost::system::error_code(static_cast<int>(ev), getErrorCategory());
452  }
453 
455  static void createErrorCategory(void);
456 
457 
458  // misc functions used by the parsing functions
459  inline static bool isChar(int c);
460  inline static bool isControl(int c);
461  inline static bool isSpecial(int c);
462  inline static bool isDigit(int c);
463  inline static bool isHexDigit(int c);
464  inline static bool isCookieAttribute(const std::string& name, bool set_cookie_header);
465 
466 
468  static const boost::uint32_t STATUS_MESSAGE_MAX;
469 
471  static const boost::uint32_t METHOD_MAX;
472 
474  static const boost::uint32_t RESOURCE_MAX;
475 
477  static const boost::uint32_t QUERY_STRING_MAX;
478 
480  static const boost::uint32_t HEADER_NAME_MAX;
481 
483  static const boost::uint32_t HEADER_VALUE_MAX;
484 
486  static const boost::uint32_t QUERY_NAME_MAX;
487 
489  static const boost::uint32_t QUERY_VALUE_MAX;
490 
492  static const boost::uint32_t COOKIE_NAME_MAX;
493 
495  static const boost::uint32_t COOKIE_VALUE_MAX;
496 
497 
500 
502  const bool m_is_request;
503 
505  const char * m_read_ptr;
506 
508  const char * m_read_end_ptr;
509 
510 
511 private:
512 
514  enum MessageParseState {
515  PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
516  PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
517  };
518 
521  enum HeadersParseState {
522  PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
523  PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
524  PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
525  PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
526  PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
527  PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
528  PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
529  PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
530  PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
531  PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
532  };
533 
536  enum ChunkedContentParseState {
537  PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
538  PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
539  PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
540  PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
541  PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK,
542  PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
543  };
544 
545 
547  MessageParseState m_message_parse_state;
548 
550  HeadersParseState m_headers_parse_state;
551 
553  ChunkedContentParseState m_chunked_content_parse_state;
554 
556  boost::uint16_t m_status_code;
557 
559  std::string m_status_message;
560 
562  std::string m_method;
563 
565  std::string m_resource;
566 
568  std::string m_query_string;
569 
571  std::string m_raw_headers;
572 
574  std::string m_header_name;
575 
577  std::string m_header_value;
578 
580  std::string m_chunk_size_str;
581 
583  std::size_t m_size_of_current_chunk;
584 
586  std::size_t m_bytes_read_in_current_chunk;
587 
589  std::size_t m_bytes_content_remaining;
590 
592  std::size_t m_bytes_content_read;
593 
595  std::size_t m_bytes_last_read;
596 
598  std::size_t m_bytes_total_read;
599 
601  std::size_t m_max_content_length;
602 
604  bool m_parse_headers_only;
605 
607  bool m_save_raw_headers;
608 
610  static ErrorCategory * m_error_category_ptr;
611 
613  static boost::once_flag m_instance_flag;
614 };
615 
616 
617 // inline functions for HTTPParser
618 
619 inline bool HTTPParser::isChar(int c)
620 {
621  return(c >= 0 && c <= 127);
622 }
623 
624 inline bool HTTPParser::isControl(int c)
625 {
626  return( (c >= 0 && c <= 31) || c == 127);
627 }
628 
629 inline bool HTTPParser::isSpecial(int c)
630 {
631  switch (c) {
632  case '(': case ')': case '<': case '>': case '@':
633  case ',': case ';': case ':': case '\\': case '"':
634  case '/': case '[': case ']': case '?': case '=':
635  case '{': case '}': case ' ': case '\t':
636  return true;
637  default:
638  return false;
639  }
640 }
641 
642 inline bool HTTPParser::isDigit(int c)
643 {
644  return(c >= '0' && c <= '9');
645 }
646 
647 inline bool HTTPParser::isHexDigit(int c)
648 {
649  return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
650 }
651 
652 inline bool HTTPParser::isCookieAttribute(const std::string& name, bool set_cookie_header)
653 {
654  return (name.empty() || name[0] == '$' || (set_cookie_header &&
655  (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
656  ) );
657 }
658 
659 } // end namespace net
660 } // end namespace pion
661 
662 #endif