00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef __PION_HTTPPARSER_HEADER__
00011 #define __PION_HTTPPARSER_HEADER__
00012
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/logic/tribool.hpp>
00016 #include <boost/system/error_code.hpp>
00017 #include <boost/thread/once.hpp>
00018 #include <pion/PionConfig.hpp>
00019 #include <pion/PionLogger.hpp>
00020 #include <pion/net/HTTPMessage.hpp>
00021
00022
00023 namespace pion {
00024 namespace net {
00025
00026
00027 class HTTPRequest;
00028 class HTTPResponse;
00029
00033 class PION_NET_API HTTPParser :
00034 private boost::noncopyable
00035 {
00036
00037 public:
00038
00040 static const std::size_t DEFAULT_CONTENT_MAX;
00041
00043 enum ErrorValue {
00044 ERROR_METHOD_CHAR = 1,
00045 ERROR_METHOD_SIZE,
00046 ERROR_URI_CHAR,
00047 ERROR_URI_SIZE,
00048 ERROR_QUERY_CHAR,
00049 ERROR_QUERY_SIZE,
00050 ERROR_VERSION_EMPTY,
00051 ERROR_VERSION_CHAR,
00052 ERROR_STATUS_EMPTY,
00053 ERROR_STATUS_CHAR,
00054 ERROR_HEADER_CHAR,
00055 ERROR_HEADER_NAME_SIZE,
00056 ERROR_HEADER_VALUE_SIZE,
00057 ERROR_INVALID_CONTENT_LENGTH,
00058 ERROR_CHUNK_CHAR,
00059 ERROR_MISSING_CHUNK_DATA,
00060 ERROR_MISSING_HEADER_DATA,
00061 ERROR_MISSING_TOO_MUCH_CONTENT,
00062 };
00063
00065 class ErrorCategory
00066 : public boost::system::error_category
00067 {
00068 public:
00069 const char *name() const { return "HTTPParser"; }
00070 std::string message(int ev) const {
00071 switch (ev) {
00072 case ERROR_METHOD_CHAR:
00073 return "invalid method character";
00074 case ERROR_METHOD_SIZE:
00075 return "method exceeds maximum size";
00076 case ERROR_URI_CHAR:
00077 return "invalid URI character";
00078 case ERROR_URI_SIZE:
00079 return "method exceeds maximum size";
00080 case ERROR_QUERY_CHAR:
00081 return "invalid query string character";
00082 case ERROR_QUERY_SIZE:
00083 return "query string exceeds maximum size";
00084 case ERROR_VERSION_EMPTY:
00085 return "HTTP version undefined";
00086 case ERROR_VERSION_CHAR:
00087 return "invalid version character";
00088 case ERROR_STATUS_EMPTY:
00089 return "HTTP status undefined";
00090 case ERROR_STATUS_CHAR:
00091 return "invalid status character";
00092 case ERROR_HEADER_CHAR:
00093 return "invalid header character";
00094 case ERROR_HEADER_NAME_SIZE:
00095 return "header name exceeds maximum size";
00096 case ERROR_HEADER_VALUE_SIZE:
00097 return "header value exceeds maximum size";
00098 case ERROR_INVALID_CONTENT_LENGTH:
00099 return "invalid Content-Length header";
00100 case ERROR_CHUNK_CHAR:
00101 return "invalid chunk character";
00102 case ERROR_MISSING_HEADER_DATA:
00103 return "missing header data";
00104 case ERROR_MISSING_CHUNK_DATA:
00105 return "missing chunk data";
00106 case ERROR_MISSING_TOO_MUCH_CONTENT:
00107 return "missing too much content";
00108 }
00109 return "HTTPParser error";
00110 }
00111 };
00112
00120 HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00121 : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
00122 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00123 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00124 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00125 m_bytes_content_remaining(0), m_bytes_content_read(0),
00126 m_bytes_last_read(0), m_bytes_total_read(0),
00127 m_max_content_length(max_content_length),
00128 m_parse_headers_only(false), m_save_raw_headers(false)
00129 {}
00130
00132 virtual ~HTTPParser() {}
00133
00145 boost::tribool parse(HTTPMessage& http_msg, boost::system::error_code& ec);
00146
00159 boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len,
00160 boost::system::error_code& ec);
00161
00167 void finish(HTTPMessage& http_msg) const;
00168
00175 inline void setReadBuffer(const char *ptr, size_t len) {
00176 m_read_ptr = ptr;
00177 m_read_end_ptr = ptr + len;
00178 }
00179
00186 inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
00187 read_ptr = m_read_ptr;
00188 read_end_ptr = m_read_end_ptr;
00189 }
00190
00199 inline bool checkPrematureEOF(HTTPMessage& http_msg) {
00200 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00201 return true;
00202 m_message_parse_state = PARSE_END;
00203 http_msg.concatenateChunks();
00204 finish(http_msg);
00205 return false;
00206 }
00207
00213 inline void parseHeadersOnly(bool b = true) { m_parse_headers_only = b; }
00214
00220 inline void skipHeaderParsing(HTTPMessage& http_msg) {
00221 boost::system::error_code ec;
00222 finishHeaderParsing(http_msg, ec);
00223 }
00224
00226 inline void reset(void) {
00227 m_message_parse_state = PARSE_START;
00228 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00229 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00230 m_status_code = 0;
00231 m_status_message.erase();
00232 m_method.erase();
00233 m_resource.erase();
00234 m_query_string.erase();
00235 m_raw_headers.erase();
00236 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00237 }
00238
00240 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00241
00243 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
00244
00246 inline std::size_t gcount(void) const { return m_bytes_last_read; }
00247
00249 inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
00250
00252 inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
00253
00255 inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
00256
00258 inline const std::string& getRawHeaders(void) const { return m_raw_headers; }
00259
00261 inline bool getSaveRawHeaders(void) const { return m_save_raw_headers; }
00262
00264 inline bool isParsingRequest(void) const { return m_is_request; }
00265
00267 inline bool isParsingResponse(void) const { return ! m_is_request; }
00268
00270 inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
00271
00273 inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00274
00276 inline void setSaveRawHeaders(bool b) { m_save_raw_headers = b; }
00277
00279 inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
00280
00282 inline PionLogger getLogger(void) { return m_logger; }
00283
00284
00295 static bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00296 const char *ptr, const std::size_t len);
00297
00309 static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00310 const char *ptr, const std::size_t len,
00311 bool set_cookie_header);
00312
00323 static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00324 const std::string& cookie_header, bool set_cookie_header)
00325 {
00326 return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
00327 }
00328
00338 static inline bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00339 const std::string& query)
00340 {
00341 return parseURLEncoded(dict, query.c_str(), query.size());
00342 }
00343
00353 static bool parseForwardedFor(const std::string& header, std::string& public_ip);
00354
00356 static inline ErrorCategory& getErrorCategory(void) {
00357 boost::call_once(HTTPParser::createErrorCategory, m_instance_flag);
00358 return *m_error_category_ptr;
00359 }
00360
00361
00362 protected:
00363
00376 boost::tribool parseHeaders(HTTPMessage& http_msg, boost::system::error_code& ec);
00377
00383 void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
00384
00397 boost::tribool finishHeaderParsing(HTTPMessage& http_msg,
00398 boost::system::error_code& ec);
00399
00411 boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers,
00412 boost::system::error_code& ec);
00413
00425 boost::tribool consumeContent(HTTPMessage& http_msg,
00426 boost::system::error_code& ec);
00427
00435 std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
00436
00442 static void computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok);
00443
00450 static inline void setError(boost::system::error_code& ec, ErrorValue ev) {
00451 ec = boost::system::error_code(static_cast<int>(ev), getErrorCategory());
00452 }
00453
00455 static void createErrorCategory(void);
00456
00457
00458
00459 inline static bool isChar(int c);
00460 inline static bool isControl(int c);
00461 inline static bool isSpecial(int c);
00462 inline static bool isDigit(int c);
00463 inline static bool isHexDigit(int c);
00464 inline static bool isCookieAttribute(const std::string& name, bool set_cookie_header);
00465
00466
00468 static const boost::uint32_t STATUS_MESSAGE_MAX;
00469
00471 static const boost::uint32_t METHOD_MAX;
00472
00474 static const boost::uint32_t RESOURCE_MAX;
00475
00477 static const boost::uint32_t QUERY_STRING_MAX;
00478
00480 static const boost::uint32_t HEADER_NAME_MAX;
00481
00483 static const boost::uint32_t HEADER_VALUE_MAX;
00484
00486 static const boost::uint32_t QUERY_NAME_MAX;
00487
00489 static const boost::uint32_t QUERY_VALUE_MAX;
00490
00492 static const boost::uint32_t COOKIE_NAME_MAX;
00493
00495 static const boost::uint32_t COOKIE_VALUE_MAX;
00496
00497
00499 mutable PionLogger m_logger;
00500
00502 const bool m_is_request;
00503
00505 const char * m_read_ptr;
00506
00508 const char * m_read_end_ptr;
00509
00510
00511 private:
00512
00514 enum MessageParseState {
00515 PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
00516 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00517 };
00518
00521 enum HeadersParseState {
00522 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00523 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00524 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00525 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00526 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00527 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00528 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00529 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00530 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00531 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00532 };
00533
00536 enum ChunkedContentParseState {
00537 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
00538 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00539 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
00540 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00541 PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK,
00542 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00543 };
00544
00545
00547 MessageParseState m_message_parse_state;
00548
00550 HeadersParseState m_headers_parse_state;
00551
00553 ChunkedContentParseState m_chunked_content_parse_state;
00554
00556 boost::uint16_t m_status_code;
00557
00559 std::string m_status_message;
00560
00562 std::string m_method;
00563
00565 std::string m_resource;
00566
00568 std::string m_query_string;
00569
00571 std::string m_raw_headers;
00572
00574 std::string m_header_name;
00575
00577 std::string m_header_value;
00578
00580 std::string m_chunk_size_str;
00581
00583 std::size_t m_size_of_current_chunk;
00584
00586 std::size_t m_bytes_read_in_current_chunk;
00587
00589 std::size_t m_bytes_content_remaining;
00590
00592 std::size_t m_bytes_content_read;
00593
00595 std::size_t m_bytes_last_read;
00596
00598 std::size_t m_bytes_total_read;
00599
00601 std::size_t m_max_content_length;
00602
00604 bool m_parse_headers_only;
00605
00607 bool m_save_raw_headers;
00608
00610 static ErrorCategory * m_error_category_ptr;
00611
00613 static boost::once_flag m_instance_flag;
00614 };
00615
00616
00617
00618
00619 inline bool HTTPParser::isChar(int c)
00620 {
00621 return(c >= 0 && c <= 127);
00622 }
00623
00624 inline bool HTTPParser::isControl(int c)
00625 {
00626 return( (c >= 0 && c <= 31) || c == 127);
00627 }
00628
00629 inline bool HTTPParser::isSpecial(int c)
00630 {
00631 switch (c) {
00632 case '(': case ')': case '<': case '>': case '@':
00633 case ',': case ';': case ':': case '\\': case '"':
00634 case '/': case '[': case ']': case '?': case '=':
00635 case '{': case '}': case ' ': case '\t':
00636 return true;
00637 default:
00638 return false;
00639 }
00640 }
00641
00642 inline bool HTTPParser::isDigit(int c)
00643 {
00644 return(c >= '0' && c <= '9');
00645 }
00646
00647 inline bool HTTPParser::isHexDigit(int c)
00648 {
00649 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00650 }
00651
00652 inline bool HTTPParser::isCookieAttribute(const std::string& name, bool set_cookie_header)
00653 {
00654 return (name.empty() || name[0] == '$' || (set_cookie_header &&
00655 (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
00656 ) );
00657 }
00658
00659 }
00660 }
00661
00662 #endif