00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef __PION_HTTPPARSER_HEADER__
00011 #define __PION_HTTPPARSER_HEADER__
00012
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/logic/tribool.hpp>
00016 #include <pion/PionConfig.hpp>
00017 #include <pion/PionLogger.hpp>
00018 #include <pion/net/HTTPMessage.hpp>
00019
00020
00021 namespace pion {
00022 namespace net {
00023
00024
00025 class HTTPRequest;
00026 class HTTPResponse;
00027
00031 class PION_NET_API HTTPParser :
00032 private boost::noncopyable
00033 {
00034
00035 public:
00036
00038 static const std::size_t DEFAULT_CONTENT_MAX;
00039
00047 HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00048 : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
00049 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00050 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00051 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00052 m_bytes_content_remaining(0), m_bytes_content_read(0),
00053 m_bytes_last_read(0), m_bytes_total_read(0),
00054 m_max_content_length(max_content_length), m_save_raw_headers(false)
00055 {}
00056
00058 virtual ~HTTPParser() {}
00059
00070 boost::tribool parse(HTTPMessage& http_msg);
00071
00083 boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len);
00084
00090 void finish(HTTPMessage& http_msg) const;
00091
00098 inline void setReadBuffer(const char *ptr, size_t len) {
00099 m_read_ptr = ptr;
00100 m_read_end_ptr = ptr + len;
00101 }
00102
00109 inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
00110 read_ptr = m_read_ptr;
00111 read_end_ptr = m_read_end_ptr;
00112 }
00113
00123 inline bool checkPrematureEOF(HTTPMessage& http_msg) {
00124 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00125 return true;
00126 m_message_parse_state = PARSE_END;
00127 http_msg.concatenateChunks();
00128 finish(http_msg);
00129 return false;
00130 }
00131
00133 inline void reset(void) {
00134 m_message_parse_state = PARSE_START;
00135 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00136 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00137 m_status_code = 0;
00138 m_status_message.erase();
00139 m_method.erase();
00140 m_resource.erase();
00141 m_query_string.erase();
00142 m_raw_headers.erase();
00143 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00144 }
00145
00147 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00148
00150 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
00151
00153 inline std::size_t gcount(void) const { return m_bytes_last_read; }
00154
00156 inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
00157
00159 inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
00160
00162 inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
00163
00165 inline const std::string& getRawHeaders(void) const { return m_raw_headers; }
00166
00168 inline bool getSaveRawHeaders(void) const { return m_save_raw_headers; }
00169
00171 inline bool isParsingRequest(void) const { return m_is_request; }
00172
00174 inline bool isParsingResponse(void) const { return ! m_is_request; }
00175
00177 inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
00178
00180 inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00181
00183 inline void setSaveRawHeaders(bool b) { m_save_raw_headers = b; }
00184
00186 inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
00187
00189 inline PionLogger getLogger(void) { return m_logger; }
00190
00191
00200 static bool contentTypeIsUrlEncoded(HTTPRequest& http_request);
00201
00212 static bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00213 const char *ptr, const std::size_t len);
00214
00226 static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00227 const char *ptr, const std::size_t len,
00228 bool set_cookie_header);
00229
00240 static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00241 const std::string& cookie_header, bool set_cookie_header)
00242 {
00243 return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
00244 }
00245
00255 static inline bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00256 const std::string& query)
00257 {
00258 return parseURLEncoded(dict, query.c_str(), query.size());
00259 }
00260
00261
00262 protected:
00263
00275 boost::tribool parseHeaders(HTTPMessage& http_msg);
00276
00282 void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
00283
00295 boost::tribool finishHeaderParsing(HTTPMessage& http_msg);
00296
00307 boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers);
00308
00319 boost::tribool consumeContent(HTTPMessage& http_msg);
00320
00328 std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
00329
00335 static void computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok);
00336
00337
00338 inline static bool isChar(int c);
00339 inline static bool isControl(int c);
00340 inline static bool isSpecial(int c);
00341 inline static bool isDigit(int c);
00342 inline static bool isHexDigit(int c);
00343 inline static bool isCookieAttribute(const std::string& name, bool set_cookie_header);
00344
00345
00347 static const boost::uint32_t STATUS_MESSAGE_MAX;
00348
00350 static const boost::uint32_t METHOD_MAX;
00351
00353 static const boost::uint32_t RESOURCE_MAX;
00354
00356 static const boost::uint32_t QUERY_STRING_MAX;
00357
00359 static const boost::uint32_t HEADER_NAME_MAX;
00360
00362 static const boost::uint32_t HEADER_VALUE_MAX;
00363
00365 static const boost::uint32_t QUERY_NAME_MAX;
00366
00368 static const boost::uint32_t QUERY_VALUE_MAX;
00369
00371 static const boost::uint32_t COOKIE_NAME_MAX;
00372
00374 static const boost::uint32_t COOKIE_VALUE_MAX;
00375
00376
00378 mutable PionLogger m_logger;
00379
00381 const bool m_is_request;
00382
00384 const char * m_read_ptr;
00385
00387 const char * m_read_end_ptr;
00388
00389
00390 private:
00391
00393 enum MessageParseState {
00394 PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
00395 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00396 };
00397
00400 enum HeadersParseState {
00401 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00402 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00403 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00404 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00405 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00406 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00407 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00408 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00409 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00410 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00411 };
00412
00415 enum ChunkedContentParseState {
00416 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
00417 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00418 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
00419 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00420 PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK,
00421 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00422 };
00423
00424
00426 MessageParseState m_message_parse_state;
00427
00429 HeadersParseState m_headers_parse_state;
00430
00432 ChunkedContentParseState m_chunked_content_parse_state;
00433
00435 boost::uint16_t m_status_code;
00436
00438 std::string m_status_message;
00439
00441 std::string m_method;
00442
00444 std::string m_resource;
00445
00447 std::string m_query_string;
00448
00450 std::string m_raw_headers;
00451
00453 std::string m_header_name;
00454
00456 std::string m_header_value;
00457
00459 std::string m_chunk_size_str;
00460
00462 std::size_t m_size_of_current_chunk;
00463
00465 std::size_t m_bytes_read_in_current_chunk;
00466
00468 std::size_t m_bytes_content_remaining;
00469
00471 std::size_t m_bytes_content_read;
00472
00474 std::size_t m_bytes_last_read;
00475
00477 std::size_t m_bytes_total_read;
00478
00480 std::size_t m_max_content_length;
00481
00483 bool m_save_raw_headers;
00484 };
00485
00486
00487
00488
00489 inline bool HTTPParser::isChar(int c)
00490 {
00491 return(c >= 0 && c <= 127);
00492 }
00493
00494 inline bool HTTPParser::isControl(int c)
00495 {
00496 return( (c >= 0 && c <= 31) || c == 127);
00497 }
00498
00499 inline bool HTTPParser::isSpecial(int c)
00500 {
00501 switch (c) {
00502 case '(': case ')': case '<': case '>': case '@':
00503 case ',': case ';': case ':': case '\\': case '"':
00504 case '/': case '[': case ']': case '?': case '=':
00505 case '{': case '}': case ' ': case '\t':
00506 return true;
00507 default:
00508 return false;
00509 }
00510 }
00511
00512 inline bool HTTPParser::isDigit(int c)
00513 {
00514 return(c >= '0' && c <= '9');
00515 }
00516
00517 inline bool HTTPParser::isHexDigit(int c)
00518 {
00519 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00520 }
00521
00522 inline bool HTTPParser::isCookieAttribute(const std::string& name, bool set_cookie_header)
00523 {
00524 return (name.empty() || name[0] == '$' || (set_cookie_header &&
00525 (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
00526 ) );
00527 }
00528
00529 }
00530 }
00531
00532 #endif