net/include/pion/net/HTTPParser.hpp

00001 // ------------------------------------------------------------------
00002 // pion-net: a C++ framework for building lightweight HTTP interfaces
00003 // ------------------------------------------------------------------
00004 // Copyright (C) 2007-2008 Atomic Labs, Inc.  (http://www.atomiclabs.com)
00005 //
00006 // Distributed under the Boost Software License, Version 1.0.
00007 // See http://www.boost.org/LICENSE_1_0.txt
00008 //
00009 
00010 #ifndef __PION_HTTPPARSER_HEADER__
00011 #define __PION_HTTPPARSER_HEADER__
00012 
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/logic/tribool.hpp>
00016 #include <pion/PionConfig.hpp>
00017 #include <pion/PionLogger.hpp>
00018 #include <pion/net/HTTPMessage.hpp>
00019 
00020 
00021 namespace pion {    // begin namespace pion
00022 namespace net {     // begin namespace net (Pion Network Library)
00023 
00024 // forward declarations used for finishing HTTP messages
00025 class HTTPRequest;
00026 class HTTPResponse;
00027 
00031 class PION_NET_API HTTPParser :
00032     private boost::noncopyable
00033 {
00034 
00035 public:
00036 
00038     static const std::size_t        DEFAULT_CONTENT_MAX;
00039 
00047     HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00048         : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
00049         m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00050         m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00051         m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00052         m_bytes_content_remaining(0), m_bytes_content_read(0),
00053         m_bytes_last_read(0), m_bytes_total_read(0),
00054         m_max_content_length(max_content_length), m_save_raw_headers(false)
00055     {}
00056 
00058     virtual ~HTTPParser() {}
00059 
00070     boost::tribool parse(HTTPMessage& http_msg);
00071 
00083     boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len);
00084 
00090     void finish(HTTPMessage& http_msg) const;
00091 
00098     inline void setReadBuffer(const char *ptr, size_t len) {
00099         m_read_ptr = ptr;
00100         m_read_end_ptr = ptr + len;
00101     }
00102 
00109     inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
00110         read_ptr = m_read_ptr;
00111         read_end_ptr = m_read_end_ptr;
00112     }
00113 
00123     inline bool checkPrematureEOF(HTTPMessage& http_msg) {
00124         if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00125             return true;
00126         m_message_parse_state = PARSE_END;
00127         http_msg.concatenateChunks();
00128         finish(http_msg);
00129         return false;
00130     }
00131 
00133     inline void reset(void) {
00134         m_message_parse_state = PARSE_START;
00135         m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00136         m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00137         m_status_code = 0;
00138         m_status_message.erase();
00139         m_method.erase();
00140         m_resource.erase();
00141         m_query_string.erase();
00142         m_raw_headers.erase();
00143         m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00144     }
00145 
00147     inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00148 
00150     inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); } 
00151 
00153     inline std::size_t gcount(void) const { return m_bytes_last_read; }
00154 
00156     inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
00157 
00159     inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
00160 
00162     inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
00163 
00165     inline const std::string& getRawHeaders(void) const { return m_raw_headers; }
00166 
00168     inline bool getSaveRawHeaders(void) const { return m_save_raw_headers; }
00169 
00171     inline bool isParsingRequest(void) const { return m_is_request; }
00172 
00174     inline bool isParsingResponse(void) const { return ! m_is_request; }
00175 
00177     inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
00178 
00180     inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00181 
00183     inline void setSaveRawHeaders(bool b) { m_save_raw_headers = b; }
00184 
00186     inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
00187 
00189     inline PionLogger getLogger(void) { return m_logger; }
00190 
00191 
00200     static bool contentTypeIsUrlEncoded(HTTPRequest& http_request);
00201 
00212     static bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00213                                 const char *ptr, const std::size_t len);
00214 
00226     static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00227                                   const char *ptr, const std::size_t len,
00228                                   bool set_cookie_header);
00229 
00240     static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00241         const std::string& cookie_header, bool set_cookie_header)
00242     {
00243         return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
00244     }
00245 
00255     static inline bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00256         const std::string& query)
00257     {
00258         return parseURLEncoded(dict, query.c_str(), query.size());
00259     }
00260 
00261 
00262 protected:
00263 
00275     boost::tribool parseHeaders(HTTPMessage& http_msg);
00276 
00282     void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
00283 
00295     boost::tribool finishHeaderParsing(HTTPMessage& http_msg);
00296 
00307     boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers);
00308 
00319     boost::tribool consumeContent(HTTPMessage& http_msg);
00320 
00328     std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
00329 
00335     static void computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok);
00336 
00337     // misc functions used by the parsing functions
00338     inline static bool isChar(int c);
00339     inline static bool isControl(int c);
00340     inline static bool isSpecial(int c);
00341     inline static bool isDigit(int c);
00342     inline static bool isHexDigit(int c);
00343     inline static bool isCookieAttribute(const std::string& name, bool set_cookie_header);
00344 
00345 
00347     static const boost::uint32_t        STATUS_MESSAGE_MAX;
00348 
00350     static const boost::uint32_t        METHOD_MAX;
00351 
00353     static const boost::uint32_t        RESOURCE_MAX;
00354 
00356     static const boost::uint32_t        QUERY_STRING_MAX;
00357 
00359     static const boost::uint32_t        HEADER_NAME_MAX;
00360 
00362     static const boost::uint32_t        HEADER_VALUE_MAX;
00363 
00365     static const boost::uint32_t        QUERY_NAME_MAX;
00366 
00368     static const boost::uint32_t        QUERY_VALUE_MAX;
00369 
00371     static const boost::uint32_t        COOKIE_NAME_MAX;
00372 
00374     static const boost::uint32_t        COOKIE_VALUE_MAX;
00375 
00376 
00378     mutable PionLogger                  m_logger;
00379 
00381     const bool                          m_is_request;
00382 
00384     const char *                        m_read_ptr;
00385 
00387     const char *                        m_read_end_ptr;
00388 
00389 
00390 private:
00391 
00393     enum MessageParseState {
00394         PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
00395         PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00396     };
00397 
00400     enum HeadersParseState {
00401         PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00402         PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00403         PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00404         PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00405         PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00406         PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00407         PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00408         PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00409         PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00410         PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00411     };
00412 
00415     enum ChunkedContentParseState {
00416         PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE, 
00417         PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00418         PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK, 
00419         PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00420         PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK, 
00421         PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00422     };
00423 
00424 
00426     MessageParseState                   m_message_parse_state;
00427 
00429     HeadersParseState                   m_headers_parse_state;
00430 
00432     ChunkedContentParseState            m_chunked_content_parse_state;
00433 
00435     boost::uint16_t                     m_status_code;
00436 
00438     std::string                         m_status_message;
00439 
00441     std::string                         m_method;
00442 
00444     std::string                         m_resource;
00445 
00447     std::string                         m_query_string;
00448 
00450     std::string                         m_raw_headers;
00451 
00453     std::string                         m_header_name;
00454 
00456     std::string                         m_header_value;
00457 
00459     std::string                         m_chunk_size_str;
00460 
00462     std::size_t                         m_size_of_current_chunk;
00463 
00465     std::size_t                         m_bytes_read_in_current_chunk;
00466 
00468     std::size_t                         m_bytes_content_remaining;
00469 
00471     std::size_t                         m_bytes_content_read;
00472 
00474     std::size_t                         m_bytes_last_read;
00475 
00477     std::size_t                         m_bytes_total_read;
00478 
00480     std::size_t                         m_max_content_length;
00481     
00483     bool                                m_save_raw_headers;
00484 };
00485 
00486 
00487 // inline functions for HTTPParser
00488 
00489 inline bool HTTPParser::isChar(int c)
00490 {
00491     return(c >= 0 && c <= 127);
00492 }
00493 
00494 inline bool HTTPParser::isControl(int c)
00495 {
00496     return( (c >= 0 && c <= 31) || c == 127);
00497 }
00498 
00499 inline bool HTTPParser::isSpecial(int c)
00500 {
00501     switch (c) {
00502     case '(': case ')': case '<': case '>': case '@':
00503     case ',': case ';': case ':': case '\\': case '"':
00504     case '/': case '[': case ']': case '?': case '=':
00505     case '{': case '}': case ' ': case '\t':
00506         return true;
00507     default:
00508         return false;
00509     }
00510 }
00511 
00512 inline bool HTTPParser::isDigit(int c)
00513 {
00514     return(c >= '0' && c <= '9');
00515 }
00516 
00517 inline bool HTTPParser::isHexDigit(int c)
00518 {
00519     return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00520 }
00521 
00522 inline bool HTTPParser::isCookieAttribute(const std::string& name, bool set_cookie_header)
00523 {
00524     return (name.empty() || name[0] == '$' || (set_cookie_header &&
00525         (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
00526         ) );
00527 }
00528 
00529 }   // end namespace net
00530 }   // end namespace pion
00531 
00532 #endif

Generated on Fri Apr 30 14:48:53 2010 for pion-net by  doxygen 1.4.7