00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <cstdlib>
00011 #include <boost/logic/tribool.hpp>
00012 #include <pion/net/HTTPParser.hpp>
00013 #include <pion/net/HTTPRequest.hpp>
00014 #include <pion/net/HTTPResponse.hpp>
00015 #include <pion/net/HTTPMessage.hpp>
00016
00017
00018 namespace pion {
00019 namespace net {
00020
00021
00022
00023
00024 const boost::uint32_t HTTPParser::STATUS_MESSAGE_MAX = 1024;
00025 const boost::uint32_t HTTPParser::METHOD_MAX = 1024;
00026 const boost::uint32_t HTTPParser::RESOURCE_MAX = 256 * 1024;
00027 const boost::uint32_t HTTPParser::QUERY_STRING_MAX = 1024 * 1024;
00028 const boost::uint32_t HTTPParser::HEADER_NAME_MAX = 1024;
00029 const boost::uint32_t HTTPParser::HEADER_VALUE_MAX = 1024 * 1024;
00030 const boost::uint32_t HTTPParser::QUERY_NAME_MAX = 1024;
00031 const boost::uint32_t HTTPParser::QUERY_VALUE_MAX = 1024 * 1024;
00032 const boost::uint32_t HTTPParser::COOKIE_NAME_MAX = 1024;
00033 const boost::uint32_t HTTPParser::COOKIE_VALUE_MAX = 1024 * 1024;
00034 const std::size_t HTTPParser::DEFAULT_CONTENT_MAX = 1024 * 1024;
00035
00036
00037
00038
00039 boost::tribool HTTPParser::parse(HTTPMessage& http_msg)
00040 {
00041 PION_ASSERT(! eof() );
00042
00043 boost::tribool rc = boost::indeterminate;
00044 std::size_t total_bytes_parsed = 0;
00045
00046 if(http_msg.hasMissingPackets()) {
00047 http_msg.setDataAfterMissingPacket(true);
00048 }
00049
00050 do {
00051 switch (m_message_parse_state) {
00052
00053 case PARSE_START:
00054 m_message_parse_state = PARSE_HEADERS;
00055
00056
00057
00058 case PARSE_HEADERS:
00059 rc = parseHeaders(http_msg);
00060 total_bytes_parsed += m_bytes_last_read;
00061
00062 if (rc == true) {
00063
00064 rc = finishHeaderParsing(http_msg);
00065 }
00066 break;
00067
00068
00069 case PARSE_CHUNKS:
00070 rc = parseChunks(http_msg.getChunkCache());
00071 total_bytes_parsed += m_bytes_last_read;
00072
00073 if (rc == true) {
00074 http_msg.concatenateChunks();
00075 }
00076 break;
00077
00078
00079 case PARSE_CONTENT:
00080 rc = consumeContent(http_msg);
00081 total_bytes_parsed += m_bytes_last_read;
00082 break;
00083
00084
00085 case PARSE_CONTENT_NO_LENGTH:
00086 consumeContentAsNextChunk(http_msg.getChunkCache());
00087 total_bytes_parsed += m_bytes_last_read;
00088 break;
00089
00090
00091 case PARSE_END:
00092 rc = true;
00093 break;
00094 }
00095 } while ( boost::indeterminate(rc) && ! eof() );
00096
00097
00098 if (rc == true) {
00099 m_message_parse_state = PARSE_END;
00100 finish(http_msg);
00101 } else if(rc == false) {
00102 computeMsgStatus(http_msg, false);
00103 }
00104
00105
00106 m_bytes_last_read = total_bytes_parsed;
00107
00108 return rc;
00109 }
00110
00111 boost::tribool HTTPParser::parseMissingData(HTTPMessage& http_msg, std::size_t len)
00112 {
00113 static const char MISSING_DATA_CHAR = 'X';
00114 boost::tribool rc = boost::indeterminate;
00115
00116 http_msg.setMissingPackets(true);
00117
00118 switch (m_message_parse_state) {
00119
00120
00121 case PARSE_START:
00122 case PARSE_HEADERS:
00123 rc = false;
00124 break;
00125
00126
00127 case PARSE_CHUNKS:
00128
00129 if (m_chunked_content_parse_state == PARSE_CHUNK
00130 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00131 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00132 {
00133
00134 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n)
00135 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00136
00137 m_bytes_read_in_current_chunk += len;
00138 m_bytes_last_read = len;
00139 m_bytes_total_read += len;
00140 m_bytes_content_read += len;
00141
00142 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00143 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00144 }
00145 } else {
00146
00147 rc = false;
00148 }
00149 break;
00150
00151
00152 case PARSE_CONTENT:
00153
00154 if (m_bytes_content_remaining == 0) {
00155
00156 rc = true;
00157 } else if (m_bytes_content_remaining < len) {
00158
00159 rc = false;
00160 } else {
00161
00162
00163 if ( (m_bytes_content_read+len) <= m_max_content_length) {
00164
00165 for (std::size_t n = 0; n < len; ++n)
00166 http_msg.getContent()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00167 } else {
00168 m_bytes_content_read += len;
00169 }
00170
00171 m_bytes_content_remaining -= len;
00172 m_bytes_total_read += len;
00173 m_bytes_last_read = len;
00174
00175 if (m_bytes_content_remaining == 0)
00176 rc = true;
00177 }
00178 break;
00179
00180
00181 case PARSE_CONTENT_NO_LENGTH:
00182
00183 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n)
00184 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00185 m_bytes_last_read = len;
00186 m_bytes_total_read += len;
00187 m_bytes_content_read += len;
00188 break;
00189
00190
00191 case PARSE_END:
00192 rc = true;
00193 break;
00194 }
00195
00196
00197 if (rc == true) {
00198 m_message_parse_state = PARSE_END;
00199 finish(http_msg);
00200 } else if(rc == false) {
00201 computeMsgStatus(http_msg, false);
00202 }
00203
00204 return rc;
00205 }
00206
00207 boost::tribool HTTPParser::parseHeaders(HTTPMessage& http_msg)
00208 {
00209
00210
00211
00212
00213
00214
00215
00216 const char *read_start_ptr = m_read_ptr;
00217 m_bytes_last_read = 0;
00218 while (m_read_ptr < m_read_end_ptr) {
00219
00220 switch (m_headers_parse_state) {
00221 case PARSE_METHOD_START:
00222
00223 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') {
00224 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
00225 return false;
00226 m_headers_parse_state = PARSE_METHOD;
00227 m_method.erase();
00228 m_method.push_back(*m_read_ptr);
00229 }
00230 break;
00231
00232 case PARSE_METHOD:
00233
00234 if (*m_read_ptr == ' ') {
00235 m_resource.erase();
00236 m_headers_parse_state = PARSE_URI_STEM;
00237 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00238 return false;
00239 } else if (m_method.size() >= METHOD_MAX) {
00240 return false;
00241 } else {
00242 m_method.push_back(*m_read_ptr);
00243 }
00244 break;
00245
00246 case PARSE_URI_STEM:
00247
00248 if (*m_read_ptr == ' ') {
00249 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00250 } else if (*m_read_ptr == '?') {
00251 m_query_string.erase();
00252 m_headers_parse_state = PARSE_URI_QUERY;
00253 } else if (isControl(*m_read_ptr)) {
00254 return false;
00255 } else if (m_resource.size() >= RESOURCE_MAX) {
00256 return false;
00257 } else {
00258 m_resource.push_back(*m_read_ptr);
00259 }
00260 break;
00261
00262 case PARSE_URI_QUERY:
00263
00264 if (*m_read_ptr == ' ') {
00265 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00266 } else if (isControl(*m_read_ptr)) {
00267 return false;
00268 } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00269 return false;
00270 } else {
00271 m_query_string.push_back(*m_read_ptr);
00272 }
00273 break;
00274
00275 case PARSE_HTTP_VERSION_H:
00276
00277 if (*m_read_ptr != 'H') return false;
00278 m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00279 break;
00280
00281 case PARSE_HTTP_VERSION_T_1:
00282
00283 if (*m_read_ptr != 'T') return false;
00284 m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00285 break;
00286
00287 case PARSE_HTTP_VERSION_T_2:
00288
00289 if (*m_read_ptr != 'T') return false;
00290 m_headers_parse_state = PARSE_HTTP_VERSION_P;
00291 break;
00292
00293 case PARSE_HTTP_VERSION_P:
00294
00295 if (*m_read_ptr != 'P') return false;
00296 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00297 break;
00298
00299 case PARSE_HTTP_VERSION_SLASH:
00300
00301 if (*m_read_ptr != '/') return false;
00302 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00303 break;
00304
00305 case PARSE_HTTP_VERSION_MAJOR_START:
00306
00307 if (!isDigit(*m_read_ptr)) return false;
00308 http_msg.setVersionMajor(*m_read_ptr - '0');
00309 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00310 break;
00311
00312 case PARSE_HTTP_VERSION_MAJOR:
00313
00314 if (*m_read_ptr == '.') {
00315 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00316 } else if (isDigit(*m_read_ptr)) {
00317 http_msg.setVersionMajor( (http_msg.getVersionMajor() * 10)
00318 + (*m_read_ptr - '0') );
00319 } else {
00320 return false;
00321 }
00322 break;
00323
00324 case PARSE_HTTP_VERSION_MINOR_START:
00325
00326 if (!isDigit(*m_read_ptr)) return false;
00327 http_msg.setVersionMinor(*m_read_ptr - '0');
00328 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00329 break;
00330
00331 case PARSE_HTTP_VERSION_MINOR:
00332
00333 if (*m_read_ptr == ' ') {
00334
00335 if (m_is_request) return false;
00336 m_headers_parse_state = PARSE_STATUS_CODE_START;
00337 } else if (*m_read_ptr == '\r') {
00338
00339 if (! m_is_request) return false;
00340 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00341 } else if (*m_read_ptr == '\n') {
00342
00343 if (! m_is_request) return false;
00344 m_headers_parse_state = PARSE_EXPECTING_CR;
00345 } else if (isDigit(*m_read_ptr)) {
00346 http_msg.setVersionMinor( (http_msg.getVersionMinor() * 10)
00347 + (*m_read_ptr - '0') );
00348 } else {
00349 return false;
00350 }
00351 break;
00352
00353 case PARSE_STATUS_CODE_START:
00354
00355 if (!isDigit(*m_read_ptr)) return false;
00356 m_status_code = (*m_read_ptr - '0');
00357 m_headers_parse_state = PARSE_STATUS_CODE;
00358 break;
00359
00360 case PARSE_STATUS_CODE:
00361
00362 if (*m_read_ptr == ' ') {
00363 m_status_message.erase();
00364 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00365 } else if (isDigit(*m_read_ptr)) {
00366 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00367 } else {
00368 return false;
00369 }
00370 break;
00371
00372 case PARSE_STATUS_MESSAGE:
00373
00374 if (*m_read_ptr == '\r') {
00375 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00376 } else if (*m_read_ptr == '\n') {
00377 m_headers_parse_state = PARSE_EXPECTING_CR;
00378 } else if (isControl(*m_read_ptr)) {
00379 return false;
00380 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00381 return false;
00382 } else {
00383 m_status_message.push_back(*m_read_ptr);
00384 }
00385 break;
00386
00387 case PARSE_EXPECTING_NEWLINE:
00388
00389 if (*m_read_ptr == '\n') {
00390 m_headers_parse_state = PARSE_HEADER_START;
00391 } else if (*m_read_ptr == '\r') {
00392
00393
00394
00395 ++m_read_ptr;
00396 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00397 m_bytes_total_read += m_bytes_last_read;
00398 return true;
00399 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00400 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00401 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00402 return false;
00403 } else {
00404
00405 m_header_name.erase();
00406 m_header_name.push_back(*m_read_ptr);
00407 m_headers_parse_state = PARSE_HEADER_NAME;
00408 }
00409 break;
00410
00411 case PARSE_EXPECTING_CR:
00412
00413 if (*m_read_ptr == '\r') {
00414 m_headers_parse_state = PARSE_HEADER_START;
00415 } else if (*m_read_ptr == '\n') {
00416
00417
00418
00419 ++m_read_ptr;
00420 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00421 m_bytes_total_read += m_bytes_last_read;
00422 return true;
00423 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00424 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00425 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00426 return false;
00427 } else {
00428
00429 m_header_name.erase();
00430 m_header_name.push_back(*m_read_ptr);
00431 m_headers_parse_state = PARSE_HEADER_NAME;
00432 }
00433 break;
00434
00435 case PARSE_HEADER_WHITESPACE:
00436
00437 if (*m_read_ptr == '\r') {
00438 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00439 } else if (*m_read_ptr == '\n') {
00440 m_headers_parse_state = PARSE_EXPECTING_CR;
00441 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00442 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
00443 return false;
00444
00445 m_header_name.erase();
00446 m_header_name.push_back(*m_read_ptr);
00447 m_headers_parse_state = PARSE_HEADER_NAME;
00448 }
00449 break;
00450
00451 case PARSE_HEADER_START:
00452
00453 if (*m_read_ptr == '\r') {
00454 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00455 } else if (*m_read_ptr == '\n') {
00456 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00457 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00458 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00459 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00460 return false;
00461 } else {
00462
00463 m_header_name.erase();
00464 m_header_name.push_back(*m_read_ptr);
00465 m_headers_parse_state = PARSE_HEADER_NAME;
00466 }
00467 break;
00468
00469 case PARSE_HEADER_NAME:
00470
00471 if (*m_read_ptr == ':') {
00472 m_header_value.erase();
00473 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00474 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00475 return false;
00476 } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00477 return false;
00478 } else {
00479
00480 m_header_name.push_back(*m_read_ptr);
00481 }
00482 break;
00483
00484 case PARSE_SPACE_BEFORE_HEADER_VALUE:
00485
00486 if (*m_read_ptr == ' ') {
00487 m_headers_parse_state = PARSE_HEADER_VALUE;
00488 } else if (*m_read_ptr == '\r') {
00489 http_msg.addHeader(m_header_name, m_header_value);
00490 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00491 } else if (*m_read_ptr == '\n') {
00492 http_msg.addHeader(m_header_name, m_header_value);
00493 m_headers_parse_state = PARSE_EXPECTING_CR;
00494 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00495 return false;
00496 } else {
00497
00498 m_header_value.push_back(*m_read_ptr);
00499 m_headers_parse_state = PARSE_HEADER_VALUE;
00500 }
00501 break;
00502
00503 case PARSE_HEADER_VALUE:
00504
00505 if (*m_read_ptr == '\r') {
00506 http_msg.addHeader(m_header_name, m_header_value);
00507 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00508 } else if (*m_read_ptr == '\n') {
00509 http_msg.addHeader(m_header_name, m_header_value);
00510 m_headers_parse_state = PARSE_EXPECTING_CR;
00511 } else if (isControl(*m_read_ptr)) {
00512 return false;
00513 } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00514 return false;
00515 } else {
00516
00517 m_header_value.push_back(*m_read_ptr);
00518 }
00519 break;
00520
00521 case PARSE_EXPECTING_FINAL_NEWLINE:
00522 if (*m_read_ptr == '\n') ++m_read_ptr;
00523 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00524 m_bytes_total_read += m_bytes_last_read;
00525 return true;
00526
00527 case PARSE_EXPECTING_FINAL_CR:
00528 if (*m_read_ptr == '\r') ++m_read_ptr;
00529 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00530 m_bytes_total_read += m_bytes_last_read;
00531 return true;
00532 }
00533
00534 if (m_save_raw_headers)
00535 m_raw_headers += *m_read_ptr;
00536
00537 ++m_read_ptr;
00538 }
00539
00540 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00541 m_bytes_total_read += m_bytes_last_read;
00542 return boost::indeterminate;
00543 }
00544
00545 void HTTPParser::updateMessageWithHeaderData(HTTPMessage& http_msg) const
00546 {
00547 if (isParsingRequest()) {
00548
00549
00550
00551 HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
00552 http_request.setMethod(m_method);
00553 http_request.setResource(m_resource);
00554 http_request.setQueryString(m_query_string);
00555
00556
00557 if (! m_query_string.empty()) {
00558 if (! parseURLEncoded(http_request.getQueryParams(),
00559 m_query_string.c_str(),
00560 m_query_string.size()))
00561 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI): \""
00562 << m_query_string << "\"");
00563 }
00564
00565
00566 std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00567 cookie_pair = http_request.getHeaders().equal_range(HTTPTypes::HEADER_COOKIE);
00568 for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00569 cookie_iterator != http_request.getHeaders().end()
00570 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00571 {
00572 if (! parseCookieHeader(http_request.getCookieParams(),
00573 cookie_iterator->second, false) )
00574 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00575 }
00576
00577 } else {
00578
00579
00580
00581 HTTPResponse& http_response(dynamic_cast<HTTPResponse&>(http_msg));
00582 http_response.setStatusCode(m_status_code);
00583 http_response.setStatusMessage(m_status_message);
00584
00585
00586 std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00587 cookie_pair = http_response.getHeaders().equal_range(HTTPTypes::HEADER_SET_COOKIE);
00588 for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00589 cookie_iterator != http_response.getHeaders().end()
00590 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00591 {
00592 if (! parseCookieHeader(http_response.getCookieParams(),
00593 cookie_iterator->second, true) )
00594 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00595 }
00596
00597 }
00598 }
00599
00600 boost::tribool HTTPParser::finishHeaderParsing(HTTPMessage& http_msg)
00601 {
00602 boost::tribool rc = boost::indeterminate;
00603
00604 m_bytes_content_remaining = m_bytes_content_read = 0;
00605 http_msg.setContentLength(0);
00606 http_msg.updateTransferCodingUsingHeader();
00607 updateMessageWithHeaderData(http_msg);
00608
00609 if (http_msg.isChunked()) {
00610
00611
00612 m_message_parse_state = PARSE_CHUNKS;
00613
00614 } else if (http_msg.isContentLengthImplied()) {
00615
00616
00617 m_message_parse_state = PARSE_END;
00618 rc = true;
00619
00620 } else {
00621
00622
00623 if (http_msg.hasHeader(HTTPTypes::HEADER_CONTENT_LENGTH)) {
00624
00625
00626 try {
00627 http_msg.updateContentLengthUsingHeader();
00628 } catch (...) {
00629 PION_LOG_ERROR(m_logger, "Unable to update content length");
00630 return false;
00631 }
00632
00633
00634 if (http_msg.getContentLength() == 0) {
00635 m_message_parse_state = PARSE_END;
00636 rc = true;
00637 } else {
00638 m_message_parse_state = PARSE_CONTENT;
00639 m_bytes_content_remaining = http_msg.getContentLength();
00640
00641
00642 if (m_bytes_content_remaining > m_max_content_length)
00643 http_msg.setContentLength(m_max_content_length);
00644 }
00645
00646 } else {
00647
00648
00649
00650
00651 if (! m_is_request) {
00652
00653 http_msg.getChunkCache().clear();
00654
00655
00656 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00657 } else {
00658 m_message_parse_state = PARSE_END;
00659 rc = true;
00660 }
00661 }
00662 }
00663
00664
00665 http_msg.createContentBuffer();
00666
00667 return rc;
00668 }
00669
00670 bool HTTPParser::parseURLEncoded(HTTPTypes::QueryParams& dict,
00671 const char *ptr, const size_t len)
00672 {
00673
00674 enum QueryParseState {
00675 QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00676 } parse_state = QUERY_PARSE_NAME;
00677
00678
00679 const char * const end = ptr + len;
00680 std::string query_name;
00681 std::string query_value;
00682
00683
00684 while (ptr < end) {
00685 switch (parse_state) {
00686
00687 case QUERY_PARSE_NAME:
00688
00689 if (*ptr == '=') {
00690
00691 if (query_name.empty()) return false;
00692 parse_state = QUERY_PARSE_VALUE;
00693 } else if (*ptr == '&') {
00694
00695 if (! query_name.empty()) {
00696
00697 dict.insert( std::make_pair(query_name, query_value) );
00698 query_name.erase();
00699 }
00700 } else if (*ptr == '\r' || *ptr == '\n') {
00701
00702 } else if (isControl(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00703
00704 return false;
00705 } else {
00706
00707 query_name.push_back(*ptr);
00708 }
00709 break;
00710
00711 case QUERY_PARSE_VALUE:
00712
00713 if (*ptr == '&') {
00714
00715 dict.insert( std::make_pair(query_name, query_value) );
00716 query_name.erase();
00717 query_value.erase();
00718 parse_state = QUERY_PARSE_NAME;
00719 } else if (*ptr == '\r' || *ptr == '\n') {
00720
00721 } else if (isControl(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00722
00723 return false;
00724 } else {
00725
00726 query_value.push_back(*ptr);
00727 }
00728 break;
00729 }
00730
00731 ++ptr;
00732 }
00733
00734
00735 if (! query_name.empty())
00736 dict.insert( std::make_pair(query_name, query_value) );
00737
00738 return true;
00739 }
00740
00741 bool HTTPParser::parseCookieHeader(HTTPTypes::CookieParams& dict,
00742 const char *ptr, const size_t len,
00743 bool set_cookie_header)
00744 {
00745
00746
00747
00748
00749
00750
00751
00752 enum CookieParseState {
00753 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
00754 } parse_state = COOKIE_PARSE_NAME;
00755
00756
00757 const char * const end = ptr + len;
00758 std::string cookie_name;
00759 std::string cookie_value;
00760 char value_quote_character = '\0';
00761
00762
00763 while (ptr < end) {
00764 switch (parse_state) {
00765
00766 case COOKIE_PARSE_NAME:
00767
00768 if (*ptr == '=') {
00769
00770 if (cookie_name.empty()) return false;
00771 value_quote_character = '\0';
00772 parse_state = COOKIE_PARSE_VALUE;
00773 } else if (*ptr == ';' || *ptr == ',') {
00774
00775
00776 if (! cookie_name.empty()) {
00777
00778 if (! isCookieAttribute(cookie_name, set_cookie_header))
00779 dict.insert( std::make_pair(cookie_name, cookie_value) );
00780 cookie_name.erase();
00781 }
00782 } else if (*ptr != ' ') {
00783
00784 if (isControl(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
00785 return false;
00786
00787 cookie_name.push_back(*ptr);
00788 }
00789 break;
00790
00791 case COOKIE_PARSE_VALUE:
00792
00793 if (value_quote_character == '\0') {
00794
00795 if (*ptr == ';' || *ptr == ',') {
00796
00797 if (! isCookieAttribute(cookie_name, set_cookie_header))
00798 dict.insert( std::make_pair(cookie_name, cookie_value) );
00799 cookie_name.erase();
00800 cookie_value.erase();
00801 parse_state = COOKIE_PARSE_NAME;
00802 } else if (*ptr == '\'' || *ptr == '"') {
00803 if (cookie_value.empty()) {
00804
00805 value_quote_character = *ptr;
00806 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00807
00808 return false;
00809 } else {
00810
00811 cookie_value.push_back(*ptr);
00812 }
00813 } else if (*ptr != ' ') {
00814
00815 if (isControl(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
00816 return false;
00817
00818 cookie_value.push_back(*ptr);
00819 }
00820 } else {
00821
00822 if (*ptr == value_quote_character) {
00823
00824 if (! isCookieAttribute(cookie_name, set_cookie_header))
00825 dict.insert( std::make_pair(cookie_name, cookie_value) );
00826 cookie_name.erase();
00827 cookie_value.erase();
00828 parse_state = COOKIE_PARSE_IGNORE;
00829 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00830
00831 return false;
00832 } else {
00833
00834 cookie_value.push_back(*ptr);
00835 }
00836 }
00837 break;
00838
00839 case COOKIE_PARSE_IGNORE:
00840
00841 if (*ptr == ';' || *ptr == ',')
00842 parse_state = COOKIE_PARSE_NAME;
00843 break;
00844 }
00845
00846 ++ptr;
00847 }
00848
00849
00850 if (! isCookieAttribute(cookie_name, set_cookie_header))
00851 dict.insert( std::make_pair(cookie_name, cookie_value) );
00852
00853 return true;
00854 }
00855
00856 boost::tribool HTTPParser::parseChunks(HTTPMessage::ChunkCache& chunk_cache)
00857 {
00858
00859
00860
00861
00862
00863
00864
00865 const char *read_start_ptr = m_read_ptr;
00866 m_bytes_last_read = 0;
00867 while (m_read_ptr < m_read_end_ptr) {
00868
00869 switch (m_chunked_content_parse_state) {
00870 case PARSE_CHUNK_SIZE_START:
00871
00872 if (isHexDigit(*m_read_ptr)) {
00873 m_chunk_size_str.erase();
00874 m_chunk_size_str.push_back(*m_read_ptr);
00875 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
00876 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
00877
00878
00879 break;
00880 } else {
00881 return false;
00882 }
00883 break;
00884
00885 case PARSE_CHUNK_SIZE:
00886 if (isHexDigit(*m_read_ptr)) {
00887 m_chunk_size_str.push_back(*m_read_ptr);
00888 } else if (*m_read_ptr == '\x0D') {
00889 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
00890 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
00891
00892
00893 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
00894 } else {
00895 return false;
00896 }
00897 break;
00898
00899 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
00900 if (*m_read_ptr == '\x0D') {
00901 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
00902 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
00903
00904
00905 break;
00906 } else {
00907 return false;
00908 }
00909 break;
00910
00911 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
00912
00913
00914 if (*m_read_ptr == '\x0A') {
00915 m_bytes_read_in_current_chunk = 0;
00916 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
00917 if (m_size_of_current_chunk == 0) {
00918 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK;
00919 } else {
00920 m_chunked_content_parse_state = PARSE_CHUNK;
00921 }
00922 } else {
00923 return false;
00924 }
00925 break;
00926
00927 case PARSE_CHUNK:
00928 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
00929 if (chunk_cache.size() < m_max_content_length)
00930 chunk_cache.push_back(*m_read_ptr);
00931 m_bytes_read_in_current_chunk++;
00932 }
00933 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00934 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00935 }
00936 break;
00937
00938 case PARSE_EXPECTING_CR_AFTER_CHUNK:
00939
00940 if (*m_read_ptr == '\x0D') {
00941 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
00942 } else {
00943 return false;
00944 }
00945 break;
00946
00947 case PARSE_EXPECTING_LF_AFTER_CHUNK:
00948
00949 if (*m_read_ptr == '\x0A') {
00950 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00951 } else {
00952 return false;
00953 }
00954 break;
00955
00956 case PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK:
00957
00958 if (*m_read_ptr == '\x0D') {
00959 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
00960 } else {
00961 return false;
00962 }
00963 break;
00964
00965 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
00966
00967 if (*m_read_ptr == '\x0A') {
00968 ++m_read_ptr;
00969 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00970 m_bytes_total_read += m_bytes_last_read;
00971 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
00972 return true;
00973 } else {
00974 return false;
00975 }
00976 }
00977
00978 ++m_read_ptr;
00979 }
00980
00981 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00982 m_bytes_total_read += m_bytes_last_read;
00983 m_bytes_content_read += m_bytes_last_read;
00984 return boost::indeterminate;
00985 }
00986
00987 boost::tribool HTTPParser::consumeContent(HTTPMessage& http_msg)
00988 {
00989 size_t content_bytes_to_read;
00990 size_t content_bytes_available = bytes_available();
00991 boost::tribool rc = boost::indeterminate;
00992
00993 if (m_bytes_content_remaining == 0) {
00994
00995 return true;
00996 } else {
00997 if (content_bytes_available >= m_bytes_content_remaining) {
00998
00999 rc = true;
01000 content_bytes_to_read = m_bytes_content_remaining;
01001 } else {
01002
01003 content_bytes_to_read = content_bytes_available;
01004 }
01005 m_bytes_content_remaining -= content_bytes_to_read;
01006 }
01007
01008
01009 if (m_bytes_content_read < m_max_content_length) {
01010 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01011
01012
01013 memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr,
01014 m_max_content_length - m_bytes_content_read);
01015 } else {
01016
01017 memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01018 }
01019 }
01020
01021 m_read_ptr += content_bytes_to_read;
01022 m_bytes_content_read += content_bytes_to_read;
01023 m_bytes_total_read += content_bytes_to_read;
01024 m_bytes_last_read = content_bytes_to_read;
01025
01026 return rc;
01027 }
01028
01029 std::size_t HTTPParser::consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_cache)
01030 {
01031 if (bytes_available() == 0) {
01032 m_bytes_last_read = 0;
01033 } else {
01034 m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01035 while (m_read_ptr < m_read_end_ptr) {
01036 if (chunk_cache.size() < m_max_content_length)
01037 chunk_cache.push_back(*m_read_ptr);
01038 ++m_read_ptr;
01039 }
01040 m_bytes_total_read += m_bytes_last_read;
01041 m_bytes_content_read += m_bytes_last_read;
01042 }
01043 return m_bytes_last_read;
01044 }
01045
01046 void HTTPParser::finish(HTTPMessage& http_msg) const
01047 {
01048 switch (m_message_parse_state) {
01049 case PARSE_START:
01050 http_msg.setIsValid(false);
01051 http_msg.setContentLength(0);
01052 http_msg.createContentBuffer();
01053 return;
01054 case PARSE_END:
01055 http_msg.setIsValid(true);
01056 break;
01057 case PARSE_HEADERS:
01058 http_msg.setIsValid(false);
01059 updateMessageWithHeaderData(http_msg);
01060 http_msg.setContentLength(0);
01061 http_msg.createContentBuffer();
01062 break;
01063 case PARSE_CONTENT:
01064 http_msg.setIsValid(false);
01065 http_msg.setContentLength(getContentBytesRead());
01066 break;
01067 case PARSE_CHUNKS:
01068 http_msg.setIsValid(false);
01069 http_msg.concatenateChunks();
01070 break;
01071 case PARSE_CONTENT_NO_LENGTH:
01072 http_msg.setIsValid(true);
01073 http_msg.concatenateChunks();
01074 break;
01075 }
01076
01077 computeMsgStatus(http_msg, http_msg.isValid());
01078
01079 if (isParsingRequest()) {
01080
01081
01082
01083 HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
01084 const std::string& content_type_header = http_request.getHeader(HTTPTypes::HEADER_CONTENT_TYPE);
01085 if (content_type_header.compare(0, HTTPTypes::CONTENT_TYPE_URLENCODED.length(),
01086 HTTPTypes::CONTENT_TYPE_URLENCODED) == 0)
01087 {
01088 if (! parseURLEncoded(http_request.getQueryParams(),
01089 http_request.getContent(),
01090 http_request.getContentLength()))
01091 PION_LOG_WARN(m_logger, "Request query string parsing failed (POST content): \""
01092 << http_request.getContent() << "\"");
01093 }
01094 }
01095 }
01096
01097 void HTTPParser::computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok )
01098 {
01099 HTTPMessage::DataStatus st = HTTPMessage::STATUS_NONE;
01100
01101 if(http_msg.hasMissingPackets()) {
01102 st = http_msg.hasDataAfterMissingPackets() ?
01103 HTTPMessage::STATUS_PARTIAL : HTTPMessage::STATUS_TRUNCATED;
01104 http_msg.setStatus(st);
01105 } else {
01106 st = msg_parsed_ok ? HTTPMessage::STATUS_OK : HTTPMessage::STATUS_TRUNCATED;
01107 }
01108
01109 http_msg.setStatus(st);
01110 }
01111 }
01112 }
01113