diff --git a/.codespellrc b/.codespellrc index 101edae..c5233c0 100644 --- a/.codespellrc +++ b/.codespellrc @@ -4,4 +4,4 @@ ignore-words-list = , check-filenames = check-hidden = -skip = ./.git +skip = ./.git,./src/utility/URLParser diff --git a/.github/workflows/check-arduino.yml b/.github/workflows/check-arduino.yml index 3e0d26c..e818685 100644 --- a/.github/workflows/check-arduino.yml +++ b/.github/workflows/check-arduino.yml @@ -16,10 +16,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Arduino Lint - uses: arduino/arduino-lint-action@v1 + uses: arduino/arduino-lint-action@v2 with: compliance: specification library-manager: update diff --git a/.github/workflows/compile-examples.yml b/.github/workflows/compile-examples.yml index 17cbc1f..0dd5ac7 100644 --- a/.github/workflows/compile-examples.yml +++ b/.github/workflows/compile-examples.yml @@ -34,10 +34,11 @@ jobs: - fqbn: arduino:samd:mkr1000 platforms: | - name: arduino:samd + artifact-name-suffix: arduino-samd-mkr1000 steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Compile examples uses: arduino/compile-sketches@v1 @@ -55,8 +56,8 @@ jobs: sketches-report-path: ${{ env.SKETCHES_REPORTS_PATH }} - name: Save sketches report as workflow artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: if-no-files-found: error path: ${{ env.SKETCHES_REPORTS_PATH }} - name: ${{ env.SKETCHES_REPORTS_PATH }} + name: sketches-report-${{ matrix.board.artifact-name-suffix }} diff --git a/.github/workflows/report-size-deltas.yml b/.github/workflows/report-size-deltas.yml index 652be5d..39e2a0a 100644 --- a/.github/workflows/report-size-deltas.yml +++ b/.github/workflows/report-size-deltas.yml @@ -20,5 +20,5 @@ jobs: - name: Comment size deltas reports to PRs uses: arduino/report-size-deltas@v1 with: - # The name of the workflow artifact created by the sketch compilation workflow - sketches-reports-source: sketches-reports + # Regex matching the names of the workflow artifacts created by the "Compile Examples" workflow + sketches-reports-source: ^sketches-report-.+ diff --git a/.github/workflows/spell-check.yml b/.github/workflows/spell-check.yml index 3f6b03f..ef7d894 100644 --- a/.github/workflows/spell-check.yml +++ b/.github/workflows/spell-check.yml @@ -16,7 +16,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Spell check uses: codespell-project/actions-codespell@master diff --git a/.github/workflows/sync-labels.yml b/.github/workflows/sync-labels.yml index 986bda6..53a9f54 100644 --- a/.github/workflows/sync-labels.yml +++ b/.github/workflows/sync-labels.yml @@ -27,11 +27,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Download JSON schema for labels configuration file id: download-schema - uses: carlosperate/download-file-action@v1 + uses: carlosperate/download-file-action@v2 with: file-url: https://raw.githubusercontent.com/arduino/tooling-project-assets/main/workflow-templates/assets/sync-labels/arduino-tooling-gh-label-configuration-schema.json location: ${{ runner.temp }}/label-configuration-schema @@ -65,12 +65,12 @@ jobs: steps: - name: Download - uses: carlosperate/download-file-action@v1 + uses: carlosperate/download-file-action@v2 with: file-url: https://raw.githubusercontent.com/arduino/tooling-project-assets/main/workflow-templates/assets/sync-labels/${{ matrix.filename }} - name: Pass configuration files to next job via workflow artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: path: | *.yaml @@ -105,16 +105,16 @@ jobs: echo "::set-output name=flag::--dry-run" - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Download configuration files artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: ${{ env.CONFIGURATIONS_ARTIFACT }} path: ${{ env.CONFIGURATIONS_FOLDER }} - name: Remove unneeded artifact - uses: geekyeggo/delete-artifact@v1 + uses: geekyeggo/delete-artifact@v5 with: name: ${{ env.CONFIGURATIONS_ARTIFACT }} diff --git a/.gitignore b/.gitignore index 653acab..24a0007 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ examples/node_test_server/node_modules/ *.DS_Store */.DS_Store examples/.DS_Store +.idea/ diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/examples/ParseURL/ParseURL.ino b/examples/ParseURL/ParseURL.ino new file mode 100644 index 0000000..410ac85 --- /dev/null +++ b/examples/ParseURL/ParseURL.ino @@ -0,0 +1,29 @@ +#include "URLParser.h" + +void setup() { + + Serial.begin(9600); + + while(!Serial); + + Serial.println("starting"); + + ParsedUrl url( + "https://www.google.com/search?q=arduino" + ); + + Serial.print("parsed URL schema: \""); + Serial.print(url.schema()); + Serial.print("\"\nparsed URL host: \""); + Serial.print(url.host()); + Serial.print("\"\nparsed URL path: \""); + Serial.print(url.path()); + Serial.print("\"\nparsed URL query: \""); + Serial.print(url.query()); + Serial.print("\"\nparsed URL userinfo: \""); + Serial.print(url.userinfo()); + Serial.println("\""); + +} + +void loop() { } \ No newline at end of file diff --git a/library.properties b/library.properties index 2682599..c93d0de 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=ArduinoHttpClient -version=0.4.0 +version=0.6.1 author=Arduino maintainer=Arduino sentence=[EXPERIMENTAL] Easily interact with web servers from Arduino, using HTTP and WebSockets. diff --git a/src/HttpClient.cpp b/src/HttpClient.cpp index 1c73464..31909d9 100644 --- a/src/HttpClient.cpp +++ b/src/HttpClient.cpp @@ -40,6 +40,7 @@ void HttpClient::resetState() iIsChunked = false; iChunkLength = 0; iHttpResponseTimeout = kHttpResponseTimeout; + iHttpWaitForDataDelay = kHttpWaitForDataDelay; } void HttpClient::stop() @@ -161,7 +162,7 @@ int HttpClient::sendInitialHeaders(const char* aURLPath, const char* aHttpMethod { iClient->print("Host: "); iClient->print(iServerName); - if (iServerPort != kHttpPort) + if (iServerPort != kHttpPort && iServerPort != kHttpsPort) { iClient->print(":"); iClient->print(iServerPort); @@ -421,7 +422,7 @@ int HttpClient::responseStatusCode() { if (available()) { - c = read(); + c = HttpClient::read(); if (c != -1) { switch(iState) @@ -473,7 +474,7 @@ int HttpClient::responseStatusCode() { // We haven't got any data, so let's pause to allow some to // arrive - delay(kHttpWaitForDataDelay); + delay(iHttpWaitForDataDelay); } } if ( (c == '\n') && (iStatusCode < 200 && iStatusCode != 101) ) @@ -522,7 +523,7 @@ int HttpClient::skipResponseHeaders() { // We haven't got any data, so let's pause to allow some to // arrive - delay(kHttpWaitForDataDelay); + delay(iHttpWaitForDataDelay); } } if (endOfHeadersReached()) @@ -542,7 +543,7 @@ bool HttpClient::endOfHeadersReached() return (iState == eReadingBody || iState == eReadingChunkLength || iState == eReadingBodyChunk); }; -int HttpClient::contentLength() +long HttpClient::contentLength() { // skip the response headers, if they haven't been read already if (!endOfHeadersReached()) @@ -762,7 +763,7 @@ int HttpClient::read(uint8_t *buf, size_t size) int HttpClient::readHeader() { - char c = read(); + char c = HttpClient::read(); if (endOfHeadersReached()) { @@ -818,7 +819,11 @@ int HttpClient::readHeader() case eReadingContentLength: if (isdigit(c)) { - iContentLength = iContentLength*10 + (c - '0'); + long _iContentLength = iContentLength*10 + (c - '0'); + // Only apply if the value didn't wrap around + if (_iContentLength > iContentLength) { + iContentLength = _iContentLength; + } } else { diff --git a/src/HttpClient.h b/src/HttpClient.h index 6a7aa1d..3d404af 100644 --- a/src/HttpClient.h +++ b/src/HttpClient.h @@ -43,6 +43,7 @@ class HttpClient : public Client public: static const int kNoContentLengthHeader =-1; static const int kHttpPort =80; + static const int kHttpsPort =443; static const char* kUserAgent; // FIXME Write longer API request, using port and user-agent, example @@ -272,7 +273,7 @@ class HttpClient : public Client @return Length of the body, in bytes, or kNoContentLengthHeader if no Content-Length header was returned by the server */ - int contentLength(); + long contentLength(); /** Returns if the response body is chunked @return true if response body is chunked, false otherwise @@ -317,6 +318,8 @@ class HttpClient : public Client virtual operator bool() { return bool(iClient); }; virtual uint32_t httpResponseTimeout() { return iHttpResponseTimeout; }; virtual void setHttpResponseTimeout(uint32_t timeout) { iHttpResponseTimeout = timeout; }; + virtual uint32_t httpWaitForDataDelay() { return iHttpWaitForDataDelay; }; + virtual void setHttpWaitForDataDelay(uint32_t delay) { iHttpWaitForDataDelay = delay; }; protected: /** Reset internal state data back to the "just initialised" state */ @@ -340,7 +343,7 @@ class HttpClient : public Client // Number of milliseconds that we wait each time there isn't any data // available to be read (during status code and header processing) - static const int kHttpWaitForDataDelay = 1000; + static const int kHttpWaitForDataDelay = 100; // Number of milliseconds that we'll wait in total without receiving any // data before returning HTTP_ERROR_TIMED_OUT (during status code and header // processing) @@ -372,7 +375,7 @@ class HttpClient : public Client // Stores the status code for the response, once known int iStatusCode; // Stores the value of the Content-Length header, if present - int iContentLength; + long iContentLength; // How many bytes of the response body have been read by the user int iBodyLengthConsumed; // How far through a Content-Length header prefix we are @@ -384,6 +387,7 @@ class HttpClient : public Client // Stores the value of the current chunk length, if present int iChunkLength; uint32_t iHttpResponseTimeout; + uint32_t iHttpWaitForDataDelay; bool iConnectionClose; bool iSendDefaultRequestHeaders; String iHeaderLine; diff --git a/src/URLParser.h b/src/URLParser.h new file mode 100644 index 0000000..fd31e93 --- /dev/null +++ b/src/URLParser.h @@ -0,0 +1,108 @@ +/* + * PackageLicenseDeclared: Apache-2.0 + * Copyright (c) 2017 ARM Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * The following class is defined in mbed libraries, in case of STM32H7 include the original library + */ +#if defined __has_include +# if __has_include() +# include +# else +# define NO_HTTP_PARSED +# endif +#endif + +#ifdef NO_HTTP_PARSED +#ifndef _MBED_HTTP_PARSED_URL_H_ +#define _MBED_HTTP_PARSED_URL_H_ + +#include "utility/URLParser/http_parser.h" + +class ParsedUrl { +public: + ParsedUrl(const char* url) { + struct http_parser_url parsed_url; + http_parser_parse_url(https://github.com/url,%20strlen(url), false, &parsed_url); + + for (size_t ix = 0; ix < UF_MAX; ix++) { + char* value; + if (parsed_url.field_set & (1 << ix)) { + value = (char*)calloc(parsed_url.field_data[ix].len + 1, 1); + memcpy(value, url + parsed_url.field_data[ix].off, + parsed_url.field_data[ix].len); + } + else { + value = (char*)calloc(1, 1); + } + + switch ((http_parser_url_fields)ix) { + case UF_SCHEMA: _schema = value; break; + case UF_HOST: _host = value; break; + case UF_PATH: _path = value; break; + case UF_QUERY: _query = value; break; + case UF_USERINFO: _userinfo = value; break; + default: + // PORT is already parsed, FRAGMENT is not relevant for HTTP requests + free(value); + break; + } + } + + _port = parsed_url.port; + if (!_port) { + if (strcmp(_schema, "https") == 0 || strcmp(_schema, "wss") == 0) { + _port = 443; + } + else { + _port = 80; + } + } + + if (strcmp(_path, "") == 0) { + free(_path); + _path = (char*)calloc(2, 1); + _path[0] = '/'; + } + } + + ~ParsedUrl() { + if (_schema) free(_schema); + if (_host) free(_host); + if (_path) free(_path); + if (_query) free(_query); + if (_userinfo) free(_userinfo); + } + + uint16_t port() const { return _port; } + char* schema() const { return _schema; } + char* host() const { return _host; } + char* path() const { return _path; } + char* query() const { return _query; } + char* userinfo() const { return _userinfo; } + +private: + uint16_t _port; + char* _schema; + char* _host; + char* _path; + char* _query; + char* _userinfo; +}; + +#endif // _MBED_HTTP_PARSED_URL_H_ +#endif // NO_HTTP_PARSED +#undef NO_HTTP_PARSED \ No newline at end of file diff --git a/src/WebSocketClient.h b/src/WebSocketClient.h index 4b009e6..96eb6d2 100644 --- a/src/WebSocketClient.h +++ b/src/WebSocketClient.h @@ -8,6 +8,10 @@ #include "HttpClient.h" +#ifndef WS_TX_BUFFER_SIZE + #define WS_TX_BUFFER_SIZE 128 +#endif + static const int TYPE_CONTINUATION = 0x0; static const int TYPE_TEXT = 0x1; static const int TYPE_BINARY = 0x2; @@ -86,7 +90,7 @@ class WebSocketClient : public HttpClient private: bool iTxStarted; uint8_t iTxMessageType; - uint8_t iTxBuffer[128]; + uint8_t iTxBuffer[WS_TX_BUFFER_SIZE]; uint64_t iTxSize; uint8_t iRxOpCode; diff --git a/src/utility/URLParser/LICENSE b/src/utility/URLParser/LICENSE new file mode 100644 index 0000000..5baf7c0 --- /dev/null +++ b/src/utility/URLParser/LICENSE @@ -0,0 +1,23 @@ +http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright +Igor Sysoev. + +Additional changes are licensed under the same terms as NGINX and +copyright Joyent, Inc. and other Node contributors. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/src/utility/URLParser/README.md b/src/utility/URLParser/README.md new file mode 100644 index 0000000..56f8c96 --- /dev/null +++ b/src/utility/URLParser/README.md @@ -0,0 +1,5 @@ +# http_parser library + +This code is imported from: https://github.com/arduino/ArduinoCore-mbed/tree/4.1.1/libraries/SocketWrapper/src/utility/http_parser + +The code is shrinked in size by deleting all the unrelated code to url parse. diff --git a/src/utility/URLParser/http_parser.c b/src/utility/URLParser/http_parser.c new file mode 100644 index 0000000..a572a4c --- /dev/null +++ b/src/utility/URLParser/http_parser.c @@ -0,0 +1,591 @@ +#if defined __has_include +# if ! __has_include() && ! __has_include() +# define NO_HTTP_PARSER +# endif +#endif + +#ifdef NO_HTTP_PARSER +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include +#include +#include + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#define SET_ERRNO(e) \ +do { \ + parser->http_errno = (e); \ +} while(0) + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req + + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_headers_done + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#if HTTP_PARSER_STRICT +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* FALLTHROUGH */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* FALLTHROUGH */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* FALLTHROUGH */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + uint32_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = p - buf; + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = p - buf; + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = p - buf ; + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, uint32_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* FALLTROUGH */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (uint16_t) v; + } + + return 0; +} + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} + +#endif // NO_HTTP_PARSER \ No newline at end of file diff --git a/src/utility/URLParser/http_parser.h b/src/utility/URLParser/http_parser.h new file mode 100644 index 0000000..85a5238 --- /dev/null +++ b/src/utility/URLParser/http_parser.h @@ -0,0 +1,96 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h + +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 7 +#define HTTP_PARSER_VERSION_PATCH 1 + +#include + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, patch); + */ +unsigned long http_parser_version(void); + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, uint32_t buflen, + int is_connect, + struct http_parser_url *u); + +#ifdef __cplusplus +} +#endif +#endif \ No newline at end of file