DeterministicESPAsyncWebServer 1.2.0
Zero-allocation, bounded-execution async HTTP server for ESP32
Loading...
Searching...
No Matches
http_parser.cpp
Go to the documentation of this file.
1// Copyright (C) 2026 Douglas Quigg (dstroy0) <dquigg123@gmail.com>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4/**
5 * @file http_parser.cpp
6 * @brief Standalone HTTP/1.1 request parser — implementation.
7 *
8 * No dependency on transport, session, or lwIP. Consumes one byte at a
9 * time via http_parser_feed(); the presentation layer is responsible for
10 * pulling bytes out of whatever transport buffer it uses.
11 */
12
13#include "http_parser.h"
14
16
17// ---------------------------------------------------------------------------
18// FNV-1a hash constants for HTTP version validation
19// ---------------------------------------------------------------------------
20// Precomputed at compile time via constexpr; zero runtime cost.
21// The hash of the 8-byte version token ("HTTP/1.0" or "HTTP/1.1") is
22// compared against the accumulated _version_hash when CR terminates the
23// version field.
24
25static constexpr uint32_t FNV_OFFSET = 2166136261u;
26static constexpr uint32_t FNV_PRIME = 16777619u;
27
28static constexpr uint32_t fnv1a(const char *s, uint32_t h = FNV_OFFSET)
29{
30 return *s ? fnv1a(s + 1, (h ^ (uint8_t)*s) * FNV_PRIME) : h;
31}
32
33static constexpr uint32_t HASH_HTTP10 = fnv1a("HTTP/1.0");
34static constexpr uint32_t HASH_HTTP11 = fnv1a("HTTP/1.1");
35
36// ---------------------------------------------------------------------------
37// RFC 7230 character-class helpers
38// ---------------------------------------------------------------------------
39
40/**
41 * @brief True for bytes that are valid HTTP "token" characters (RFC 7230 §3.2.6).
42 *
43 * token = 1*tchar
44 * tchar = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" /
45 * "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
46 *
47 * Used to validate method bytes and header field-name bytes.
48 */
49static inline bool is_tchar(uint8_t b)
50{
51 if (b >= 'A' && b <= 'Z')
52 return true;
53 if (b >= 'a' && b <= 'z')
54 return true;
55 if (b >= '0' && b <= '9')
56 return true;
57 switch (b)
58 {
59 case '!':
60 case '#':
61 case '$':
62 case '%':
63 case '&':
64 case '\'':
65 case '*':
66 case '+':
67 case '-':
68 case '.':
69 case '^':
70 case '_':
71 case '`':
72 case '|':
73 case '~':
74 return true;
75 default:
76 return false;
77 }
78}
79
80/**
81 * @brief True for visible ASCII characters (RFC 5234 VCHAR = %x21-7E).
82 *
83 * Used to validate request-target (path, query) bytes. Excludes NUL,
84 * control characters, SP (which is the request-line field delimiter),
85 * and DEL (0x7F).
86 */
87static inline bool is_vchar(uint8_t b)
88{
89 return b >= 0x21 && b <= 0x7E;
90}
91
92/**
93 * @brief True for bytes permitted in an HTTP header field-value (RFC 7230 §3.2).
94 *
95 * field-value = *( field-vchar / SP / HTAB )
96 * field-vchar = VCHAR / obs-text (%x80-FF)
97 *
98 * Excludes all control characters (%x00-08, %x0A-1F, %x7F).
99 * SP (0x20) and HTAB (0x09) are allowed as internal whitespace.
100 * obs-text (%x80-FF) is kept for legacy compatibility.
101 */
102static inline bool is_field_value_char(uint8_t b)
103{
104 return b == 0x09 // HTAB
105 || (b >= 0x20 && b <= 0x7E) // SP through '~', no DEL
106 || b >= 0x80; // obs-text
107}
108
109/**
110 * @brief Split a raw query string into key=value pairs.
111 *
112 * Operates in-place on `req->query[]`. Pairs are `&`-separated; key and
113 * value are split on the first `=`. Keys or values longer than their
114 * respective limits are silently truncated — the path itself remains valid.
115 */
116static void parse_query_params(HttpReq *req)
117{
118 const char *qs = req->query;
119 size_t len = req->query_idx;
120 size_t i = 0;
121
122 while (i < len && req->query_count < MAX_QUERY_PARAMS)
123 {
124 QueryParam *qp = &req->query_params[req->query_count];
125 size_t key_idx = 0;
126 size_t val_idx = 0;
127 bool in_val = false;
128
129 while (i < len)
130 {
131 char c = qs[i++];
132 if (c == '&')
133 break;
134 if (c == '=' && !in_val)
135 {
136 in_val = true;
137 continue;
138 }
139 if (!in_val && key_idx < QUERY_KEY_LEN - 1)
140 qp->key[key_idx++] = c;
141 else if (in_val && val_idx < QUERY_VAL_LEN - 1)
142 qp->val[val_idx++] = c;
143 }
144
145 if (key_idx > 0)
146 req->query_count++;
147 }
148}
149
151{
152 uint8_t id = req->slot_id;
153 *req = {}; // zero all fields
154 req->slot_id = id; // restore slot identity
156 req->_version_hash = FNV_OFFSET; // seed the FNV-1a accumulator
157}
158
159void http_parser_feed(HttpReq *p, uint8_t byte)
160{
161 // Terminal states — do nothing
162 switch (p->parse_state)
163 {
164 case PARSE_COMPLETE:
165 case PARSE_ERROR:
168 return;
169 default:
170 break;
171 }
172
173 char c = (char)byte;
174
175 switch (p->parse_state)
176 {
177
178 case PARSE_METHOD:
179 if (c == ' ')
180 {
182 p->current_token_idx = 0;
183 }
184 else if (!is_tchar(byte))
185 {
186 // RFC 7230 §3.1.1: method = token; any non-tchar is malformed
188 }
189 else if (p->current_token_idx < sizeof(p->method) - 1)
190 {
191 p->method[p->current_token_idx++] = c;
192 }
193 else
194 {
196 }
197 break;
198
199 case PARSE_PATH:
200 if (c == ' ')
201 {
203 }
204 else if (c == '?')
205 {
207 }
208 else if (!is_vchar(byte))
209 {
210 // RFC 3986 §3.3: path chars must be visible ASCII (or pct-encoded)
212 }
213 else if (p->path_idx < MAX_PATH_LEN - 1)
214 {
215 p->path[p->path_idx++] = c;
216 }
217 else
218 {
220 }
221 break;
222
223 case PARSE_QUERY:
224 if (c == ' ')
225 {
226 parse_query_params(p);
228 }
229 else if (!is_vchar(byte))
230 {
231 // Control chars and NUL are not valid query-string bytes
233 }
234 else if (p->query_idx < MAX_QUERY_LEN - 1)
235 {
236 p->query[p->query_idx++] = c;
237 }
238 // Silently truncate — query overflow is a capacity limit, not a protocol error
239 break;
240
241 case PARSE_VERSION:
242 if (c == '\r')
243 {
244 if (p->_version_hash == HASH_HTTP11)
245 p->version = HTTP_11;
246 else if (p->_version_hash == HASH_HTTP10)
247 p->version = HTTP_10;
248 else
251 }
252 else
253 {
254 p->_version_hash = (p->_version_hash ^ byte) * FNV_PRIME;
255 }
256 break;
257
258 case PARSE_EXPECT_LF:
259 if (c == '\n')
260 {
262 p->current_token_idx = 0;
263 }
264 else
265 {
267 }
268 break;
269
270 case PARSE_HEADER_KEY:
271 if (c == '\r')
272 {
273 if (p->current_token_idx == 0)
274 {
275 // Blank line — end of headers
277 }
278 else
279 {
280 // CR mid-key: malformed (RFC 7230 §3.2 requires CRLF after value)
282 }
283 }
284 else if (c == ':')
285 {
287 p->current_token_idx = 0;
288 }
289 else if (!is_tchar(byte))
290 {
291 // RFC 7230 §3.2: field-name = token; any non-tchar is malformed
293 }
294 else
295 {
296 uint8_t h = p->header_count;
297 if (h < MAX_HEADERS)
298 {
299 if (p->current_token_idx < MAX_KEY_LEN - 1)
300 p->headers[h].key[p->current_token_idx++] = c;
301 else
302 p->parse_state = PARSE_ERROR; // key too long for stored header
303 }
304 else
305 {
306 // Past MAX_HEADERS: cap counter so it never wraps to 0 (which would
307 // look like a blank line ending headers) and never OOBs key buffers.
310 }
311 }
312 break;
313
314 case PARSE_HEADER_VAL:
315 // Strip leading OWS (SP or HTAB) after the colon — RFC 9110 §5.6.3
316 if ((c == ' ' || c == '\t') && p->current_token_idx == 0)
317 break;
318 if (c == '\r')
319 {
320 uint8_t h = p->header_count;
321 if (h < MAX_HEADERS)
322 {
323 if (strcasecmp(p->headers[h].key, "Content-Length") == 0)
324 p->content_length = (size_t)atoi(p->headers[h].val);
325 p->header_count++;
326 }
328 p->current_token_idx = 0;
329 }
330 else if (!is_field_value_char(byte))
331 {
332 // RFC 7230 §3.2: control chars and NUL are not valid in field values
334 }
335 else if (p->current_token_idx < MAX_VAL_LEN - 1)
336 {
337 uint8_t h = p->header_count;
338 if (h < MAX_HEADERS)
339 p->headers[h].val[p->current_token_idx++] = c;
340 }
341 // Silently truncate — value overflow is a capacity limit, not a protocol error
342 break;
343
345 /*
346 * Consumes the LF of the blank-line CRLF that ends the header block.
347 * Decides the next state based on Content-Length:
348 * > BODY_BUF_SIZE → 413 Payload Too Large
349 * == 0 → PARSE_COMPLETE (no body)
350 * else → PARSE_BODY
351 */
352 if (c == '\n')
353 {
356 else if (p->content_length == 0)
357 {
358 p->body[0] = '\0';
360 }
361 else
362 {
364 }
365 }
366 else
367 {
369 }
370 break;
371
372 case PARSE_BODY:
373 // Body is opaque data — no character validation
374 if (p->body_len < BODY_BUF_SIZE)
375 p->body[p->body_len++] = byte;
376 p->body_bytes_read++;
377 if (p->body_bytes_read >= p->content_length)
378 {
379 p->body[p->body_len] = '\0';
381 }
382 break;
383
384 default:
385 break;
386 }
387}
388
389const char *http_get_header(const HttpReq *req, const char *key)
390{
391 for (uint8_t i = 0; i < req->header_count; i++)
392 {
393 if (strcasecmp(req->headers[i].key, key) == 0)
394 return req->headers[i].val;
395 }
396 return nullptr;
397}
398
399const char *http_get_query(const HttpReq *req, const char *key)
400{
401 for (uint8_t i = 0; i < req->query_count; i++)
402 {
403 if (strcmp(req->query_params[i].key, key) == 0)
404 return req->query_params[i].val;
405 }
406 return nullptr;
407}
#define MAX_VAL_LEN
Maximum header field-value length.
#define MAX_QUERY_PARAMS
Maximum number of parsed query-string parameters.
#define MAX_QUERY_LEN
Maximum raw query-string length (everything after ?).
#define QUERY_VAL_LEN
Maximum query-parameter value length.
#define BODY_BUF_SIZE
Maximum request body bytes stored in HttpReq::body.
#define MAX_HEADERS
Maximum HTTP headers stored per request.
#define MAX_PATH_LEN
Maximum URL path length (including leading /).
#define MAX_CONNS
Maximum simultaneous TCP connections.
#define QUERY_KEY_LEN
Maximum query-parameter key length.
#define MAX_KEY_LEN
Maximum header field-name length (e.g. "Content-Type").
void http_parser_reset(HttpReq *req)
Reset a parser context to the initial (PARSE_METHOD) state.
const char * http_get_header(const HttpReq *req, const char *key)
Look up a header value by name (case-insensitive).
void http_parser_feed(HttpReq *p, uint8_t byte)
Feed one byte to the parser state machine.
const char * http_get_query(const HttpReq *req, const char *key)
Look up a query parameter value by name (case-sensitive).
HttpReq http_pool[MAX_CONNS]
Pool of parser contexts, one per transport slot.
Standalone HTTP/1.1 request parser — no transport dependency.
@ PARSE_BODY
Reading the request body.
Definition http_parser.h:62
@ PARSE_HEADER_VAL
Reading a header field value.
Definition http_parser.h:59
@ PARSE_QUERY
Reading the raw query string (after ?).
Definition http_parser.h:56
@ PARSE_COMPLETE
Full request parsed; ready for dispatch.
Definition http_parser.h:63
@ PARSE_HEADER_KEY
Reading a header field name.
Definition http_parser.h:58
@ PARSE_EXPECT_BODY_LF
Consuming the LF of the blank-line CRLF.
Definition http_parser.h:61
@ PARSE_VERSION
Accumulating HTTP/1.x — hashed for validation.
Definition http_parser.h:57
@ PARSE_URI_TOO_LONG
Path exceeds MAX_PATH_LEN → 414.
Definition http_parser.h:66
@ PARSE_EXPECT_LF
Consuming the LF of a header-line CRLF pair.
Definition http_parser.h:60
@ PARSE_ENTITY_TOO_LARGE
Content-Length > BODY_BUF_SIZE → 413.
Definition http_parser.h:65
@ PARSE_PATH
Reading the URL path component.
Definition http_parser.h:55
@ PARSE_ERROR
Unrecoverable parse failure → 400.
Definition http_parser.h:64
@ PARSE_METHOD
Reading the HTTP method (GET, POST, …).
Definition http_parser.h:54
@ HTTP_UNKNOWN
Version string did not match any known token.
Definition http_parser.h:79
@ HTTP_11
HTTP/1.1 — persistent connection by default.
Definition http_parser.h:81
@ HTTP_10
HTTP/1.0 — close semantics by default.
Definition http_parser.h:80
char val[MAX_VAL_LEN]
Field value, null-terminated.
Definition http_parser.h:92
char key[MAX_KEY_LEN]
Field name, null-terminated.
Definition http_parser.h:91
Fully-parsed HTTP/1.1 request.
QueryParam query_params[MAX_QUERY_PARAMS]
Parsed key=value pairs.
Header headers[MAX_HEADERS]
Captured header fields.
char query[MAX_QUERY_LEN]
Raw query string (after ?).
uint32_t _version_hash
FNV-1a accumulator for version validation (internal).
uint8_t body[BODY_BUF_SIZE+1]
Stored body bytes, always null-terminated.
uint8_t header_count
Valid entries in headers[].
size_t current_token_idx
Write cursor shared by key/value sub-states.
ParseState parse_state
Current parser state.
size_t path_idx
Write cursor into path[].
size_t content_length
Value of Content-Length header (0 if absent).
uint8_t slot_id
Transport slot index (set by presentation layer).
size_t body_len
Bytes stored in body[] (≤ BODY_BUF_SIZE).
HttpVersion version
Protocol version parsed from the request line.
char method[8]
HTTP method, null-terminated (max 7: OPTIONS).
char path[MAX_PATH_LEN]
URL path, null-terminated; no query string.
uint8_t query_count
Valid entries in query_params[].
size_t body_bytes_read
Body bytes received (may exceed BODY_BUF_SIZE).
size_t query_idx
Write cursor into query[].
A single parsed query-string parameter.
Definition http_parser.h:97
char val[QUERY_VAL_LEN]
Parameter value (empty string if absent).
Definition http_parser.h:99
char key[QUERY_KEY_LEN]
Parameter name, null-terminated.
Definition http_parser.h:98