Skip to content

Commit d67a184

Browse files
authored
Merge pull request #778 from mseri/parser
cohttp.headers: use faster comparison
2 parents 0187c8a + e0bd381 commit d67a184

File tree

7 files changed

+83
-90
lines changed

7 files changed

+83
-90
lines changed

.github/workflows/workflow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
uses: actions/checkout@v2
2626

2727
- name: Use OCaml ${{ matrix.ocaml-version }}
28-
uses: actions-ml/setup-ocaml@master
28+
uses: ocaml/setup-ocaml@v2
2929
with:
3030
ocaml-version: ${{ matrix.ocaml-version }}
3131
opam-depext: false

CHANGES.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@
88
stack overflow happens in the XHR completion handler (mefyl #762).
99
- lwt_jsoo: Add test suite (mefyl #764).
1010

11-
- Cohttp.Header: new implementation (@lyrm #747)
11+
- Cohttp.Header: new implementation (lyrm #747)
1212

1313
+ New implementation of Header modules using an associative list instead of a map, with one major semantic change (function ```get```, see below), and some new functions (```clean_dup```, ```get_multi_concat```)
1414
+ More Alcotest tests as well as fuzzing tests for this particular module.
1515

16+
- Cohttp.Header: performance improvement (mseri, anuragsoni #778)
17+
**Breaking** the headers are no-longer lowercased when parsed, the headers key comparison is case insensitive instead.
18+
1619
### Purpose
1720

1821
The new header implementation uses an associative list instead of a map to represent headers and is focused on predictability and intuitivity: except for some specific and documented functions, the headers are always kept in transmission order, which makes debugging easier and is also important for [RFC7230§3.2.2](https://tools.ietf.org/html/rfc7230#section-3.2.2) that states that multiple values of a header must be kept in order.
@@ -38,6 +41,7 @@
3841

3942
+ ```clean_dup``` enables the user to clean headers that follows the {{:https://tools.ietf.org/html/rfc7230#section-3.2.2} RFC7230§3.2.2} (no duplicate, except ```set-cookie```)
4043
+ ```get_multi_concat``` has been added to get a result similar to the previous ```get``` function.
44+
- Cohttp.Header: optimize internal of cohttp.headers (mseri #778)
4145

4246
## v4.0.0 (2021-03-24)
4347

cohttp-lwt-unix/test/test_parser.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ let make_simple_req () =
246246
let open Cohttp in
247247
let open Cohttp_lwt_unix in
248248
let expected =
249-
"POST /foo/bar HTTP/1.1\r\nfoo: bar\r\nhost: localhost\r\nuser-agent: "
249+
"POST /foo/bar HTTP/1.1\r\nFoo: bar\r\nhost: localhost\r\nuser-agent: "
250250
^ user_agent
251251
^ "\r\ntransfer-encoding: chunked\r\n\r\n6\r\nfoobar\r\n0\r\n\r\n"
252252
in

cohttp/fuzz/fuzz_header.ml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,9 @@ let is_empty_test () =
189189
let init_with_test () =
190190
Crowbar.(
191191
(* FS *)
192-
(* forall k v. to_list (init_with k v) = [String.lowercase k, v] *)
192+
(* forall k v. to_list (init_with k v) = [k, v] *)
193193
add_test ~name:"[init_list k v] is [k, v]" [ header_name_gen; word_gen ]
194-
(fun k v ->
195-
check_eq H.(to_list (init_with k v)) [ (String.lowercase_ascii k, v) ]))
194+
(fun k v -> check_eq H.(to_list (init_with k v)) [ (k, v) ]))
196195

197196
let mem_test () =
198197
Crowbar.(
@@ -201,12 +200,16 @@ let mem_test () =
201200
add_test ~name:"[mem h k] on an empty header is always false"
202201
[ header_name_gen ] (fun k -> check_eq false H.(mem (init ()) k));
203202
(* SI *)
204-
(* forall h, k. H.mem h k = List.(mem_assoc k (H.to_list h)) *)
203+
(* forall h, k. H.mem h k = List.(mem_assoc (String.lowercase_ascii x) (List.map (fun (k, v) -> String.lowercase_ascii k, v) (H.to_list h))) *)
205204
add_test ~name:"Header.mem has the same behavior than List.mem_assoc"
206205
[ headers_gen; header_name_gen ] (fun h k ->
207206
check_eq
208207
H.(mem h k)
209-
List.(mem_assoc (String.lowercase_ascii k) (H.to_list h))))
208+
List.(
209+
mem_assoc (String.lowercase_ascii k)
210+
(List.map
211+
(fun (k, v) -> (String.lowercase_ascii k, v))
212+
(H.to_list h)))))
210213

211214
let add_test () =
212215
Crowbar.(
@@ -220,9 +223,7 @@ let add_test () =
220223
(* forall h, k, v. to_list (add h k v) = to_list h @ [lowercase k, v] *)
221224
~name:"[add] adds a value at the header end"
222225
[ headers_gen; header_name_gen; word_gen ] (fun h k v ->
223-
check_eq
224-
(H.to_list h @ [ (String.lowercase_ascii k, v) ])
225-
H.(to_list (add h k v))))
226+
check_eq (H.to_list h @ [ (k, v) ]) H.(to_list (add h k v))))
226227

227228
let to_list_of_list_test () =
228229
Crowbar.(

cohttp/src/header.ml

Lines changed: 64 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -16,36 +16,37 @@
1616
*
1717
}}}*)
1818

19-
module LString : sig
20-
type t
21-
22-
val of_string : string -> t
23-
val to_string : t -> string
24-
val compare : t -> t -> int
25-
end = struct
26-
type t = string
27-
28-
let of_string x = String.lowercase_ascii x
29-
let to_string x = x
30-
let compare a b = String.compare a b
31-
end
32-
33-
type t = (LString.t * string) list
19+
let caseless_equal a b =
20+
if a == b then true
21+
else
22+
let len = String.length a in
23+
len = String.length b
24+
&&
25+
let stop = ref false in
26+
let idx = ref 0 in
27+
while (not !stop) && !idx < len do
28+
let c1 = String.unsafe_get a !idx in
29+
let c2 = String.unsafe_get b !idx in
30+
if Char.lowercase_ascii c1 <> Char.lowercase_ascii c2 then stop := true;
31+
incr idx
32+
done;
33+
not !stop
34+
35+
type t = (string * string) list
3436

3537
let compare = Stdlib.compare
3638
let init () = []
3739
let is_empty = function [] -> true | _ -> false
38-
let init_with k v = [ (LString.of_string k, v) ]
40+
let init_with k v = [ (k, v) ]
3941

4042
let mem h k =
41-
let k = LString.of_string k in
4243
let rec loop = function
4344
| [] -> false
44-
| (k', _) :: h' -> if LString.compare k k' = 0 then true else loop h'
45+
| (k', _) :: h' -> if caseless_equal k k' then true else loop h'
4546
in
4647
loop h
4748

48-
let add h k v : t = (LString.of_string k, v) :: h
49+
let add h k v : t = (k, v) :: h
4950
let add_list h l = List.fold_left (fun h (k, v) -> add h k v) h l
5051
let add_multi h k l = List.fold_left (fun h v -> add h k v) h l
5152

@@ -58,11 +59,10 @@ let add_opt_unless_exists h k v =
5859
match h with None -> init_with k v | Some h -> add_unless_exists h k v
5960

6061
let get h k =
61-
let k = LString.of_string k in
6262
let rec loop h =
6363
match h with
6464
| [] -> None
65-
| (k', v) :: h' -> if LString.compare k k' = 0 then Some v else loop h'
65+
| (k', v) :: h' -> if caseless_equal k k' then Some v else loop h'
6666
in
6767
loop h
6868

@@ -71,36 +71,32 @@ let get_multi (h : t) (k : string) =
7171
match h with
7272
| [] -> acc
7373
| (k', v) :: h' ->
74-
if LString.compare (LString.of_string k) k' = 0 then loop h' (v :: acc)
75-
else loop h' acc
74+
if caseless_equal k k' then loop h' (v :: acc) else loop h' acc
7675
in
7776
loop h []
7877

7978
let remove h k =
80-
let k = LString.of_string k in
8179
let rec loop seen = function
8280
| [] -> if seen then [] else raise Not_found
83-
| (k', _) :: h when LString.compare k k' = 0 -> loop true h
81+
| (k', _) :: h when caseless_equal k k' -> loop true h
8482
| x :: h -> x :: loop seen h
8583
in
8684
try loop false h with Not_found -> h
8785

8886
let remove_last h k =
89-
let k = LString.of_string k in
9087
let rec loop seen = function
9188
| [] -> raise Not_found
92-
| (k', _) :: h when LString.compare k k' = 0 -> h
89+
| (k', _) :: h when caseless_equal k k' -> h
9390
| x :: h -> x :: loop seen h
9491
in
9592
try loop false h with Not_found -> h
9693

9794
let replace_ last h k v =
98-
let k' = LString.of_string k in
9995
let rec loop seen = function
10096
| [] -> if seen then [] else raise Not_found
101-
| (k'', _) :: h when LString.compare k' k'' = 0 ->
97+
| (k'', _) :: h when caseless_equal k k'' ->
10298
if last then (k'', v) :: h
103-
else if not seen then (k', v) :: loop true h
99+
else if not seen then (k, v) :: loop true h
104100
else loop seen h
105101
| x :: h -> x :: loop seen h
106102
in
@@ -129,33 +125,26 @@ let update_all h k f =
129125
let map (f : string -> string -> string) (h : t) : t =
130126
List.map
131127
(fun (k, v) ->
132-
let vs' = f (LString.to_string k) v in
128+
let vs' = f k v in
133129
(k, vs'))
134130
h
135131

136132
let iter (f : string -> string -> unit) (h : t) : unit =
137-
List.iter (fun (k, v) -> f (LString.to_string k) v) h
133+
List.iter (fun (k, v) -> f k v) h
138134

139135
let fold (f : string -> string -> 'a -> 'a) (h : t) (init : 'a) : 'a =
140-
List.fold_left (fun acc (k, v) -> f (LString.to_string k) v acc) init h
141-
142-
let of_list h =
143-
List.fold_left (fun acc (k, v) -> (LString.of_string k, v) :: acc) [] h
136+
List.fold_left (fun acc (k, v) -> f k v acc) init h
144137

145-
let to_list h =
146-
List.fold_left (fun acc (k, v) -> (LString.to_string k, v) :: acc) [] h
138+
let of_list h = List.rev h
139+
let to_list h = List.rev h
147140

148141
let to_lines (h : t) =
149142
let header_line k v = Printf.sprintf "%s: %s\r\n" k v in
150-
List.fold_left
151-
(fun acc (k, v) -> header_line (LString.to_string k) v :: acc)
152-
[] h
143+
List.fold_left (fun acc (k, v) -> header_line k v :: acc) [] h
153144

154145
let to_frames h =
155146
let to_frame k v = Printf.sprintf "%s: %s" k v in
156-
List.fold_left
157-
(fun acc (k, v) -> to_frame (LString.to_string k) v :: acc)
158-
[] h
147+
List.fold_left (fun acc (k, v) -> to_frame k v :: acc) [] h
159148

160149
let to_string h =
161150
let b = Buffer.create 128 in
@@ -169,56 +158,55 @@ let to_string h =
169158
Buffer.contents b
170159

171160
let headers_with_list_values =
172-
Array.map LString.of_string
173-
[|
174-
"accept";
175-
"accept-charset";
176-
"accept-encoding";
177-
"accept-language";
178-
"accept-ranges";
179-
"allow";
180-
"cache-control";
181-
"connection";
182-
"content-encoding";
183-
"content-language";
184-
"expect";
185-
"if-match";
186-
"if-none-match";
187-
"link";
188-
"pragma";
189-
"proxy-authenticate";
190-
"te";
191-
"trailer";
192-
"transfer-encoding";
193-
"upgrade";
194-
"vary";
195-
"via";
196-
"warning";
197-
"www-authenticate";
198-
|]
161+
[|
162+
"accept";
163+
"accept-charset";
164+
"accept-encoding";
165+
"accept-language";
166+
"accept-ranges";
167+
"allow";
168+
"cache-control";
169+
"connection";
170+
"content-encoding";
171+
"content-language";
172+
"expect";
173+
"if-match";
174+
"if-none-match";
175+
"link";
176+
"pragma";
177+
"proxy-authenticate";
178+
"te";
179+
"trailer";
180+
"transfer-encoding";
181+
"upgrade";
182+
"vary";
183+
"via";
184+
"warning";
185+
"www-authenticate";
186+
|]
199187

200188
let is_header_with_list_value =
201189
let tbl = Hashtbl.create (Array.length headers_with_list_values) in
202190
headers_with_list_values |> Array.iter (fun h -> Hashtbl.add tbl h ());
203191
fun h -> Hashtbl.mem tbl h
204192

205-
let is_set_cookie k = LString.(compare k (of_string "set-cookie"))
193+
let is_set_cookie k = caseless_equal k "set-cookie"
206194

207195
(* set-cookie is an exception according to
208196
{{:https://tools.ietf.org/html/rfc7230#section-3.2.2}
209-
RFC7230§3.2.2} and can appear multiple times in a response message.
197+
RFC7230§3.2.2} and can appear multiple times in a response message.
210198
*)
211199
let clean_dup (h : t) : t =
212200
let add h k v =
213-
if is_set_cookie k = 0 then (k, v) :: h
201+
if is_set_cookie k then (k, v) :: h
214202
else
215203
let to_add = ref false in
216204
let rec loop = function
217205
| [] ->
218206
to_add := true;
219207
[]
220208
| (k', v') :: hs ->
221-
if LString.compare k k' = 0 then
209+
if caseless_equal k k' then
222210
if is_header_with_list_value k then (k, v' ^ "," ^ v) :: hs
223211
else (
224212
to_add := true;
@@ -231,8 +219,7 @@ let clean_dup (h : t) : t =
231219
List.rev h |> List.fold_left (fun acc (k, v) -> add acc k v) []
232220

233221
let get_multi_concat ?(list_value_only = false) h k : string option =
234-
if (not list_value_only) || is_header_with_list_value (LString.of_string k)
235-
then
222+
if (not list_value_only) || is_header_with_list_value k then
236223
let vs = get_multi h k in
237224
match vs with [] -> None | _ -> Some (String.concat "," vs)
238225
else get h k

cohttp/src/header.mli

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ val of_list : (string * string) list -> t
3232
is true with case insensitive comparison. *)
3333

3434
val to_list : t -> (string * string) list
35-
(** [to_list h] converts HTTP headers [h] to a list. Order is preserved.
35+
(** [to_list h] converts HTTP headers [h] to a list. Order and case is
36+
preserved.
3637
3738
{e Invariant (with case insensitive comparison):} [to_list (of_list l) = l] *)
3839

cohttp/test/unitary_test_header.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ let is_empty_tests () =
5858

5959
let init_with_tests () =
6060
aessl "init_with k v"
61-
[ ("transfer-encoding", "chunked") ]
61+
[ ("traNsfer-eNcoding", "chunked") ]
6262
H.(to_list (init_with "traNsfer-eNcoding" "chunked"))
6363

6464
let mem_tests () =

0 commit comments

Comments
 (0)