Skip to main content

form_urlencoded/
form_urlencoded.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11//! as used by HTML forms.
12//!
13//! Converts between a string (such as an URL’s query string)
14//! and a sequence of (name, value) pairs.
15#![no_std]
16
17// For forwards compatibility
18#[cfg(feature = "std")]
19extern crate std as _;
20
21extern crate alloc;
22
23#[cfg(not(feature = "alloc"))]
24compile_error!("the `alloc` feature must currently be enabled");
25
26use alloc::{
27    borrow::{Borrow, Cow, ToOwned},
28    string::String,
29};
30use core::str;
31
32use percent_encoding::{percent_decode, percent_encode_byte};
33
34/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
35/// into a iterator of (name, value) pairs.
36///
37/// Use `parse(input.as_bytes())` to parse a `&str` string.
38///
39/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
40/// converted to `[("#first", "%try%")]`.
41#[inline]
42pub fn parse(input: &[u8]) -> Parse<'_> {
43    Parse {
44        input,
45    }
46}
47/// The return type of `parse()`.
48#[derive(Copy, Clone)]
49pub struct Parse<'a> {
50    input: &'a [u8],
51}
52
53impl<'a> Iterator for Parse<'a> {
54    type Item = (Cow<'a, str>, Cow<'a, str>);
55
56    fn next(&mut self) -> Option<Self::Item> {
57        loop {
58            if self.input.is_empty() {
59                return None;
60            }
61            let mut split2 = self.input.splitn(2, |&b| b == b'&');
62            let sequence = split2.next().unwrap();
63            self.input = split2.next().unwrap_or(&[][..]);
64            if sequence.is_empty() {
65                continue;
66            }
67            let mut split2 = sequence.splitn(2, |&b| b == b'=');
68            let name = split2.next().unwrap();
69            let value = split2.next().unwrap_or(&[][..]);
70            return Some((decode(name), decode(value)));
71        }
72    }
73}
74
75fn decode(input: &[u8]) -> Cow<'_, str> {
76    let replaced = replace_plus(input);
77    decode_utf8_lossy(match percent_decode(&replaced).into() {
78        Cow::Owned(vec) => Cow::Owned(vec),
79        Cow::Borrowed(_) => replaced,
80    })
81}
82
83/// Replace b'+' with b' '
84fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
85    match input.iter().position(|&b| b == b'+') {
86        None => Cow::Borrowed(input),
87        Some(first_position) => {
88            let mut replaced = input.to_owned();
89            replaced[first_position] = b' ';
90            for byte in &mut replaced[first_position + 1..] {
91                if *byte == b'+' {
92                    *byte = b' ';
93                }
94            }
95            Cow::Owned(replaced)
96        }
97    }
98}
99
100impl<'a> Parse<'a> {
101    /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
102    pub fn into_owned(self) -> ParseIntoOwned<'a> {
103        ParseIntoOwned {
104            inner: self,
105        }
106    }
107}
108
109/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
110pub struct ParseIntoOwned<'a> {
111    inner: Parse<'a>,
112}
113
114impl Iterator for ParseIntoOwned<'_> {
115    type Item = (String, String);
116
117    fn next(&mut self) -> Option<Self::Item> {
118        self.inner.next().map(|(k, v)| (k.into_owned(), v.into_owned()))
119    }
120}
121
122/// The [`application/x-www-form-urlencoded` byte serializer](
123/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
124///
125/// Return an iterator of `&str` slices.
126pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
127    ByteSerialize {
128        bytes: input,
129    }
130}
131
132/// Return value of `byte_serialize()`.
133#[derive(Debug)]
134pub struct ByteSerialize<'a> {
135    bytes: &'a [u8],
136}
137
138fn byte_serialized_unchanged(byte: u8) -> bool {
139    matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
140}
141
142impl<'a> Iterator for ByteSerialize<'a> {
143    type Item = &'a str;
144
145    fn next(&mut self) -> Option<&'a str> {
146        if let Some((&first, tail)) = self.bytes.split_first() {
147            if !byte_serialized_unchanged(first) {
148                self.bytes = tail;
149                return Some(if first == b' ' { "+" } else { percent_encode_byte(first) });
150            }
151            let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
152            let (unchanged_slice, remaining) = match position {
153                // 1 for first_byte + i unchanged in tail
154                Some(i) => self.bytes.split_at(1 + i),
155                None => (self.bytes, &[][..]),
156            };
157            self.bytes = remaining;
158            // This unsafe is appropriate because we have already checked these
159            // bytes in byte_serialized_unchanged, which checks for a subset
160            // of UTF-8. So we know these bytes are valid UTF-8, and doing
161            // another UTF-8 check would be wasteful.
162            Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
163        } else {
164            None
165        }
166    }
167
168    fn size_hint(&self) -> (usize, Option<usize>) {
169        if self.bytes.is_empty() {
170            (0, Some(0))
171        } else {
172            (1, Some(self.bytes.len()))
173        }
174    }
175}
176
177/// The [`application/x-www-form-urlencoded` serializer](
178/// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
179pub struct Serializer<'a, T: Target> {
180    target: Option<T>,
181    start_position: usize,
182    encoding: EncodingOverride<'a>,
183}
184
185pub trait Target {
186    fn as_mut_string(&mut self) -> &mut String;
187    fn finish(self) -> Self::Finished;
188    type Finished;
189}
190
191impl Target for String {
192    fn as_mut_string(&mut self) -> &mut String {
193        self
194    }
195    fn finish(self) -> Self {
196        self
197    }
198    type Finished = Self;
199}
200
201impl Target for &mut String {
202    fn as_mut_string(&mut self) -> &mut String {
203        self
204    }
205    fn finish(self) -> Self {
206        self
207    }
208    type Finished = Self;
209}
210
211impl<'a, T: Target> Serializer<'a, T> {
212    /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
213    ///
214    /// If the target is non-empty,
215    /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
216    pub fn new(target: T) -> Self {
217        Self::for_suffix(target, 0)
218    }
219
220    /// Create a new `application/x-www-form-urlencoded` serializer
221    /// for a suffix of the given target.
222    ///
223    /// If that suffix is non-empty,
224    /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
225    pub fn for_suffix(mut target: T, start_position: usize) -> Self {
226        if target.as_mut_string().len() < start_position {
227            panic!(
228                "invalid length {} for target of length {}",
229                start_position,
230                target.as_mut_string().len()
231            );
232        }
233
234        Serializer {
235            target: Some(target),
236            start_position,
237            encoding: None,
238        }
239    }
240
241    /// Remove any existing name/value pair.
242    ///
243    /// Panics if called after `.finish()`.
244    pub fn clear(&mut self) -> &mut Self {
245        string(&mut self.target).truncate(self.start_position);
246        self
247    }
248
249    /// Set the character encoding to be used for names and values before percent-encoding.
250    pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
251        self.encoding = new;
252        self
253    }
254
255    /// Serialize and append a name/value pair.
256    ///
257    /// Panics if called after `.finish()`.
258    pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
259        append_pair(string(&mut self.target), self.start_position, self.encoding, name, value);
260        self
261    }
262
263    /// Serialize and append a name of parameter without any value.
264    ///
265    /// Panics if called after `.finish()`.
266    pub fn append_key_only(&mut self, name: &str) -> &mut Self {
267        append_key_only(string(&mut self.target), self.start_position, self.encoding, name);
268        self
269    }
270
271    /// Serialize and append a number of name/value pairs.
272    ///
273    /// This simply calls `append_pair` repeatedly.
274    /// This can be more convenient, so the user doesn’t need to introduce a block
275    /// to limit the scope of `Serializer`’s borrow of its string.
276    ///
277    /// Panics if called after `.finish()`.
278    pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
279    where
280        I: IntoIterator,
281        I::Item: Borrow<(K, V)>,
282        K: AsRef<str>,
283        V: AsRef<str>,
284    {
285        {
286            let string = string(&mut self.target);
287            for pair in iter {
288                let (k, v) = pair.borrow();
289                append_pair(string, self.start_position, self.encoding, k.as_ref(), v.as_ref());
290            }
291        }
292        self
293    }
294
295    /// Serialize and append a number of names without values.
296    ///
297    /// This simply calls `append_key_only` repeatedly.
298    /// This can be more convenient, so the user doesn’t need to introduce a block
299    /// to limit the scope of `Serializer`’s borrow of its string.
300    ///
301    /// Panics if called after `.finish()`.
302    pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
303    where
304        I: IntoIterator,
305        I::Item: Borrow<K>,
306        K: AsRef<str>,
307    {
308        {
309            let string = string(&mut self.target);
310            for key in iter {
311                let k = key.borrow().as_ref();
312                append_key_only(string, self.start_position, self.encoding, k);
313            }
314        }
315        self
316    }
317
318    /// If this serializer was constructed with a string, take and return that string.
319    ///
320    /// ```rust
321    /// use form_urlencoded;
322    /// let encoded: String = form_urlencoded::Serializer::new(String::new())
323    ///     .append_pair("foo", "bar & baz")
324    ///     .append_pair("saison", "Été+hiver")
325    ///     .finish();
326    /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
327    /// ```
328    ///
329    /// Panics if called more than once.
330    pub fn finish(&mut self) -> T::Finished {
331        self.target
332            .take()
333            .expect("url::form_urlencoded::Serializer double finish")
334            .finish()
335    }
336}
337
338fn append_separator_if_needed(string: &mut String, start_position: usize) {
339    if string.len() > start_position {
340        string.push('&')
341    }
342}
343
344fn string<T: Target>(target: &mut Option<T>) -> &mut String {
345    target
346        .as_mut()
347        .expect("url::form_urlencoded::Serializer finished")
348        .as_mut_string()
349}
350
351fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride<'_>, name: &str, value: &str) {
352    append_separator_if_needed(string, start_position);
353    append_encoded(name, string, encoding);
354    string.push('=');
355    append_encoded(value, string, encoding);
356}
357
358fn append_key_only(string: &mut String, start_position: usize, encoding: EncodingOverride, name: &str) {
359    append_separator_if_needed(string, start_position);
360    append_encoded(name, string, encoding);
361}
362
363fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
364    string.extend(byte_serialize(&encode(encoding, s)))
365}
366
367pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
368    if let Some(o) = encoding_override {
369        return o(input);
370    }
371    input.as_bytes().into()
372}
373
374// std::ptr::addr_eq was stabilized in rust 1.76. Once we upgrade
375// the MSRV we can remove this lint override.
376#[allow(ambiguous_wide_pointer_comparisons)]
377pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
378    // Note: This function is duplicated in `percent_encoding/lib.rs`.
379    match input {
380        Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
381        Cow::Owned(bytes) => {
382            match String::from_utf8_lossy(&bytes) {
383                Cow::Borrowed(utf8) => {
384                    // If from_utf8_lossy returns a Cow::Borrowed, then we can
385                    // be sure our original bytes were valid UTF-8. This is because
386                    // if the bytes were invalid UTF-8 from_utf8_lossy would have
387                    // to allocate a new owned string to back the Cow so it could
388                    // replace invalid bytes with a placeholder.
389
390                    // First we do a debug_assert to confirm our description above.
391                    let raw_utf8: *const [u8] = utf8.as_bytes();
392                    debug_assert!(core::ptr::eq(raw_utf8, &*bytes));
393
394                    // Given we know the original input bytes are valid UTF-8,
395                    // and we have ownership of those bytes, we re-use them and
396                    // return a Cow::Owned here.
397                    Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
398                }
399                Cow::Owned(s) => Cow::Owned(s),
400            }
401        }
402    }
403}
404
405pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;