Skip to main content

tld/
lib.rs

1//! A native Rust library for Mozilla's Public Suffix List
2
3mod error;
4mod types;
5
6use core::str::{FromStr, from_utf8};
7use std::{collections::BTreeMap, sync::LazyLock};
8
9pub use error::Error;
10pub use types::{Domain, Info, List as Psl, Suffix, Type};
11
12/// The official URL of the list
13pub const LIST_URL: &str = "https://publicsuffix.org/list/public_suffix_list.dat";
14
15type Children = BTreeMap<Vec<u8>, Node>;
16
17const WILDCARD: &str = "*";
18
19const PUBLIC_SUFFIX_LIST_DATA: &str = include_str!("./public_suffix_list.txt");
20
21pub static PUBLIC_SUFFIX_LIST: LazyLock<List> = LazyLock::new(|| {
22    let ret: List = PUBLIC_SUFFIX_LIST_DATA
23        .parse()
24        .unwrap_or_else(|err| panic!("tld: error parsing public suffic list: {err}"));
25    return ret;
26});
27
28#[derive(Debug, Clone, Default, Eq, PartialEq)]
29struct Node {
30    children: Children,
31    leaf: Option<Leaf>,
32}
33
34#[derive(Debug, Clone, Copy, Eq, PartialEq)]
35struct Leaf {
36    is_exception: bool,
37    typ: Type,
38}
39
40/// A dynamic public suffix list
41#[derive(Debug, Clone, Default, Eq, PartialEq)]
42pub struct List {
43    rules: Node,
44    typ: Option<Type>,
45}
46
47impl List {
48    /// Creates a new list with default wildcard rule support
49    #[inline]
50    #[must_use]
51    pub fn new() -> Self {
52        Self::default()
53    }
54
55    /// Creates a new list from a byte slice
56    ///
57    /// # Errors
58    ///
59    /// Returns an `Err` if the list is not UTF-8 encoded
60    /// or if its format is invalid.
61    #[inline]
62    pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
63        from_utf8(bytes).map_err(|_| Error::ListNotUtf8Encoded)?.parse()
64    }
65
66    /// Checks to see if the list is empty, ignoring the wildcard rule
67    #[inline]
68    #[must_use]
69    pub fn is_empty(&self) -> bool {
70        self.rules.children.is_empty()
71    }
72
73    #[inline]
74    fn append(&mut self, mut rule: &str, typ: Type) -> Result<(), Error> {
75        let mut is_exception = false;
76        if rule.starts_with('!') {
77            if !rule.contains('.') {
78                return Err(Error::ExceptionAtFirstLabel(rule.to_owned()));
79            }
80            is_exception = true;
81            rule = &rule[1..];
82        }
83
84        let mut current = &mut self.rules;
85        for label in rule.rsplit('.') {
86            if label.is_empty() {
87                return Err(Error::EmptyLabel(rule.to_owned()));
88            }
89
90            #[cfg(not(feature = "anycase"))]
91            let key = label.as_bytes().to_owned();
92            #[cfg(feature = "anycase")]
93            let key = UniCase::new(Cow::from(label.to_owned()));
94
95            current = current.children.entry(key).or_insert_with(Default::default);
96        }
97
98        current.leaf = Some(Leaf {
99            is_exception,
100            typ,
101        });
102
103        Ok(())
104    }
105}
106
107#[cfg(feature = "anycase")]
108macro_rules! anycase_key {
109    ($label:ident) => {
110        match from_utf8($label) {
111            Ok(label) => UniCase::new(Cow::from(label)),
112            Err(_) => {
113                return Info {
114                    len: 0,
115                    typ: None,
116                }
117            }
118        }
119    };
120}
121
122impl Psl for List {
123    #[inline]
124    fn find<'a, T>(&self, mut labels: T) -> Info
125    where
126        T: Iterator<Item = &'a [u8]>,
127    {
128        let mut rules = &self.rules;
129
130        // the first label
131        // it's special because we always need it whether or not
132        // it's in our hash map (because of the implicit wildcard)
133        let mut info = match labels.next() {
134            Some(label) => {
135                let mut info = Info {
136                    len: label.len(),
137                    typ: None,
138                };
139                #[cfg(not(feature = "anycase"))]
140                let node_opt = rules.children.get(label);
141                #[cfg(feature = "anycase")]
142                let node_opt = rules.children.get(&anycase_key!(label));
143                match node_opt {
144                    Some(node) => {
145                        info.typ = node.leaf.map(|leaf| leaf.typ);
146                        rules = node;
147                    }
148                    None => return info,
149                }
150                info
151            }
152            None => {
153                return Info {
154                    len: 0,
155                    typ: None,
156                };
157            }
158        };
159
160        // the rest of the labels
161        let mut len_so_far = info.len;
162        for label in labels {
163            #[cfg(not(feature = "anycase"))]
164            let node_opt = rules.children.get(label);
165            #[cfg(feature = "anycase")]
166            let node_opt = rules.children.get(&anycase_key!(label));
167            match node_opt {
168                Some(node) => rules = node,
169                None => {
170                    #[cfg(not(feature = "anycase"))]
171                    let node_opt = rules.children.get(WILDCARD.as_bytes());
172                    #[cfg(feature = "anycase")]
173                    let node_opt = rules.children.get(&UniCase::new(Cow::from(WILDCARD)));
174                    match node_opt {
175                        Some(node) => rules = node,
176                        None => break,
177                    }
178                }
179            }
180            let label_plus_dot = label.len() + 1;
181            if let Some(leaf) = rules.leaf {
182                if self.typ.is_none() || self.typ == Some(leaf.typ) {
183                    info.typ = Some(leaf.typ);
184                    if leaf.is_exception {
185                        info.len = len_so_far;
186                        break;
187                    }
188                    info.len = len_so_far + label_plus_dot;
189                }
190            }
191            len_so_far += label_plus_dot;
192        }
193
194        info
195    }
196}
197
198impl FromStr for List {
199    type Err = Error;
200
201    #[inline]
202    fn from_str(s: &str) -> Result<Self, Self::Err> {
203        let mut typ = None;
204        let mut list = List::new();
205        for line in s.lines() {
206            match line {
207                line if line.contains("BEGIN ICANN DOMAINS") => {
208                    typ = Some(Type::Icann);
209                }
210                line if line.contains("BEGIN PRIVATE DOMAINS") => {
211                    typ = Some(Type::Private);
212                }
213                line if line.starts_with("//") => {
214                    continue;
215                }
216                line => match typ {
217                    Some(typ) => {
218                        let rule = match line.split_whitespace().next() {
219                            Some(rule) => rule,
220                            None => continue,
221                        };
222                        list.append(rule, typ)?;
223                        #[cfg(feature = "punycode")]
224                        {
225                            let ascii = idna::domain_to_ascii(rule).map_err(|_| Error::InvalidRule(rule.to_owned()))?;
226                            list.append(&ascii, typ)?;
227                        }
228                    }
229                    None => {
230                        continue;
231                    }
232                },
233            }
234        }
235        if list.is_empty() {
236            return Err(Error::InvalidList);
237        }
238        Ok(list)
239    }
240}
241
242/// A list of only ICANN suffixes
243#[derive(Debug, Clone, Default, Eq, PartialEq)]
244pub struct IcannList(List);
245
246impl From<List> for IcannList {
247    #[inline]
248    fn from(mut list: List) -> Self {
249        list.typ = Some(Type::Icann);
250        Self(list)
251    }
252}
253
254impl From<IcannList> for List {
255    #[inline]
256    fn from(IcannList(mut list): IcannList) -> Self {
257        list.typ = None;
258        list
259    }
260}
261
262impl IcannList {
263    /// Creates a new list from a byte slice
264    ///
265    /// # Errors
266    ///
267    /// Returns an `Err` if the list is not UTF-8 encoded
268    /// or if its format is invalid.
269    #[inline]
270    pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
271        let list = List::from_bytes(bytes)?;
272        Ok(list.into())
273    }
274
275    /// Checks to see if the list is empty, ignoring the wildcard rule
276    #[inline]
277    #[must_use]
278    pub fn is_empty(&self) -> bool {
279        self.0.is_empty()
280    }
281}
282
283impl FromStr for IcannList {
284    type Err = Error;
285
286    #[inline]
287    fn from_str(s: &str) -> Result<Self, Self::Err> {
288        let list = List::from_str(s)?;
289        Ok(list.into())
290    }
291}
292
293impl Psl for IcannList {
294    #[inline]
295    fn find<'a, T>(&self, labels: T) -> Info
296    where
297        T: Iterator<Item = &'a [u8]>,
298    {
299        self.0.find(labels)
300    }
301}
302
303/// A list of only private suffixes
304#[derive(Debug, Clone, Default, Eq, PartialEq)]
305pub struct PrivateList(List);
306
307impl From<List> for PrivateList {
308    #[inline]
309    fn from(mut list: List) -> Self {
310        list.typ = Some(Type::Private);
311        Self(list)
312    }
313}
314
315impl From<PrivateList> for List {
316    #[inline]
317    fn from(PrivateList(mut list): PrivateList) -> Self {
318        list.typ = None;
319        list
320    }
321}
322
323impl PrivateList {
324    /// Creates a new list from a byte slice
325    ///
326    /// # Errors
327    ///
328    /// Returns an `Err` if the list is not UTF-8 encoded
329    /// or if its format is invalid.
330    #[inline]
331    pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
332        let list = List::from_bytes(bytes)?;
333        Ok(list.into())
334    }
335
336    /// Checks to see if the list is empty, ignoring the wildcard rule
337    #[inline]
338    #[must_use]
339    pub fn is_empty(&self) -> bool {
340        self.0.is_empty()
341    }
342}
343
344impl FromStr for PrivateList {
345    type Err = Error;
346
347    #[inline]
348    fn from_str(s: &str) -> Result<Self, Self::Err> {
349        let list = List::from_str(s)?;
350        Ok(list.into())
351    }
352}
353
354impl Psl for PrivateList {
355    #[inline]
356    fn find<'a, T>(&self, labels: T) -> Info
357    where
358        T: Iterator<Item = &'a [u8]>,
359    {
360        self.0.find(labels)
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367
368    const LIST: &[u8] = b"
369        // BEGIN ICANN DOMAINS
370        com.uk
371        ";
372
373    #[test]
374    fn list_construction() {
375        let list = List::from_bytes(LIST).unwrap();
376        let expected = List {
377            typ: None,
378            rules: Node {
379                children: {
380                    let mut children = Children::default();
381                    children.insert(
382                        #[cfg(not(feature = "anycase"))]
383                        b"uk".to_vec(),
384                        #[cfg(feature = "anycase")]
385                        UniCase::new(Cow::from("uk")),
386                        Node {
387                            children: {
388                                let mut children = Children::default();
389                                children.insert(
390                                    #[cfg(not(feature = "anycase"))]
391                                    b"com".to_vec(),
392                                    #[cfg(feature = "anycase")]
393                                    UniCase::new(Cow::from("com")),
394                                    Node {
395                                        children: Default::default(),
396                                        leaf: Some(Leaf {
397                                            is_exception: false,
398                                            typ: Type::Icann,
399                                        }),
400                                    },
401                                );
402                                children
403                            },
404                            leaf: None,
405                        },
406                    );
407                    children
408                },
409                leaf: None,
410            },
411        };
412        assert_eq!(list, expected);
413    }
414
415    #[test]
416    fn find_localhost() {
417        let list = List::from_bytes(LIST).unwrap();
418        let labels = b"localhost".rsplit(|x| *x == b'.');
419        assert_eq!(
420            list.find(labels),
421            Info {
422                len: 9,
423                typ: None
424            }
425        );
426    }
427
428    #[test]
429    fn find_uk() {
430        let list = List::from_bytes(LIST).unwrap();
431        let labels = b"uk".rsplit(|x| *x == b'.');
432        assert_eq!(
433            list.find(labels),
434            Info {
435                len: 2,
436                typ: None
437            }
438        );
439    }
440
441    #[test]
442    fn find_com_uk() {
443        let list = List::from_bytes(LIST).unwrap();
444        let labels = b"com.uk".rsplit(|x| *x == b'.');
445        assert_eq!(
446            list.find(labels),
447            Info {
448                len: 6,
449                typ: Some(Type::Icann)
450            }
451        );
452    }
453
454    #[test]
455    fn find_ide_kyoto_jp() {
456        let list = List::from_bytes(b"// BEGIN ICANN DOMAINS\nide.kyoto.jp").unwrap();
457        let labels = b"ide.kyoto.jp".rsplit(|x| *x == b'.');
458        assert_eq!(
459            list.find(labels),
460            Info {
461                len: 12,
462                typ: Some(Type::Icann)
463            }
464        );
465    }
466}