1mod error;
4mod types;
5
6use core::str::{FromStr, from_utf8};
7use std::{collections::BTreeMap, sync::LazyLock};
8
9pub use error::Error;
10pub use types::{Domain, Info, List as Psl, Suffix, Type};
11
12pub const LIST_URL: &str = "https://publicsuffix.org/list/public_suffix_list.dat";
14
15type Children = BTreeMap<Vec<u8>, Node>;
16
17const WILDCARD: &str = "*";
18
19const PUBLIC_SUFFIX_LIST_DATA: &str = include_str!("./public_suffix_list.txt");
20
21pub static PUBLIC_SUFFIX_LIST: LazyLock<List> = LazyLock::new(|| {
22 let ret: List = PUBLIC_SUFFIX_LIST_DATA
23 .parse()
24 .unwrap_or_else(|err| panic!("tld: error parsing public suffic list: {err}"));
25 return ret;
26});
27
28#[derive(Debug, Clone, Default, Eq, PartialEq)]
29struct Node {
30 children: Children,
31 leaf: Option<Leaf>,
32}
33
34#[derive(Debug, Clone, Copy, Eq, PartialEq)]
35struct Leaf {
36 is_exception: bool,
37 typ: Type,
38}
39
40#[derive(Debug, Clone, Default, Eq, PartialEq)]
42pub struct List {
43 rules: Node,
44 typ: Option<Type>,
45}
46
47impl List {
48 #[inline]
50 #[must_use]
51 pub fn new() -> Self {
52 Self::default()
53 }
54
55 #[inline]
62 pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
63 from_utf8(bytes).map_err(|_| Error::ListNotUtf8Encoded)?.parse()
64 }
65
66 #[inline]
68 #[must_use]
69 pub fn is_empty(&self) -> bool {
70 self.rules.children.is_empty()
71 }
72
73 #[inline]
74 fn append(&mut self, mut rule: &str, typ: Type) -> Result<(), Error> {
75 let mut is_exception = false;
76 if rule.starts_with('!') {
77 if !rule.contains('.') {
78 return Err(Error::ExceptionAtFirstLabel(rule.to_owned()));
79 }
80 is_exception = true;
81 rule = &rule[1..];
82 }
83
84 let mut current = &mut self.rules;
85 for label in rule.rsplit('.') {
86 if label.is_empty() {
87 return Err(Error::EmptyLabel(rule.to_owned()));
88 }
89
90 #[cfg(not(feature = "anycase"))]
91 let key = label.as_bytes().to_owned();
92 #[cfg(feature = "anycase")]
93 let key = UniCase::new(Cow::from(label.to_owned()));
94
95 current = current.children.entry(key).or_insert_with(Default::default);
96 }
97
98 current.leaf = Some(Leaf {
99 is_exception,
100 typ,
101 });
102
103 Ok(())
104 }
105}
106
107#[cfg(feature = "anycase")]
108macro_rules! anycase_key {
109 ($label:ident) => {
110 match from_utf8($label) {
111 Ok(label) => UniCase::new(Cow::from(label)),
112 Err(_) => {
113 return Info {
114 len: 0,
115 typ: None,
116 }
117 }
118 }
119 };
120}
121
122impl Psl for List {
123 #[inline]
124 fn find<'a, T>(&self, mut labels: T) -> Info
125 where
126 T: Iterator<Item = &'a [u8]>,
127 {
128 let mut rules = &self.rules;
129
130 let mut info = match labels.next() {
134 Some(label) => {
135 let mut info = Info {
136 len: label.len(),
137 typ: None,
138 };
139 #[cfg(not(feature = "anycase"))]
140 let node_opt = rules.children.get(label);
141 #[cfg(feature = "anycase")]
142 let node_opt = rules.children.get(&anycase_key!(label));
143 match node_opt {
144 Some(node) => {
145 info.typ = node.leaf.map(|leaf| leaf.typ);
146 rules = node;
147 }
148 None => return info,
149 }
150 info
151 }
152 None => {
153 return Info {
154 len: 0,
155 typ: None,
156 };
157 }
158 };
159
160 let mut len_so_far = info.len;
162 for label in labels {
163 #[cfg(not(feature = "anycase"))]
164 let node_opt = rules.children.get(label);
165 #[cfg(feature = "anycase")]
166 let node_opt = rules.children.get(&anycase_key!(label));
167 match node_opt {
168 Some(node) => rules = node,
169 None => {
170 #[cfg(not(feature = "anycase"))]
171 let node_opt = rules.children.get(WILDCARD.as_bytes());
172 #[cfg(feature = "anycase")]
173 let node_opt = rules.children.get(&UniCase::new(Cow::from(WILDCARD)));
174 match node_opt {
175 Some(node) => rules = node,
176 None => break,
177 }
178 }
179 }
180 let label_plus_dot = label.len() + 1;
181 if let Some(leaf) = rules.leaf {
182 if self.typ.is_none() || self.typ == Some(leaf.typ) {
183 info.typ = Some(leaf.typ);
184 if leaf.is_exception {
185 info.len = len_so_far;
186 break;
187 }
188 info.len = len_so_far + label_plus_dot;
189 }
190 }
191 len_so_far += label_plus_dot;
192 }
193
194 info
195 }
196}
197
198impl FromStr for List {
199 type Err = Error;
200
201 #[inline]
202 fn from_str(s: &str) -> Result<Self, Self::Err> {
203 let mut typ = None;
204 let mut list = List::new();
205 for line in s.lines() {
206 match line {
207 line if line.contains("BEGIN ICANN DOMAINS") => {
208 typ = Some(Type::Icann);
209 }
210 line if line.contains("BEGIN PRIVATE DOMAINS") => {
211 typ = Some(Type::Private);
212 }
213 line if line.starts_with("//") => {
214 continue;
215 }
216 line => match typ {
217 Some(typ) => {
218 let rule = match line.split_whitespace().next() {
219 Some(rule) => rule,
220 None => continue,
221 };
222 list.append(rule, typ)?;
223 #[cfg(feature = "punycode")]
224 {
225 let ascii = idna::domain_to_ascii(rule).map_err(|_| Error::InvalidRule(rule.to_owned()))?;
226 list.append(&ascii, typ)?;
227 }
228 }
229 None => {
230 continue;
231 }
232 },
233 }
234 }
235 if list.is_empty() {
236 return Err(Error::InvalidList);
237 }
238 Ok(list)
239 }
240}
241
242#[derive(Debug, Clone, Default, Eq, PartialEq)]
244pub struct IcannList(List);
245
246impl From<List> for IcannList {
247 #[inline]
248 fn from(mut list: List) -> Self {
249 list.typ = Some(Type::Icann);
250 Self(list)
251 }
252}
253
254impl From<IcannList> for List {
255 #[inline]
256 fn from(IcannList(mut list): IcannList) -> Self {
257 list.typ = None;
258 list
259 }
260}
261
262impl IcannList {
263 #[inline]
270 pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
271 let list = List::from_bytes(bytes)?;
272 Ok(list.into())
273 }
274
275 #[inline]
277 #[must_use]
278 pub fn is_empty(&self) -> bool {
279 self.0.is_empty()
280 }
281}
282
283impl FromStr for IcannList {
284 type Err = Error;
285
286 #[inline]
287 fn from_str(s: &str) -> Result<Self, Self::Err> {
288 let list = List::from_str(s)?;
289 Ok(list.into())
290 }
291}
292
293impl Psl for IcannList {
294 #[inline]
295 fn find<'a, T>(&self, labels: T) -> Info
296 where
297 T: Iterator<Item = &'a [u8]>,
298 {
299 self.0.find(labels)
300 }
301}
302
303#[derive(Debug, Clone, Default, Eq, PartialEq)]
305pub struct PrivateList(List);
306
307impl From<List> for PrivateList {
308 #[inline]
309 fn from(mut list: List) -> Self {
310 list.typ = Some(Type::Private);
311 Self(list)
312 }
313}
314
315impl From<PrivateList> for List {
316 #[inline]
317 fn from(PrivateList(mut list): PrivateList) -> Self {
318 list.typ = None;
319 list
320 }
321}
322
323impl PrivateList {
324 #[inline]
331 pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
332 let list = List::from_bytes(bytes)?;
333 Ok(list.into())
334 }
335
336 #[inline]
338 #[must_use]
339 pub fn is_empty(&self) -> bool {
340 self.0.is_empty()
341 }
342}
343
344impl FromStr for PrivateList {
345 type Err = Error;
346
347 #[inline]
348 fn from_str(s: &str) -> Result<Self, Self::Err> {
349 let list = List::from_str(s)?;
350 Ok(list.into())
351 }
352}
353
354impl Psl for PrivateList {
355 #[inline]
356 fn find<'a, T>(&self, labels: T) -> Info
357 where
358 T: Iterator<Item = &'a [u8]>,
359 {
360 self.0.find(labels)
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use super::*;
367
368 const LIST: &[u8] = b"
369 // BEGIN ICANN DOMAINS
370 com.uk
371 ";
372
373 #[test]
374 fn list_construction() {
375 let list = List::from_bytes(LIST).unwrap();
376 let expected = List {
377 typ: None,
378 rules: Node {
379 children: {
380 let mut children = Children::default();
381 children.insert(
382 #[cfg(not(feature = "anycase"))]
383 b"uk".to_vec(),
384 #[cfg(feature = "anycase")]
385 UniCase::new(Cow::from("uk")),
386 Node {
387 children: {
388 let mut children = Children::default();
389 children.insert(
390 #[cfg(not(feature = "anycase"))]
391 b"com".to_vec(),
392 #[cfg(feature = "anycase")]
393 UniCase::new(Cow::from("com")),
394 Node {
395 children: Default::default(),
396 leaf: Some(Leaf {
397 is_exception: false,
398 typ: Type::Icann,
399 }),
400 },
401 );
402 children
403 },
404 leaf: None,
405 },
406 );
407 children
408 },
409 leaf: None,
410 },
411 };
412 assert_eq!(list, expected);
413 }
414
415 #[test]
416 fn find_localhost() {
417 let list = List::from_bytes(LIST).unwrap();
418 let labels = b"localhost".rsplit(|x| *x == b'.');
419 assert_eq!(
420 list.find(labels),
421 Info {
422 len: 9,
423 typ: None
424 }
425 );
426 }
427
428 #[test]
429 fn find_uk() {
430 let list = List::from_bytes(LIST).unwrap();
431 let labels = b"uk".rsplit(|x| *x == b'.');
432 assert_eq!(
433 list.find(labels),
434 Info {
435 len: 2,
436 typ: None
437 }
438 );
439 }
440
441 #[test]
442 fn find_com_uk() {
443 let list = List::from_bytes(LIST).unwrap();
444 let labels = b"com.uk".rsplit(|x| *x == b'.');
445 assert_eq!(
446 list.find(labels),
447 Info {
448 len: 6,
449 typ: Some(Type::Icann)
450 }
451 );
452 }
453
454 #[test]
455 fn find_ide_kyoto_jp() {
456 let list = List::from_bytes(b"// BEGIN ICANN DOMAINS\nide.kyoto.jp").unwrap();
457 let labels = b"ide.kyoto.jp".rsplit(|x| *x == b'.');
458 assert_eq!(
459 list.find(labels),
460 Info {
461 len: 12,
462 typ: Some(Type::Icann)
463 }
464 );
465 }
466}