Skip to main content

maxminddb/
maxminddb.rs

1#![deny(trivial_casts, trivial_numeric_casts, unused_import_braces)]
2
3use std::{
4    cmp::Ordering,
5    collections::BTreeMap,
6    fmt::{self, Display, Formatter},
7    io,
8    marker::PhantomData,
9    net::{IpAddr, Ipv4Addr, Ipv6Addr},
10    path::Path,
11};
12
13use ipnetwork::IpNetwork;
14use serde::{Deserialize, Serialize, de};
15
16// #[cfg(feature = "mmap")]
17// pub use memmap2::Mmap;
18// #[cfg(feature = "mmap")]
19// use memmap2::MmapOptions;
20// #[cfg(feature = "mmap")]
21// use std::fs::File;
22
23pub mod decoder;
24pub mod encoder;
25pub mod geoip2;
26pub mod writer;
27pub use encoder::Value;
28
29#[derive(Debug, PartialEq, Eq)]
30pub enum MaxMindDBError {
31    AddressNotFoundError(IpAddr),
32    InvalidDatabaseError(String),
33    IoError(String),
34    MapError(String),
35    DecodingError(String),
36    InvalidNetworkError(String),
37}
38
39impl From<io::Error> for MaxMindDBError {
40    fn from(err: io::Error) -> MaxMindDBError {
41        // clean up and clean up MaxMindDBError generally
42        MaxMindDBError::IoError(err.to_string())
43    }
44}
45
46impl Display for MaxMindDBError {
47    fn fmt(&self, fmt: &mut Formatter<'_>) -> Result<(), fmt::Error> {
48        match self {
49            MaxMindDBError::AddressNotFoundError(msg) => write!(fmt, "AddressNotFoundError: {msg}")?,
50            MaxMindDBError::InvalidDatabaseError(msg) => write!(fmt, "InvalidDatabaseError: {msg}")?,
51            MaxMindDBError::IoError(msg) => write!(fmt, "IoError: {msg}")?,
52            MaxMindDBError::MapError(msg) => write!(fmt, "MapError: {msg}")?,
53            MaxMindDBError::DecodingError(msg) => write!(fmt, "DecodingError: {msg}")?,
54            MaxMindDBError::InvalidNetworkError(msg) => write!(fmt, "InvalidNetworkError: {msg}")?,
55        }
56        Ok(())
57    }
58}
59
60// Use default implementation for `std::error::Error`
61impl std::error::Error for MaxMindDBError {}
62
63impl de::Error for MaxMindDBError {
64    fn custom<T: Display>(msg: T) -> Self {
65        MaxMindDBError::DecodingError(format!("{msg}"))
66    }
67}
68
69#[derive(Deserialize, Serialize, Clone, Debug)]
70pub struct Metadata {
71    pub binary_format_major_version: u16,
72    pub binary_format_minor_version: u16,
73    pub build_epoch: u64,
74    pub database_type: String,
75    pub description: BTreeMap<String, String>,
76    pub ip_version: u16,
77    pub languages: Vec<String>,
78    pub node_count: u32,
79    pub record_size: u16,
80}
81
82#[derive(Debug)]
83struct WithinNode {
84    node: usize,
85    ip_bytes: Vec<u8>,
86    prefix_len: usize,
87}
88
89#[derive(Debug)]
90pub struct Within<'de, T: Deserialize<'de>, S: AsRef<[u8]>> {
91    reader: &'de Reader<S>,
92    node_count: usize,
93    stack: Vec<WithinNode>,
94    phantom: PhantomData<&'de T>,
95}
96
97#[derive(Debug)]
98pub struct WithinItem<T> {
99    pub ip_net: IpNetwork,
100    pub info: T,
101}
102
103impl<'de, T: Deserialize<'de>, S: AsRef<[u8]>> Iterator for Within<'de, T, S> {
104    type Item = Result<WithinItem<T>, MaxMindDBError>;
105
106    fn next(&mut self) -> Option<Self::Item> {
107        while let Some(current) = self.stack.pop() {
108            let bit_count = current.ip_bytes.len() * 8;
109
110            // Skip networks that are aliases for the IPv4 network
111            if self.reader.ipv4_start != 0
112                && current.node == self.reader.ipv4_start
113                && bit_count == 128
114                && current.ip_bytes[..12].iter().any(|&b| b != 0)
115            {
116                continue;
117            }
118
119            match current.node.cmp(&self.node_count) {
120                Ordering::Greater => {
121                    // This is a data node, emit it and we're done (until the following next call)
122                    let ip_net = match bytes_and_prefix_to_net(&current.ip_bytes, current.prefix_len as u8) {
123                        Ok(ip_net) => ip_net,
124                        Err(e) => return Some(Err(e)),
125                    };
126                    // TODO: should this block become a helper method on reader?
127                    let rec = match self.reader.resolve_data_pointer(current.node) {
128                        Ok(rec) => rec,
129                        Err(e) => return Some(Err(e)),
130                    };
131                    let mut decoder = decoder::Decoder::new(&self.reader.buf.as_ref()[self.reader.pointer_base..], rec);
132                    return match T::deserialize(&mut decoder) {
133                        Ok(info) => Some(Ok(WithinItem {
134                            ip_net,
135                            info,
136                        })),
137                        Err(e) => Some(Err(e)),
138                    };
139                }
140                Ordering::Equal => {
141                    // Dead end, nothing to do
142                }
143                Ordering::Less => {
144                    // In order traversal of our children
145                    // right/1-bit
146                    let mut right_ip_bytes = current.ip_bytes.clone();
147                    right_ip_bytes[current.prefix_len >> 3] |= 1 << ((bit_count - current.prefix_len - 1) % 8);
148                    let node = match self.reader.read_node(current.node, 1) {
149                        Ok(node) => node,
150                        Err(e) => return Some(Err(e)),
151                    };
152                    self.stack.push(WithinNode {
153                        node,
154                        ip_bytes: right_ip_bytes,
155                        prefix_len: current.prefix_len + 1,
156                    });
157                    // left/0-bit
158                    let node = match self.reader.read_node(current.node, 0) {
159                        Ok(node) => node,
160                        Err(e) => return Some(Err(e)),
161                    };
162                    self.stack.push(WithinNode {
163                        node,
164                        ip_bytes: current.ip_bytes.clone(),
165                        prefix_len: current.prefix_len + 1,
166                    });
167                }
168            }
169        }
170        None
171    }
172}
173
174/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the lifetime of the underlying buffer holding the contents of the database file.
175#[derive(Debug)]
176pub struct Reader<S: AsRef<[u8]>> {
177    buf: S,
178    pub metadata: Metadata,
179    ipv4_start: usize,
180    pointer_base: usize,
181}
182
183// #[cfg(feature = "mmap")]
184// impl<'de> Reader<Mmap> {
185//     /// Open a MaxMind DB database file by memory mapping it.
186//     ///
187//     /// # Example
188//     ///
189//     /// ```
190//     /// let reader = maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
191//     /// ```
192//     pub fn open_mmap<P: AsRef<Path>>(database: P) -> Result<Reader<Mmap>, MaxMindDBError> {
193//         let file_read = File::open(database)?;
194//         let mmap = unsafe { MmapOptions::new().map(&file_read) }?;
195//         Reader::from_source(mmap)
196//     }
197// }
198
199impl Reader<Vec<u8>> {
200    /// Open a MaxMind DB database file by loading it into memory.
201    ///
202    /// # Example
203    ///
204    /// ```
205    /// let reader = maxminddb::Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
206    /// ```
207    pub fn open_readfile<P: AsRef<Path>>(database: P) -> Result<Reader<Vec<u8>>, MaxMindDBError> {
208        use std::fs;
209
210        let buf: Vec<u8> = fs::read(&database)?;
211        Reader::from_source(buf)
212    }
213}
214
215impl<'de, S: AsRef<[u8]>> Reader<S> {
216    /// Open a MaxMind DB database from anything that implements AsRef<[u8]>
217    ///
218    /// # Example
219    ///
220    /// ```
221    /// use std::fs;
222    /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
223    /// let reader = maxminddb::Reader::from_source(buf).unwrap();
224    /// ```
225    pub fn from_source(buf: S) -> Result<Reader<S>, MaxMindDBError> {
226        let data_section_separator_size = 16;
227
228        let metadata_start = find_metadata_start(buf.as_ref())?;
229        let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0);
230        let metadata = Metadata::deserialize(&mut type_decoder)?;
231
232        let search_tree_size = (metadata.node_count as usize) * (metadata.record_size as usize) / 4;
233
234        let mut reader = Reader {
235            buf,
236            pointer_base: search_tree_size + data_section_separator_size,
237            metadata,
238            ipv4_start: 0,
239        };
240        reader.ipv4_start = reader.find_ipv4_start()?;
241
242        Ok(reader)
243    }
244
245    /// Lookup the socket address in the opened MaxMind DB
246    ///
247    /// Example:
248    ///
249    /// ```
250    /// use maxminddb::geoip2;
251    /// use std::net::IpAddr;
252    /// use std::str::FromStr;
253    ///
254    /// let reader = maxminddb::Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
255    ///
256    /// let ip: IpAddr = FromStr::from_str("89.160.20.128").unwrap();
257    /// let city: geoip2::City = reader.lookup(ip).unwrap();
258    /// print!("{:?}", city);
259    /// ```
260    pub fn lookup<T>(&'de self, address: IpAddr) -> Result<T, MaxMindDBError>
261    where
262        T: Deserialize<'de>,
263    {
264        self.lookup_prefix(address).map(|(v, _)| v)
265    }
266
267    /// Lookup the socket address in the opened MaxMind DB
268    ///
269    /// Example:
270    ///
271    /// ```
272    /// use maxminddb::geoip2;
273    /// use std::net::IpAddr;
274    /// use std::str::FromStr;
275    ///
276    /// let reader = maxminddb::Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
277    ///
278    /// let ip: IpAddr = "89.160.20.128".parse().unwrap();
279    /// let (city, prefix_len) = reader.lookup_prefix::<geoip2::City>(ip).unwrap();
280    /// print!("{:?}, prefix length: {}", city, prefix_len);
281    /// ```
282    pub fn lookup_prefix<T>(&'de self, address: IpAddr) -> Result<(T, usize), MaxMindDBError>
283    where
284        T: Deserialize<'de>,
285    {
286        let ip_bytes = ip_to_bytes(address);
287        let (pointer, prefix_len) = self.find_address_in_tree(&ip_bytes)?;
288        if pointer == 0 {
289            return Err(MaxMindDBError::AddressNotFoundError(address));
290        }
291
292        let rec = self.resolve_data_pointer(pointer)?;
293        let mut decoder = decoder::Decoder::new(&self.buf.as_ref()[self.pointer_base..], rec);
294
295        T::deserialize(&mut decoder).map(|v| (v, prefix_len))
296    }
297
298    /// Iterate over blocks of IP networks in the opened MaxMind DB
299    ///
300    /// Example:
301    ///
302    /// ```
303    /// use ipnetwork::IpNetwork;
304    /// use maxminddb::{geoip2, Within};
305    ///
306    /// let reader = maxminddb::Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
307    ///
308    /// let ip_net = IpNetwork::V6("::/0".parse().unwrap());
309    /// let mut iter: Within<geoip2::City, _> = reader.within(ip_net).unwrap();
310    /// while let Some(next) = iter.next() {
311    ///     let item = next.unwrap();
312    ///     println!("ip_net={}, city={:?}", item.ip_net, item.info);
313    /// }
314    /// ```
315    pub fn within<T>(&'de self, cidr: IpNetwork) -> Result<Within<'de, T, S>, MaxMindDBError>
316    where
317        T: Deserialize<'de>,
318    {
319        let ip_address = cidr.network();
320        let prefix_len = cidr.prefix() as usize;
321        let ip_bytes = ip_to_bytes(ip_address);
322        let bit_count = ip_bytes.len() * 8;
323
324        let mut node = self.start_node(bit_count);
325        let node_count = self.metadata.node_count as usize;
326
327        let mut stack: Vec<WithinNode> = Vec::with_capacity(bit_count - prefix_len);
328
329        // Traverse down the tree to the level that matches the cidr mark
330        let mut i = 0_usize;
331        while i < prefix_len {
332            let bit = 1 & (ip_bytes[i >> 3] >> (7 - (i % 8))) as usize;
333            node = self.read_node(node, bit)?;
334            if node >= node_count {
335                // We've hit a dead end before we exhausted our prefix
336                break;
337            }
338
339            i += 1;
340        }
341
342        if node < node_count {
343            // Ok, now anything that's below node in the tree is "within", start with the node we
344            // traversed to as our to be processed stack.
345            stack.push(WithinNode {
346                node,
347                ip_bytes,
348                prefix_len,
349            });
350        }
351        // else the stack will be empty and we'll be returning an iterator that visits nothing,
352        // which makes sense.
353
354        let within: Within<T, S> = Within {
355            reader: self,
356            node_count,
357            stack,
358            phantom: PhantomData,
359        };
360
361        Ok(within)
362    }
363
364    fn find_address_in_tree(&self, ip_address: &[u8]) -> Result<(usize, usize), MaxMindDBError> {
365        let bit_count = ip_address.len() * 8;
366        let mut node = self.start_node(bit_count);
367
368        let node_count = self.metadata.node_count as usize;
369        let mut prefix_len = bit_count;
370
371        for i in 0..bit_count {
372            if node >= node_count {
373                prefix_len = i;
374                break;
375            }
376            let bit = 1 & (ip_address[i >> 3] >> (7 - (i % 8)));
377
378            node = self.read_node(node, bit as usize)?;
379        }
380        match node_count {
381            n if n == node => Ok((0, prefix_len)),
382            n if node > n => Ok((node, prefix_len)),
383            _ => {
384                // All bits consumed but we are at a node (exact prefix match).
385                // Read the left child to get the data pointer. For an exact prefix
386                // match the writer sets both children to the same data pointer.
387                let data_ptr = self.read_node(node, 0)?;
388                if data_ptr > node_count {
389                    Ok((data_ptr, prefix_len))
390                } else {
391                    Err(MaxMindDBError::InvalidDatabaseError("invalid node in search tree".to_owned()))
392                }
393            }
394        }
395    }
396
397    fn start_node(&self, length: usize) -> usize {
398        if length == 128 { 0 } else { self.ipv4_start }
399    }
400
401    fn find_ipv4_start(&self) -> Result<usize, MaxMindDBError> {
402        if self.metadata.ip_version != 6 {
403            return Ok(0);
404        }
405
406        // We are looking up an IPv4 address in an IPv6 tree. Skip over the
407        // first 96 nodes.
408        let mut node: usize = 0_usize;
409        for _ in 0_u8..96 {
410            if node >= self.metadata.node_count as usize {
411                break;
412            }
413            node = self.read_node(node, 0)?;
414        }
415        Ok(node)
416    }
417
418    fn read_node(&self, node_number: usize, index: usize) -> Result<usize, MaxMindDBError> {
419        let buf = self.buf.as_ref();
420        let base_offset = node_number * (self.metadata.record_size as usize) / 4;
421
422        let val = match self.metadata.record_size {
423            24 => {
424                let offset = base_offset + index * 3;
425                if offset + 3 > buf.len() {
426                    return Err(MaxMindDBError::InvalidDatabaseError(
427                        "search tree node offset beyond end of file".to_owned(),
428                    ));
429                }
430                to_usize(0, &buf[offset..offset + 3])
431            }
432            28 => {
433                if base_offset + 4 > buf.len() {
434                    return Err(MaxMindDBError::InvalidDatabaseError(
435                        "search tree node offset beyond end of file".to_owned(),
436                    ));
437                }
438                let mut middle = buf[base_offset + 3];
439                if index != 0 {
440                    middle &= 0x0F
441                } else {
442                    middle = (0xF0 & middle) >> 4
443                }
444                let offset = base_offset + index * 4;
445                if offset + 3 > buf.len() {
446                    return Err(MaxMindDBError::InvalidDatabaseError(
447                        "search tree node offset beyond end of file".to_owned(),
448                    ));
449                }
450                to_usize(middle, &buf[offset..offset + 3])
451            }
452            32 => {
453                let offset = base_offset + index * 4;
454                if offset + 4 > buf.len() {
455                    return Err(MaxMindDBError::InvalidDatabaseError(
456                        "search tree node offset beyond end of file".to_owned(),
457                    ));
458                }
459                to_usize(0, &buf[offset..offset + 4])
460            }
461            s => {
462                return Err(MaxMindDBError::InvalidDatabaseError(format!(
463                    "unknown record size: \
464                     {s:?}"
465                )));
466            }
467        };
468        Ok(val)
469    }
470
471    fn resolve_data_pointer(&self, pointer: usize) -> Result<usize, MaxMindDBError> {
472        let node_count = self.metadata.node_count as usize;
473        if pointer <= node_count {
474            return Err(MaxMindDBError::AddressNotFoundError(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0))));
475        }
476        let separator_size = 16;
477        if pointer < node_count + separator_size {
478            return Err(MaxMindDBError::InvalidDatabaseError(format!(
479                "data pointer value {pointer} falls in the invalid range (node_count + 1 to node_count + 15)"
480            )));
481        }
482        let resolved = pointer - node_count - separator_size;
483
484        if self.pointer_base + resolved >= self.buf.as_ref().len() {
485            return Err(MaxMindDBError::InvalidDatabaseError(
486                "the MaxMind DB file's search tree \
487                 is corrupt"
488                    .to_owned(),
489            ));
490        }
491
492        Ok(resolved)
493    }
494}
495
496// I haven't moved all patterns of this form to a generic function as
497// the FromPrimitive trait is unstable
498fn to_usize(base: u8, bytes: &[u8]) -> usize {
499    bytes.iter().fold(base as usize, |acc, &b| (acc << 8) | b as usize)
500}
501
502fn ip_to_bytes(address: IpAddr) -> Vec<u8> {
503    match address {
504        IpAddr::V4(a) => a.octets().to_vec(),
505        IpAddr::V6(a) => a.octets().to_vec(),
506    }
507}
508
509#[allow(clippy::many_single_char_names)]
510fn bytes_and_prefix_to_net(bytes: &[u8], prefix: u8) -> Result<IpNetwork, MaxMindDBError> {
511    let (ip, pre) = match bytes.len() {
512        4 => (IpAddr::V4(Ipv4Addr::new(bytes[0], bytes[1], bytes[2], bytes[3])), prefix),
513        16 => {
514            if bytes[0] == 0
515                && bytes[1] == 0
516                && bytes[2] == 0
517                && bytes[3] == 0
518                && bytes[4] == 0
519                && bytes[5] == 0
520                && bytes[6] == 0
521                && bytes[7] == 0
522                && bytes[8] == 0
523                && bytes[9] == 0
524                && bytes[10] == 0
525                && bytes[11] == 0
526            {
527                // It's actually v4, but in v6 form, convert would be nice if ipnetwork had this
528                // logic.
529                (
530                    IpAddr::V4(Ipv4Addr::new(bytes[12], bytes[13], bytes[14], bytes[15])),
531                    prefix - 96,
532                )
533            } else {
534                let a = u16::from(bytes[0]) << 8 | u16::from(bytes[1]);
535                let b = u16::from(bytes[2]) << 8 | u16::from(bytes[3]);
536                let c = u16::from(bytes[4]) << 8 | u16::from(bytes[5]);
537                let d = u16::from(bytes[6]) << 8 | u16::from(bytes[7]);
538                let e = u16::from(bytes[8]) << 8 | u16::from(bytes[9]);
539                let f = u16::from(bytes[10]) << 8 | u16::from(bytes[11]);
540                let g = u16::from(bytes[12]) << 8 | u16::from(bytes[13]);
541                let h = u16::from(bytes[14]) << 8 | u16::from(bytes[15]);
542                (IpAddr::V6(Ipv6Addr::new(a, b, c, d, e, f, g, h)), prefix)
543            }
544        }
545        // This should never happen
546        _ => return Err(MaxMindDBError::InvalidNetworkError("invalid address".to_owned())),
547    };
548    IpNetwork::new(ip, pre).map_err(|e| MaxMindDBError::InvalidNetworkError(e.to_string()))
549}
550
551fn find_metadata_start(buf: &[u8]) -> Result<usize, MaxMindDBError> {
552    const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com";
553
554    memchr::memmem::rfind(buf, METADATA_START_MARKER)
555        .map(|x| x + METADATA_START_MARKER.len())
556        .ok_or_else(|| MaxMindDBError::InvalidDatabaseError("Could not find MaxMind DB metadata in file.".to_owned()))
557}
558
559#[cfg(test)]
560mod reader_test;
561
562#[cfg(test)]
563mod tests {
564    use std::net::{IpAddr, Ipv4Addr};
565
566    use super::MaxMindDBError;
567
568    #[test]
569    fn test_error_display() {
570        assert_eq!(
571            format!(
572                "{}",
573                MaxMindDBError::AddressNotFoundError(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)))
574            ),
575            "AddressNotFoundError: 127.0.0.1".to_owned(),
576        );
577        assert_eq!(
578            format!("{}", MaxMindDBError::InvalidDatabaseError("something went wrong".to_owned())),
579            "InvalidDatabaseError: something went wrong".to_owned(),
580        );
581        assert_eq!(
582            format!("{}", MaxMindDBError::IoError("something went wrong".to_owned())),
583            "IoError: something went wrong".to_owned(),
584        );
585        assert_eq!(
586            format!("{}", MaxMindDBError::MapError("something went wrong".to_owned())),
587            "MapError: something went wrong".to_owned(),
588        );
589        assert_eq!(
590            format!("{}", MaxMindDBError::DecodingError("something went wrong".to_owned())),
591            "DecodingError: something went wrong".to_owned(),
592        );
593    }
594}