multihash: Use Bytes instead of Vec<u8> internally. (#1187)

multihash: Use `Bytes` instead of `Vec<u8>` internally. To improve the efficiency of cloning multi-hashes (e.g. as the representation of `PeerId`s), this PR replaces the `Vec<u8>` representation with `Bytes`. The API is kept backwards compatible and does not leak the representation type.
2025-06-25 15:51:34 +00:00 · 2019-07-03 14:27:26 +02:00
parent 404f1bdf4d
commit 8af4a28152
3 changed files with 58 additions and 74 deletions
--- a/misc/multihash/src/hashes.rs
+++ b/misc/multihash/src/hashes.rs
@ -38,7 +38,7 @@ pub enum Hash {
 impl Hash {
    /// Get the corresponding hash code.
    pub fn code(&self) -> u16 {
-        match *self {
+        match self {
            Hash::SHA1 => 0x11,
            Hash::SHA2256 => 0x12,
            Hash::SHA2512 => 0x13,
@ -59,7 +59,7 @@ impl Hash {

    /// Get the hash length in bytes.
    pub fn size(&self) -> u8 {
-        match *self {
+        match self {
            Hash::SHA1 => 20,
            Hash::SHA2256 => 32,
            Hash::SHA2512 => 64,
--- a/misc/multihash/src/lib.rs
+++ b/misc/multihash/src/lib.rs
@ -8,6 +8,8 @@
 mod errors;
 mod hashes;

+use bytes::{BufMut, Bytes, BytesMut};
+use rand::RngCore;
 use sha2::Digest;
 use std::{convert::TryFrom, fmt::Write};
 use unsigned_varint::{decode, encode};
@ -48,25 +50,15 @@ macro_rules! match_encoder {
 /// use parity_multihash::{encode, Hash};
 ///
 /// assert_eq!(
-///     encode(Hash::SHA2256, b"hello world").unwrap().into_bytes(),
+///     encode(Hash::SHA2256, b"hello world").unwrap().to_vec(),
 ///     vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
 ///     132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
 /// );
 /// ```
 ///
 pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
-    let mut buf = encode::u16_buffer();
-    let code = encode::u16(hash.code(), &mut buf);
-
-    let header_len = code.len() + 1;
-    let size = hash.size();
-
-    let mut output = Vec::new();
-    output.resize(header_len + size as usize, 0);
-    output[..code.len()].copy_from_slice(code);
-    output[code.len()] = size;
-
-    match_encoder!(hash for (input, &mut output[header_len..]) {
+    let (offset, mut output) = encode_hash(hash);
+    match_encoder!(hash for (input, &mut output[offset ..]) {
        SHA1 => sha1::Sha1,
        SHA2256 => sha2::Sha256,
        SHA2512 => sha2::Sha512,
@ -82,77 +74,71 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
        Blake2s256 => blake2::Blake2s,
    });

-    Ok(Multihash { bytes: output })
+    Ok(Multihash { bytes: output.freeze() })
+}
+
+// Encode the given [`Hash`] value and ensure the returned [`BytesMut`]
+// has enough capacity to hold the actual digest.
+fn encode_hash(hash: Hash) -> (usize, BytesMut) {
+    let mut buf = encode::u16_buffer();
+    let code = encode::u16(hash.code(), &mut buf);
+
+    let len = code.len() + 1 + usize::from(hash.size());
+
+    let mut output = BytesMut::with_capacity(len);
+    output.put_slice(code);
+    output.put_u8(hash.size());
+    output.resize(len, 0);
+
+    (code.len() + 1, output)
 }

 /// Represents a valid multihash.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct Multihash {
-    bytes: Vec<u8>,
-}
+pub struct Multihash { bytes: Bytes }

 impl Multihash {
    /// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`.
-    #[inline]
    pub fn from_bytes(bytes: Vec<u8>) -> Result<Multihash, DecodeOwnedError> {
        if let Err(err) = MultihashRef::from_slice(&bytes) {
-            return Err(DecodeOwnedError {
-                error: err,
-                data: bytes,
-            });
+            return Err(DecodeOwnedError { error: err, data: bytes });
        }
-
-        Ok(Multihash { bytes })
+        Ok(Multihash { bytes: Bytes::from(bytes) })
    }

    /// Generates a random `Multihash` from a cryptographically secure PRNG.
    pub fn random(hash: Hash) -> Multihash {
-        let mut buf = encode::u16_buffer();
-        let code = encode::u16(hash.code(), &mut buf);
-
-        let header_len = code.len() + 1;
-        let size = hash.size();
-
-        let mut output = Vec::new();
-        output.resize(header_len + size as usize, 0);
-        output[..code.len()].copy_from_slice(code);
-        output[code.len()] = size;
-
-        for b in output[header_len..].iter_mut() {
-            *b = rand::random();
-        }
-
-        Multihash {
-            bytes: output,
-        }
+        let (offset, mut bytes) = encode_hash(hash);
+        rand::thread_rng().fill_bytes(&mut bytes[offset ..]);
+        Multihash { bytes: bytes.freeze() }
    }

    /// Returns the bytes representation of the multihash.
-    #[inline]
    pub fn into_bytes(self) -> Vec<u8> {
-        self.bytes
+        self.to_vec()
+    }
+
+    /// Returns the bytes representation of the multihash.
+    pub fn to_vec(&self) -> Vec<u8> {
+        Vec::from(&self.bytes[..])
    }

    /// Returns the bytes representation of this multihash.
-    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        &self.bytes
    }

    /// Builds a `MultihashRef` corresponding to this `Multihash`.
-    #[inline]
    pub fn as_ref(&self) -> MultihashRef<'_> {
        MultihashRef { bytes: &self.bytes }
    }

    /// Returns which hashing algorithm is used in this multihash.
-    #[inline]
    pub fn algorithm(&self) -> Hash {
        self.as_ref().algorithm()
    }

    /// Returns the hashed data.
-    #[inline]
    pub fn digest(&self) -> &[u8] {
        self.as_ref().digest()
    }
@ -165,7 +151,6 @@ impl AsRef<[u8]> for Multihash {
 }

 impl<'a> PartialEq<MultihashRef<'a>> for Multihash {
-    #[inline]
    fn eq(&self, other: &MultihashRef<'a>) -> bool {
        &*self.bytes == other.bytes
    }
@ -181,69 +166,66 @@ impl TryFrom<Vec<u8>> for Multihash {

 /// Represents a valid multihash.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-pub struct MultihashRef<'a> {
-    bytes: &'a [u8],
-}
+pub struct MultihashRef<'a> { bytes: &'a [u8] }

 impl<'a> MultihashRef<'a> {
-    /// Verifies whether `bytes` contains a valid multihash, and if so returns a `MultihashRef`.
-    pub fn from_slice(input: &'a [u8]) -> Result<MultihashRef<'a>, DecodeError> {
+    /// Creates a `MultihashRef` from the given `input`.
+    pub fn from_slice(input: &'a [u8]) -> Result<Self, DecodeError> {
        if input.is_empty() {
-            return Err(DecodeError::BadInputLength);
+            return Err(DecodeError::BadInputLength)
        }

-        // NOTE: We choose u16 here because there is no hashing algorithm implemented in this crate
-        // whose length exceeds 2^16 - 1.
+        // Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
+        std::convert::identity::<fn(&'_ Hash) -> u16>(Hash::code);
        let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;

        let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
-        let hash_len = alg.size() as usize;
+        let hash_len = usize::from(alg.size());

        // Length of input after hash code should be exactly hash_len + 1
        if bytes.len() != hash_len + 1 {
-            return Err(DecodeError::BadInputLength);
+            return Err(DecodeError::BadInputLength)
        }

-        if bytes[0] as usize != hash_len {
-            return Err(DecodeError::BadInputLength);
+        if usize::from(bytes[0]) != hash_len {
+            return Err(DecodeError::BadInputLength)
        }

        Ok(MultihashRef { bytes: input })
    }

    /// Returns which hashing algorithm is used in this multihash.
-    #[inline]
    pub fn algorithm(&self) -> Hash {
-        let (code, _) = decode::u16(&self.bytes).expect("multihash is known to be valid algorithm");
+        let code = decode::u16(&self.bytes)
+            .expect("multihash is known to be valid algorithm")
+            .0;
        Hash::from_code(code).expect("multihash is known to be valid")
    }

    /// Returns the hashed data.
-    #[inline]
    pub fn digest(&self) -> &'a [u8] {
-        let (_, bytes) = decode::u16(&self.bytes).expect("multihash is known to be valid digest");
-        &bytes[1..]
+        let bytes = decode::u16(&self.bytes)
+            .expect("multihash is known to be valid digest")
+            .1;
+        &bytes[1 ..]
    }

    /// Builds a `Multihash` that owns the data.
    ///
    /// This operation allocates.
-    #[inline]
-    pub fn into_owned(&self) -> Multihash {
+    pub fn into_owned(self) -> Multihash {
        Multihash {
-            bytes: self.bytes.to_owned(),
+            bytes: Bytes::from(self.bytes)
        }
    }

    /// Returns the bytes representation of this multihash.
-    #[inline]
    pub fn as_bytes(&self) -> &'a [u8] {
        &self.bytes
    }
 }

 impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
-    #[inline]
    fn eq(&self, other: &Multihash) -> bool {
        self.bytes == &*other.bytes
    }
@ -263,6 +245,7 @@ pub fn to_hex(bytes: &[u8]) -> String {
 #[cfg(test)]
 mod tests {
    use crate::{Hash, Multihash};
+    use std::convert::TryFrom;

    #[test]
    fn rand_generates_valid_multihash() {
@ -275,7 +258,7 @@ mod tests {

            for _ in 0 .. 2000 {
                let hash = Multihash::random(hash_fn);
-                assert_eq!(hash, Multihash::from_bytes(hash.clone().into_bytes()).unwrap());
+                assert_eq!(hash, Multihash::try_from(hash.to_vec()).unwrap());
            }
        }
    }