multihash: Use Bytes instead of Vec<u8> internally. (#1187)

multihash: Use `Bytes` instead of `Vec<u8>` internally.

To improve the efficiency of cloning multi-hashes (e.g. as the
representation of `PeerId`s), this PR replaces the `Vec<u8>`
representation with `Bytes`. The API is kept backwards
compatible and does not leak the representation type.
This commit is contained in:
Toralf Wittner
2019-07-03 14:27:26 +02:00
committed by GitHub
parent 404f1bdf4d
commit 8af4a28152
3 changed files with 58 additions and 74 deletions

View File

@ -38,7 +38,7 @@ pub enum Hash {
impl Hash {
/// Get the corresponding hash code.
pub fn code(&self) -> u16 {
match *self {
match self {
Hash::SHA1 => 0x11,
Hash::SHA2256 => 0x12,
Hash::SHA2512 => 0x13,
@ -59,7 +59,7 @@ impl Hash {
/// Get the hash length in bytes.
pub fn size(&self) -> u8 {
match *self {
match self {
Hash::SHA1 => 20,
Hash::SHA2256 => 32,
Hash::SHA2512 => 64,

View File

@ -8,6 +8,8 @@
mod errors;
mod hashes;
use bytes::{BufMut, Bytes, BytesMut};
use rand::RngCore;
use sha2::Digest;
use std::{convert::TryFrom, fmt::Write};
use unsigned_varint::{decode, encode};
@ -48,25 +50,15 @@ macro_rules! match_encoder {
/// use parity_multihash::{encode, Hash};
///
/// assert_eq!(
/// encode(Hash::SHA2256, b"hello world").unwrap().into_bytes(),
/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(),
/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
/// );
/// ```
///
pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
let mut buf = encode::u16_buffer();
let code = encode::u16(hash.code(), &mut buf);
let header_len = code.len() + 1;
let size = hash.size();
let mut output = Vec::new();
output.resize(header_len + size as usize, 0);
output[..code.len()].copy_from_slice(code);
output[code.len()] = size;
match_encoder!(hash for (input, &mut output[header_len..]) {
let (offset, mut output) = encode_hash(hash);
match_encoder!(hash for (input, &mut output[offset ..]) {
SHA1 => sha1::Sha1,
SHA2256 => sha2::Sha256,
SHA2512 => sha2::Sha512,
@ -82,77 +74,71 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
Blake2s256 => blake2::Blake2s,
});
Ok(Multihash { bytes: output })
Ok(Multihash { bytes: output.freeze() })
}
// Encode the given [`Hash`] value and ensure the returned [`BytesMut`]
// has enough capacity to hold the actual digest.
fn encode_hash(hash: Hash) -> (usize, BytesMut) {
let mut buf = encode::u16_buffer();
let code = encode::u16(hash.code(), &mut buf);
let len = code.len() + 1 + usize::from(hash.size());
let mut output = BytesMut::with_capacity(len);
output.put_slice(code);
output.put_u8(hash.size());
output.resize(len, 0);
(code.len() + 1, output)
}
/// Represents a valid multihash.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Multihash {
bytes: Vec<u8>,
}
pub struct Multihash { bytes: Bytes }
impl Multihash {
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`.
#[inline]
pub fn from_bytes(bytes: Vec<u8>) -> Result<Multihash, DecodeOwnedError> {
if let Err(err) = MultihashRef::from_slice(&bytes) {
return Err(DecodeOwnedError {
error: err,
data: bytes,
});
return Err(DecodeOwnedError { error: err, data: bytes });
}
Ok(Multihash { bytes })
Ok(Multihash { bytes: Bytes::from(bytes) })
}
/// Generates a random `Multihash` from a cryptographically secure PRNG.
pub fn random(hash: Hash) -> Multihash {
let mut buf = encode::u16_buffer();
let code = encode::u16(hash.code(), &mut buf);
let header_len = code.len() + 1;
let size = hash.size();
let mut output = Vec::new();
output.resize(header_len + size as usize, 0);
output[..code.len()].copy_from_slice(code);
output[code.len()] = size;
for b in output[header_len..].iter_mut() {
*b = rand::random();
}
Multihash {
bytes: output,
}
let (offset, mut bytes) = encode_hash(hash);
rand::thread_rng().fill_bytes(&mut bytes[offset ..]);
Multihash { bytes: bytes.freeze() }
}
/// Returns the bytes representation of the multihash.
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.bytes
self.to_vec()
}
/// Returns the bytes representation of the multihash.
pub fn to_vec(&self) -> Vec<u8> {
Vec::from(&self.bytes[..])
}
/// Returns the bytes representation of this multihash.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
/// Builds a `MultihashRef` corresponding to this `Multihash`.
#[inline]
pub fn as_ref(&self) -> MultihashRef<'_> {
MultihashRef { bytes: &self.bytes }
}
/// Returns which hashing algorithm is used in this multihash.
#[inline]
pub fn algorithm(&self) -> Hash {
self.as_ref().algorithm()
}
/// Returns the hashed data.
#[inline]
pub fn digest(&self) -> &[u8] {
self.as_ref().digest()
}
@ -165,7 +151,6 @@ impl AsRef<[u8]> for Multihash {
}
impl<'a> PartialEq<MultihashRef<'a>> for Multihash {
#[inline]
fn eq(&self, other: &MultihashRef<'a>) -> bool {
&*self.bytes == other.bytes
}
@ -181,69 +166,66 @@ impl TryFrom<Vec<u8>> for Multihash {
/// Represents a valid multihash.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct MultihashRef<'a> {
bytes: &'a [u8],
}
pub struct MultihashRef<'a> { bytes: &'a [u8] }
impl<'a> MultihashRef<'a> {
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `MultihashRef`.
pub fn from_slice(input: &'a [u8]) -> Result<MultihashRef<'a>, DecodeError> {
/// Creates a `MultihashRef` from the given `input`.
pub fn from_slice(input: &'a [u8]) -> Result<Self, DecodeError> {
if input.is_empty() {
return Err(DecodeError::BadInputLength);
return Err(DecodeError::BadInputLength)
}
// NOTE: We choose u16 here because there is no hashing algorithm implemented in this crate
// whose length exceeds 2^16 - 1.
// Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
std::convert::identity::<fn(&'_ Hash) -> u16>(Hash::code);
let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;
let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
let hash_len = alg.size() as usize;
let hash_len = usize::from(alg.size());
// Length of input after hash code should be exactly hash_len + 1
if bytes.len() != hash_len + 1 {
return Err(DecodeError::BadInputLength);
return Err(DecodeError::BadInputLength)
}
if bytes[0] as usize != hash_len {
return Err(DecodeError::BadInputLength);
if usize::from(bytes[0]) != hash_len {
return Err(DecodeError::BadInputLength)
}
Ok(MultihashRef { bytes: input })
}
/// Returns which hashing algorithm is used in this multihash.
#[inline]
pub fn algorithm(&self) -> Hash {
let (code, _) = decode::u16(&self.bytes).expect("multihash is known to be valid algorithm");
let code = decode::u16(&self.bytes)
.expect("multihash is known to be valid algorithm")
.0;
Hash::from_code(code).expect("multihash is known to be valid")
}
/// Returns the hashed data.
#[inline]
pub fn digest(&self) -> &'a [u8] {
let (_, bytes) = decode::u16(&self.bytes).expect("multihash is known to be valid digest");
&bytes[1..]
let bytes = decode::u16(&self.bytes)
.expect("multihash is known to be valid digest")
.1;
&bytes[1 ..]
}
/// Builds a `Multihash` that owns the data.
///
/// This operation allocates.
#[inline]
pub fn into_owned(&self) -> Multihash {
pub fn into_owned(self) -> Multihash {
Multihash {
bytes: self.bytes.to_owned(),
bytes: Bytes::from(self.bytes)
}
}
/// Returns the bytes representation of this multihash.
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
&self.bytes
}
}
impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
#[inline]
fn eq(&self, other: &Multihash) -> bool {
self.bytes == &*other.bytes
}
@ -263,6 +245,7 @@ pub fn to_hex(bytes: &[u8]) -> String {
#[cfg(test)]
mod tests {
use crate::{Hash, Multihash};
use std::convert::TryFrom;
#[test]
fn rand_generates_valid_multihash() {
@ -275,7 +258,7 @@ mod tests {
for _ in 0 .. 2000 {
let hash = Multihash::random(hash_fn);
assert_eq!(hash, Multihash::from_bytes(hash.clone().into_bytes()).unwrap());
assert_eq!(hash, Multihash::try_from(hash.to_vec()).unwrap());
}
}
}