Rework the multihash crate (#403)

* Use Multihash and MultihashRef instead

* Don't use star imports

* Split EncodeError and DecodeError

* Add DecodeOwnedError

* Some cleanup

* Remove Hash::name()

* Some crate root documentation

* Add some more methods

* Fix tests

* Add PartialEq between Multihash and MultihashRef

* Fix the rest of the repo

* Rename hash_data() to digest()

* Add comment about varint

* Remove Error::description impls
This commit is contained in:
Pierre Krieger
2018-08-09 14:51:09 +02:00
committed by GitHub
parent 486a9694d9
commit 078fa1cb33
11 changed files with 282 additions and 196 deletions

View File

@@ -44,8 +44,14 @@ impl From<multibase::Error> for Error {
}
}
impl From<multihash::Error> for Error {
fn from(_: multihash::Error) -> Error {
impl From<multihash::DecodeError> for Error {
fn from(_: multihash::DecodeError) -> Error {
Error::ParsingError
}
}
impl From<multihash::DecodeOwnedError> for Error {
fn from(_: multihash::DecodeOwnedError) -> Error {
Error::ParsingError
}
}

View File

@@ -54,7 +54,7 @@ impl Cid {
/// Create a new CID from a prefix and some data.
pub fn new_from_prefix(prefix: &Prefix, data: &[u8]) -> Cid {
let mut hash = multihash::encode(prefix.mh_type.to_owned(), data).unwrap();
let mut hash = multihash::encode(prefix.mh_type.to_owned(), data).unwrap().into_bytes();
hash.truncate(prefix.mh_len + 2);
Cid {
version: prefix.version,
@@ -110,13 +110,13 @@ impl Cid {
pub fn prefix(&self) -> Prefix {
// Unwrap is safe, as this should have been validated on creation
let mh = multihash::decode(self.hash.as_slice()).unwrap();
let mh = multihash::MultihashRef::from_slice(self.hash.as_slice()).unwrap();
Prefix {
version: self.version,
codec: self.codec.to_owned(),
mh_type: mh.alg,
mh_len: mh.digest.len(),
mh_type: mh.algorithm(),
mh_len: mh.digest().len(),
}
}
}
@@ -138,7 +138,8 @@ impl Prefix {
let version = Version::from(raw_version)?;
let codec = Codec::from(raw_codec)?;
let mh_type = multihash::Hash::from_code(raw_mh_type as u8)?;
let mh_type = multihash::Hash::from_code(raw_mh_type as u8)
.ok_or(Error::ParsingError)?;
let mh_len = cur.read_varint()?;

View File

@@ -71,7 +71,7 @@ impl ToCid for [u8] {
fn to_cid(&self) -> Result<Cid> {
if Version::is_v0_binary(self) {
// Verify that hash can be decoded, this is very cheap
multihash::decode(self)?;
multihash::MultihashRef::from_slice(self)?;
Ok(Cid::new(Codec::DagProtobuf, Version::V0, self))
} else {
@@ -85,7 +85,7 @@ impl ToCid for [u8] {
let hash = &self[cur.position() as usize..];
// Verify that hash can be decoded, this is very cheap
multihash::decode(hash)?;
multihash::MultihashRef::from_slice(hash)?;
Ok(Cid::new(codec, version, hash))
}

View File

@@ -7,7 +7,7 @@ use cid::{Cid, Version, Codec, Error, Prefix};
fn basic_marshalling() {
let h = multihash::encode(multihash::Hash::SHA2256, b"beep boop").unwrap();
let cid = Cid::new(Codec::DagProtobuf, Version::V1, &h);
let cid = Cid::new(Codec::DagProtobuf, Version::V1, h.as_bytes());
let data = cid.to_bytes();
let out = Cid::from(data).unwrap();
@@ -45,7 +45,7 @@ fn prefix_roundtrip() {
let data = b"awesome test content";
let h = multihash::encode(multihash::Hash::SHA2256, data).unwrap();
let cid = Cid::new(Codec::DagProtobuf, Version::V1, &h);
let cid = Cid::new(Codec::DagProtobuf, Version::V1, h.as_bytes());
let prefix = cid.prefix();
let cid2 = Cid::new_from_prefix(&prefix, data);

View File

@@ -29,7 +29,7 @@ use PublicKey;
// TODO: maybe keep things in decoded version?
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct PeerId {
multihash: Vec<u8>,
multihash: multihash::Multihash,
}
impl fmt::Debug for PeerId {
@@ -43,20 +43,18 @@ impl PeerId {
#[inline]
pub fn from_public_key(public_key: PublicKey) -> PeerId {
let protobuf = public_key.into_protobuf_encoding();
let data = multihash::encode(multihash::Hash::SHA2256, &protobuf)
let multihash = multihash::encode(multihash::Hash::SHA2256, &protobuf)
.expect("sha2-256 is always supported");
PeerId { multihash: data }
PeerId { multihash }
}
/// Checks whether `data` is a valid `PeerId`. If so, returns the `PeerId`. If not, returns
/// back the data as an error.
#[inline]
pub fn from_bytes(data: Vec<u8>) -> Result<PeerId, Vec<u8>> {
let is_valid = multihash::decode(&data).is_ok();
if is_valid {
Ok(PeerId { multihash: data })
} else {
Err(data)
match multihash::Multihash::from_bytes(data) {
Ok(multihash) => Ok(PeerId { multihash }),
Err(err) => Err(err.data),
}
}
@@ -65,7 +63,7 @@ impl PeerId {
/// Note that this is not the same as the public key of the peer.
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.multihash
self.multihash.into_bytes()
}
/// Returns a raw bytes representation of this `PeerId`.
@@ -73,21 +71,19 @@ impl PeerId {
/// Note that this is not the same as the public key of the peer.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.multihash
self.multihash.as_bytes()
}
/// Returns a base-58 encoded string of this `PeerId`.
#[inline]
pub fn to_base58(&self) -> String {
bs58::encode(&self.multihash).into_string()
bs58::encode(self.multihash.as_bytes()).into_string()
}
/// Returns the raw bytes of the hash of this `PeerId`.
#[inline]
pub fn hash(&self) -> &[u8] {
multihash::decode(&self.multihash)
.expect("our inner value should always be valid")
.digest
pub fn digest(&self) -> &[u8] {
self.multihash.digest()
}
/// Checks whether the public key passed as parameter matches the public key of this `PeerId`.
@@ -95,13 +91,10 @@ impl PeerId {
/// Returns `None` if this `PeerId`s hash algorithm is not supported when encoding the
/// given public key, otherwise `Some` boolean as the result of an equality check.
pub fn is_public_key(&self, public_key: &PublicKey) -> Option<bool> {
let alg = multihash::decode(&self.multihash)
.expect("our inner value should always be valid")
.alg;
let alg = self.multihash.algorithm();
match multihash::encode(alg, &public_key.clone().into_protobuf_encoding()) {
Ok(compare) => Some(compare == self.multihash),
Err(multihash::Error::UnsupportedType) => None,
Err(_) => Some(false),
Err(multihash::EncodeError::UnsupportedType) => None,
}
}
}

View File

@@ -136,8 +136,8 @@ impl KBucketsPeerId for PeerId {
fn distance_with(&self, other: &Self) -> Self::Distance {
// Note that we don't compare the hash functions because there's no chance of collision
// of the same value hashed with two different hash functions.
let my_hash = U512::from(self.hash());
let other_hash = U512::from(other.hash());
let my_hash = U512::from(self.digest());
let other_hash = U512::from(other.digest());
my_hash ^ other_hash
}

View File

@@ -213,10 +213,10 @@ fn main() {
.map_err(|(err, _)| err)
.map(|(out, _)| out.unwrap())
.and_then(|out| {
let local_hash = U512::from(my_peer_id.hash());
let local_hash = U512::from(my_peer_id.digest());
println!("Results of peer discovery for {:?}:", my_peer_id);
for n in out {
let other_hash = U512::from(n.hash());
let other_hash = U512::from(n.digest());
let dist = 512 - (local_hash ^ other_hash).leading_zeros();
println!("* {:?} (distance bits = {:?} (lower is better))", n, dist);
}

View File

@@ -1,24 +1,63 @@
use std::{fmt, error};
#[derive(Debug)]
pub enum Error {
/// Error that can happen when encoding some bytes into a multihash.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum EncodeError {
/// The requested hash algorithm isn't supported by this library.
UnsupportedType,
BadInputLength,
UnknownCode,
}
impl fmt::Display for Error {
impl fmt::Display for EncodeError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(error::Error::description(self))
}
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::UnsupportedType => "This type is not supported yet",
Error::BadInputLength => "Not matching input length",
Error::UnknownCode => "Found unknown code",
EncodeError::UnsupportedType => write!(f, "This type is not supported yet"),
}
}
}
impl error::Error for EncodeError {
}
/// Error that can happen when decoding some bytes.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum DecodeError {
/// The input doesn't have a correct length.
BadInputLength,
/// The code of the hashing algorithm is incorrect.
UnknownCode,
}
impl fmt::Display for DecodeError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
DecodeError::BadInputLength => write!(f, "Not matching input length"),
DecodeError::UnknownCode => write!(f, "Found unknown code"),
}
}
}
impl error::Error for DecodeError {
}
/// Error that can happen when decoding some bytes.
///
/// Same as `DecodeError`, but allows retreiving the data whose decoding was attempted.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodeOwnedError {
/// The error.
pub error: DecodeError,
/// The data whose decoding was attempted.
pub data: Vec<u8>,
}
impl fmt::Display for DecodeOwnedError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.error)
}
}
impl error::Error for DecodeOwnedError {
}

View File

@@ -1,4 +1,3 @@
use errors::Error;
/// List of types currently supported in the multihash spec.
///
@@ -34,88 +33,61 @@ pub enum Hash {
}
impl Hash {
/// Get the corresponding hash code
/// Get the corresponding hash code.
pub fn code(&self) -> u8 {
use Hash::*;
match *self {
SHA1 => 0x11,
SHA2256 => 0x12,
SHA2512 => 0x13,
SHA3224 => 0x17,
SHA3256 => 0x16,
SHA3384 => 0x15,
SHA3512 => 0x14,
Keccak224 => 0x1A,
Keccak256 => 0x1B,
Keccak384 => 0x1C,
Keccak512 => 0x1D,
Blake2b => 0x40,
Blake2s => 0x41,
Hash::SHA1 => 0x11,
Hash::SHA2256 => 0x12,
Hash::SHA2512 => 0x13,
Hash::SHA3224 => 0x17,
Hash::SHA3256 => 0x16,
Hash::SHA3384 => 0x15,
Hash::SHA3512 => 0x14,
Hash::Keccak224 => 0x1A,
Hash::Keccak256 => 0x1B,
Hash::Keccak384 => 0x1C,
Hash::Keccak512 => 0x1D,
Hash::Blake2b => 0x40,
Hash::Blake2s => 0x41,
}
}
/// Get the hash length in bytes
/// Get the hash length in bytes.
pub fn size(&self) -> u8 {
use Hash::*;
match *self {
SHA1 => 20,
SHA2256 => 32,
SHA2512 => 64,
SHA3224 => 28,
SHA3256 => 32,
SHA3384 => 48,
SHA3512 => 64,
Keccak224 => 28,
Keccak256 => 32,
Keccak384 => 48,
Keccak512 => 64,
Blake2b => 64,
Blake2s => 32,
Hash::SHA1 => 20,
Hash::SHA2256 => 32,
Hash::SHA2512 => 64,
Hash::SHA3224 => 28,
Hash::SHA3256 => 32,
Hash::SHA3384 => 48,
Hash::SHA3512 => 64,
Hash::Keccak224 => 28,
Hash::Keccak256 => 32,
Hash::Keccak384 => 48,
Hash::Keccak512 => 64,
Hash::Blake2b => 64,
Hash::Blake2s => 32,
}
}
/// Get the human readable name
pub fn name(&self) -> &str {
use Hash::*;
match *self {
SHA1 => "SHA1",
SHA2256 => "SHA2-256",
SHA2512 => "SHA2-512",
SHA3512 => "SHA3-512",
SHA3384 => "SHA3-384",
SHA3256 => "SHA3-256",
SHA3224 => "SHA3-224",
Keccak224 => "Keccak-224",
Keccak256 => "Keccak-256",
Keccak384 => "Keccak-384",
Keccak512 => "Keccak-512",
Blake2b => "Blake-2b",
Blake2s => "Blake-2s",
}
}
pub fn from_code(code: u8) -> Result<Hash, Error> {
use Hash::*;
Ok(match code {
0x11 => SHA1,
0x12 => SHA2256,
0x13 => SHA2512,
0x14 => SHA3512,
0x15 => SHA3384,
0x16 => SHA3256,
0x17 => SHA3224,
0x1A => Keccak224,
0x1B => Keccak256,
0x1C => Keccak384,
0x1D => Keccak512,
0x40 => Blake2b,
0x41 => Blake2s,
_ => return Err(Error::UnknownCode),
/// Returns the algorithm corresponding to a code, or `None` if no algorith is matching.
pub fn from_code(code: u8) -> Option<Hash> {
Some(match code {
0x11 => Hash::SHA1,
0x12 => Hash::SHA2256,
0x13 => Hash::SHA2512,
0x14 => Hash::SHA3512,
0x15 => Hash::SHA3384,
0x16 => Hash::SHA3256,
0x17 => Hash::SHA3224,
0x1A => Hash::Keccak224,
0x1B => Hash::Keccak256,
0x1C => Hash::Keccak384,
0x1D => Hash::Keccak512,
0x40 => Hash::Blake2b,
0x41 => Hash::Blake2s,
_ => return None,
})
}
}

View File

@@ -1,22 +1,24 @@
/// ! # multihash
/// !
/// ! Implementation of [multihash](https://github.com/multiformats/multihash)
/// ! in Rust.
/// Representation of a Multiaddr.
//! # Multihash
//!
//! Implementation of [multihash](https://github.com/multiformats/multihash) in Rust.
//!
//! A `Multihash` is a structure that contains a hashing algorithm, plus some hashed data.
//! A `MultihashRef` is the same as a `Multihash`, except that it doesn't own its data.
//!
extern crate sha1;
extern crate sha2;
extern crate tiny_keccak;
mod errors;
mod hashes;
use std::fmt::Write;
use sha2::Digest;
use tiny_keccak::Keccak;
mod hashes;
pub use hashes::*;
mod errors;
pub use errors::*;
pub use hashes::Hash;
pub use errors::{EncodeError, DecodeError, DecodeOwnedError};
// Helper macro for encoding input into output using sha1, sha2 or tiny_keccak
macro_rules! encode {
@@ -47,20 +49,16 @@ macro_rules! match_encoder {
Hash::$hashtype => encode!($lib, $method, $input, $output),
)*
_ => return Err(Error::UnsupportedType)
_ => return Err(EncodeError::UnsupportedType)
}
})
}
/// Encodes data into a multihash.
///
/// The returned data is raw bytes. To make is more human-friendly, you can encode it (hex,
/// base58, base64, etc).
///
/// # Errors
///
/// Will return an error if the specified hash type is not supported. See the docs for `Hash`
/// Will return an error if the specified hash type is not supported. See the docs for `Hash`
/// to see what is supported.
///
/// # Examples
@@ -69,13 +67,13 @@ macro_rules! match_encoder {
/// use multihash::{encode, Hash};
///
/// assert_eq!(
/// encode(Hash::SHA2256, b"hello world").unwrap(),
/// encode(Hash::SHA2256, b"hello world").unwrap().into_bytes(),
/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
/// );
/// ```
///
pub fn encode(hash: Hash, input: &[u8]) -> Result<Vec<u8>, Error> {
pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
let size = hash.size();
let mut output = Vec::new();
output.resize(2 + size as usize, 0);
@@ -96,59 +94,137 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result<Vec<u8>, Error> {
Keccak512 => tiny::new_keccak512,
});
Ok(output)
Ok(Multihash { bytes: output })
}
/// Decodes bytes into a multihash
///
/// # Errors
///
/// Returns an error if the bytes are not a valid multihash.
///
/// # Examples
///
/// ```
/// use multihash::{decode, Hash, Multihash};
///
/// // use the data from the `encode` example
/// let data = vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218,
/// 125, 171, 250, 196, 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233];
///
/// assert_eq!(
/// decode(&data).unwrap(),
/// Multihash {
/// alg: Hash::SHA2256,
/// digest: &data[2..]
/// }
/// );
/// ```
///
pub fn decode(input: &[u8]) -> Result<Multihash, Error> {
if input.is_empty() {
return Err(Error::BadInputLength);
}
let code = input[0];
let alg = Hash::from_code(code)?;
let hash_len = alg.size() as usize;
// length of input should be exactly hash_len + 2
if input.len() != hash_len + 2 {
return Err(Error::BadInputLength);
}
Ok(Multihash {
alg: alg,
digest: &input[2..],
})
/// Represents a valid multihash.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Multihash {
bytes: Vec<u8>
}
/// Represents a valid multihash, by associating the hash algorithm with the data
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub struct Multihash<'a> {
pub alg: Hash,
pub digest: &'a [u8],
impl Multihash {
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`.
#[inline]
pub fn from_bytes(bytes: Vec<u8>) -> Result<Multihash, DecodeOwnedError> {
if let Err(err) = MultihashRef::from_slice(&bytes) {
return Err(DecodeOwnedError {
error: err,
data: bytes,
});
}
Ok(Multihash { bytes })
}
/// Returns the bytes representation of the multihash.
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.bytes
}
/// Returns the bytes representation of this multihash.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
/// Builds a `MultihashRef` corresponding to this `Multihash`.
#[inline]
pub fn as_ref(&self) -> MultihashRef {
MultihashRef { bytes: &self.bytes }
}
/// Returns which hashing algorithm is used in this multihash.
#[inline]
pub fn algorithm(&self) -> Hash {
self.as_ref().algorithm()
}
/// Returns the hashed data.
#[inline]
pub fn digest(&self) -> &[u8] {
self.as_ref().digest()
}
}
impl<'a> PartialEq<MultihashRef<'a>> for Multihash {
#[inline]
fn eq(&self, other: &MultihashRef<'a>) -> bool {
&*self.bytes == other.bytes
}
}
/// Represents a valid multihash.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct MultihashRef<'a> {
bytes: &'a [u8]
}
impl<'a> MultihashRef<'a> {
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `MultihashRef`.
pub fn from_slice(input: &'a [u8]) -> Result<MultihashRef<'a>, DecodeError> {
if input.is_empty() {
return Err(DecodeError::BadInputLength);
}
// TODO: note that `input[0]` and `input[1]` and technically variable-length integers,
// but there's no hashing algorithm implemented in this crate whose code or digest length
// is superior to 128
let code = input[0];
// TODO: see comment just above about varints
if input[0] >= 128 || input[1] >= 128 {
return Err(DecodeError::BadInputLength);
}
let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
let hash_len = alg.size() as usize;
// length of input should be exactly hash_len + 2
if input.len() != hash_len + 2 {
return Err(DecodeError::BadInputLength);
}
if input[1] as usize != hash_len {
return Err(DecodeError::BadInputLength);
}
Ok(MultihashRef { bytes: input })
}
/// Returns which hashing algorithm is used in this multihash.
#[inline]
pub fn algorithm(&self) -> Hash {
Hash::from_code(self.bytes[0]).expect("multihash is known to be valid")
}
/// Returns the hashed data.
#[inline]
pub fn digest(&self) -> &'a [u8] {
&self.bytes[2..]
}
/// Builds a `Multihash` that owns the data.
///
/// This operation allocates.
#[inline]
pub fn into_owned(&self) -> Multihash {
Multihash { bytes: self.bytes.to_owned() }
}
/// Returns the bytes representation of this multihash.
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
&self.bytes
}
}
impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
#[inline]
fn eq(&self, other: &Multihash) -> bool {
self.bytes == &*other.bytes
}
}
/// Convert bytes to a hex representation

View File

@@ -18,9 +18,9 @@ macro_rules! assert_encode {
{$( $alg:ident, $data:expr, $expect:expr; )*} => {
$(
assert_eq!(
encode(Hash::$alg, $data).expect("Must be supported"),
encode(Hash::$alg, $data).expect("Must be supported").into_bytes(),
hex_to_bytes($expect),
"{} encodes correctly", Hash::$alg.name()
"{:?} encodes correctly", Hash::$alg
);
)*
}
@@ -49,9 +49,9 @@ macro_rules! assert_decode {
$(
let hash = hex_to_bytes($hash);
assert_eq!(
decode(&hash).unwrap().alg,
MultihashRef::from_slice(&hash).unwrap().algorithm(),
Hash::$alg,
"{} decodes correctly", Hash::$alg.name()
"{:?} decodes correctly", Hash::$alg
);
)*
}
@@ -79,9 +79,9 @@ macro_rules! assert_roundtrip {
($( $alg:ident ),*) => {
$(
{
let hash: Vec<u8> = encode(Hash::$alg, b"helloworld").unwrap();
let hash: Vec<u8> = encode(Hash::$alg, b"helloworld").unwrap().into_bytes();
assert_eq!(
decode(&hash).unwrap().alg,
MultihashRef::from_slice(&hash).unwrap().algorithm(),
Hash::$alg
);
}
@@ -100,5 +100,4 @@ fn assert_roundtrip() {
#[test]
fn hash_types() {
assert_eq!(Hash::SHA2256.size(), 32);
assert_eq!(Hash::SHA2256.name(), "SHA2-256");
}