< 返回版块

Borber 发表于 2021-12-23 15:51

Tags:hash

这是原crate的地址 https://github.com/BLAKE3-team/BLAKE3/blob/master/b3sum/src/main.rs

以下是我想自己简化的代码, 运行起来感觉是死循环


use anyhow::Result;
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::path::Path;

const DERIVE_KEY: &str = "BORBER";

enum Input {
    Mmap(io::Cursor<memmap::Mmap>),
    File(File)
}

impl Input {
    fn open(path: &Path) -> Result<Self> {
        let file = File::open(path)?;
        if let Some(mmap) = maybe_memmap_file(&file)? {
            return Ok(Self::Mmap(io::Cursor::new(mmap)));
        }
        Ok(Self::File(file))
    }

    fn hash(&mut self) -> Result<blake3::OutputReader> {
        let mut hasher = blake3::Hasher::new_derive_key(DERIVE_KEY);
        match self {
            Self::Mmap(cursor) => {
                hasher.update_rayon(cursor.get_ref());
            }
            Self::File(file) => {
                copy_wide(file, &mut hasher)?;
            }
        }
        Ok(hasher.finalize_xof())
    }
}

impl Read for Input {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        match self {
            Self::Mmap(cursor) => cursor.read(buf),
            Self::File(file) => file.read(buf)
        }
    }
}


fn maybe_memmap_file(file: &File) -> Result<Option<memmap::Mmap>> {
    let metadata = file.metadata()?;
    let file_size = metadata.len();
    Ok(if !metadata.is_file() {
        None
    } else if file_size > isize::MAX as u64 {
        None
    } else if file_size == 0 {
        None
    } else if file_size < 16 * 1024 {
        None
    } else {
        let map = unsafe {
            memmap::MmapOptions::new()
                .len(file_size as usize)
                .map(&file)?
        };
        Some(map)
    })
}

fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u64> {
    let mut buffer = [0; 65536];
    let mut total = 0;
    loop {
        match reader.read(&mut buffer) {
            Ok(0) => return Ok(total),
            Ok(n) => {
                hasher.update(&buffer[..n]);
                total += n as u64;
            }
            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
            Err(e) => return Err(e),
        }
    }
}

pub fn hash_one(path: &Path) -> Result<String> {
    let mut s = String::new();
    let mut input = Input::open(path)?;
    let mut output = input.hash()?;
    output.read_to_string(&mut s);
    Ok(s)
}

Cargo.toml

anyhow = "1.0"
memmap = "0.7.0"
blake3 = "1"

评论区

写评论
作者 Borber 2021-12-24 10:42

原来如此, 太感谢了.

--
👇
eric642:

impl std::io::Read for OutputReader {
    #[inline]
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        self.fill(buf);
        Ok(buf.len())
    }
}
pub fn fill(&mut self, mut buf: &mut [u8]) {
        while !buf.is_empty() {
            let block: [u8; BLOCK_LEN] = self.inner.root_output_block();
            let output_bytes = &block[self.position_within_block as usize..];
            let take = cmp::min(buf.len(), output_bytes.len());
            buf[..take].copy_from_slice(&output_bytes[..take]);
            buf = &mut buf[take..];
            self.position_within_block += take as u8;
            if self.position_within_block == BLOCK_LEN as u8 {
                self.inner.counter += 1;
                self.position_within_block = 0;
            }
        }
    }

这个库的read实现有问题, 每次读都会有输出,导致标准库read_to_string一直去读,死循环了

eric642 2021-12-23 16:59
impl std::io::Read for OutputReader {
    #[inline]
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        self.fill(buf);
        Ok(buf.len())
    }
}
pub fn fill(&mut self, mut buf: &mut [u8]) {
        while !buf.is_empty() {
            let block: [u8; BLOCK_LEN] = self.inner.root_output_block();
            let output_bytes = &block[self.position_within_block as usize..];
            let take = cmp::min(buf.len(), output_bytes.len());
            buf[..take].copy_from_slice(&output_bytes[..take]);
            buf = &mut buf[take..];
            self.position_within_block += take as u8;
            if self.position_within_block == BLOCK_LEN as u8 {
                self.inner.counter += 1;
                self.position_within_block = 0;
            }
        }
    }

这个库的read实现有问题, 每次读都会有输出,导致标准库read_to_string一直去读,死循环了

作者 Borber 2021-12-23 16:44

修改第33行为

Ok(hasher.finalize())

最后调用这个就好了

let s = output.to_string();
oksbsb 2021-12-23 16:26
output.read_to_string(&mut s);    <=
1 共 4 条评论, 1 页