我测了一次performance,发现u32/u32的速度竟然比f64/f64慢
有没有dalao知道发生了什么吗?
extern crate rayon;
use rayon::prelude::*;
fn main(){
let b=std::time::Instant::now();
let r1=(0u32..65536).into_par_iter().chunks(4096).map(|x|{
x.iter().map(|&i|{
(1u32..65536).into_iter().map(|j|{
i/j
}).sum::<u32>() as usize
}).sum::<usize>()
}).sum::<usize>();
println!("{:?}",b.elapsed());
let b=std::time::Instant::now();
let a:Vec<usize>=(0..1).chain((1..2097152).map(|x|((4398046511104_u64+x-1)/x as u64) as usize)).collect();
let r2=(0usize..65536).into_par_iter().chunks(4096).map(|x|{
x.iter().map(|&i|{
(1usize..65536).into_iter().map(|j|{
i*a[j]>>42
}).sum::<usize>()
}).sum::<usize>()
}).sum::<usize>();
println!("{:?}",b.elapsed());
let b=std::time::Instant::now();
let r3=(0u32..65536).into_par_iter().chunks(4096).map(|x|{
x.iter().map(|&i|{
(1u32..65536).into_iter().map(|j|{
(i as f64/j as f64) as u32
}).sum::<u32>() as usize
}).sum::<usize>()
}).sum::<usize>();
println!("{:?}",b.elapsed());
println!("{}",(r1+r2)>>1-r3);
}
测试结果
neutron@Neutron:/me/rust/rayonops$ cargo run --release
Compiling rayonops v0.1.0 (/me/rust/rayonops)
Finished release [optimized] target(s) in 0.86s
Running `target/release/rayonops`
1.739315795s
735.013066ms
957.63335ms
0
neutron@Neutron:/me/rust/rayonops$ cargo run --release
Finished release [optimized] target(s) in 0.01s
Running `target/release/rayonops`
1.77189577s
900.427822ms
1.013179198s
0
neutron@Neutron:/me/rust/rayonops$ cargo run --release
Finished release [optimized] target(s) in 0.01s
Running `target/release/rayonops`
1.778011042s
859.130591ms
1.003195896s
0
neutron@Neutron:/me/rust/rayonops$ cargo run --release
Finished release [optimized] target(s) in 0.01s
Running `target/release/rayonops`
1.53730607s
767.199922ms
1.048112309s
0
并不是rayon的锅:
//不带rayon
fn main(){
let b=std::time::Instant::now();
let r1=(0u32..65536).into_iter().map(|i|{
(1u32..65536).into_iter().map(|j|{
i/j
}).sum::<u32>() as usize
}).sum::<usize>();
println!("{:?}",b.elapsed());
let b=std::time::Instant::now();
let a:Vec<usize>=(0..1).chain((1..2097152).map(|x|((4398046511104_u64+x-1)/x as u64) as usize)).collect();
let r2=(0usize..65536).into_iter().map(|i|{
(1usize..65536).into_iter().map(|j|{
i*a[j]>>42
}).sum::<usize>()
}).sum::<usize>();
println!("{:?}",b.elapsed());
let b=std::time::Instant::now();
let r3=(0u32..65536).into_iter().map(|i|{
(1u32..65536).into_iter().map(|j|{
(i as f64/j as f64) as u32
}).sum::<u32>() as usize
}).sum::<usize>();
println!("{:?}",b.elapsed());
println!("{}",(r1+r2)>>1-r3);
}
测试结果
neutron@Neutron:/me/rust/rayonops$ ./main_speed_test
10.20379502s
3.601069229s
6.565324578s
0
1
共 1 条评论, 1 页
评论区
写评论经人指点,是out-of-order execution的缘故
强行指定计算顺序的话,不会出现类似问题
(大概是Rust对u32的优化不是太好吧)