Skip to content

Commit

Permalink
Merge pull request #6 from alecmocatta/template
Browse files Browse the repository at this point in the history
Merge alecmocatta/template-rust
  • Loading branch information
alecmocatta authored Jul 25, 2019
2 parents 725de43 + 534b236 commit f7d8794
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 65 deletions.
9 changes: 5 additions & 4 deletions .rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
fn_args_density = "Compressed"
hard_tabs = true
imports_layout = "Horizontal"
merge_imports = true
# reorder_impl_items = true # currently nondeterministic, breaks check
fn_args_layout = "Compressed"
use_field_init_shorthand = true
use_try_shorthand = true
# wrap_comments = true

# To enable when stable
# wrap_comments = true # https://github.com/rust-lang/rustfmt/issues/3347
# reorder_impl_items = true # https://github.com/rust-lang/rustfmt/issues/3363
13 changes: 5 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "streaming_algorithms"
version = "0.1.0"
license = "Apache-2.0"
license = "MIT OR Apache-2.0"
authors = ["Alec Mocatta <alec@mocatta.net>"]
categories = ["data-structures","algorithms","science"]
keywords = ["streaming-algorithm","probabilistic","sketch","data-structure","hyperloglog"]
Expand All @@ -12,18 +12,15 @@ repository = "https://github.com/alecmocatta/streaming_algorithms"
homepage = "https://github.com/alecmocatta/streaming_algorithms"
documentation = "https://docs.rs/streaming_algorithms/0.1.0"
readme = "README.md"
edition = "2018"

[badges]
appveyor = { repository = "alecmocatta/streaming_algorithms" }
circle-ci = { repository = "alecmocatta/streaming_algorithms" }
#gitlab = { repository = "alecmocatta/streaming_algorithms" }
travis-ci = { repository = "alecmocatta/streaming_algorithms" }

azure-devops = { project = "alecmocatta/streaming_algorithms", pipeline = "tests" }
maintenance = { status = "actively-developed" }

[dependencies]
twox-hash = "1.1"
serde_derive = "1.0"
serde = "1.0"
rand = "0.5"
serde = { version = "1.0", features = ["derive"] }
rand = { version = "0.7", features = ["small_rng"] }
packed_simd = { version = "0.3", features = ["into_bits"] }
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# streaming_algorithms

[![Crates.io](https://img.shields.io/crates/v/streaming_algorithms.svg?style=flat-square&maxAge=86400)](https://crates.io/crates/streaming_algorithms)
[![Apache-2.0 licensed](https://img.shields.io/crates/l/streaming_algorithms.svg?style=flat-square&maxAge=2592000)](LICENSE.txt)
[![Build Status](https://ci.appveyor.com/api/projects/status/github/alecmocatta/streaming_algorithms?branch=master&svg=true)](https://ci.appveyor.com/project/alecmocatta/streaming-algorithms)
[![Build Status](https://circleci.com/gh/alecmocatta/streaming_algorithms/tree/master.svg?style=shield)](https://circleci.com/gh/alecmocatta/streaming_algorithms)
[![Build Status](https://travis-ci.com/alecmocatta/streaming_algorithms.svg?branch=master)](https://travis-ci.com/alecmocatta/streaming_algorithms)
[![Crates.io](https://img.shields.io/crates/v/streaming_algorithms.svg?maxAge=86400)](https://crates.io/crates/streaming_algorithms)
[![MIT / Apache 2.0 licensed](https://img.shields.io/crates/l/streaming_algorithms.svg?maxAge=2592000)](#License)
[![Build Status](https://dev.azure.com/alecmocatta/template-rust/_apis/build/status/tests?branchName=master)](https://dev.azure.com/alecmocatta/template-rust/_build/latest?branchName=master)

[Docs](https://docs.rs/streaming_algorithms/0.1.0)

Expand All @@ -30,6 +28,11 @@ See [this gist](https://gist.github.com/debasishg/8172796) for a good list of fu
As these implementations are often in hot code paths, unsafe is used, albeit only when necessary to a) achieve the asymptotically optimal algorithm or b) mitigate an observed bottleneck.

## License
Licensed under Apache License, Version 2.0, ([LICENSE.txt](LICENSE.txt) or http://www.apache.org/licenses/LICENSE-2.0).
Licensed under either of

Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be licensed as above, without any additional terms or conditions.
* Apache License, Version 2.0, ([LICENSE-APACHE.txt](LICENSE-APACHE.txt) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT.txt](LICENSE-MIT.txt) or http://opensource.org/licenses/MIT)

at your option.

Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
31 changes: 31 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
trigger: ["master"]
pr: ["master"]

resources:
repositories:
- repository: templates
type: github
name: alecmocatta/azure-pipeline-templates
endpoint: alecmocatta

jobs:
- template: rust.yml@templates
parameters:
default:
rust_toolchain: nightly
rust_lint_toolchain: nightly-2019-07-19
rust_flags: ''
rust_features: ''
rust_target_check: ''
rust_target_build: ''
rust_target_run: ''
matrix:
windows:
imageName: 'vs2017-win2016'
rust_target_run: 'x86_64-pc-windows-msvc i686-pc-windows-msvc' # currently broken building crate-type=lib: x86_64-pc-windows-gnu i686-pc-windows-gnu
mac:
imageName: 'macos-10.13'
rust_target_run: 'x86_64-apple-darwin i686-apple-darwin'
linux:
imageName: 'ubuntu-16.04'
rust_target_run: 'x86_64-unknown-linux-gnu i686-unknown-linux-gnu x86_64-unknown-linux-musl i686-unknown-linux-musl'
8 changes: 4 additions & 4 deletions src/count_min.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
// SOFTWARE.

use super::f64_to_usize;
use serde::{de::Deserialize, ser::Serialize};
use crate::traits::{Intersect, IntersectPlusUnionIsPlus, New, UnionAssign};
use serde::{Deserialize, Serialize};
use std::{
borrow::Borrow, cmp::max, convert::TryFrom, fmt, hash::{Hash, Hasher}, marker::PhantomData, ops
};
use traits::{Intersect, IntersectPlusUnionIsPlus, New, UnionAssign};
use twox_hash::XxHash;

/// An implementation of a [count-min sketch](https://en.wikipedia.org/wiki/Count–min_sketch) data structure with *conservative updating* for increased accuracy.
Expand Down Expand Up @@ -229,8 +229,8 @@ impl<K: ?Sized, C: New + Clone> Clone for CountMinSketch<K, C> {
impl<K: ?Sized, C: New> fmt::Debug for CountMinSketch<K, C> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("CountMinSketch")
// .field("counters", &self.counters)
.finish()
// .field("counters", &self.counters)
.finish()
}
}

Expand Down
9 changes: 5 additions & 4 deletions src/distinct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@
// is_x86_feature_detected ?

use super::{f64_to_u8, u64_to_f64, usize_to_f64};
use crate::traits::{Intersect, IntersectPlusUnionIsPlus, New, UnionAssign};
use packed_simd::{self, Cast, FromBits, IntoBits};
use serde::{Deserialize, Serialize};
use std::{
cmp::{self, Ordering}, convert::{identity, TryFrom}, fmt, hash::{Hash, Hasher}, marker::PhantomData, ops::{self, Range}
};
use traits::{Intersect, IntersectPlusUnionIsPlus, New, UnionAssign};
use twox_hash::XxHash;

mod consts;
Expand Down Expand Up @@ -80,21 +81,21 @@ impl<V: Hash> PartialEq for HyperLogLogMagnitude<V> {
impl<V: Hash> Eq for HyperLogLogMagnitude<V> {}
impl<V: Hash> Clone for HyperLogLogMagnitude<V> {
fn clone(&self) -> Self {
HyperLogLogMagnitude(self.0.clone())
Self(self.0.clone())
}
}
impl<V: Hash> New for HyperLogLogMagnitude<V> {
type Config = f64;
fn new(config: &Self::Config) -> Self {
HyperLogLogMagnitude(New::new(config))
Self(New::new(config))
}
}
impl<V: Hash> Intersect for HyperLogLogMagnitude<V> {
fn intersect<'a>(iter: impl Iterator<Item = &'a Self>) -> Option<Self>
where
Self: Sized + 'a,
{
Intersect::intersect(iter.map(|x| &x.0)).map(HyperLogLogMagnitude)
Intersect::intersect(iter.map(|x| &x.0)).map(Self)
}
}
impl<'a, V: Hash> UnionAssign<&'a HyperLogLogMagnitude<V>> for HyperLogLogMagnitude<V> {
Expand Down
18 changes: 6 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
//! As these implementations are often in hot code paths, unsafe is used, albeit only when necessary to a) achieve the asymptotically optimal algorithm or b) mitigate an observed bottleneck.
#![doc(html_root_url = "https://docs.rs/streaming_algorithms/0.1.0")]
#![feature(nll, specialization, convert_id, try_trait, try_from)]
#![feature(specialization, try_trait)]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
unused_qualifications,
unused_results,
Expand All @@ -39,21 +39,14 @@
dead_code,
clippy::doc_markdown,
clippy::inline_always,
clippy::stutter,
clippy::module_name_repetitions,
clippy::if_not_else,
clippy::op_ref,
clippy::needless_pass_by_value,
clippy::suspicious_op_assign_impl,
clippy::float_cmp
)]

extern crate twox_hash;
#[macro_use]
extern crate serde_derive;
extern crate packed_simd;
extern crate rand;
extern crate serde;

mod count_min;
mod distinct;
mod linked_list;
Expand All @@ -72,7 +65,8 @@ pub use traits::*;
#[allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss
clippy::cast_precision_loss,
clippy::cast_lossless
)]
fn f64_to_usize(a: f64) -> usize {
assert!(a.is_sign_positive() && a <= usize::max_value() as f64 && a.fract() == 0.0);
Expand All @@ -89,7 +83,7 @@ fn f64_to_u8(a: f64) -> u8 {
a as u8
}

#[allow(clippy::cast_precision_loss)]
#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
fn usize_to_f64(a: usize) -> f64 {
assert!(a as u64 <= 1_u64 << 53);
a as f64
Expand Down
1 change: 1 addition & 0 deletions src/linked_list.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use serde::{Deserialize, Serialize};
use std::{iter, marker, ops};

#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
Expand Down
5 changes: 3 additions & 2 deletions src/ordered_linked_list.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use linked_list::{LinkedList, LinkedListIndex};
use crate::linked_list::{LinkedList, LinkedListIndex};
use serde::{Deserialize, Serialize};
use std::{ops, ptr};

#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
Expand All @@ -14,7 +15,7 @@ impl<'a> OrderedLinkedListIndex<'a> {
pub struct OrderedLinkedList<T>(LinkedList<T>);
impl<T: Ord> OrderedLinkedList<T> {
pub fn new(cap: usize) -> Self {
OrderedLinkedList(LinkedList::new(cap))
Self(LinkedList::new(cap))
}
fn assert(&self) {
if !cfg!(feature = "assert") {
Expand Down
10 changes: 4 additions & 6 deletions src/sample.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use rand::{self, Rng, SeedableRng};
use serde::{
de::{Deserialize, Deserializer}, ser::{Serialize, Serializer}
};
use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize};
use std::{convert::TryFrom, fmt, iter, ops, vec};

/// Given population and sample sizes, returns true if this element is in the sample. Without replacement.
Expand Down Expand Up @@ -44,7 +42,7 @@ impl Drop for SampleTotal {
struct FixedCapVec<T>(Vec<T>);
impl<T> FixedCapVec<T> {
fn new(cap: usize) -> Self {
let self_ = FixedCapVec(Vec::with_capacity(cap));
let self_ = Self(Vec::with_capacity(cap));
assert_eq!(self_.capacity(), cap);
self_
}
Expand Down Expand Up @@ -117,7 +115,7 @@ where
<(usize, Vec<T>)>::deserialize(deserializer).map(|(cap, mut vec)| {
vec.reserve_exact(cap - vec.len());
assert_eq!(vec.capacity(), cap);
FixedCapVec(vec)
Self(vec)
})
}
}
Expand Down Expand Up @@ -186,7 +184,7 @@ impl<T> ops::AddAssign for SampleUnstable<T> {
assert_eq!(self.reservoir.capacity(), other.reservoir.capacity());
let mut new = FixedCapVec::new(self.reservoir.capacity());
let (m, n) = (self.i, other.i);
let mut rng = rand::prng::XorShiftRng::from_seed([
let mut rng = rand::rngs::SmallRng::from_seed([
u8::try_from(m & 0xff).unwrap(),
u8::try_from(n & 0xff).unwrap(),
u8::try_from(self.reservoir.capacity() & 0xff).unwrap(),
Expand Down
32 changes: 14 additions & 18 deletions src/top.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use count_min::CountMinSketch;
use ordered_linked_list::{OrderedLinkedList, OrderedLinkedListIndex, OrderedLinkedListIter};
use serde::{de::Deserialize, ser::Serialize};
use crate::{
count_min::CountMinSketch, ordered_linked_list::{OrderedLinkedList, OrderedLinkedListIndex, OrderedLinkedListIter}, traits::{Intersect, New, UnionAssign}
};
use serde::{Deserialize, Serialize};
use std::{
cmp, collections::{hash_map::Entry, HashMap}, fmt::{self, Debug}, hash::Hash, iter, ops
};
use traits::{Intersect, New, UnionAssign};
use twox_hash::RandomXxHashBuilder;

/// This probabilistic data structure tracks the `n` top keys given a stream of `(key,value)` tuples, ordered by the sum of the values for each key (the "aggregated value"). It uses only `O(n)` space.
Expand Down Expand Up @@ -302,16 +302,14 @@ impl<T, C: Eq> Eq for Node<T, C> {}
#[cfg(test)]
mod test {
use super::*;
use distinct::HyperLogLog;
use crate::{distinct::HyperLogLog, traits::IntersectPlusUnionIsPlus};
use rand::{self, Rng, SeedableRng};
use std::time;
use traits::IntersectPlusUnionIsPlus;

#[test]
fn abc() {
let mut rng = rand::prng::XorShiftRng::from_seed([
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
]);
let mut rng =
rand::rngs::SmallRng::from_seed([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let mut top = Top::<String, usize>::new(100, 0.99, 2.0 / 1000.0, ());
let mut x = HashMap::new();
for _ in 0..10_000 {
Expand Down Expand Up @@ -355,21 +353,21 @@ mod test {
impl<V: Hash> Eq for HLL<V> {}
impl<V: Hash> Clone for HLL<V> {
fn clone(&self) -> Self {
HLL(self.0.clone())
Self(self.0.clone())
}
}
impl<V: Hash> New for HLL<V> {
type Config = f64;
fn new(config: &Self::Config) -> Self {
HLL(New::new(config))
Self(New::new(config))
}
}
impl<V: Hash> Intersect for HLL<V> {
fn intersect<'a>(iter: impl Iterator<Item = &'a Self>) -> Option<Self>
where
Self: Sized + 'a,
{
Intersect::intersect(iter.map(|x| &x.0)).map(HLL)
Intersect::intersect(iter.map(|x| &x.0)).map(Self)
}
}
impl<'a, V: Hash> UnionAssign<&'a HLL<V>> for HLL<V> {
Expand All @@ -393,9 +391,8 @@ mod test {

#[test]
fn top_hll() {
let mut rng = rand::prng::XorShiftRng::from_seed([
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
]);
let mut rng =
rand::rngs::SmallRng::from_seed([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let mut top = Top::<String, HLL<String>>::new(1000, 0.99, 2.0 / 1000.0, 0.00408);
// let mut x = HashMap::new();
for _ in 0..5_000 {
Expand Down Expand Up @@ -423,9 +420,8 @@ mod test {
fn many() {
let start = time::Instant::now();

let mut rng = rand::prng::XorShiftRng::from_seed([
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
]);
let mut rng =
rand::rngs::SmallRng::from_seed([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let mut top = Top::<String, HLL<String>>::new(1000, 0.99, 2.0 / 1000.0, 0.05);
// let mut x = HashMap::new();
for _ in 0..5_000_000 {
Expand Down

0 comments on commit f7d8794

Please sign in to comment.