Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add a new invariant to Bitv
The length of the underlying vector must now be exactly as long as it needs to
be.
  • Loading branch information
tbu- authored and Gankra committed Dec 20, 2014
commit e84a3833077825893b5bf0fb5abf07bdbd58d988
154 changes: 84 additions & 70 deletions src/libcollections/bit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// FIXME(Gankro): Bitv and BitvSet are very tightly coupled. Ideally (for maintenance),
// they should be in separate files/modules, with BitvSet only using Bitv's public API.

// First rule of Bitv club: almost everything can actually overflow because we're working with
// bits and not bytes.
//
// Second rule of Bitv club: the last "block" of bits may be partially used. We must ensure that
// those unused bits are zeroed out, as other methods will assume this is the case. It may be
// the case that this isn't a great design, but having "undefined" bits is headache-inducing.
//
// Third rule of Bitv club: BitvSet is fairly tightly coupled to Bitv's implementation details.
// Make sure any changes to Bitv are properly addressed in BitvSet.
// FIXME(Gankro): Bitv and BitvSet are very tightly coupled. Ideally (for
// maintenance), they should be in separate files/modules, with BitvSet only
// using Bitv's public API. This will be hard for performance though, because
// `Bitv` will not want to leak its internal representation while its internal
// representation as `u32`s must be assumed for best performance.

// FIXME(tbu-): `Bitv`'s methods shouldn't be `union`, `intersection`, but
// rather `or` and `and`.

// (1) Be careful, most things can overflow here because the amount of bits in
// memory can overflow `uint`.
// (2) Make sure that the underlying vector has no excess length:
// E. g. `nbits == 16`, `storage.len() == 2` would be excess length,
// because the last word isn't used at all. This is important because some
// methods rely on it (for *CORRECTNESS*).
// (3) Make sure that the unused bits in the last word are zeroed out, again
// other methods rely on it for *CORRECTNESS*.
// (4) `BitvSet` is tightly coupled with `Bitv`, so any changes you make in
// `Bitv` will need to be reflected in `BitvSet`.

//! Collections implemented with bit vectors.
//!
Expand Down Expand Up @@ -82,10 +89,10 @@ use core::iter::{Cloned, Chain, Enumerate, Repeat, Skip, Take};
use core::iter;
use core::num::Int;
use core::slice::{Items, MutItems};
use core::{u32, uint};
use std::hash;
use core::{u8, u32, uint};

use vec::Vec;
use hash;
use Vec;

type Blocks<'a> = Cloned<Items<'a, u32>>;
type MutBlocks<'a> = MutItems<'a, u32>;
Expand Down Expand Up @@ -181,17 +188,15 @@ fn mask_for_bits(bits: uint) -> u32 {
}

impl Bitv {
/// Applies the given operation to the blocks of self and other, and sets self to
/// be the result.
/// Applies the given operation to the blocks of self and other, and sets
/// self to be the result. This relies on the caller not to corrupt the
/// last word.
#[inline]
fn process<F>(&mut self, other: &Bitv, mut op: F) -> bool where F: FnMut(u32, u32) -> u32 {
let len = other.storage.len();
assert_eq!(self.storage.len(), len);
assert_eq!(self.len(), other.len());
// This could theoretically be a `debug_assert!`.
assert_eq!(self.storage.len(), other.storage.len());
let mut changed = false;
// Notice: `a` is *not* masked here, which is fine as long as
// `op` is a bitwise operation, since any bits that should've
// been masked were fine to change anyway. `b` is masked to
// make sure its unmasked bits do not cause damage.
for (a, b) in self.blocks_mut().zip(other.blocks()) {
let w = op(*a, b);
if *a != w {
Expand All @@ -204,21 +209,20 @@ impl Bitv {

/// Iterator over mutable refs to the underlying blocks of data.
fn blocks_mut(&mut self) -> MutBlocks {
let blocks = blocks_for_bits(self.len());
self.storage.slice_to_mut(blocks).iter_mut()
// (2)
self.storage.iter_mut()
}

/// Iterator over the underlying blocks of data
fn blocks(&self) -> Blocks {
let blocks = blocks_for_bits(self.len());
self.storage[..blocks].iter().cloned()
// (2)
self.storage.iter().cloned()
}

/// An operation might screw up the unused bits in the last block of the Bitv.
/// It's assumed to be all 0's. This fixes it up.
/// An operation might screw up the unused bits in the last block of the
/// `Bitv`. As per (3), it's assumed to be all 0s. This method fixes it up.
fn fix_last_block(&mut self) {
let len = self.len();
let extra_bits = len % u32::BITS;
let extra_bits = self.len() % u32::BITS;
if extra_bits > 0 {
let mask = (1 << extra_bits) - 1;
let storage_len = self.storage.len();
Expand Down Expand Up @@ -259,7 +263,6 @@ impl Bitv {
storage: Vec::from_elem(nblocks, if bit { !0u32 } else { 0u32 }),
nbits: nbits
};

bitv.fix_last_block();
bitv
}
Expand Down Expand Up @@ -295,15 +298,33 @@ impl Bitv {
/// false, false, true, false]));
/// ```
pub fn from_bytes(bytes: &[u8]) -> Bitv {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could consider providing FromIterator/Extend instances for bool, u8, u16, u32 and u64 instead. That would give you maximal flexibility while keeping the API surface thin and standard.

(I suppose this would put further pressure on the performance of these traits, though. Perhaps it's worth spending time on that soon.)

Bitv::from_fn(bytes.len() * 8, |i| {
let b = bytes[i / 8] as u32;
let offset = i % 8;
b >> (7 - offset) & 1 == 1
})
let len = bytes.len().checked_mul(u8::BITS).expect("capacity overflow");
let mut bitv = Bitv::with_capacity(len);
let complete_words = bytes.len() / 4;
let extra_bytes = bytes.len() % 4;

for i in range(0, complete_words) {
bitv.storage.push(
(bytes[i * 4 + 0] as u32 << 0) |
(bytes[i * 4 + 1] as u32 << 8) |
(bytes[i * 4 + 2] as u32 << 16) |
(bytes[i * 4 + 3] as u32 << 24)
);
}

if extra_bytes > 0 {
let mut last_word = 0u32;
for (i, &byte) in bytes[complete_words*4..].iter().enumerate() {
last_word |= byte as u32 << (i * 8);
}
bitv.storage.push(last_word);
}

bitv
}

/// Creates a `Bitv` of the specified length where the value at each
/// index is `f(index)`.
/// Creates a `Bitv` of the specified length where the value at each index
/// is `f(index)`.
///
/// # Examples
///
Expand Down Expand Up @@ -339,7 +360,9 @@ impl Bitv {
#[inline]
#[unstable = "panic semantics are likely to change in the future"]
pub fn get(&self, i: uint) -> Option<bool> {
assert!(i < self.nbits);
if i >= self.nbits {
return None;
}
let w = i / u32::BITS;
let b = i % u32::BITS;
self.storage.get(w).map(|&block|
Expand Down Expand Up @@ -548,7 +571,7 @@ impl Bitv {
#[inline]
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn iter<'a>(&'a self) -> Bits<'a> {
Bits {bitv: self, next_idx: 0, end_idx: self.nbits}
Bits { bitv: self, next_idx: 0, end_idx: self.nbits }
}

/// Returns `true` if all bits are 0.
Expand Down Expand Up @@ -608,7 +631,7 @@ impl Bitv {
/// assert_eq!(bv.to_bytes(), vec!(0b00100000, 0b10000000));
/// ```
pub fn to_bytes(&self) -> Vec<u8> {
fn bit (bitv: &Bitv, byte: uint, bit: uint) -> u8 {
fn bit(bitv: &Bitv, byte: uint, bit: uint) -> u8 {
let offset = byte * 8 + bit;
if offset >= bitv.nbits {
0
Expand All @@ -634,7 +657,7 @@ impl Bitv {
/// Deprecated: Use `iter().collect()`.
#[deprecated = "Use `iter().collect()`"]
pub fn to_bools(&self) -> Vec<bool> {
Vec::from_fn(self.nbits, |i| self[i])
self.iter().collect()
}

/// Compares a `Bitv` to a slice of `bool`s.
Expand All @@ -656,12 +679,7 @@ impl Bitv {
/// ```
pub fn eq_vec(&self, v: &[bool]) -> bool {
assert_eq!(self.nbits, v.len());
let mut i = 0;
while i < self.nbits {
if self[i] != v[i] { return false; }
i = i + 1;
}
true
iter::order::eq(self.iter(), v.iter().cloned())
}

/// Shortens a `Bitv`, dropping excess elements.
Expand All @@ -682,6 +700,7 @@ impl Bitv {
pub fn truncate(&mut self, len: uint) {
if len < self.len() {
self.nbits = len;
// This fixes (2).
self.storage.truncate(blocks_for_bits(len));
self.fix_last_block();
}
Expand All @@ -707,13 +726,9 @@ impl Bitv {
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn reserve(&mut self, additional: uint) {
let desired_cap = self.len().checked_add(additional).expect("capacity overflow");
match self.storage.len().checked_mul(u32::BITS) {
None => {} // Vec has more initialized capacity than we can ever use
Some(initialized_cap) => {
if desired_cap > initialized_cap {
self.storage.reserve(blocks_for_bits(desired_cap - initialized_cap));
}
}
let storage_len = self.storage.len();
if desired_cap > self.capacity() {
self.storage.reserve(blocks_for_bits(desired_cap) - storage_len);
}
}

Expand Down Expand Up @@ -741,13 +756,9 @@ impl Bitv {
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn reserve_exact(&mut self, additional: uint) {
let desired_cap = self.len().checked_add(additional).expect("capacity overflow");
match self.storage.len().checked_mul(u32::BITS) {
None => {} // Vec has more initialized capacity than we can ever use
Some(initialized_cap) => {
if desired_cap > initialized_cap {
self.storage.reserve_exact(blocks_for_bits(desired_cap - initialized_cap));
}
}
let storage_len = self.storage.len();
if desired_cap > self.capacity() {
self.storage.reserve_exact(blocks_for_bits(desired_cap) - storage_len);
}
}

Expand Down Expand Up @@ -801,8 +812,7 @@ impl Bitv {
if value {
self.storage[old_last_word] |= !mask;
} else {
// Extra bits are already supposed to be zero by invariant, but play it safe...
self.storage[old_last_word] &= mask;
// Extra bits are already zero by invariant.
}
}

Expand Down Expand Up @@ -843,9 +853,13 @@ impl Bitv {
} else {
let i = self.nbits - 1;
let ret = self[i];
// Second rule of Bitv Club
// (3)
self.set(i, false);
self.nbits = i;
if self.nbits % u32::BITS == 0 {
// (2)
self.storage.pop();
}
Some(ret)
}
}
Expand All @@ -864,11 +878,11 @@ impl Bitv {
/// ```
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn push(&mut self, elem: bool) {
let insert_pos = self.nbits;
self.nbits = self.nbits.checked_add(1).expect("Capacity overflow");
if self.storage.len().checked_mul(u32::BITS).unwrap_or(uint::MAX) < self.nbits {
if self.nbits % u32::BITS == 0 {
self.storage.push(0);
}
let insert_pos = self.nbits;
self.nbits = self.nbits.checked_add(1).expect("Capacity overflow");
self.set(insert_pos, elem);
}

Expand Down Expand Up @@ -958,7 +972,7 @@ impl Ord for Bitv {
impl fmt::Show for Bitv {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
for bit in self.iter() {
try!(write!(fmt, "{}", if bit { 1u } else { 0u }));
try!(write!(fmt, "{}", if bit { 1u32 } else { 0u32 }));
}
Ok(())
}
Expand Down