HEX

File: //usr/include/hphp/util/sparse-id-containers.h
/*
   +----------------------------------------------------------------------+
   | HipHop for PHP                                                       |
   +----------------------------------------------------------------------+
   | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   +----------------------------------------------------------------------+
   | This source file is subject to version 3.01 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.php.net/license/3_01.txt                                  |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
*/
#ifndef incl_HPHP_SPARSE_ID_CONTAINERS_H_
#define incl_HPHP_SPARSE_ID_CONTAINERS_H_

#include <cstdlib>
#include <cstring>
#include <cassert>
#include <algorithm>
#include <type_traits>
#include <utility>

#include <folly/gen/String.h>

#include "hphp/util/compilation-flags.h"
#include "hphp/util/safe-cast.h"

namespace HPHP {

//////////////////////////////////////////////////////////////////////

/*
 * Time-efficient representations for sparse sets and sparse maps keyed with
 * integers from a known universe (i.e. values from zero to some maximum).  It
 * also has the peculiar (but maybe occasionally useful) property of iteration
 * order matching insertion order as long as you haven't erased anything yet.
 *
 * Space consumption is O(universe), where universe is the maximum value the
 * set can hold.  See the constructor for more information on this.  The set
 * version has several member functions with preconditions relating to universe
 * size.
 *
 * The datastructure implemented here is from this:
 *
 *   http://dl.acm.org/citation.cfm?id=176484
 *
 * Some notes about this implementation:
 *
 *    o The set doesn't support set complement.  It's O(universe), so if you
 *      need it it's probably better to use a bitset of some sort.
 *
 *    o Iterators are invalidated on any call to a non-const member function.
 *
 *    o Moving from a set or map leaves it in an undetermined (but valid)
 *      state.  Notably this includes potentially changing its universe size.
 *
 *    o Set operations involving multiple sets are generally only legal if both
 *      sets have the same universe size.  Exceptions are expressions relating
 *      to EqualityComparable, Assignable, and swap(), for reasons relating to
 *      moving potentially changing universe size.
 *
 *    o Lookups and insertions can be done through a different type than the
 *      containers actually hold.  This is to ease using non-integer types as
 *      "keys" in these classes, as long as they can be mapped down to ids.  An
 *      'extractor' function object type can be provided as a template
 *      parameter to control how the mapping works.
 *
 * Also, note that for very small universes, even if the bits are sparse
 * there's a good chance you'll be better off with some kind of bitset than the
 * set version of this.
 *
 */

//////////////////////////////////////////////////////////////////////

namespace sparse_id_detail {

template<class T, class Lookup>
struct default_extract {
  T operator()(Lookup l) const {
    size_t convert = l;
    return safe_cast<T>(convert);
  }
};

}

//////////////////////////////////////////////////////////////////////

template<
  class T,
  class LookupT = T,
  class Extract = sparse_id_detail::default_extract<T,LookupT>
>
struct sparse_id_set {
  using value_type     = T;
  using size_type      = value_type;
  using const_iterator = const T*;

  static_assert(
    std::is_integral<T>::value && std::is_unsigned<T>::value,
    "sparse_id_set is intended for use with unsigned integer types"
  );

  /*
   * When constructing a sparse_id_set, you must provide a 'universe size' for
   * the ids.  This is one greater than the maximum value you'll insert into
   * the set.
   *
   * All functions dealing with values have a precondition that the values fit
   * in the universe size, and most functions involving multiple sparse_id_sets
   * (essentially everything except swap) will have a precondition that the two
   * sets have the same universe size.
   */
  explicit sparse_id_set(T universe_size)
    : m_universe_size{universe_size}
    , m_next{0}
    , m_mem{
        universe_size
          ? static_cast<T*>(std::malloc(sizeof(T) * universe_size * 2))
          : nullptr
      }
  {
    // Note: the sparse part of m_mem is deliberately uninitialized, but we do
    // it for valgrind or asan builds.
#if defined(FOLLY_SANITIZE_ADDRESS) || defined(VALGRIND)
    std::memset(m_mem, 0, sizeof(T) * universe_size);
#endif
  }
  ~sparse_id_set() { if (m_universe_size) std::free(m_mem); }

  /*
   * Copy this set from `o'.
   *
   * Post: operator==(o)
   */
  sparse_id_set(const sparse_id_set& o)
    : sparse_id_set(o.m_universe_size)
  {
    *this |= o;
  }

  /*
   * Move construct a set from `o'.
   *
   * The set `o' is left in an unspecified but valid state.  It's
   * universe_size() is not even guaranteed to be the same after it is
   * moved from.
   */
  sparse_id_set(sparse_id_set&& o) noexcept
    : m_universe_size{o.m_universe_size}
    , m_next{o.m_next}
    , m_mem{o.m_mem}
  {
    o.m_universe_size = 0;
    if (debug) {
      o.m_mem = nullptr;
      o.m_next = 0;
    }
  }

  /*
   * Copy assignment.
   *
   * Post: operator==(o)
   */
  sparse_id_set& operator=(const sparse_id_set& o) {
    if (m_universe_size == o.m_universe_size) {
      clear();
      *this |= o;
      return *this;
    }
    sparse_id_set tmp(o);
    swap(o);
    return *this;
  }

  /*
   * Move assignment.
   *
   * Leaves `o' in an unspecified, but valid state.  It may not have the same
   * universe size that it had before being moved from.
   */
  sparse_id_set& operator=(sparse_id_set&& o) noexcept {
    swap(o);
    if (debug) {
      // Make sure no one relies on the universe staying the same.
      sparse_id_set tmp(0);
      tmp.swap(o);
    }
    return *this;
  }

  /*
   * Returns the universe size of this sparse_id_set.  Once created, a set's
   * universe size can not change unless you move-construct or move-assign from
   * it.
   */
  size_type universe_size() const { return m_universe_size; }

  /*
   * Iteration.  Make sure you don't mutate the set while you're using its
   * iterators.
   *
   * The order of elements in the set is guaranteed to be the same as the order
   * of insertion.
   */
  const_iterator begin() const  { return const_iterator(dense()); }
  const_iterator end() const    { return const_iterator(dense() + m_next); }
  const_iterator cbegin() const { return const_iterator(dense()); }
  const_iterator cend() const   { return const_iterator(dense() + m_next); }

  /*
   * Since we iterate in insertion order, it might be convenient to ask what's
   * at the front or the back.  This class is definitely not a full model of
   * Sequence, however.
   */
  T front() const { assert(!empty()); return dense()[0]; }
  T back() const { assert(!empty()); return dense()[m_next - 1]; }

  /*
   * Number of elements in this set.
   */
  size_type size() const { return m_next; }

  /*
   * Returns: size() != 0
   */
  bool empty() const { return !size(); }

  /*
   * Clear all members from the set.  O(1).
   *
   * Post: empty()
   */
  void clear() { m_next = 0; }

  /*
   * Returns: whether this sparse_id_set contains a particular value.  O(1).
   */
  bool contains(LookupT lt) const {
    return containsImpl(Extract()(lt));
  }

  /*
   * Returns: whether this sparse_id_set contains a particular value.  O(1).
   * Does not require that the id is in range.
   */
  bool contains_safe(LookupT lt) const {
    auto const t = Extract()(lt);
    return t < m_universe_size && containsImpl(t);
  }

  /*
   * Insert a new value into the set.  O(1)
   *
   * Post: contains an element with the id of `lt'
   */
  void insert(LookupT lt) {
    auto const t = Extract()(lt);
    assert(t < m_universe_size);
    if (containsImpl(t)) return;
    dense()[m_next] = t;
    sparse()[t] = m_next;
    ++m_next;
  }

  /*
   * Remove an element from the set, if it is a member.  (Does not assume that
   * it is.)
   *
   * Post: !contains(lt)
   */
  void erase(LookupT lt) {
    auto const t = Extract()(lt);
    assert(t < m_universe_size);
    // Swap with back element and update sparse ptrs.
    auto const didx = sparse()[t];  // possibly reads uninitialized mem
    if (didx >= m_next || dense()[didx] != t) return;
    auto const moving = dense()[m_next - 1];
    sparse()[moving] = didx;
    dense()[didx] = moving;
    --m_next;
    // No need to write to sparse()[t].  If it's read, next and dense are
    // rechecked to ensure it's actually relevant.
  }

  /*
   * These sets are EqualityComparable, even if they aren't from the save
   * universe.  (Rationale: if you move construct from something it's nice that
   * it is still legal to compare to other things it was legally comparable to
   * before that.)
   *
   * This returns whether the two sets have the same elements, regardless of
   * the order they were inserted.
   *
   * But note that it's O(size()) worst case.  You probably should just never
   * use this function.
   */
  bool operator==(const sparse_id_set& o) const {
    if (universe_size() != o.universe_size()) return false;
    if (size() != o.size()) return false;
    for (auto v : *this) if (!o.containsImpl(v)) return false;
    return true;
  }
  bool operator!=(const sparse_id_set& o) const { return !(*this == o); }

  /*
   * Union, difference and intersection operators.
   *
   * All of these operators are only provided as versions that modify the lhs
   * in place.  Idiomatic uses are going to involve updating id sets that
   * already exist, so even with our move-construction support it will tend to
   * involve allocations compared to mutation-based usage-styles.
   *
   * Union (|=) and difference (-=) are O(o.size()), while intersection (&=) is
   * O(this->size()).
   *
   * Pre: universe_size() == o.universe_size()
   */
  sparse_id_set& operator|=(const sparse_id_set& o) {
    assert(m_universe_size == o.m_universe_size);
    for (auto t : o) insert(t);
    return *this;
  }
  sparse_id_set& operator-=(const sparse_id_set& o) {
    assert(m_universe_size == o.m_universe_size);
    for (auto t : o) erase(t);
    return *this;
  }
  sparse_id_set& operator&=(const sparse_id_set& o) {
    assert(m_universe_size == o.m_universe_size);
    auto fwd = T{0};
    auto back = m_next;
    while (fwd != back) {
      assert(fwd < back);
      if (!o.containsImpl(dense()[fwd])) {
        auto const val = dense()[--back];
        sparse()[val] = fwd;
        dense()[fwd] = val;
      } else {
        ++fwd;
      }
    }
    m_next = back;
    return *this;
  }

  /*
   * Swap the contents of two sets.
   *
   * This function is unusual in that it does not have any preconditions about
   * universe sizes matching.
   */
  void swap(sparse_id_set& o) noexcept {
    std::swap(m_universe_size, o.m_universe_size);
    std::swap(m_mem, o.m_mem);
    std::swap(m_next, o.m_next);
  }

  /*
   * Convert a sparse id set to a std::string, intended for debug printing.
   */
  friend std::string show(const sparse_id_set& set) {
    using namespace folly::gen;
    return from(set)
      | eachTo<std::string>()
      | unsplit<std::string>(" ")
      ;
  }

private:
  bool containsImpl(T t) const {
    assert(t < m_universe_size);
    auto const didx = sparse()[t];  // may read uninitialized memory
    return didx < m_next && dense()[didx] == t;
  }

private:
  T* sparse()             { return m_mem; }
  T* dense()              { return m_mem + m_universe_size; }
  const T* sparse() const { return m_mem; }
  const T* dense() const  { return m_mem + m_universe_size; }

private:
  T m_universe_size;
  T m_next;
  T* m_mem;
};

//////////////////////////////////////////////////////////////////////

template<
  class K,
  class V,
  class LookupKey = K,
  class KExtract = sparse_id_detail::default_extract<K,LookupKey>
>
struct sparse_id_map {
  using value_type     = std::pair<const K,V>;
  using size_type      = K;
  using const_iterator = const value_type*;

  static_assert(
    std::is_integral<K>::value && std::is_unsigned<K>::value,
    "sparse_id_set is intended for use with unsigned integer types"
  );

  /*
   * When constructing a sparse_id_map, you must provide a 'universe size' for
   * the ids.  This is one greater than the maximum key you'll insert into the
   * map.
   */
  explicit sparse_id_map(K universe_size)
    : m_universe_size(universe_size)
    , m_next{0}
    , m_mem{
        universe_size
          ? std::malloc(sizeof(K) * universe_size +
              sizeof(value_type) * universe_size)
          : nullptr
      }
  {
    // Note: the sparse part of m_mem is deliberately uninitialized, but we do
    // it for valgrind or asan builds.
#if defined(FOLLY_SANITIZE_ADDRESS) || defined(VALGRIND)
    std::memset(m_mem, 0, sizeof(K) * universe_size);
#endif
  }
  ~sparse_id_map() {
    if (!m_universe_size) return;
    if (!std::is_trivially_destructible<V>::value) {
      for (auto& kv : *this) {
        kv.~value_type();
      }
    }
    std::free(m_mem);
  }

  /*
   * Copy this map from `o'.  Nothrow as long as V has a nothrow copy
   * constructor.
   *
   * Post: operator==(o)
   */
  sparse_id_map(const sparse_id_map& o)
    : m_universe_size{o.m_universe_size}
    , m_next{o.m_next}
    , m_mem{
        m_universe_size
          ? std::malloc(sizeof(K) * m_universe_size +
              sizeof(value_type) * m_universe_size)
          : nullptr
      }
  {
    auto idx = K{0};
    auto initialize = [&] {
      for (; idx < m_next; ++idx) {
        new (&dense()[idx]) value_type(o.dense()[idx]);
        sparse()[o.dense()[idx].first] = idx;
      }
    };
    if (std::is_trivially_destructible<V>::value ||
        std::is_nothrow_copy_constructible<V>::value) {
      initialize();
      return;
    }

    try {
      initialize();
    } catch (...) {
      while (idx-- > 0) {
        dense()[idx].~value_type();
      }
      throw;
    }
  }

  /*
   * Move construct a map from `o'.  Leaves `o' in an unspecified but valid
   * state.  (Notably the universe size may be changed.)
   *
   * Nothrow guarantee.
   */
  sparse_id_map(sparse_id_map&& o) noexcept
    : m_universe_size{o.m_universe_size}
    , m_next{o.m_next}
    , m_mem{o.m_mem}
  {
    o.m_universe_size = 0;
    if (debug) {
      o.m_mem = nullptr;
      o.m_next = 0;
    }
  }

  /*
   * Copy assignment.  Make this map equivalent to `o'.
   *
   * Strong exception guarantee.
   */
  sparse_id_map& operator=(const sparse_id_map& o) {
    sparse_id_map tmp(o);
    swap(tmp);
    return *this;
  }

  /*
   * Move assign from `o'.
   *
   * Leaves `o' in an unspecified but valid state.  Notably the universe size
   * may be changed.
   *
   * Nothrow guarantee.
   */
  sparse_id_map& operator=(sparse_id_map&& o) noexcept {
    swap(o);
    return *this;
  }

  /*
   * Returns the universe size of this sparse_id_map.  Once created, a map's
   * universe size can not change unless you move-construct or move-assign from
   * it.
   */
  size_type universe_size() const { return m_universe_size; }

  /*
   * Iteration.  Make sure you don't mutate the map while you're using its
   * iterators.
   *
   * The order of elements in the map is guaranteed to be the same as the order
   * of insertion.
   */
  const_iterator begin() const  { return const_iterator(dense()); }
  const_iterator end() const    { return const_iterator(dense() + m_next); }
  const_iterator cbegin() const { return const_iterator(dense()); }
  const_iterator cend() const   { return const_iterator(dense() + m_next); }

  /*
   * Since we iterate in insertion order, it might be convenient to ask what's
   * at the front or the back.  This class is definitely not a full model of
   * Sequence, however.
   */
  const value_type& front() const {
    assert(!empty());
    return dense()[0];
  }
  const value_type& back() const {
    assert(!empty());
    return dense()[m_next - 1];
  }

  /*
   * Number of elements in this map.
   */
  size_type size() const { return m_next; }

  /*
   * Returns: size() != 0
   */
  bool empty() const { return !size(); }

  /*
   * Clear all members from the map.  O(1) if V is trivially destructable,
   * O(size()) if not.
   *
   * Post: empty()
   */
  void clear() {
    if (!std::is_trivially_destructible<V>::value) {
      for (auto& kv : *this) {
        kv.~value_type();
      }
    }
    m_next = 0;
  }

  /*
   * Returns: whether this sparse_id_map contains a particular key.  O(1).
   */
  bool contains(LookupKey lk) const {
    return containsImpl(KExtract()(lk));
  }

  /*
   * Returns: whether this sparse_id_map contains a particular value.  O(1).
   * Does not require that the id is in range.
   */
  bool contains_safe(LookupKey lk) const {
    auto const k = KExtract()(lk);
    return k < m_universe_size && containsImpl(k);
  }

  /*
   * Get a reference to the value for key `k', inserting it with a default
   * constructed value if it doesn't exist.  Strong guarantee.
   */
  V& operator[](LookupKey lk) {
    auto const k = KExtract()(lk);
    if (!containsImpl(k)) insert(std::make_pair(k, V{}));
    return dense()[sparse()[k]].second;
  }

  /*
   * Insert a new value into the set.  O(1).  Strong exception guarantee.
   *
   * Post: contains an element with id v.first
   */
  void insert(const value_type& v) {
    assert(v.first < m_universe_size);
    if (containsImpl(v.first)) return;
    new (&dense()[m_next]) value_type(v);
    sparse()[v.first] = m_next;
    ++m_next;
  }

  /*
   * Insert a new value into the set, moving it if we need it.  O(1).  Strong
   * exception guarantee.
   *
   * Post: contains an element with id v.first
   */
  void insert(value_type&& v) {
    assert(v.first < m_universe_size);
    if (containsImpl(v.first)) return;
    new (&dense()[m_next]) value_type(std::move(v));
    sparse()[v.first] = m_next;
    ++m_next;
  }

  /*
   * Remove an element from the set, if it is a member.  (Does not assume that
   * it is.)  No throw as long as V has a nothrow move assignment operator.
   * Strong guarantee otherwise.
   *
   * Post: !contains(lk)
   */
  void erase(LookupKey lk) {
    auto const key = KExtract()(lk);
    assert(key < m_universe_size);
    // Move in back element and update sparse ptrs.
    auto const didx = sparse()[key];  // possibly reads uninitialized mem
    if (didx >= m_next || dense()[didx].first != key) return;
    auto& moving = dense()[m_next - 1];
    auto const moved_key = moving.first;
    if (didx < m_next - 1) {
      dense()[didx].second = std::move(moving.second);
      const_cast<K&>(dense()[didx].first) = moved_key;
    }
    sparse()[moved_key] = didx;
    dense()[m_next - 1].~value_type();
    --m_next;
    // No need to write to sparse()[t].  If it's read, next and dense are
    // rechecked to ensure it's actually relevant.
  }

  /*
   * Model EqualityComparable, as long as the value is EqualityComparable.
   * Note that it's O(size()) worst case.
   *
   * This returns whether the two maps have equivalent key value pairs,
   * regardless of the order they were inserted.
   */
  bool operator==(const sparse_id_map& o) const {
    if (universe_size() != o.universe_size()) return false;
    if (size() != o.size()) return false;
    for (auto& kv : *this) {
      if (!o.containsImpl(kv.first)) return false;
      if (!(o.dense()[o.sparse()[kv.first]].second == kv.second)) {
        return false;
      }
    }
    return true;
  }
  bool operator!=(const sparse_id_map& o) const { return !(*this == o); }

  /*
   * Merge a map into this one by intersecting the keys, and using a
   * user-defined function to merge values that were present in both this and
   * `o'.  The user-defined value merge function should return a bool,
   * indicating whether the value should be considered changed.
   *
   * Basic guarantee only.  Nothrow if V has a nothrow move constructor or a
   * nothrow copy constructor.
   *
   * Complexity: O(size()).
   *
   * Returns: true if this map changed keys, or if the user-supplied function
   * returned true for any pair of values.
   *
   * Pre: universe_size() == o.universe_size()
   */
  template<class Fun>
  bool merge(const sparse_id_map& o, Fun val_merge) {
    assert(m_universe_size == o.m_universe_size);
    auto fwd = K{0};
    auto changed = false;
    while (fwd != m_next) {
      assert(fwd < m_next);
      auto const k = dense()[fwd].first;
      if (!o.containsImpl(k)) {
        changed = true;
        if (fwd == m_next - 1) {  // Avoid self-move assigning values.
          --m_next;
          continue;
        }
        // Order here is important for exception safety: we can't decrement
        // m_next until we've moved-from and then destroyed the old value.
        auto& tomove = dense()[m_next - 1];
        sparse()[tomove.first] = fwd;
        dense()[fwd].second = std::move(tomove.second);
        const_cast<K&>(dense()[fwd].first) = tomove.first;
        tomove.~value_type();
        --m_next;
        continue;
      }
      if (val_merge(dense()[fwd].second, o.dense()[o.sparse()[k]].second)) {
        changed = true;
      }
      ++fwd;
    }
    return changed;
  }

  /*
   * Swap the contents of two maps.
   */
  void swap(sparse_id_map& o) noexcept {
    std::swap(m_universe_size, o.m_universe_size);
    std::swap(m_mem, o.m_mem);
    std::swap(m_next, o.m_next);
  }

private:
  bool containsImpl(K k) const {
    assert(k < m_universe_size);
    auto const didx = sparse()[k];  // may read uninitialized memory
    return didx < m_next && dense()[didx].first == k;
  }

private:
  K* sparse() { return static_cast<K*>(m_mem); }
  const K* sparse() const { return static_cast<K*>(m_mem); }
  value_type* dense() {
    void* vpDense = sparse() + m_universe_size;
    return static_cast<value_type*>(vpDense);
  }
  const value_type* dense() const {
    return const_cast<sparse_id_map*>(this)->dense();
  }

private:
  K m_universe_size;
  K m_next;
  void* m_mem;
};

//////////////////////////////////////////////////////////////////////

// Non-member swaps for ADL swap idiom.

template<class T, class LT, class EX>
void swap(sparse_id_set<T,LT,EX>& a, sparse_id_set<T,LT,EX>& b) {
  a.swap(b);
}

template<class K, class V, class LK, class LKE>
void swap(sparse_id_map<K,V,LK,LKE>& a, sparse_id_map<K,V,LK,LKE>& b) {
  a.swap(b);
}

//////////////////////////////////////////////////////////////////////

}

#endif