about summary refs log blame commit diff
path: root/absl/synchronization/internal/graphcycles.cc
blob: d3878de2b68f21929f00b8634bcb9794a0e4f086 (plain) (tree)





























                                                                           
                                 







                                                                         
                                        


















                                                                         
                                                      

















































































































































                                                                               
                                            
















































                                                                              


















                                                                            




















                                                                               
                                              
















                                                             
                                              


































































                                                                               
                                                               






























                                                                                
                                                










                                                                    
                                                




















                                         
                                                        









                                                           

                                                                      









































































































































































































































                                                                               
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// GraphCycles provides incremental cycle detection on a dynamic
// graph using the following algorithm:
//
// A dynamic topological sort algorithm for directed acyclic graphs
// David J. Pearce, Paul H. J. Kelly
// Journal of Experimental Algorithmics (JEA) JEA Homepage archive
// Volume 11, 2006, Article No. 1.7
//
// Brief summary of the algorithm:
//
// (1) Maintain a rank for each node that is consistent
//     with the topological sort of the graph. I.e., path from x to y
//     implies rank[x] < rank[y].
// (2) When a new edge (x->y) is inserted, do nothing if rank[x] < rank[y].
// (3) Otherwise: adjust ranks in the neighborhood of x and y.

#include "absl/base/attributes.h"
// This file is a no-op if the required LowLevelAlloc support is missing.
#include "absl/base/internal/low_level_alloc.h"
#ifndef ABSL_LOW_LEVEL_ALLOC_MISSING

#include "absl/synchronization/internal/graphcycles.h"

#include <algorithm>
#include <array>
#include "absl/base/internal/hide_ptr.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/spinlock.h"

// Do not use STL.   This module does not use standard memory allocation.

namespace absl {
namespace synchronization_internal {

namespace {

// Avoid LowLevelAlloc's default arena since it calls malloc hooks in
// which people are doing things like acquiring Mutexes.
static absl::base_internal::SpinLock arena_mu(
    absl::base_internal::kLinkerInitialized);
static base_internal::LowLevelAlloc::Arena* arena;

static void InitArenaIfNecessary() {
  arena_mu.Lock();
  if (arena == nullptr) {
    arena = base_internal::LowLevelAlloc::NewArena(0);
  }
  arena_mu.Unlock();
}

// Number of inlined elements in Vec.  Hash table implementation
// relies on this being a power of two.
static const uint32_t kInline = 8;

// A simple LowLevelAlloc based resizable vector with inlined storage
// for a few elements.  T must be a plain type since constructor
// and destructor are not run on elements of type T managed by Vec.
template <typename T>
class Vec {
 public:
  Vec() { Init(); }
  ~Vec() { Discard(); }

  void clear() {
    Discard();
    Init();
  }

  bool empty() const { return size_ == 0; }
  uint32_t size() const { return size_; }
  T* begin() { return ptr_; }
  T* end() { return ptr_ + size_; }
  const T& operator[](uint32_t i) const { return ptr_[i]; }
  T& operator[](uint32_t i) { return ptr_[i]; }
  const T& back() const { return ptr_[size_-1]; }
  void pop_back() { size_--; }

  void push_back(const T& v) {
    if (size_ == capacity_) Grow(size_ + 1);
    ptr_[size_] = v;
    size_++;
  }

  void resize(uint32_t n) {
    if (n > capacity_) Grow(n);
    size_ = n;
  }

  void fill(const T& val) {
    for (uint32_t i = 0; i < size(); i++) {
      ptr_[i] = val;
    }
  }

  // Guarantees src is empty at end.
  // Provided for the hash table resizing code below.
  void MoveFrom(Vec<T>* src) {
    if (src->ptr_ == src->space_) {
      // Need to actually copy
      resize(src->size_);
      std::copy(src->ptr_, src->ptr_ + src->size_, ptr_);
      src->size_ = 0;
    } else {
      Discard();
      ptr_ = src->ptr_;
      size_ = src->size_;
      capacity_ = src->capacity_;
      src->Init();
    }
  }

 private:
  T* ptr_;
  T space_[kInline];
  uint32_t size_;
  uint32_t capacity_;

  void Init() {
    ptr_ = space_;
    size_ = 0;
    capacity_ = kInline;
  }

  void Discard() {
    if (ptr_ != space_) base_internal::LowLevelAlloc::Free(ptr_);
  }

  void Grow(uint32_t n) {
    while (capacity_ < n) {
      capacity_ *= 2;
    }
    size_t request = static_cast<size_t>(capacity_) * sizeof(T);
    T* copy = static_cast<T*>(
        base_internal::LowLevelAlloc::AllocWithArena(request, arena));
    std::copy(ptr_, ptr_ + size_, copy);
    Discard();
    ptr_ = copy;
  }

  Vec(const Vec&) = delete;
  Vec& operator=(const Vec&) = delete;
};

// A hash set of non-negative int32_t that uses Vec for its underlying storage.
class NodeSet {
 public:
  NodeSet() { Init(); }

  void clear() { Init(); }
  bool contains(int32_t v) const { return table_[FindIndex(v)] == v; }

  bool insert(int32_t v) {
    uint32_t i = FindIndex(v);
    if (table_[i] == v) {
      return false;
    }
    if (table_[i] == kEmpty) {
      // Only inserting over an empty cell increases the number of occupied
      // slots.
      occupied_++;
    }
    table_[i] = v;
    // Double when 75% full.
    if (occupied_ >= table_.size() - table_.size()/4) Grow();
    return true;
  }

  void erase(uint32_t v) {
    uint32_t i = FindIndex(v);
    if (static_cast<uint32_t>(table_[i]) == v) {
      table_[i] = kDel;
    }
  }

  // Iteration: is done via HASH_FOR_EACH
  // Example:
  //    HASH_FOR_EACH(elem, node->out) { ... }
#define HASH_FOR_EACH(elem, eset) \
  for (int32_t elem, _cursor = 0; (eset).Next(&_cursor, &elem); )
  bool Next(int32_t* cursor, int32_t* elem) {
    while (static_cast<uint32_t>(*cursor) < table_.size()) {
      int32_t v = table_[*cursor];
      (*cursor)++;
      if (v >= 0) {
        *elem = v;
        return true;
      }
    }
    return false;
  }

 private:
  enum : int32_t { kEmpty = -1, kDel = -2 };
  Vec<int32_t> table_;
  uint32_t occupied_;     // Count of non-empty slots (includes deleted slots)

  static uint32_t Hash(uint32_t a) { return a * 41; }

  // Return index for storing v.  May return an empty index or deleted index
  int FindIndex(int32_t v) const {
    // Search starting at hash index.
    const uint32_t mask = table_.size() - 1;
    uint32_t i = Hash(v) & mask;
    int deleted_index = -1;  // If >= 0, index of first deleted element we see
    while (true) {
      int32_t e = table_[i];
      if (v == e) {
        return i;
      } else if (e == kEmpty) {
        // Return any previously encountered deleted slot.
        return (deleted_index >= 0) ? deleted_index : i;
      } else if (e == kDel && deleted_index < 0) {
        // Keep searching since v might be present later.
        deleted_index = i;
      }
      i = (i + 1) & mask;  // Linear probing; quadratic is slightly slower.
    }
  }

  void Init() {
    table_.clear();
    table_.resize(kInline);
    table_.fill(kEmpty);
    occupied_ = 0;
  }

  void Grow() {
    Vec<int32_t> copy;
    copy.MoveFrom(&table_);
    occupied_ = 0;
    table_.resize(copy.size() * 2);
    table_.fill(kEmpty);

    for (const auto& e : copy) {
      if (e >= 0) insert(e);
    }
  }

  NodeSet(const NodeSet&) = delete;
  NodeSet& operator=(const NodeSet&) = delete;
};

// We encode a node index and a node version in GraphId.  The version
// number is incremented when the GraphId is freed which automatically
// invalidates all copies of the GraphId.

inline GraphId MakeId(int32_t index, uint32_t version) {
  GraphId g;
  g.handle =
      (static_cast<uint64_t>(version) << 32) | static_cast<uint32_t>(index);
  return g;
}

inline int32_t NodeIndex(GraphId id) {
  return static_cast<uint32_t>(id.handle & 0xfffffffful);
}

inline uint32_t NodeVersion(GraphId id) {
  return static_cast<uint32_t>(id.handle >> 32);
}

struct Node {
  int32_t rank;               // rank number assigned by Pearce-Kelly algorithm
  uint32_t version;           // Current version number
  int32_t next_hash;          // Next entry in hash table
  bool visited;               // Temporary marker used by depth-first-search
  uintptr_t masked_ptr;       // User-supplied pointer
  NodeSet in;                 // List of immediate predecessor nodes in graph
  NodeSet out;                // List of immediate successor nodes in graph
  int priority;               // Priority of recorded stack trace.
  int nstack;                 // Depth of recorded stack trace.
  void* stack[40];            // stack[0,nstack-1] holds stack trace for node.
};

// Hash table for pointer to node index lookups.
class PointerMap {
 public:
  explicit PointerMap(const Vec<Node*>* nodes) : nodes_(nodes) {
    table_.fill(-1);
  }

  int32_t Find(void* ptr) {
    auto masked = base_internal::HidePtr(ptr);
    for (int32_t i = table_[Hash(ptr)]; i != -1;) {
      Node* n = (*nodes_)[i];
      if (n->masked_ptr == masked) return i;
      i = n->next_hash;
    }
    return -1;
  }

  void Add(void* ptr, int32_t i) {
    int32_t* head = &table_[Hash(ptr)];
    (*nodes_)[i]->next_hash = *head;
    *head = i;
  }

  int32_t Remove(void* ptr) {
    // Advance through linked list while keeping track of the
    // predecessor slot that points to the current entry.
    auto masked = base_internal::HidePtr(ptr);
    for (int32_t* slot = &table_[Hash(ptr)]; *slot != -1; ) {
      int32_t index = *slot;
      Node* n = (*nodes_)[index];
      if (n->masked_ptr == masked) {
        *slot = n->next_hash;  // Remove n from linked list
        n->next_hash = -1;
        return index;
      }
      slot = &n->next_hash;
    }
    return -1;
  }

 private:
  // Number of buckets in hash table for pointer lookups.
  static constexpr uint32_t kHashTableSize = 8171;  // should be prime

  const Vec<Node*>* nodes_;
  std::array<int32_t, kHashTableSize> table_;

  static uint32_t Hash(void* ptr) {
    return reinterpret_cast<uintptr_t>(ptr) % kHashTableSize;
  }
};

}  // namespace

struct GraphCycles::Rep {
  Vec<Node*> nodes_;
  Vec<int32_t> free_nodes_;  // Indices for unused entries in nodes_
  PointerMap ptrmap_;

  // Temporary state.
  Vec<int32_t> deltaf_;  // Results of forward DFS
  Vec<int32_t> deltab_;  // Results of backward DFS
  Vec<int32_t> list_;    // All nodes to reprocess
  Vec<int32_t> merged_;  // Rank values to assign to list_ entries
  Vec<int32_t> stack_;   // Emulates recursion stack for depth-first searches

  Rep() : ptrmap_(&nodes_) {}
};

static Node* FindNode(GraphCycles::Rep* rep, GraphId id) {
  Node* n = rep->nodes_[NodeIndex(id)];
  return (n->version == NodeVersion(id)) ? n : nullptr;
}

GraphCycles::GraphCycles() {
  InitArenaIfNecessary();
  rep_ = new (base_internal::LowLevelAlloc::AllocWithArena(sizeof(Rep), arena))
      Rep;
}

GraphCycles::~GraphCycles() {
  for (auto* node : rep_->nodes_) {
    node->Node::~Node();
    base_internal::LowLevelAlloc::Free(node);
  }
  rep_->Rep::~Rep();
  base_internal::LowLevelAlloc::Free(rep_);
}

bool GraphCycles::CheckInvariants() const {
  Rep* r = rep_;
  NodeSet ranks;  // Set of ranks seen so far.
  for (uint32_t x = 0; x < r->nodes_.size(); x++) {
    Node* nx = r->nodes_[x];
    void* ptr = base_internal::UnhidePtr<void>(nx->masked_ptr);
    if (ptr != nullptr && static_cast<uint32_t>(r->ptrmap_.Find(ptr)) != x) {
      ABSL_RAW_LOG(FATAL, "Did not find live node in hash table %u %p", x, ptr);
    }
    if (nx->visited) {
      ABSL_RAW_LOG(FATAL, "Did not clear visited marker on node %u", x);
    }
    if (!ranks.insert(nx->rank)) {
      ABSL_RAW_LOG(FATAL, "Duplicate occurrence of rank %d", nx->rank);
    }
    HASH_FOR_EACH(y, nx->out) {
      Node* ny = r->nodes_[y];
      if (nx->rank >= ny->rank) {
        ABSL_RAW_LOG(FATAL, "Edge %u->%d has bad rank assignment %d->%d", x, y,
                     nx->rank, ny->rank);
      }
    }
  }
  return true;
}

GraphId GraphCycles::GetId(void* ptr) {
  int32_t i = rep_->ptrmap_.Find(ptr);
  if (i != -1) {
    return MakeId(i, rep_->nodes_[i]->version);
  } else if (rep_->free_nodes_.empty()) {
    Node* n =
        new (base_internal::LowLevelAlloc::AllocWithArena(sizeof(Node), arena))
            Node;
    n->version = 1;  // Avoid 0 since it is used by InvalidGraphId()
    n->visited = false;
    n->rank = rep_->nodes_.size();
    n->masked_ptr = base_internal::HidePtr(ptr);
    n->nstack = 0;
    n->priority = 0;
    rep_->nodes_.push_back(n);
    rep_->ptrmap_.Add(ptr, n->rank);
    return MakeId(n->rank, n->version);
  } else {
    // Preserve preceding rank since the set of ranks in use must be
    // a permutation of [0,rep_->nodes_.size()-1].
    int32_t r = rep_->free_nodes_.back();
    rep_->free_nodes_.pop_back();
    Node* n = rep_->nodes_[r];
    n->masked_ptr = base_internal::HidePtr(ptr);
    n->nstack = 0;
    n->priority = 0;
    rep_->ptrmap_.Add(ptr, r);
    return MakeId(r, n->version);
  }
}

void GraphCycles::RemoveNode(void* ptr) {
  int32_t i = rep_->ptrmap_.Remove(ptr);
  if (i == -1) {
    return;
  }
  Node* x = rep_->nodes_[i];
  HASH_FOR_EACH(y, x->out) {
    rep_->nodes_[y]->in.erase(i);
  }
  HASH_FOR_EACH(y, x->in) {
    rep_->nodes_[y]->out.erase(i);
  }
  x->in.clear();
  x->out.clear();
  x->masked_ptr = base_internal::HidePtr<void>(nullptr);
  if (x->version == std::numeric_limits<uint32_t>::max()) {
    // Cannot use x any more
  } else {
    x->version++;  // Invalidates all copies of node.
    rep_->free_nodes_.push_back(i);
  }
}

void* GraphCycles::Ptr(GraphId id) {
  Node* n = FindNode(rep_, id);
  return n == nullptr ? nullptr
                      : base_internal::UnhidePtr<void>(n->masked_ptr);
}

bool GraphCycles::HasNode(GraphId node) {
  return FindNode(rep_, node) != nullptr;
}

bool GraphCycles::HasEdge(GraphId x, GraphId y) const {
  Node* xn = FindNode(rep_, x);
  return xn && FindNode(rep_, y) && xn->out.contains(NodeIndex(y));
}

void GraphCycles::RemoveEdge(GraphId x, GraphId y) {
  Node* xn = FindNode(rep_, x);
  Node* yn = FindNode(rep_, y);
  if (xn && yn) {
    xn->out.erase(NodeIndex(y));
    yn->in.erase(NodeIndex(x));
    // No need to update the rank assignment since a previous valid
    // rank assignment remains valid after an edge deletion.
  }
}

static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound);
static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound);
static void Reorder(GraphCycles::Rep* r);
static void Sort(const Vec<Node*>&, Vec<int32_t>* delta);
static void MoveToList(
    GraphCycles::Rep* r, Vec<int32_t>* src, Vec<int32_t>* dst);

bool GraphCycles::InsertEdge(GraphId idx, GraphId idy) {
  Rep* r = rep_;
  const int32_t x = NodeIndex(idx);
  const int32_t y = NodeIndex(idy);
  Node* nx = FindNode(r, idx);
  Node* ny = FindNode(r, idy);
  if (nx == nullptr || ny == nullptr) return true;  // Expired ids

  if (nx == ny) return false;  // Self edge
  if (!nx->out.insert(y)) {
    // Edge already exists.
    return true;
  }

  ny->in.insert(x);

  if (nx->rank <= ny->rank) {
    // New edge is consistent with existing rank assignment.
    return true;
  }

  // Current rank assignments are incompatible with the new edge.  Recompute.
  // We only need to consider nodes that fall in the range [ny->rank,nx->rank].
  if (!ForwardDFS(r, y, nx->rank)) {
    // Found a cycle.  Undo the insertion and tell caller.
    nx->out.erase(y);
    ny->in.erase(x);
    // Since we do not call Reorder() on this path, clear any visited
    // markers left by ForwardDFS.
    for (const auto& d : r->deltaf_) {
      r->nodes_[d]->visited = false;
    }
    return false;
  }
  BackwardDFS(r, x, ny->rank);
  Reorder(r);
  return true;
}

static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound) {
  // Avoid recursion since stack space might be limited.
  // We instead keep a stack of nodes to visit.
  r->deltaf_.clear();
  r->stack_.clear();
  r->stack_.push_back(n);
  while (!r->stack_.empty()) {
    n = r->stack_.back();
    r->stack_.pop_back();
    Node* nn = r->nodes_[n];
    if (nn->visited) continue;

    nn->visited = true;
    r->deltaf_.push_back(n);

    HASH_FOR_EACH(w, nn->out) {
      Node* nw = r->nodes_[w];
      if (nw->rank == upper_bound) {
        return false;  // Cycle
      }
      if (!nw->visited && nw->rank < upper_bound) {
        r->stack_.push_back(w);
      }
    }
  }
  return true;
}

static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound) {
  r->deltab_.clear();
  r->stack_.clear();
  r->stack_.push_back(n);
  while (!r->stack_.empty()) {
    n = r->stack_.back();
    r->stack_.pop_back();
    Node* nn = r->nodes_[n];
    if (nn->visited) continue;

    nn->visited = true;
    r->deltab_.push_back(n);

    HASH_FOR_EACH(w, nn->in) {
      Node* nw = r->nodes_[w];
      if (!nw->visited && lower_bound < nw->rank) {
        r->stack_.push_back(w);
      }
    }
  }
}

static void Reorder(GraphCycles::Rep* r) {
  Sort(r->nodes_, &r->deltab_);
  Sort(r->nodes_, &r->deltaf_);

  // Adds contents of delta lists to list_ (backwards deltas first).
  r->list_.clear();
  MoveToList(r, &r->deltab_, &r->list_);
  MoveToList(r, &r->deltaf_, &r->list_);

  // Produce sorted list of all ranks that will be reassigned.
  r->merged_.resize(r->deltab_.size() + r->deltaf_.size());
  std::merge(r->deltab_.begin(), r->deltab_.end(),
             r->deltaf_.begin(), r->deltaf_.end(),
             r->merged_.begin());

  // Assign the ranks in order to the collected list.
  for (uint32_t i = 0; i < r->list_.size(); i++) {
    r->nodes_[r->list_[i]]->rank = r->merged_[i];
  }
}

static void Sort(const Vec<Node*>& nodes, Vec<int32_t>* delta) {
  struct ByRank {
    const Vec<Node*>* nodes;
    bool operator()(int32_t a, int32_t b) const {
      return (*nodes)[a]->rank < (*nodes)[b]->rank;
    }
  };
  ByRank cmp;
  cmp.nodes = &nodes;
  std::sort(delta->begin(), delta->end(), cmp);
}

static void MoveToList(
    GraphCycles::Rep* r, Vec<int32_t>* src, Vec<int32_t>* dst) {
  for (auto& v : *src) {
    int32_t w = v;
    v = r->nodes_[w]->rank;         // Replace v entry with its rank
    r->nodes_[w]->visited = false;  // Prepare for future DFS calls
    dst->push_back(w);
  }
}

int GraphCycles::FindPath(GraphId idx, GraphId idy, int max_path_len,
                          GraphId path[]) const {
  Rep* r = rep_;
  if (FindNode(r, idx) == nullptr || FindNode(r, idy) == nullptr) return 0;
  const int32_t x = NodeIndex(idx);
  const int32_t y = NodeIndex(idy);

  // Forward depth first search starting at x until we hit y.
  // As we descend into a node, we push it onto the path.
  // As we leave a node, we remove it from the path.
  int path_len = 0;

  NodeSet seen;
  r->stack_.clear();
  r->stack_.push_back(x);
  while (!r->stack_.empty()) {
    int32_t n = r->stack_.back();
    r->stack_.pop_back();
    if (n < 0) {
      // Marker to indicate that we are leaving a node
      path_len--;
      continue;
    }

    if (path_len < max_path_len) {
      path[path_len] = MakeId(n, rep_->nodes_[n]->version);
    }
    path_len++;
    r->stack_.push_back(-1);  // Will remove tentative path entry

    if (n == y) {
      return path_len;
    }

    HASH_FOR_EACH(w, r->nodes_[n]->out) {
      if (seen.insert(w)) {
        r->stack_.push_back(w);
      }
    }
  }

  return 0;
}

bool GraphCycles::IsReachable(GraphId x, GraphId y) const {
  return FindPath(x, y, 0, nullptr) > 0;
}

void GraphCycles::UpdateStackTrace(GraphId id, int priority,
                                   int (*get_stack_trace)(void** stack, int)) {
  Node* n = FindNode(rep_, id);
  if (n == nullptr || n->priority >= priority) {
    return;
  }
  n->nstack = (*get_stack_trace)(n->stack, ABSL_ARRAYSIZE(n->stack));
  n->priority = priority;
}

int GraphCycles::GetStackTrace(GraphId id, void*** ptr) {
  Node* n = FindNode(rep_, id);
  if (n == nullptr) {
    *ptr = nullptr;
    return 0;
  } else {
    *ptr = n->stack;
    return n->nstack;
  }
}

}  // namespace synchronization_internal
}  // namespace absl

#endif  // ABSL_LOW_LEVEL_ALLOC_MISSING