Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright 2011-2016 Kwant authors.
2#
3# This file is part of Kwant. It is subject to the license terms in the file
4# LICENSE.rst found in the top-level directory of this distribution and at
5# https://kwant-project.org/license. A list of Kwant authors can be found in
6# the file AUTHORS.rst at the top-level directory of this distribution and at
7# https://kwant-project.org/authors.
9"""Directed graphs optimized for storage and runtime efficiency."""
11__all__ = ['Graph', 'CGraph']
13# In this module we manage arrays directly with malloc, realloc and free to
14# circumvent two problems with Cython:
15#
16# (1) There are no efficient arrays which allow appending (numpy.ndarray
17# doesn't).
18#
19# (2) Cython extension types cannot have typed buffers as members.
20#
21# Once these two problems are solved, the corresponding code could be
22# rewritten, probably to use Python's array.array.
24# TODO: represent all dangling nodes by -1
26# TODO (perhaps): transform Graph into something which behaves like a python
27# sequence. Allow creation of compressed graphs from any sequence.
29from libc.stdlib cimport malloc, realloc, free
30from libc.string cimport memset
31from cpython cimport array
32import array
33import numpy as np
34cimport numpy as np
35from .defs cimport gint
37cdef class Graph:
38 """An uncompressed graph. Used to make compressed graphs. (See `CGraph`.)
39 """
40 # The array edges holds `size` elements with space for `capacity`.
42 def __init__(self, allow_negative_nodes=False):
43 self.allow_negative_nodes = allow_negative_nodes
45 def __dealloc__(self):
46 free(self.edges)
48 property num_nodes:
49 def __get__(self):
50 return self._num_nodes
52 def __set__(self, value):
53 if value < self._num_nodes:
54 raise ValueError("The number of nodes cannot be decreased.")
55 self._num_nodes = value
57 cpdef reserve(self, gint capacity):
58 """Reserve space for edges.
60 Parameters
61 ----------
62 capacity : integer
63 Number of edges for which to reserve space.
65 Notes
66 -----
67 It is not necessary to call this method, but using it can speed up the
68 creation of graphs.
69 """
70 if capacity <= self.capacity:
71 return
72 self.edges = <Edge*>realloc(self.edges, capacity * sizeof(Edge))
73 if not self.edges:
74 raise MemoryError
75 self.capacity = capacity
77 cpdef gint add_edge(self, gint tail, gint head) except -1:
78 """Add the directed edge (`tail`, `head`) to the graph.
80 Parameters
81 ----------
82 tail : integer
83 head : integer
85 Raises
86 ------
87 ValueError
88 If a negative node is added when this has not been allowed
89 explicitly or if an edge is doubly-dangling.
91 Returns
92 -------
93 edge_nr : integer
94 The sequential number of the edge. This number can be used to query
95 for the edge ID of an edge in the compressed graph.
96 """
97 cdef bint neg_tail = tail < 0
98 cdef bint neg_head = head < 0
99 if neg_tail or neg_head:
100 if not self.allow_negative_nodes:
101 raise ValueError(
102 "Negative node numbers have to be allowed explicitly.")
103 if neg_tail and neg_head:
104 raise ValueError("Doubly-dangling edges are never allowed.")
105 if neg_head:
106 self.num_pn_edges += 1
107 else:
108 self.num_np_edges += 1
109 else:
110 self.num_pp_edges += 1
112 if self.size == self.capacity:
113 if self.capacity == 0:
114 self.reserve(8)
115 else:
116 self.reserve(2 * self.capacity)
117 self.edges[self.size].tail = tail
118 self.edges[self.size].head = head
119 self.size += 1
120 self._num_nodes = max(self._num_nodes, tail + 1, head + 1)
121 return self.size - 1
123 def add_edges(self, edges):
124 """Add multiple edges in one pass.
126 Parameters
127 ----------
128 edges : iterable of 2-sequences of integers
129 The parameter `edges` must be an iterable of elements which
130 describe the edges to be added. For each edge-element, edge[0] and
131 edge[1] must give, respectively, the tail and the head. Valid
132 edges are, for example, a list of 2-integer-tuples, or an
133 numpy.ndarray of integers with a shape (n, 2). The latter case is
134 optimized.
136 Returns
137 -------
138 first_edge_nr : integer
139 The sequential number of the first of the added edges. The numbers
140 of the other added edges are consecutive integers following the
141 number of the first. Edge numbers can be used to query for the edge
142 ID of an edge in the compressed graph.
143 """
144 result = self.size
145 if isinstance(edges, np.ndarray):
146 if edges.dtype == np.int64:
147 self._add_edges_ndarray_int64(edges)
148 elif edges.dtype == np.int32:
149 self._add_edges_ndarray_int32(edges)
150 else:
151 self._add_edges_ndarray_int64(edges.astype(np.int64))
152 else:
153 for edge in edges:
154 self.add_edge(*edge)
155 return result
157 cdef _add_edges_ndarray_int64(self, np.ndarray[np.int64_t, ndim=2] edges):
158 cdef int i
159 for i in range(edges.shape[0]):
160 self.add_edge(edges[i, 0], edges[i, 1])
162 cdef _add_edges_ndarray_int32(self, np.ndarray[np.int32_t, ndim=2] edges):
163 cdef int i
164 for i in range(edges.shape[0]):
165 self.add_edge(edges[i, 0], edges[i, 1])
167 def compressed(self, bint twoway=False, bint edge_nr_translation=False,
168 bint allow_lost_edges=False):
169 """Build a CGraph from this graph.
171 Parameters
172 ----------
173 twoway : boolean (default: False)
174 If set, it will be possible to query the compressed graph for
175 incoming neighbors.
176 edge_nr_translation : boolean (default: False)
177 If set, it will be possible to call the method `edge_id`.
178 allow_lost_edges : boolean (default: False)
179 If set, negative tails are accepted even with one-way compression.
181 Raises
182 ------
183 ValueError
184 When negative tails occur while `twoway` and `allow_lost_edges` are
185 both false.
187 Notes
188 -----
189 In a one-way compressed graph, an edge with a negative tail is present
190 only minimally: it is only possible to query the head of such an edge,
191 given the edge ID. This is why one-way compression of a graph with a
192 negative tail leads to a ValueError being raised, unless
193 `allow_lost_edges` is true.
194 """
195 assert (self.size ==
196 self.num_pp_edges + self.num_pn_edges + self.num_np_edges)
197 if not (twoway or allow_lost_edges or self.num_np_edges == 0):
198 raise ValueError('Edges with negative tails cannot be '
199 'represented in an one-way compressed graph.')
201 cdef gint s, tail, head, edge_nr
202 cdef CGraph_malloc result = CGraph_malloc(twoway, edge_nr_translation,
203 self._num_nodes,
204 self.num_pp_edges,
205 self.num_pn_edges,
206 self.num_np_edges)
207 cdef gint *hbuf = result.heads_idxs + 1
208 cdef gint *heads = result.heads
209 cdef gint *tbuf = result.tails_idxs + 1
210 cdef gint *tails = result.tails
211 cdef gint *edge_ids = result.edge_ids
212 cdef gint edge_id = 0, num_edges # = 0 is there to silence warning.
213 cdef Edge *edge
215 # `hbuf` is just `heads_idxs` shifted by one. We will use `hbuf` to
216 # build up `heads` and its end state will be such that `heads_idxs`
217 # will have the right content. For `tbuf`, replace "head" with tail in
218 # the previous text.
220 # Make a histogram of outgoing edges per node in `hbuf` and one of
221 # incoming edges per node in `tbuf`.
222 memset(result.heads_idxs, 0, (self._num_nodes + 1) * sizeof(gint))
223 if twoway:
224 memset(result.tails_idxs, 0, (self._num_nodes + 1) * sizeof(gint))
225 for edge in self.edges[:self.size]:
226 if edge.tail >= 0:
227 hbuf[edge.tail] += 1
228 if twoway and edge.head >= 0:
229 tbuf[edge.head] += 1
231 # Replace `hbuf` with its "antiderivative" and then subtract the
232 # original `hbuf` from it. This is done in one pass.
233 s = 0
234 for tail in range(self._num_nodes):
235 s += hbuf[tail]
236 hbuf[tail] = s - hbuf[tail]
238 # Same as before for `tbuf`.
239 if twoway:
240 s = 0
241 for head in range(self._num_nodes):
242 s += tbuf[head]
243 tbuf[head] = s - tbuf[head]
245 # Iterate through all edges and build `heads` and `tails`.
246 next_np_edge_id = result.num_px_edges
247 edge_nr = 0
248 for edge in self.edges[:self.size]:
249 if edge.tail >= 0:
250 edge_id = hbuf[edge.tail]
251 hbuf[edge.tail] += 1
252 elif twoway:
253 assert edge.head >= 0
254 edge_id = next_np_edge_id
255 next_np_edge_id += 1
256 else:
257 edge_id = -1
258 if edge_id >= 0:
259 heads[edge_id] = edge.head
260 if twoway and edge.head >= 0:
261 tails[tbuf[edge.head]] = edge.tail
262 edge_ids[tbuf[edge.head]] = edge_id
263 tbuf[edge.head] += 1
264 if edge_nr_translation:
265 result.edge_ids_by_edge_nr[edge_nr] = edge_id
266 edge_nr += 1
268 assert result.num_edges == next_np_edge_id
269 return result
271 def write_dot(self, file):
272 """Write a representation of the graph in dot format to `file`.
274 That resulting file can be visualized with dot(1) or neato(1) form the
275 graphviz package.
276 """
277 cdef Edge *edge
278 file.write("digraph g {\n")
279 for edge in self.edges[:self.size]:
280 file.write(" %d -> %d;\n" % (edge.tail, edge.head))
281 file.write("}\n")
284cdef class gintArraySlice:
285 def __len__(self):
286 return self.size
288 def __getitem__(self, gint key):
289 cdef gint index
290 if key >= 0:
291 index = key
292 else:
293 index = self.size + key
294 if index < 0 or index >= self.size:
295 raise IndexError('Index out of range.')
296 return self.data[index]
298cdef class EdgeIterator:
299 def __iter__(self):
300 return self
302 def __next__(self):
303 if self.edge_id == self.graph.num_edges:
304 raise StopIteration
305 cdef gint current_edge_id = self.edge_id
306 self.edge_id += 1
307 while current_edge_id >= self.graph.heads_idxs[self.tail + 1]:
308 self.tail += 1
309 return self.tail, self.graph.heads[current_edge_id]
312class DisabledFeatureError(RuntimeError):
313 pass
316class NodeDoesNotExistError(IndexError):
317 pass
320class EdgeDoesNotExistError(IndexError):
321 pass
324_need_twoway = 'Enable "twoway" during graph compression.'
326cdef class CGraph:
327 """A compressed graph which can be efficiently queried for the existence of
328 edges and outgoing neighbors.
330 Objects of this class do not initialize the members themselves, but expect
331 that they hold usable values. A good way to create them is by compressing
332 a `Graph`.
334 Iterating over a graph yields a sequence of (tail, head) pairs of all
335 edges. The number of an edge in this sequence equals its edge ID. The
336 built-in function `enumerate` can thus be used to easily iterate over all
337 edges along with their edge IDs.
338 """
339 def __iter__(self):
340 """Return an iterator over (tail, head) of all edges."""
341 cdef EdgeIterator result = EdgeIterator()
342 result.graph = self
343 result.edge_id = 0
344 result.tail = 0
345 return result
347 def has_dangling_edges(self):
348 return not self.num_edges == self.num_px_edges == self.num_xp_edges
350 cpdef gintArraySlice out_neighbors(self, gint node):
351 """Return the nodes a node points to.
353 Parameters
354 ----------
355 node : integer
357 Returns
358 -------
359 nodes : sequence of integers
361 Raises
362 ------
363 NodeDoesNotExistError
364 """
365 if node < 0 or node >= self.num_nodes:
366 raise NodeDoesNotExistError()
367 cdef gintArraySlice result = gintArraySlice()
368 result.data = &self.heads[self.heads_idxs[node]]
369 result.size = &self.heads[self.heads_idxs[node + 1]] - result.data
370 return result
372 def out_edge_ids(self, gint node):
373 """Return the IDs of outgoing edges of node.
375 Parameters
376 ----------
377 node : integer
379 Returns
380 -------
381 edge_ids : sequence of integers
383 Raises
384 ------
385 NodeDoesNotExistError
386 """
387 if node < 0 or node >= self.num_nodes:
388 raise NodeDoesNotExistError()
389 return iter(range(self.heads_idxs[node], self.heads_idxs[node + 1]))
391 def in_neighbors(self, gint node):
392 """Return the nodes which point to a node.
394 Parameters
395 ----------
396 node : integer
398 Returns
399 -------
400 nodes : sequence of integers
402 Raises
403 ------
404 NodeDoesNotExistError
405 DisabledFeatureError
406 If the graph is not two-way compressed.
407 """
408 if not self.twoway:
409 raise DisabledFeatureError(_need_twoway)
410 if node < 0 or node >= self.num_nodes:
411 raise NodeDoesNotExistError()
412 cdef gintArraySlice result = gintArraySlice()
413 result.data = &self.tails[self.tails_idxs[node]]
414 result.size = &self.tails[self.tails_idxs[node + 1]] - result.data
415 return result
417 def in_edge_ids(self, gint node):
418 """Return the IDs of incoming edges of a node.
420 Parameters
421 ----------
422 node : integer
424 Returns
425 -------
426 edge_ids : sequence of integers
428 Raises
429 ------
430 NodeDoesNotExistError
431 DisabledFeatureError
432 If the graph is not two-way compressed.
433 """
434 if not self.twoway:
435 raise DisabledFeatureError(_need_twoway)
436 if node < 0 or node >= self.num_nodes:
437 raise NodeDoesNotExistError()
438 cdef gintArraySlice result = gintArraySlice()
439 result.data = &self.edge_ids[self.tails_idxs[node]]
440 result.size = &self.edge_ids[self.tails_idxs[node + 1]] - result.data
441 return result
443 def has_edge(self, gint tail, gint head):
444 """Does the graph contain the edge (tail, head)?
446 Parameters
447 ----------
448 tail : integer
449 head : integer
451 Returns
452 -------
453 had_edge : boolean
455 Raises
456 ------
457 NodeDoesNotExistError
458 EdgeDoesNotExistError
459 DisabledFeatureError
460 If `tail` is negative and the graph is not two-way compressed.
461 """
462 cdef gint h, t
463 if tail >= self.num_nodes or head >= self.num_nodes:
464 raise NodeDoesNotExistError()
465 if tail >= 0:
466 for h in self.heads[self.heads_idxs[tail]
467 : self.heads_idxs[tail + 1]]:
468 if h == head: return True
469 else:
470 if not self.twoway:
471 raise DisabledFeatureError(_need_twoway)
472 if head < 0:
473 raise EdgeDoesNotExistError()
474 for t in self.tails[self.tails_idxs[head]
475 : self.tails_idxs[head + 1]]:
476 if t == tail: return True
477 return False
479 def edge_id(self, gint edge_nr):
480 """Return the edge ID of an edge given its sequential number.
482 Parameters
483 ----------
484 edge_nr : integer
486 Returns
487 -------
488 edge_id : integer
490 Raises
491 ------
492 DisabledFeatureError
493 If `edge_nr_translation` was not enabled during graph compression.
494 EdgeDoesNotExistError
495 """
496 if not self.edge_ids_by_edge_nr:
497 raise DisabledFeatureError(
498 'Enable "edge_nr_translation" during graph compression.')
499 if edge_nr < 0 or edge_nr >= self.edge_nr_end:
500 raise EdgeDoesNotExistError()
501 result = self.edge_ids_by_edge_nr[edge_nr]
502 if result < 0:
503 raise EdgeDoesNotExistError()
504 return result
506 def first_edge_id(self, gint tail, gint head):
507 """Return the edge ID of the first edge (tail, head).
509 Parameters
510 ----------
511 tail : integer
512 head : integer
514 Returns
515 -------
516 edge_id : integer
518 Raises
519 ------
520 NodeDoesNotExist
521 EdgeDoesNotExistError
522 DisabledFeatureError
523 If `tail` is negative and the graph is not two-way compressed.
525 Notes
526 -----
527 This method is useful for graphs where each edge occurs only once.
528 """
529 if tail >= self.num_nodes or head >= self.num_nodes:
530 raise NodeDoesNotExistError()
531 if tail >= 0:
532 for head_index in range(self.heads_idxs[tail],
533 self.heads_idxs[tail + 1]):
534 if self.heads[head_index] == head:
535 return head_index
536 else:
537 if not self.twoway:
538 raise DisabledFeatureError(_need_twoway)
539 for tail_index in range(self.tails_idxs[head],
540 self.tails_idxs[head + 1]):
541 if self.tails[tail_index] == tail:
542 return self.edge_ids[tail_index]
543 raise EdgeDoesNotExistError()
545 def all_edge_ids(self, gint tail, gint head):
546 """Return an iterator over all edge IDs of edges with a given tail and
547 head.
549 Parameters
550 ----------
551 tail : integer
552 head : integer
554 Returns
555 -------
556 edge_id : integer
558 Raises
559 ------
560 NodeDoesNotExist
561 EdgeDoesNotExistError
562 DisabledFeatureError
563 If `tail` is negative and the graph is not two-way compressed.
564 """
565 if tail >= self.num_nodes or head >= self.num_nodes:
566 raise NodeDoesNotExistError()
567 result = []
568 if tail >= 0:
569 for head_index in range(self.heads_idxs[tail],
570 self.heads_idxs[tail + 1]):
571 if self.heads[head_index] == head:
572 result.append(head_index)
573 else:
574 if not self.twoway:
575 raise DisabledFeatureError(_need_twoway)
576 for tail_index in range(self.tails_idxs[head],
577 self.tails_idxs[head + 1]):
578 if self.tails[tail_index] == tail:
579 result.append(self.edge_ids[tail_index])
580 return result
582 # TODO: optimize this for the case of twofold graphs and low degree.
583 def tail(self, gint edge_id):
584 """Return the tail of an edge, given its edge ID.
586 Parameters
587 ----------
588 edge_id : integer
590 Returns
591 -------
592 tail : integer
593 If the edge exists and is positive.
594 None
595 If the tail is negative.
597 Raises
598 ------
599 EdgeDoesNotExistError
601 Notes
602 -----
603 The average performance of this method is O(log num_nodes) for
604 non-negative tails and O(1) for negative ones.
605 """
606 if edge_id < 0 or edge_id >= self.num_edges:
607 raise EdgeDoesNotExistError
608 if edge_id >= self.num_px_edges:
609 assert self.twoway
610 return None
611 cdef gint lower = 0, upper = self.num_nodes, tail = 0
612 while upper - lower > 1:
613 tail = (upper + lower) // 2
614 if edge_id == self.heads_idxs[tail]:
615 return tail
616 if edge_id < self.heads_idxs[tail]:
617 upper = tail
618 else:
619 lower = tail
620 return lower
622 def head(self, gint edge_id):
623 """Return the head of an edge, given its edge ID.
625 Parameters
626 ----------
627 edge_id : integer
629 Raises
630 ------
631 EdgeDoesNotExistError
633 Notes
634 -----
635 This method executes in constant time. It works for all edge IDs,
636 returning both positive and negative heads.
637 """
638 if edge_id < 0 or edge_id >= self.num_edges:
639 raise EdgeDoesNotExistError()
640 return self.heads[edge_id]
642 def write_dot(self, file):
643 """Write a representation of the graph in dot format to `file`.
645 Parameters
646 ----------
647 file : file-like object
649 Notes
650 -----
651 That resulting file can be visualized with dot(1) or neato(1) form the
652 `graphviz <https://graphviz.org/>`_ package.
653 """
654 cdef gint tail
655 file.write("digraph g {\n")
656 for tail in range(self.num_nodes):
657 for head in self.heads[self.heads_idxs[tail]
658 : self.heads_idxs[tail + 1]]:
659 file.write(" %d -> %d;\n" % (tail, head))
660 file.write("}\n")
663cdef class CGraph_malloc(CGraph):
664 """A CGraph which allocates and frees its own memory."""
666 def __init__(self, twoway, edge_nr_translation, num_nodes,
667 num_pp_edges, num_pn_edges, num_np_edges):
668 self.twoway = twoway
669 self.edge_nr_translation = edge_nr_translation
670 self.num_nodes = num_nodes
671 self.num_px_edges = num_pp_edges + num_pn_edges
672 self.edge_nr_end = num_pp_edges + num_pn_edges + num_np_edges
673 self._heads_idxs = array.array('i', ())
674 array.resize(self._heads_idxs, (num_nodes + 1))
675 self.heads_idxs = <gint*>self._heads_idxs.data.as_ints
676 if self.twoway:
677 # The graph is two-way. n->p edges will exist in the compressed
678 # graph.
679 self.num_xp_edges = num_pp_edges + num_np_edges
680 self.num_edges = self.edge_nr_end
681 self._tails_idxs = array.array('i', ())
682 array.resize(self._tails_idxs, (num_nodes + 1))
683 self.tails_idxs = <gint*>self._tails_idxs.data.as_ints
684 self._tails = array.array('i', ())
685 array.resize(self._tails, self.num_xp_edges)
686 self.tails = <gint*>self._tails.data.as_ints
687 self._edge_ids = array.array('i', ())
688 array.resize(self._edge_ids, self.num_xp_edges)
689 self.edge_ids = <gint*>self._edge_ids.data.as_ints
690 else:
691 # The graph is one-way. n->p edges will be ignored.
692 self.num_xp_edges = num_pp_edges
693 self.num_edges = self.num_px_edges
694 self._heads = array.array('i', ())
695 array.resize(self._heads, self.num_edges)
696 self.heads = <gint*>self._heads.data.as_ints
697 if edge_nr_translation:
698 self._edge_ids_by_edge_nr = array.array('i', ())
699 array.resize(self._edge_ids_by_edge_nr, self.edge_nr_end)
700 self.edge_ids_by_edge_nr = (<gint*>self._edge_ids_by_edge_nr
701 .data.as_ints)
702 if (not self.heads_idxs or not self.heads
703 or (twoway and (not self.tails_idxs
704 or not self.tails
705 or not self.edge_ids))
706 or (edge_nr_translation and not self.edge_ids_by_edge_nr)):
707 raise MemoryError
709 def __getstate__(self):
710 twoway = self.twoway
711 edge_nr_translation = self.edge_nr_translation
712 num_nodes = self.num_nodes
713 num_np_edges = self.edge_nr_end - self.num_px_edges
714 if twoway:
715 num_pp_edges = self.num_xp_edges - num_np_edges
716 else:
717 num_pp_edges = self.num_xp_edges
718 num_pn_edges = self.num_px_edges - num_pp_edges
719 init_args = (twoway, edge_nr_translation, num_nodes,
720 num_pp_edges, num_pn_edges, num_np_edges)
722 return init_args, (self._heads_idxs, self._heads, self._tails_idxs,
723 self._tails, self._edge_ids,
724 self._edge_ids_by_edge_nr)
726 def __setstate__(self, state):
727 init_args, arrays = state
728 self.__init__(*init_args)
729 array_attributes = (self._heads_idxs, self._heads, self._tails_idxs,
730 self._tails, self._edge_ids,
731 self._edge_ids_by_edge_nr)
732 for attribute, value in zip(array_attributes, arrays):
733 if attribute is None:
734 continue
735 attribute[:] = value
737 # We are required to implement this as of Cython 0.26
738 def __reduce__(self):
739 state = init_args, _ = self.__getstate__()
740 return (CGraph_malloc, init_args, state, None, None)