Caml-list

A couple weeks ago, Kim Quyen Ly asked a question about algorithms for strongly connected components.

I answered the best known algorithms were Mehlhorn-Gabow's and Tarjan's, both linear in number of arcs.
I wrote an implementation in Caml but was unsatisfied with it because in theory these algorithms use 3 data structures in total (2 arrays + 1 stack or 1 array + 2 stacks).
However I had an extra stack because of the recursion, and couldn't figure out how to merge the call-stack with the open-node stack of the dfs.

I finally checked in the original Mehlhorn paper (Algorithmica 1996) and Sedgewick implementations, to find out that not only did they use a recursive function as well but they had MORE arrays and stacks than theoretically required. I guess I will have to wait Knuth reaches the corresponding TAOCP volume to uncompile his assembler code and finally know the truth.

So here are my implementations. The example graph was built to show the case where the call-stack and open-nodes stack differ (node 4).
I recommend potential users to prove correction before using the implementation, I am a lousy coder.

(* Make matrix from list *)
let to_matrix = function list ->
  let n = 1 + List.fold_left (fun current (i, j) -> max current (max i j)) 0 list in
  let matrix = Array.make_matrix n n 0 in
  let rec add_arc = function
    | [] -> matrix
    | (i, j) :: tail -> matrix.(i).(j) <- 1; add_arc tail
  in add_arc list

(* Example built to show the open-node stack / dfs call-stack difference *)
let example = to_matrix [(0, 1); (1, 2); (2, 3); (3, 4); (4, 2); (2, 1); (3, 5); (5, 6); (6, 5)]


(* Mehlhorn Gabow scc *)
let cmg_scc = function matrix ->

  let n = Array.length matrix in
  let 
      visited_at_depth = Array.make n max_int and
      roots = Stack.create () and
      open_nodes = Stack.create ()
  in

  let rec unstack_until = function i ->
    match Stack.pop open_nodes with
      | n when n = i -> [i]
      | n -> n :: unstack_until i
  in

  let rec dfs depth = function i ->

    let result = ref [] in
    
    (* mark *)
    Stack.push depth roots;
    Stack.push i open_nodes;
    visited_at_depth.(i) <- depth;
    
    (* dive *)
    for j = 0 to n - 1 do
      if (matrix.(i).(j) = 1) && (visited_at_depth.(j) = max_int) then 
result := dfs (depth + 1) j @ !result
    done;

    (* process reverse-arcs *)
    for j = 0 to n - 1 do
      if (matrix.(i).(j) = 1) && (visited_at_depth.(j) < depth) then 
let scc_returns_to_depth = visited_at_depth.(j) in
while Stack.top roots > scc_returns_to_depth do ignore (Stack.pop roots) done
    done;    
    
    (* emit connected component if current node is root *)
    if depth = Stack.top roots then
      (
ignore (Stack.pop roots);
unstack_until i :: !result
      )
    else
      !result
  in

  let result = ref [] in
  for i = 0 to n - 1 do 
    if (visited_at_depth.(i) = max_int) then result := (dfs 0 i) @ !result
  done;
  !result

     
(* Tarjan scc *)
let tarjan_scc = function matrix ->
  
  let n = Array.length matrix in
  let 
      visited_at_depth = Array.make n max_int and
      scc_root = Array.make n max_int and
      open_nodes = Stack.create () and
      result = ref []
  in
  
  let rec unstack_until = function i ->
    match Stack.pop open_nodes with
      | n when n = i -> [i]
      | n -> n :: unstack_until i
  in

  let rec dfs depth = function i ->
    
    (* mark *)
    visited_at_depth.(i) <- depth;
    scc_root.(i) <- depth;
    Stack.push i open_nodes;

    for j = 0 to n - 1 do
      if matrix.(i).(j) = 1 then
if visited_at_depth.(j) = max_int then
 scc_root.(i) <- min scc_root.(i) (dfs (depth + 1) j) (* dive *)  
else 
 scc_root.(i) <- min scc_root.(i) visited_at_depth.(j) (* reverse-arc *)
    done;
    
    (* emit connected component if current node is root *)
    if scc_root.(i) = visited_at_depth.(i) then 
      result := unstack_until i :: !result;
    scc_root.(i)
  in
  
  for i = 0 to n - 1 do 
    if (visited_at_depth.(i) = max_int) then ignore (dfs 0 i)
  done;
  !result

         Diego Olivier