

open Printf
open Cil
open Callgraph

module IntSet = Set.Make( 
  struct
    let compare = Pervasives.compare
    type t = int
  end )

module CSG = Graph.Imperative.Digraph.ConcreteBidirectional(struct
  type t = stmt
  let hash n = Hashtbl.hash n.sid
  let compare n1 n2 = compare n1.sid n2.sid
  let equal n1 n2 = n1.sid = n2.sid
end)

module D = Graph.Graphviz.Dot(struct
  type t = CSG.t
  module V = CSG.V
  module E = CSG.E
  let iter_vertex = CSG.iter_vertex
  let iter_edges_e = CSG.iter_edges_e
  let graph_attributes g = []
  let default_vertex_attributes g = []
  let vertex_name v =
	let loc = Cil.get_stmtLoc v.skind in
	(String.concat "" ["\"";loc.file;":";Pervasives.string_of_int loc.line; " ("; Pervasives.string_of_int v.sid; ")\"";]) 
  let vertex_attributes v = []
  let get_subgraph v = None
  let default_edge_attributes g = []
  let edge_attributes e = []
end)


(*CONFIGURATION*)
let src_filetype = Str.regexp ".c"
let is_connectIntraCSGs = true
let save_intraCSGs = false

let is_print_full_lines = false
let is_print_changed_lines = false
let is_print_interCSG_calls = false
let is_print_patch = false
let is_print_components = false

let memoize funct param memo =
  try  
    Hashtbl.find memo param  
  with Not_found ->  
  let r = funct param in ( Hashtbl.add memo param r; r)

let syscall cmd =
  let ic, oc = Unix.open_process cmd in
  let buf = Buffer.create 16 in
  (try
     while true do
       Buffer.add_channel buf ic 1
     done
   with End_of_file -> ());
  let _ = Unix.close_process (ic, oc) in
  (Buffer.contents buf)

(*Problem: Cilly seems to compile *.i indeterministically into the proj-root-dir or the src/lib dir *)
let cleanFileName fileName =
  List.hd (List.rev (Str.split (Str.regexp "/") fileName))

(** TODO Error Handling **)
(*Patch stores file names "unclean"*)
let read_patch gitdir difftool difffile = 
  let diff = 
	if (String.length difffile) = 0 or not (Sys.file_exists difffile) then (
	  let patch_id_command = ("cd " ^ gitdir ^ "; git rev-parse HEAD") in
      (*printf "Executing: %s\n" patch_id_command;*)
      let patch_id = syscall patch_id_command in
  	  (*Remove trailing \n *)
      let patch_id = Str.string_before patch_id ((String.length patch_id) - 1) in
      printf "Reading patch: %s from repo: %s\n" patch_id gitdir;
      let diff_command = ("cd " ^ gitdir ^ "; git difftool -y -x " ^ difftool ^ " " ^ patch_id ^ "^ "^ patch_id) in
      if is_print_patch then printf "Executing: %s\n" diff_command;
      let diff = syscall diff_command in
	  diff
    ) else syscall ("cat " ^ difffile)
  in
  let patch = Hashtbl.create 100 in
  let currentFile = ref "" in
  let currentChangedLines = ref [] in
  List.iter (fun line -> 
	if (Str.string_match (Str.regexp "File") line 0) 
	then begin
	  if (String.length !currentFile) <> 0 then begin
	    Hashtbl.add patch !currentFile !currentChangedLines;
	    currentChangedLines := []
	  end;
	  let changedFile = Str.string_after line 5 in
	  (*Only consider c files*)
	  if (Str.string_match src_filetype changedFile ((String.length changedFile) - 2) )
	  then currentFile := changedFile
	  else currentFile := ""
	end
	else 
	  let changedline = 
		try (int_of_string line) 
		with Failure _ -> (printf "Error parsing %s" line); 0 
	  in
	  if changedline > 0 then currentChangedLines := changedline::!currentChangedLines
  ) (Str.split (Str.regexp "\n") diff);
  if (String.length !currentFile) <> 0 then Hashtbl.add patch !currentFile !currentChangedLines;
  if is_print_patch then begin printf "Begin PATCH\n"; Hashtbl.iter (fun file lines -> printf "[%s] " file; List.iter (fun l -> printf "%d, " l) lines; printf "\n") patch;  printf "End PATCH\n" end;
  patch
  

(**TODO Error Handling **)
let prepareFile patch gitdir =
  (* Load each input file. *)
  let find file = 
	let found =
      let absolute_cilly = (Str.string_before file ((String.length file) - 2)) ^ ".i" in
	  if Sys.file_exists (gitdir  ^ "/"^  absolute_cilly) then (gitdir ^ "/"^ absolute_cilly)
	  else (
		let cilly_name = cleanFileName absolute_cilly in
		if Sys.file_exists (gitdir  ^ "/"^  cilly_name) then (gitdir  ^ "/"^  cilly_name)
		else ""
      )
    in 
	(if (String.length found) = 0 then printf "\nCHANGE ANALYSIS WILL BE INCOMPLETE:\n -- Could not find cilly-file for %s.\n\n" file else printf "Opening changed file %s\n" found);
    found
  in
  let files = 
	Hashtbl.fold (fun file _ acc -> 
	  let found = find file in 
	  if (String.length found) <> 0 then found::acc else acc
    ) patch [] 
  in
  let files =
    List.map (
      fun filename -> let f = Frontc.parse filename in f ()
    ) files in

  (* Merge them. *)
  let file = Mergecil.merge files "test" in

  (* Remove unused prototypes and variables. *)
  Rmtmps.removeUnusedTemps file;

  (* Do control-flow-graph analysis. *)
  Cfg.computeFileCFG file;

  (* Go over the internal CIL structure and print some facts. *)
  printf "CIL has loaded the files, merged them and removed unused code.\n\n";

  printf "Number of basic blocks in CFG: %d \n" (List.length (Cfg.allStmts file));

  file

let isMain func = Str.string_match (Str.regexp "main") func.svar.vname 0

let isChangedLocation loc patch = 
  let cleanLocFile = cleanFileName loc.file in
  Hashtbl.fold (fun patch_file lines found -> 
    found or 
    let cleanPatchFile = cleanFileName patch_file in (*Clean up patch_file*)
    ((String.compare cleanLocFile cleanPatchFile) = 0) && (List.mem loc.line lines)
  ) patch false

let isChangedBasicBlock_memo = Hashtbl.create 1000
let isChangedBasicBlock(s : stmt) patch = 
  let isChangedBasicBlock s =
    match s.skind with 
      Instr(slist) -> 
        List.fold_left (fun changed instr -> 
		  changed or 
		  isChangedLocation (Cil.get_instrLoc instr) patch
	    ) false slist
	  (**TODO are we missing things? try catch/ switch/ if then else/ break / continue? **)
	  | _ -> isChangedLocation (Cil.get_stmtLoc s.skind) patch  
  in (*Debug*)
  let isChangedBasicBlock s = if is_print_changed_lines then (let temp=isChangedBasicBlock s in (if temp then let loc= Cil.get_stmtLoc s.skind in printf "Marking changed [%s:%d] (%d)\n" loc.file loc.line s.sid);temp) else isChangedBasicBlock s  
  in (*Memoize*)
  memoize isChangedBasicBlock s isChangedBasicBlock_memo

(* Returns all functions that are directly changed and main *)
let isChangedFunc_memo = Hashtbl.create 1000
let isChangedFunc func patch = 
  let isChangedFunc func = 
	  List.fold_left (fun changed s -> changed or isChangedBasicBlock s patch) false func.sallstmts 
  in (*Memoize*)
  memoize isChangedFunc func isChangedFunc_memo

let findFuncDec_memo = Hashtbl.create 1000
let findFuncDec func_id file = 
  let findFuncDec func_id =
    List.fold_left (fun found global ->
      match found with 
	  | None -> 
	    begin match global with 
	    | GFun (func, _) -> if func_id = func.svar.vid then Some(func) else None
	    | _ -> None
	    end
	  | _ -> found
    ) None file.globals
  in (*Memoize*)
  memoize findFuncDec func_id findFuncDec_memo

(*TODO No need to memoize? *)
let findFunc_memo = Hashtbl.create 1000
let findFunc cg_node file = 
  let findFunc cg_node =
	let cg_node_id = 
	  match cg_node.cnInfo with 
      | NIVar (v, _) -> v.vid
	  | _ -> -1 (*NIIndirect (n, _) -> n*) 
	in
	findFuncDec cg_node_id file
  in (*Memoize*)
  memoize findFunc cg_node findFunc_memo
  

(** Find *which* changed functions *which* instruction of s calls **)
(*TODO Identify by id instead of the name (?) *)
let callsChangedFunction_memo = Hashtbl.create 1000
let callsChangedFunction (s : stmt) proxies_ToChangedFunctions =

  (*TODO Preserve Order!*)
  let callsChangedFunction s =
	let toReturn =
    match s.skind with 
    | Instr(slist) -> List.fold_left (
      fun changed instr -> 
	    match instr with
	    | Call (_, Lval (Var callee, _), _, _) -> 
		  begin
		  let found_changed = 
		    List.fold_left (fun contains (cg_node,changedFunction) -> 
		      let cg_node_name = 
		        match cg_node.cnInfo with
    		    | Callgraph.NIVar (v, _) -> v.vname
  			    | Callgraph.NIIndirect (n, _) -> n
		      in
	  	      match contains with 
			  | [] -> if (String.compare callee.vname cg_node_name) = 0 then changedFunction else []
			  | _ -> contains
		  	) [] proxies_ToChangedFunctions
	      in
		  match found_changed with
		  | [] -> changed
		  | _ -> (instr,found_changed)::changed
		  end
        (*Can only be a call from an instr*)
	    | _ -> changed 
	  ) [] slist
    | _ -> []
	in begin
	if (is_print_interCSG_calls && List.length toReturn <> 0 )
	then let loc = Cil.get_stmtLoc s.skind in
	  printf "  %s:%d\n" loc.file loc.line end ; 
	toReturn
  in (*Memoize*)
  memoize callsChangedFunction s callsChangedFunction_memo


(** Find *which* directly called functions "callee" of "func"
   transitively call a changed function. Also return
   which changed function x is called **)
let get_changeProxies_memo = Hashtbl.create 1000
let get_changeProxies func callgraph file patch =

  (*TODO Preserve Order!*)
  let reaches_changed callee = 
    let visited = ref IntSet.empty in
    let rec loop node =
      visited := IntSet.add node.cnid !visited;
	  match findFunc node file with
	  | Some(func) ->
		if (*(isMain func) or*) (isChangedFunc func patch)
        then begin
		  match node.cnInfo with
   		  | NIVar (v, _) -> v::[]
  		  | _ -> (printf "Problem: get_changeProxies\n"; []) end
        else 
		  Inthash.fold (fun _ child reached -> 
			if IntSet.mem child.cnid !visited 
			then reached 
			else List.append (loop child) reached
          ) node.cnCallees []
	  | None -> []
	in
	loop callee
  in
  let get_changeProxies func = 
    (if is_print_interCSG_calls then printf "Changed function %s (transitively) calls changed functions: " func.svar.vname);
    let cgn = Hashtbl.find callgraph func.svar.vname in
    let toReturn = 
      Inthash.fold (fun _ callee acc -> 
	    begin match reaches_changed callee with
	    | [] -> acc
	    | hd::tl -> (*Some debugging*)(if is_print_interCSG_calls then List.iter (fun x -> printf "%s" x.vname; let calleeName = match callee.cnInfo with | NIVar (v, _) -> v.vname | _ -> "<unknown>" in if (String.compare x.vname calleeName) <> 0 then printf " (via %s), " calleeName else printf ", ") (hd::tl)); 
  				  (*Real return*) (callee,hd::tl)::acc
	    end 
      ) cgn.cnCallees [] 
    in (if is_print_interCSG_calls then printf "in the following lines\n"); 
	toReturn
  in (*Memoize*)
  memoize get_changeProxies func get_changeProxies_memo



(** Construct intraCSG and also fill interCSG from func(-tion declaration) using callgraph and patch **)
let constructIntraCSG func callgraph file patch interCSG  = 
  let intraCSG = CSG.create() in
  let changeProxies = get_changeProxies func callgraph file patch in
  let isCallsChangedFunction s = (List.length (callsChangedFunction s changeProxies)) <> 0  in
  
  (* add vertices *)
  List.iter (fun s -> 
    (*Only add changed BBs*)
    if isChangedBasicBlock s patch then begin
	  CSG.add_vertex intraCSG s;
      CSG.add_vertex interCSG s
	end
  ) func.sallstmts;

  (* connect vertices within func *)
  List.iter (fun s -> 
    if isChangedBasicBlock s patch
    then begin
      let visited = ref IntSet.empty in
 	  let rec loop node isAddToInterCSG =
	    visited := IntSet.add node.sid !visited;
	    if isChangedBasicBlock node patch
	    then begin
		  CSG.add_edge intraCSG s node;
          if isAddToInterCSG then CSG.add_edge interCSG s node
		end
		(*Only recurse for interCSG if no s is no changeProxy*)
		else begin
		  let isAddToInterCSG = (if isAddToInterCSG && isCallsChangedFunction s then false else isAddToInterCSG) in
          let handle child =
            if not (IntSet.mem child.sid !visited)
      	    then loop child isAddToInterCSG 
          in
          List.iter handle node.succs 
		end 
      in
	  List.iter (fun node -> loop node true) s.succs
    end		
  ) func.sallstmts;
  intraCSG

(**
 -- Input nodes have a change-free path to the input 
 -- Output nodes have a change-free path to one of the return statements 
**)

let isEntryStmt s func = 
  (List.length s.preds) = 0 or begin
    let firstStmt = List.hd func.sbody.bstmts in
    s.sid = firstStmt.sid 
  end


let isExitStmt s func = 
  (List.length s.succs) = 0 or begin
    match s.skind with 
    | Return _ -> true 
    | Instr instructions -> 
	    List.fold_left (fun isExit instr -> 
	      isExit || begin
	        match instr with 
	        | Call (_, Lval (Var callee, _), _, _) -> callee.vname = "exit"
	        | _ -> false
	      end
        ) false instructions
    | _ -> false
  end

let constructConnectors intraCSG func callgraph file patch = 
  let changeProxies = get_changeProxies func callgraph file patch in
  let isCallsChangedFunction s = (List.length (callsChangedFunction s changeProxies)) <> 0  in
  
  let setting_Entry = 0 in
  let setting_Exit = 1 in
  let setting_Inbound = 2 in
  let setting_Outbound = 3 in

  let isEntryExitHelper s setting =
    let visited = ref IntSet.empty in
	let getChildren node = if (setting=setting_Entry or setting=setting_Inbound) then node.preds else node.succs in
	let isEntryExit node = 
	  if setting = setting_Entry then isEntryStmt node func
  	  else if setting = setting_Exit then isExitStmt node func
	  else isCallsChangedFunction node		
	in
    let rec loop (node : stmt) =
	  visited := IntSet.add node.sid !visited;
	  let children = getChildren node in
	  (*Another changed node -> this path ends in failure*)
	  if node.sid <> s.sid && isChangedBasicBlock node patch then [] 
	  (*An extry/exit -> this path was successful!*)
	  else if isEntryExit node then node::[]
      (*Visit children*)
	  else begin 
	    let handle entryExits child =
  		  if not (IntSet.mem child.sid !visited)
		  then List.append (loop child) entryExits
		  else entryExits
	    in List.fold_left handle [] children
      end
    in
    loop s
  in
  let isEntry s = 0 <> List.length (isEntryExitHelper s setting_Entry) in
  let isExit s = 0 <> List.length (isEntryExitHelper s setting_Exit) in
  
  let computeCalledChangedFunctions calls setting = 
	List.fold_left (fun ccf s -> 
	  let calledFuncProxies = callsChangedFunction s changeProxies in
	  (*Do not connect with ALL but only with first or last!*)
	  let choose_head l = List.hd l in
	  let choose_tail l = List.hd (List.rev l) in
	  let choose = if setting = setting_Inbound then choose_head else choose_tail in
	  let (_(*ignore instr*), calledChangedFuncs) = choose calledFuncProxies in
	  let calledChangedFunc = choose calledChangedFuncs in
	  calledChangedFunc::ccf
	  (*
	  List.fold_left (fun ccf (_(*ignore instr*), calledChangedFuncs) -> 
		List.fold_left (fun ccf calledChangedFunc ->
		  (*Only add once*)
		  if not (List.mem calledChangedFunc ccf) 
		  then calledChangedFunc::ccf
		  else ccf
		) ccf calledChangedFuncs
	  ) ccf calledFuncProxies *)
	) [] calls
  in
  let findInbound s = computeCalledChangedFunctions (
	if isCallsChangedFunction s then s::[] (*s can directly be inbound*)
	else isEntryExitHelper s setting_Inbound
  ) setting_Inbound in
  let findOutbound s = computeCalledChangedFunctions (
	if isCallsChangedFunction s then s::[] (*s can directly be outbound*)
	else isEntryExitHelper s setting_Outbound
  ) setting_Outbound in
  
  (*Debugging*)
  let isEntry s = if is_print_interCSG_calls then let temp = isEntry s in (if temp then let loc= Cil.get_stmtLoc s.skind in printf "Connector_Entry of %s at [%s:%d] (%d)\n" func.svar.vname loc.file loc.line s.sid); temp else isEntry s in
  let isExit s = if is_print_interCSG_calls then let temp = isExit s in (if temp then let loc= Cil.get_stmtLoc s.skind in printf "Connector_Exit of %s at [%s:%d] (%d)\n" func.svar.vname loc.file loc.line s.sid); temp else isExit s in
  let findInbound s = if is_print_interCSG_calls then let temp = findInbound s in (if (List.length temp)<>0 then let loc= Cil.get_stmtLoc s.skind in printf "Connector_Inbound of %s at [%s:%d] (%d)\n" func.svar.vname loc.file loc.line s.sid); temp else findInbound s in
  let findOutbound s = if is_print_interCSG_calls then let temp = findOutbound s in (if (List.length temp)<>0 then let loc= Cil.get_stmtLoc s.skind in printf "Connector_Outbound of %s at [%s:%d] (%d)\n" func.svar.vname loc.file loc.line s.sid); temp else findOutbound s in

  (*Warning*)
(*  let findInbound s = let temp = findInbound s in (if (List.length temp) > 1 then let loc= Cil.get_stmtLoc s.skind in printf "Warning: Multiple changeProxies (%d) in function %s at [%s:%d] (%d)\n" (List.length temp) func.svar.vname loc.file loc.line s.sid); temp in
  let findOutbound s = let temp = findOutbound s in (if (List.length temp) > 1 then let loc= Cil.get_stmtLoc s.skind in printf "Warning: Multiple changeProxies (%d) in function %s at [%s:%d] (%d)\n" (List.length temp) func.svar.vname loc.file loc.line s.sid); temp in *)

  (*Main loop*)
  CSG.fold_vertex (fun s (entry,exit,inbound,outbound) -> 
	(
    (if isEntry s then s::entry else entry),
	(if isExit s then s::exit else exit),
	(let inbound_s = findInbound s in if (List.length inbound_s)<>0 then (s,inbound_s)::inbound else inbound),
    (let outbound_s = findOutbound s in if (List.length outbound_s)<>0 then (s,outbound_s)::outbound else outbound)
    )
  ) intraCSG ([],[],[],[])



(**Connect exit with inbound and outbound with entry**)
let connectBounds func connectors interCSG =
  let (_,_,inbounds,outbounds) = Hashtbl.find connectors func.svar.vid in
  let setting_inbound = 1 in
  let setting_outbound = 2 in
  let doBound setting =
    let bounds = if setting = setting_inbound then inbounds else outbounds in
    let entryExitNodes (entryNodes,exitNodes) = if setting = setting_inbound then exitNodes else entryNodes in
    let addEdge bound entryExit boundFunc entryExitFunc = 
  	  if setting = setting_inbound && (not (CSG.mem_edge interCSG entryExit bound)) then (
	    CSG.add_edge interCSG entryExit bound;
	    if is_print_interCSG_calls then begin
		  let boundLOC= Cil.get_stmtLoc bound.skind in 
		  let entryExitLOC = Cil.get_stmtLoc entryExit.skind in 
		  printf "Connecting INBOUND [%s:%d] (%d) in %s with EXIT [%s:%d] (%d) of %s.\n" boundLOC.file boundLOC.line bound.sid boundFunc.vname entryExitLOC.file entryExitLOC.line entryExit.sid entryExitFunc.vname
		end
	  ) else if setting = setting_outbound && (not (CSG.mem_edge interCSG bound entryExit)) then (
	    CSG.add_edge interCSG bound entryExit;
	    if is_print_interCSG_calls then begin
	      let boundLOC= Cil.get_stmtLoc bound.skind in 
		  let entryExitLOC = Cil.get_stmtLoc entryExit.skind in 
		  printf "Connecting OUTBOUND [%s:%d] (%d) in %s with ENTRY [%s:%d] (%d) of %s.\n" boundLOC.file boundLOC.line bound.sid boundFunc.vname entryExitLOC.file entryExitLOC.line entryExit.sid entryExitFunc.vname
		end
	  )
	in
	List.iter (fun (bound,otherFuncs) ->
	  List.iter (fun otherFunc ->
	    if not (Hashtbl.mem connectors otherFunc.vid) then printf "Problem: Didn't find connector for changed function %s\n" otherFunc.vname
		else begin
		  let (entryNodes,exitNodes,_,_) = Hashtbl.find connectors otherFunc.vid in
		  (*Add interCSG edges.*)
		  List.iter (fun entryExit -> 
		    addEdge bound entryExit func.svar otherFunc
		  ) (entryExitNodes (entryNodes,exitNodes))
		end
	  ) otherFuncs
	) bounds
  in
  doBound setting_inbound;
  doBound setting_outbound


(** If multiple changeProxies -> Connext EXIT of one changeProxy with ENTRY of next changeProxy **)
let connectAdjacentChangeProxies func connectors interCSG callgraph file patch =
  let changeProxies = get_changeProxies func callgraph file patch in

  let connect changeProxy1 changeProxy2 =
	let choose_head l = List.hd l in
    let choose_tail l = List.hd (List.rev l) in	
	let calledFuncProxies1 = callsChangedFunction changeProxy1 changeProxies in
	(*let calledChangedFuncs1 = List.fold_left (fun acc (_(*ignore instr*), calledChangedFuncs) -> List.append calledChangedFuncs acc) [] calledFuncProxies1 in*)
    let calledFuncProxies2 = callsChangedFunction changeProxy2 changeProxies in
	(*let calledChangedFuncs2 = List.fold_left (fun acc (_(*ignore instr*), calledChangedFuncs) -> List.append calledChangedFuncs acc) [] calledFuncProxies2 in*)
	
	(*Connect tail of changeProxy1 with head of changeProxy2 *)
	let (_(*ignore instr*), calledChangedFuncs1) = choose_tail calledFuncProxies1 in
	let calledChangedFunc1 = choose_tail calledChangedFuncs1 in
	  let n_ccf1 = List.fold_left (fun n_ccf1 (_,ccf1) -> n_ccf1 + (List.length ccf1)) 0 calledFuncProxies1 in
	  if n_ccf1 > 1 then let loc = Cil.get_stmtLoc changeProxy1.skind in printf "Warning: Basic Block 1 [%s:%d] contains more than 1 change proxies: %d\n" loc.file loc.line n_ccf1;
	let (_(*ignore instr*), calledChangedFuncs2) = choose_head calledFuncProxies2 in
	let calledChangedFunc2 = choose_head calledChangedFuncs2 in
	  let n_ccf2 = List.fold_left (fun n_ccf2 (_,ccf2) -> n_ccf2 + (List.length ccf2)) 0 calledFuncProxies2 in
	  if n_ccf2 > 1 then let loc = Cil.get_stmtLoc changeProxy2.skind in printf "Warning: Basic Block 2 [%s:%d] contains more than 1 change proxies: %d\n" loc.file loc.line n_ccf2;
	
    if not (Hashtbl.mem connectors calledChangedFunc1.vid) then printf "Problem: Didn't find connector for changed function %s\n" calledChangedFunc1.vname
	else if not (Hashtbl.mem connectors calledChangedFunc2.vid) then printf "Problem: Didn't find connector for changed function %s\n" calledChangedFunc2.vname
	else begin
	  let (_,exitNodes,_,_) = Hashtbl.find connectors calledChangedFunc1.vid in
	  let (entryNodes,_,_,_) = Hashtbl.find connectors calledChangedFunc2.vid in
	  (*Add interCSG edges.*)
	  List.iter (fun exit -> 
		List.iter (fun entry ->
		  if not (CSG.mem_edge interCSG exit entry) then (
	      	CSG.add_edge interCSG exit entry;
	  	  	if is_print_interCSG_calls then begin
			  let exitLOC= Cil.get_stmtLoc exit.skind in 
			  let entryLOC = Cil.get_stmtLoc entry.skind in 
			  let cpLOC1 = Cil.get_stmtLoc changeProxy1.skind in 
			  let cpLOC2 = Cil.get_stmtLoc changeProxy2.skind in
			  (printf "Connecting EXIT [%s:%d] (%d) in %s with ENTRY [%s:%d] (%d) of %s\n    because first CHANGE_PROXY [%s:%d], then CHANGE_PROXY [%s:%d] is called in %s.\n" exitLOC.file exitLOC.line exit.sid calledChangedFunc1.vname entryLOC.file entryLOC.line entry.sid calledChangedFunc2.vname cpLOC1.file cpLOC1.line cpLOC2.file cpLOC2.line func.svar.vname)
	  	    end
		  )
		) entryNodes
	  ) exitNodes
	end
  in  
  List.iter (fun changeProxy1 ->
	let isCallsChangedFunction s = (List.length (callsChangedFunction s changeProxies)) <> 0 in
	(*only if it transitively calls a changed *)
	if isCallsChangedFunction changeProxy1 then (
	  let visited = ref IntSet.empty in
      let rec loop (node : stmt) =
	    visited := IntSet.add node.sid !visited;
	    (*Another changed node -> this path ends in failure*)
	    if isChangedBasicBlock node patch then ()
	    (*Another change proxy -> this path was successful!*)
	    else if isCallsChangedFunction node then (connect changeProxy1 node)
        (*Visit children*)
	    else List.iter (fun child -> if not (IntSet.mem child.sid !visited) then loop child) node.succs
      in
	  (*Start with children -> It could connect to itself*)
      List.iter (fun child -> loop child) changeProxy1.succs
    )
  ) func.sallstmts

(** Find number of connected components through graph coloring 
	Two nodes have the same color if there exists a transitive connection -- direction does not matter **)
let computeConnectedComponents interCSG =
  let color_nodes = Hashtbl.create 100 in
  let invalidColors = ref [] in
  let isInvalid color = List.mem color !invalidColors in
  let markInvalid color = invalidColors := color::!invalidColors in

  let seen_nodes = ref [] in
  let isSeen node = List.mem node !seen_nodes in
  let markSeen node = if not (isSeen node) then seen_nodes := node::!seen_nodes in

  let next_color = ref 0 in
  CSG.iter_vertex (fun node1 -> 
	let traverseHelper isSuccs = 
	  let traverseCSG = if isSuccs then CSG.fold_succ else CSG.fold_pred in
	  let sameColorNodes = traverseCSG (fun node2 sameColorNodes ->
	    if isSeen node2
	    then ( (*Append to sameColor and clear from hashtbl*)
		  Hashtbl.fold (fun color nodes sameColorNodes ->
		    if (not (isInvalid color)) && (List.mem node2 nodes)
		    then (
		      (*if (List.length found) <> 0 then printf "Problem: Same element should only have one valid color!\n";*)
			  markInvalid color;
			  List.append (Hashtbl.find color_nodes color) sameColorNodes
		    ) else sameColorNodes
		  ) color_nodes sameColorNodes
	    ) else (
	      markSeen node2;
	      node2::sameColorNodes
	    )
	  ) interCSG node1 [] in
	  sameColorNodes
	in
	let sameColorNodesSuccs = traverseHelper true in
	let sameColorNodesPreds = traverseHelper false in
	markSeen node1;
	next_color := !next_color + 1;
	let sameColorNodes = List.fold_left (fun sameColorNodes n -> if not (List.mem n sameColorNodes) then n::sameColorNodes else sameColorNodes) sameColorNodesPreds sameColorNodesSuccs in
	let sameColorNodes = if not (List.mem node1 sameColorNodes) then node1::sameColorNodes else sameColorNodes in
	Hashtbl.add color_nodes !next_color sameColorNodes
  ) interCSG;

  let components = Hashtbl.create 10 in
  let new_color = ref 0 in
  Hashtbl.iter (fun color colorNodes -> 
	if not (isInvalid color) 
	then (
	  if is_print_components 
	  then (
		printf "Component %d: " !new_color;
	    List.iter (fun s -> printf "%d, " s.sid) colorNodes;
		printf "\n"
	  );
	  Hashtbl.add components !new_color colorNodes;
	  new_color := !new_color + 1
	)
  ) color_nodes;
  components

let computeRootsOfComponents connectedComponents connectors file patch = 
  let rootsOfComponents = Hashtbl.create 10 in
  let functionsOfComponents = Hashtbl.create 10 in
  let notRootOfComponents = Hashtbl.create 10 in
  let addUnique hashtbl key value =
	let temp_tail = if Hashtbl.mem hashtbl key then Hashtbl.find hashtbl key else [] in
	if not (List.mem value temp_tail) then Hashtbl.add hashtbl key (value::temp_tail)
  in
  List.iter (
	function
	| GFun (func, _) ->
	  if isChangedFunc func patch 
	  then (
		(* Which colors does the changed function have? *)
		let hasAnyColor = Hashtbl.fold (fun color stmts hasAnyColor -> 
		  let hasThisColor = List.fold_left (fun hasThisColor s -> hasThisColor or (List.mem s func.sallstmts)) false stmts in
		  if hasThisColor then (
			if is_print_components then printf "Component %d contains changed function %s\n" color func.svar.vname;
			(*Mark func with this color *)
			addUnique functionsOfComponents color func;
			(*Find called funcs*)
			let (_,_,_,outbounds) = Hashtbl.find connectors func.svar.vid in
			List.iter (fun (_,otherFuncs) ->
	  		  List.iter (fun otherFunc ->
				(*Mark otherFunc as notRoot*)
				if func.svar.vid <> otherFunc.vid then (* its okay for a potential root to call itself *)
				  addUnique notRootOfComponents color otherFunc
			  ) otherFuncs
			) outbounds;
			true
		  ) else hasAnyColor
		) connectedComponents false in
		if not hasAnyColor then printf "Bug: Changed function %s does not belong to any component?\n" func.svar.vname
	  )
	| _ -> ()
  ) file.globals;
  (* Find root by filtering non-root for every color *)
  Hashtbl.iter (fun color funcs ->
	List.iter (fun func ->
	  let func = func.svar in
	  if (not (Hashtbl.mem notRootOfComponents color)) or not (List.mem func (Hashtbl.find notRootOfComponents color)) 
	  then addUnique rootsOfComponents color func
	) funcs
  ) functionsOfComponents;
  (* Reporting components without root *)
  Hashtbl.iter (fun color funcs -> if 0 = (List.length funcs) then printf "Warning: Component %d has no root!\n" color) rootsOfComponents;
  rootsOfComponents

let computeProgramEntryExits rootsOfComponents connectors callgraph patch file=
	let setting_ENTRY = 0 in
	let setting_EXIT = 1 in

	let visitedFuncs = ref IntSet.empty in
	let rec findEntryExit func setting =
	  (*Mark visited*)
	  visitedFuncs := IntSet.add func.svar.vid !visitedFuncs;
	  let this_entryExits = 
		let (entries,exits,_,_) = Hashtbl.find connectors func.svar.vid in 
		if setting = setting_ENTRY then entries else exits 
	  in
	  let isEntryExit s = 
		if setting = setting_ENTRY 
		then isEntryStmt s func
		else isExitStmt s func
	  in
	  let getChildren s = if setting = setting_ENTRY then s.preds else s.succs in

	  let changeProxies = get_changeProxies func callgraph file patch in
      let isCallsChangedFunction s = (List.length (callsChangedFunction s changeProxies)) <> 0  in
	  
	  let visitedStmts = ref IntSet.empty in
      let rec reachesEntryExit (node : stmt) =
	    visitedStmts := IntSet.add node.sid !visitedStmts;
	    (*Another changed basicBlock or changedFunction -> this path ends in failure*)
	    if (isChangedBasicBlock node patch) or isCallsChangedFunction node then false
	    else if isEntryExit node then true
        (*Do children reach entryExit?*)
	    else List.fold_left (fun childReaches child -> childReaches or if not (IntSet.mem child.sid !visitedStmts) then reachesEntryExit child else false) false (getChildren node)
      in
	  (*For each changeProxy in func check whether it reaches EntryExit*)
	  List.fold_left (fun entryExits s ->
		if isCallsChangedFunction s && (List.fold_left (fun reaches child -> reaches or reachesEntryExit child) false (getChildren s))
		then ( (*For every changed function called from s recurse*)
		  List.fold_left (fun entryExits (_,otherFuncs) ->
			List.fold_left (fun entryExits otherFunc ->
			  if not (IntSet.mem otherFunc.vid !visitedFuncs) 
			  then 
			    match findFuncDec otherFunc.vid file with
			    | Some(otherFuncDec) -> List.append (findEntryExit otherFuncDec setting) entryExits
			    | None -> (printf "Couldn't find function declaration for other_func %s\n" otherFunc.vname; entryExits)
			  else entryExits
			) entryExits otherFuncs
		  ) entryExits (callsChangedFunction s changeProxies)
		) else entryExits
	  ) this_entryExits func.sallstmts
    in
	(* Process every root only once*)
	let processed_roots = ref [] in
	let canProcess root = if List.mem root.vid !processed_roots then false else (processed_roots := root.vid :: !processed_roots; true) in
	Hashtbl.fold (fun color roots prog_entryExits -> 
	  List.fold_left (fun (prog_entries, prog_exits) root -> 
		if canProcess root then
		  match findFuncDec root.vid file with
		  | Some (rootFuncDec) ->
		    (
		    List.append (findEntryExit rootFuncDec setting_ENTRY) prog_entries,
	 	    List.append (findEntryExit rootFuncDec setting_EXIT) prog_exits
		    )
		  | None -> (printf "Couldn't find function declaration for root %s\n" root.vname; (prog_entries,prog_exits))
		else (prog_entries,prog_exits) (*Duplicated*)
      ) prog_entryExits roots
	) rootsOfComponents ([],[])
  


(**Construct the interCSG by constructing and then connecting the intraCSGs
  Both CSGs can only contain vertices of changed statements. No output. 
  If there *exists* a directed edge from c1 to c2 then c2 *may* be executed after c1
  If there *does not exist* any (transitive) connection of c1 and c2 then both *cannot* be executed together
    Example: Grep 40679f5dbbb710bc3d09e0f410379fae20ba65e7
**)
let constructInterCSG file callgraph patch = 
  (*Statistics*)
  let start_t = Sys.time() in 

  (*Main artifacts constructed*)
  let interCSG = CSG.create() in
  let intraCSGs = Hashtbl.create 100 in
  let connectors = Hashtbl.create 100 in

  (* construct intraCSGs *)
  List.iter (
	function
	| GFun (func, _) ->
	  if isChangedFunc func patch 
	  then (
	    (* construct intraCSG and add to interCSG *)
	    let intraCSG = constructIntraCSG func callgraph file patch interCSG in
	    Hashtbl.add intraCSGs func.svar.vid intraCSG;
		  
	    (* construct connector *)
	    let connector = constructConnectors intraCSG func callgraph file patch in
	    Hashtbl.add connectors func.svar.vid connector
	  )
	| _ -> ()
  ) file.globals;

  (*Connect in- and outbound with entry and exits of intraCSGs*)
  if is_connectIntraCSGs then begin
  List.iter (
	function
	| GFun (func, _) ->
	  if isChangedFunc func patch then (
	    connectBounds func connectors interCSG;
		connectAdjacentChangeProxies func connectors interCSG callgraph file patch
	  (*if main() contains no changes but calls several changed functions, then they need to be connected as well *)
      ) else if isMain func then (
		connectAdjacentChangeProxies func connectors interCSG callgraph file patch
	  ) 
	| _ -> ()
  ) file.globals;
  
  (*Compute Program Entry and Exits*)
  let connectedComponents = computeConnectedComponents interCSG in
  let rootsOfComponents = computeRootsOfComponents connectedComponents connectors file patch in
  if is_print_components then Hashtbl.iter (fun color roots -> List.iter (fun root -> printf "Root of component %d is %s\n" color root.vname) roots) rootsOfComponents;
  let (prog_entries, prog_exits) = computeProgramEntryExits rootsOfComponents connectors callgraph patch file in
  if is_print_components then (
	printf "Program Entries: ";
    List.iter (fun prog_entry -> printf "%d, " prog_entry.sid) prog_entries;
	printf "\nProgram Exits: ";
    List.iter (fun prog_exit -> printf "%d, " prog_exit.sid) prog_exits;
	printf "\n"
  );


  (*Account for dangling components!*)
  let n_components = Hashtbl.length connectedComponents in

  (*InterCSG Statistics*)
  let n_nodes = (CSG.nb_vertex interCSG) + 2 in
  let n_edges = (CSG.nb_edges interCSG) + (List.length prog_entries) + (List.length prog_exits) in
  printf "CSG Construction Time         : %fs\n" (Sys.time() -. start_t);
  printf "Number of (connected) Compon. : %d\n" n_components;
  printf "Interprocedural CSG has       : %d nodes and %d edges\n" n_nodes n_edges;
  printf "Interprocedural Change Compl. : %d\n" ((n_edges - n_nodes) + 2*n_components);

  end else (*No InterCSG computation*)
  printf "CSG Construction Time         : %fs\n" (Sys.time() -. start_t);

  (* Print statistics *)
  let (sum_v,sum_e) = Hashtbl.fold (fun _ intraCSG (sum_v,sum_e) -> (*printf "(%d,%d), " (CSG.nb_vertex intraCSG) (CSG.nb_edges intraCSG);*) (sum_v + (CSG.nb_vertex intraCSG), sum_e + (CSG.nb_edges intraCSG))) intraCSGs (0,0) in
  let (avg_v,avg_e) = let size = float_of_int (Hashtbl.length intraCSGs) in (((float_of_int sum_v) /. size), ((float_of_int sum_e) /. size)) in
  printf "Average Intraproc CSG has     : %f nodes and %f edges\n" avg_v avg_e;
  printf "Average Intraproc Change Compl: %f\n" ((avg_e -. avg_v) +. 2.0);

  let n_loadedFunctions = ref 0 in
  let n_changedFunctions = ref 0 in
  let n_loadedBasicBlocks = ref 0 in
  let n_changedBasicBlocks = ref 0 in
  let n_loadedLinesOfCode = ref 0 in
  let n_changedLinesOfCode = ref 0 in
  let cfgs_ve = ref [] in
  List.iter (
	function
	| GFun (func, _) ->
	  let cfg_vertices = ref 0 in
	  let cfg_edges = ref 0 in
	  (*funcs*)
	  if isChangedFunc func patch then (n_changedFunctions := !n_changedFunctions + 1; if save_intraCSGs then let intraCSGDot = open_out ("intracsg_"^func.svar.vname^".dot") in D.output_graph intraCSGDot (Hashtbl.find intraCSGs func.svar.vid); close_out intraCSGDot; if is_print_full_lines then printf "Changed ");
	  n_loadedFunctions := !n_loadedFunctions + 1;
	  if is_print_full_lines then printf "Function %s\n" func.svar.vname;
	  let seen_locations = ref [] in
	  List.iter (fun s -> 
		(*BB*)
		if isChangedBasicBlock s patch then n_changedBasicBlocks := !n_changedBasicBlocks + 1;
		n_loadedBasicBlocks := !n_loadedBasicBlocks + 1;
        (*LoC*)
		
		let countLoC loc = 
		  if not (List.mem loc !seen_locations) then begin
		    seen_locations := loc::!seen_locations;
			if isChangedLocation loc patch then ( n_changedLinesOfCode := !n_changedLinesOfCode + 1; if is_print_full_lines then printf "*"; ) else if is_print_full_lines then printf " ";
			if is_print_full_lines then printf "%s:%d\n" loc.file loc.line;
			n_loadedLinesOfCode := !n_loadedLinesOfCode + 1
		  end 
		in
  		begin match s.skind with 
      	|	Instr(slist) -> List.iter (fun instr -> countLoC (Cil.get_instrLoc instr)) slist
	  	| _ -> countLoC (Cil.get_stmtLoc s.skind)
		end;

		(*cfg complexity*)
		cfg_vertices := !cfg_vertices + 1;
		cfg_edges := !cfg_edges + List.length s.succs;
	  ) func.sallstmts;
	  cfgs_ve := (!cfg_vertices,!cfg_edges)::!cfgs_ve
	| _ -> ()
  ) file.globals;

  let (sum_v,sum_e) = List.fold_left (fun (sum_v, sum_e) (cfg_vertices, cfg_edges) -> (*printf "(%d,%d), " cfg_vertices cfg_edges;*) (cfg_vertices+sum_v, cfg_edges+sum_e)) (0,0) !cfgs_ve in
  let (avg_v,avg_e) = let size = float_of_int (List.length !cfgs_ve) in (((float_of_int sum_v) /. size), ((float_of_int sum_e) /. size)) in
  printf "Average Intraproc CFG has     : %f nodes and %f edges\n" avg_v avg_e;
(*  printf "Average Intraproc Cyclom Compl: %f\n" ((avg_e -. avg_v) +. 2.0);*)


  printf "Number of changed functions   : %d out of %d loaded and %d called.\n" !n_changedFunctions !n_loadedFunctions  (Hashtbl.length callgraph);
  printf "Number of changed basic blocks: %d out of %d loaded.\n" !n_changedBasicBlocks !n_loadedBasicBlocks;
  printf "Number of changed linesOfCode : %d out of %d loaded.\n" !n_changedLinesOfCode !n_loadedLinesOfCode;

  let interCSGDot = open_out ("intercsg.dot") in
  D.output_graph interCSGDot interCSG;
  close_out interCSGDot







  
