@@ -211,6 +211,7 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
211211 if (life_inputs ) {
212212 ir_remove_phis_inputs (ctx , & ctx -> use_lists [bb -> start ], insn -> inputs_count , life_inputs );
213213 ir_mem_free (life_inputs );
214+ life_inputs = NULL ;
214215 }
215216 }
216217 }
@@ -613,59 +614,64 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx)
613614 return 1 ;
614615}
615616
616- static void compute_postnum (const ir_ctx * ctx , uint32_t * cur , uint32_t b )
617- {
618- uint32_t i , * p ;
619- ir_block * bb = & ctx -> cfg_blocks [b ];
620-
621- if (bb -> postnum != 0 ) {
622- return ;
623- }
624-
625- if (bb -> successors_count ) {
626- bb -> postnum = -1 ; /* Marker for "currently visiting" */
627- p = ctx -> cfg_edges + bb -> successors ;
628- i = bb -> successors_count ;
629- do {
630- compute_postnum (ctx , cur , * p );
631- p ++ ;
632- } while (-- i );
633- }
634- bb -> postnum = (* cur )++ ;
635- }
636-
637617/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
638618 * Cooper, Harvey and Kennedy. */
639619static IR_NEVER_INLINE int ir_build_dominators_tree_slow (ir_ctx * ctx )
640620{
641- uint32_t blocks_count , b , postnum ;
621+ uint32_t blocks_count , b , postnum , i ;
642622 ir_block * blocks , * bb ;
643623 uint32_t * edges ;
624+ uint32_t * rpo = ir_mem_malloc ((ctx -> cfg_blocks_count + 1 ) * sizeof (uint32_t ));
644625 bool changed ;
645626
646627 blocks = ctx -> cfg_blocks ;
647628 edges = ctx -> cfg_edges ;
648629 blocks_count = ctx -> cfg_blocks_count ;
649630
650- /* Clear the dominators tree */
651- for (b = 0 , bb = & blocks [0 ]; b <= blocks_count ; b ++ , bb ++ ) {
652- bb -> idom = 0 ;
653- bb -> dom_depth = 0 ;
654- bb -> dom_child = 0 ;
655- bb -> dom_next_child = 0 ;
656- }
657-
658631 ctx -> flags2 &= ~IR_NO_LOOPS ;
659632
660633 postnum = 1 ;
661- compute_postnum (ctx , & postnum , 1 );
634+ ir_worklist work ;
635+ ir_worklist_init (& work , ctx -> cfg_blocks_count + 1 );
636+ ir_worklist_push (& work , 1 );
637+ IR_ASSERT (blocks [1 ].next_succ == 0 );
638+ while (ir_worklist_len (& work )) {
639+ next :
640+ b = ir_worklist_peek (& work );
641+ bb = & blocks [b ];
642+ uint32_t n = bb -> successors_count - bb -> next_succ ;
643+ if (n ) {
644+ uint32_t * p = edges + bb -> successors + bb -> next_succ ;
645+ for (; n > 0 ; p ++ , n -- ) {
646+ uint32_t succ = * p ;
647+ if (ir_worklist_push (& work , succ )) {
648+ bb -> next_succ = bb -> successors_count - n + 1 ;
649+ IR_ASSERT (blocks [succ ].next_succ == 0 );
650+ goto next ;
651+ }
652+ }
653+ }
654+
655+ /* Start from bb->idom calculated by the fast dominators algorithm */
656+ // bb->idom = 0;
657+ bb -> next_succ = 0 ;
658+ rpo [postnum ] = b ;
659+ bb -> postnum = postnum ++ ;
660+ ir_worklist_pop (& work );
661+ }
662+ ir_worklist_free (& work );
663+
664+ IR_ASSERT (rpo [blocks_count ] == 1 );
662665
663666 /* Find immediate dominators by iterative fixed-point algorithm */
664667 blocks [1 ].idom = 1 ;
665668 do {
666669 changed = 0 ;
670+
667671 /* Iterating in Reverse Post Order */
668- for (b = 2 , bb = & blocks [2 ]; b <= blocks_count ; b ++ , bb ++ ) {
672+ for (i = blocks_count - 1 ; i > 0 ; i -- ) {
673+ b = rpo [i ];
674+ bb = & blocks [b ];
669675 IR_ASSERT (!(bb -> flags & IR_BB_UNREACHABLE ));
670676 IR_ASSERT (bb -> predecessors_count > 0 );
671677 if (bb -> predecessors_count == 1 ) {
@@ -718,6 +724,8 @@ static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx)
718724 }
719725 } while (changed );
720726
727+ ir_mem_free (rpo );
728+
721729 /* Build dominators tree */
722730 blocks [1 ].idom = 0 ;
723731 blocks [1 ].dom_depth = 0 ;
@@ -771,7 +779,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
771779 blocks [1 ].idom = 1 ;
772780 blocks [1 ].dom_depth = 0 ;
773781
774- /* Iterating in Reverse Post Order */
782+ /* Iterating in Reverse Post Order (relay on existing BB order and fall-back to slow algorithm) */
775783 for (b = 2 , bb = & blocks [2 ]; b <= blocks_count ; b ++ , bb ++ ) {
776784 IR_ASSERT (!(bb -> flags & IR_BB_UNREACHABLE ));
777785 IR_ASSERT (bb -> predecessors_count > 0 );
@@ -783,8 +791,8 @@ int ir_build_dominators_tree(ir_ctx *ctx)
783791 if (UNEXPECTED (idom >= b )) {
784792 /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
785793 ctx -> flags2 &= ~IR_NO_LOOPS ;
786- // IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
787794 if (UNEXPECTED (k <= 1 )) {
795+ // IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
788796slow_case :
789797 ir_list_free (& worklist );
790798 return ir_build_dominators_tree_slow (ctx );
@@ -798,6 +806,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
798806 break ;
799807 }
800808 if (UNEXPECTED (k == 0 )) {
809+ // IR_ASSERT(0 && "Wrong blocks order: BB is before all its predecessors");
801810 goto slow_case ;
802811 }
803812 ir_list_push (& worklist , idom );
@@ -830,13 +839,6 @@ int ir_build_dominators_tree(ir_ctx *ctx)
830839 bb -> dom_depth = idom_bb -> dom_depth + 1 ;
831840 }
832841
833- /* Construct children lists sorted by block number */
834- for (b = blocks_count , bb = & blocks [b ]; b >= 2 ; b -- , bb -- ) {
835- ir_block * idom_bb = & blocks [bb -> idom ];
836- bb -> dom_next_child = idom_bb -> dom_child ;
837- idom_bb -> dom_child = b ;
838- }
839-
840842 blocks [1 ].idom = 0 ;
841843
842844 if (ir_list_len (& worklist ) != 0 ) {
@@ -874,10 +876,18 @@ int ir_build_dominators_tree(ir_ctx *ctx)
874876
875877 if (UNEXPECTED (!complete )) {
876878 ir_list_free (& worklist );
879+ // TODO: this algorithm may be incorrect. Prove and/or switch to ir_build_dominators_tree_slow() ???
877880 return ir_build_dominators_tree_iterative (ctx );
878881 }
879882 }
880883
884+ /* Construct children lists sorted by block number */
885+ for (b = blocks_count , bb = & blocks [b ]; b >= 2 ; b -- , bb -- ) {
886+ ir_block * idom_bb = & blocks [bb -> idom ];
887+ bb -> dom_next_child = idom_bb -> dom_child ;
888+ idom_bb -> dom_child = b ;
889+ }
890+
881891 ir_list_free (& worklist );
882892
883893 return 1 ;
@@ -898,8 +908,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
898908 /* Clear the dominators tree, but keep already found dominators */
899909 for (b = 0 , bb = & blocks [0 ]; b <= blocks_count ; b ++ , bb ++ ) {
900910 bb -> dom_depth = 0 ;
901- bb -> dom_child = 0 ;
902- bb -> dom_next_child = 0 ;
903911 }
904912
905913 /* Find immediate dominators by iterative fixed-point algorithm */
@@ -917,20 +925,20 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
917925 if (blocks [idom ].idom == 0 ) {
918926 while (1 ) {
919927 k -- ;
928+ if (UNEXPECTED (k == 0 )) break ;
920929 p ++ ;
921930 idom = * p ;
922931 if (blocks [idom ].idom > 0 ) {
923932 break ;
924933 }
925- IR_ASSERT (k > 0 );
926934 }
935+ if (UNEXPECTED (k == 0 )) continue ;
927936 }
928937 IR_ASSERT (k != 0 );
929938 while (-- k > 0 ) {
930939 uint32_t pred_b = * (++ p );
931940
932941 if (blocks [pred_b ].idom > 0 ) {
933- IR_ASSERT (blocks [pred_b ].idom > 0 );
934942 while (idom != pred_b ) {
935943 while (pred_b > idom ) {
936944 pred_b = blocks [pred_b ].idom ;
@@ -1094,35 +1102,36 @@ int ir_find_loops(ir_ctx *ctx)
10941102 times = ir_mem_malloc ((ctx -> cfg_blocks_count + 1 ) * 3 * sizeof (uint32_t ));
10951103 sorted_blocks = times + (ctx -> cfg_blocks_count + 1 ) * 2 ;
10961104
1105+ ir_bitset visited = ir_bitset_malloc (ctx -> cfg_blocks_count + 1 );
10971106 ir_worklist_push (& work , 1 );
1098- ENTRY_TIME (1 ) = time ++ ;
1099-
11001107 while (ir_worklist_len (& work )) {
1101- ir_block * bb ;
1102-
1108+ next :
11031109 b = ir_worklist_peek (& work );
1110+ ir_block * bb = & blocks [b ];
11041111
1105- /* Visit successors of "b". */
1106- next :
1107- bb = & blocks [b ];
1108- n = bb -> successors_count ;
1109- if (n ) {
1110- uint32_t * p = edges + bb -> successors ;
1112+ if (!ir_bitset_in (visited , b )) {
1113+ ir_bitset_incl (visited , b );
1114+ ENTRY_TIME (b ) = time ++ ;
1115+ }
11111116
1117+ uint32_t n = bb -> successors_count - bb -> next_succ ;
1118+ if (n ) {
1119+ uint32_t * p = edges + bb -> successors + bb -> next_succ ;
11121120 for (; n > 0 ; p ++ , n -- ) {
11131121 uint32_t succ = * p ;
1114-
11151122 if (ir_worklist_push (& work , succ )) {
1116- b = succ ;
1117- ENTRY_TIME ( b ) = time ++ ;
1123+ bb -> next_succ = bb -> successors_count - n + 1 ;
1124+ IR_ASSERT ( blocks [ succ ]. next_succ == 0 ) ;
11181125 goto next ;
11191126 }
11201127 }
11211128 }
11221129
1130+ bb -> next_succ = 0 ;
11231131 EXIT_TIME (b ) = time ++ ;
11241132 ir_worklist_pop (& work );
11251133 }
1134+ ir_mem_free (visited );
11261135
11271136 /* Sort blocks by level, which is the opposite order in which we want to process them */
11281137 /* (Breadth First Search using "sorted_blocks" as a queue) */
0 commit comments