1/* 2 * Copyright 2011 INRIA Saclay 3 * Copyright 2012-2013 Ecole Normale Superieure 4 * 5 * Use of this software is governed by the MIT license 6 * 7 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France, 8 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod, 9 * 91893 Orsay, France 10 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France 11 */ 12 13#include <isl_ctx_private.h> 14#include <isl_map_private.h> 15#include <isl_space_private.h> 16#include <isl/aff.h> 17#include <isl/hash.h> 18#include <isl/constraint.h> 19#include <isl/schedule.h> 20#include <isl_mat_private.h> 21#include <isl/set.h> 22#include <isl/seq.h> 23#include <isl_tab.h> 24#include <isl_dim_map.h> 25#include <isl_hmap_map_basic_set.h> 26#include <isl_sort.h> 27#include <isl_schedule_private.h> 28#include <isl_band_private.h> 29#include <isl_options_private.h> 30#include <isl_tarjan.h> 31 32/* 33 * The scheduling algorithm implemented in this file was inspired by 34 * Bondhugula et al., "Automatic Transformations for Communication-Minimized 35 * Parallelization and Locality Optimization in the Polyhedral Model". 36 */ 37 38 39/* Internal information about a node that is used during the construction 40 * of a schedule. 41 * dim represents the space in which the domain lives 42 * sched is a matrix representation of the schedule being constructed 43 * for this node 44 * sched_map is an isl_map representation of the same (partial) schedule 45 * sched_map may be NULL 46 * rank is the number of linearly independent rows in the linear part 47 * of sched 48 * the columns of cmap represent a change of basis for the schedule 49 * coefficients; the first rank columns span the linear part of 50 * the schedule rows 51 * cinv is the inverse of cmap. 52 * start is the first variable in the LP problem in the sequences that 53 * represents the schedule coefficients of this node 54 * nvar is the dimension of the domain 55 * nparam is the number of parameters or 0 if we are not constructing 56 * a parametric schedule 57 * 58 * scc is the index of SCC (or WCC) this node belongs to 59 * 60 * band contains the band index for each of the rows of the schedule. 61 * band_id is used to differentiate between separate bands at the same 62 * level within the same parent band, i.e., bands that are separated 63 * by the parent band or bands that are independent of each other. 64 * zero contains a boolean for each of the rows of the schedule, 65 * indicating whether the corresponding scheduling dimension results 66 * in zero dependence distances within its band and with respect 67 * to the proximity edges. 68 */ 69struct isl_sched_node { 70 isl_space *dim; 71 isl_mat *sched; 72 isl_map *sched_map; 73 int rank; 74 isl_mat *cmap; 75 isl_mat *cinv; 76 int start; 77 int nvar; 78 int nparam; 79 80 int scc; 81 82 int *band; 83 int *band_id; 84 int *zero; 85}; 86 87static int node_has_dim(const void *entry, const void *val) 88{ 89 struct isl_sched_node *node = (struct isl_sched_node *)entry; 90 isl_space *dim = (isl_space *)val; 91 92 return isl_space_is_equal(node->dim, dim); 93} 94 95/* An edge in the dependence graph. An edge may be used to 96 * ensure validity of the generated schedule, to minimize the dependence 97 * distance or both 98 * 99 * map is the dependence relation 100 * src is the source node 101 * dst is the sink node 102 * validity is set if the edge is used to ensure correctness 103 * proximity is set if the edge is used to minimize dependence distances 104 * 105 * For validity edges, start and end mark the sequence of inequality 106 * constraints in the LP problem that encode the validity constraint 107 * corresponding to this edge. 108 */ 109struct isl_sched_edge { 110 isl_map *map; 111 112 struct isl_sched_node *src; 113 struct isl_sched_node *dst; 114 115 int validity; 116 int proximity; 117 118 int start; 119 int end; 120}; 121 122enum isl_edge_type { 123 isl_edge_validity = 0, 124 isl_edge_first = isl_edge_validity, 125 isl_edge_proximity, 126 isl_edge_last = isl_edge_proximity 127}; 128 129/* Internal information about the dependence graph used during 130 * the construction of the schedule. 131 * 132 * intra_hmap is a cache, mapping dependence relations to their dual, 133 * for dependences from a node to itself 134 * inter_hmap is a cache, mapping dependence relations to their dual, 135 * for dependences between distinct nodes 136 * 137 * n is the number of nodes 138 * node is the list of nodes 139 * maxvar is the maximal number of variables over all nodes 140 * max_row is the allocated number of rows in the schedule 141 * n_row is the current (maximal) number of linearly independent 142 * rows in the node schedules 143 * n_total_row is the current number of rows in the node schedules 144 * n_band is the current number of completed bands 145 * band_start is the starting row in the node schedules of the current band 146 * root is set if this graph is the original dependence graph, 147 * without any splitting 148 * 149 * sorted contains a list of node indices sorted according to the 150 * SCC to which a node belongs 151 * 152 * n_edge is the number of edges 153 * edge is the list of edges 154 * max_edge contains the maximal number of edges of each type; 155 * in particular, it contains the number of edges in the inital graph. 156 * edge_table contains pointers into the edge array, hashed on the source 157 * and sink spaces; there is one such table for each type; 158 * a given edge may be referenced from more than one table 159 * if the corresponding relation appears in more than of the 160 * sets of dependences 161 * 162 * node_table contains pointers into the node array, hashed on the space 163 * 164 * region contains a list of variable sequences that should be non-trivial 165 * 166 * lp contains the (I)LP problem used to obtain new schedule rows 167 * 168 * src_scc and dst_scc are the source and sink SCCs of an edge with 169 * conflicting constraints 170 * 171 * scc represents the number of components 172 */ 173struct isl_sched_graph { 174 isl_hmap_map_basic_set *intra_hmap; 175 isl_hmap_map_basic_set *inter_hmap; 176 177 struct isl_sched_node *node; 178 int n; 179 int maxvar; 180 int max_row; 181 int n_row; 182 183 int *sorted; 184 185 int n_band; 186 int n_total_row; 187 int band_start; 188 189 int root; 190 191 struct isl_sched_edge *edge; 192 int n_edge; 193 int max_edge[isl_edge_last + 1]; 194 struct isl_hash_table *edge_table[isl_edge_last + 1]; 195 196 struct isl_hash_table *node_table; 197 struct isl_region *region; 198 199 isl_basic_set *lp; 200 201 int src_scc; 202 int dst_scc; 203 204 int scc; 205}; 206 207/* Initialize node_table based on the list of nodes. 208 */ 209static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph) 210{ 211 int i; 212 213 graph->node_table = isl_hash_table_alloc(ctx, graph->n); 214 if (!graph->node_table) 215 return -1; 216 217 for (i = 0; i < graph->n; ++i) { 218 struct isl_hash_table_entry *entry; 219 uint32_t hash; 220 221 hash = isl_space_get_hash(graph->node[i].dim); 222 entry = isl_hash_table_find(ctx, graph->node_table, hash, 223 &node_has_dim, 224 graph->node[i].dim, 1); 225 if (!entry) 226 return -1; 227 entry->data = &graph->node[i]; 228 } 229 230 return 0; 231} 232 233/* Return a pointer to the node that lives within the given space, 234 * or NULL if there is no such node. 235 */ 236static struct isl_sched_node *graph_find_node(isl_ctx *ctx, 237 struct isl_sched_graph *graph, __isl_keep isl_space *dim) 238{ 239 struct isl_hash_table_entry *entry; 240 uint32_t hash; 241 242 hash = isl_space_get_hash(dim); 243 entry = isl_hash_table_find(ctx, graph->node_table, hash, 244 &node_has_dim, dim, 0); 245 246 return entry ? entry->data : NULL; 247} 248 249static int edge_has_src_and_dst(const void *entry, const void *val) 250{ 251 const struct isl_sched_edge *edge = entry; 252 const struct isl_sched_edge *temp = val; 253 254 return edge->src == temp->src && edge->dst == temp->dst; 255} 256 257/* Add the given edge to graph->edge_table[type]. 258 */ 259static int graph_edge_table_add(isl_ctx *ctx, struct isl_sched_graph *graph, 260 enum isl_edge_type type, struct isl_sched_edge *edge) 261{ 262 struct isl_hash_table_entry *entry; 263 uint32_t hash; 264 265 hash = isl_hash_init(); 266 hash = isl_hash_builtin(hash, edge->src); 267 hash = isl_hash_builtin(hash, edge->dst); 268 entry = isl_hash_table_find(ctx, graph->edge_table[type], hash, 269 &edge_has_src_and_dst, edge, 1); 270 if (!entry) 271 return -1; 272 entry->data = edge; 273 274 return 0; 275} 276 277/* Allocate the edge_tables based on the maximal number of edges of 278 * each type. 279 */ 280static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph) 281{ 282 int i; 283 284 for (i = 0; i <= isl_edge_last; ++i) { 285 graph->edge_table[i] = isl_hash_table_alloc(ctx, 286 graph->max_edge[i]); 287 if (!graph->edge_table[i]) 288 return -1; 289 } 290 291 return 0; 292} 293 294/* If graph->edge_table[type] contains an edge from the given source 295 * to the given destination, then return the hash table entry of this edge. 296 * Otherwise, return NULL. 297 */ 298static struct isl_hash_table_entry *graph_find_edge_entry( 299 struct isl_sched_graph *graph, 300 enum isl_edge_type type, 301 struct isl_sched_node *src, struct isl_sched_node *dst) 302{ 303 isl_ctx *ctx = isl_space_get_ctx(src->dim); 304 uint32_t hash; 305 struct isl_sched_edge temp = { .src = src, .dst = dst }; 306 307 hash = isl_hash_init(); 308 hash = isl_hash_builtin(hash, temp.src); 309 hash = isl_hash_builtin(hash, temp.dst); 310 return isl_hash_table_find(ctx, graph->edge_table[type], hash, 311 &edge_has_src_and_dst, &temp, 0); 312} 313 314 315/* If graph->edge_table[type] contains an edge from the given source 316 * to the given destination, then return this edge. 317 * Otherwise, return NULL. 318 */ 319static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph, 320 enum isl_edge_type type, 321 struct isl_sched_node *src, struct isl_sched_node *dst) 322{ 323 struct isl_hash_table_entry *entry; 324 325 entry = graph_find_edge_entry(graph, type, src, dst); 326 if (!entry) 327 return NULL; 328 329 return entry->data; 330} 331 332/* Check whether the dependence graph has an edge of the given type 333 * between the given two nodes. 334 */ 335static int graph_has_edge(struct isl_sched_graph *graph, 336 enum isl_edge_type type, 337 struct isl_sched_node *src, struct isl_sched_node *dst) 338{ 339 struct isl_sched_edge *edge; 340 int empty; 341 342 edge = graph_find_edge(graph, type, src, dst); 343 if (!edge) 344 return 0; 345 346 empty = isl_map_plain_is_empty(edge->map); 347 if (empty < 0) 348 return -1; 349 350 return !empty; 351} 352 353/* If there is an edge from the given source to the given destination 354 * of any type then return this edge. 355 * Otherwise, return NULL. 356 */ 357static struct isl_sched_edge *graph_find_any_edge(struct isl_sched_graph *graph, 358 struct isl_sched_node *src, struct isl_sched_node *dst) 359{ 360 enum isl_edge_type i; 361 struct isl_sched_edge *edge; 362 363 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 364 edge = graph_find_edge(graph, i, src, dst); 365 if (edge) 366 return edge; 367 } 368 369 return NULL; 370} 371 372/* Remove the given edge from all the edge_tables that refer to it. 373 */ 374static void graph_remove_edge(struct isl_sched_graph *graph, 375 struct isl_sched_edge *edge) 376{ 377 isl_ctx *ctx = isl_map_get_ctx(edge->map); 378 enum isl_edge_type i; 379 380 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 381 struct isl_hash_table_entry *entry; 382 383 entry = graph_find_edge_entry(graph, i, edge->src, edge->dst); 384 if (!entry) 385 continue; 386 if (entry->data != edge) 387 continue; 388 isl_hash_table_remove(ctx, graph->edge_table[i], entry); 389 } 390} 391 392/* Check whether the dependence graph has any edge 393 * between the given two nodes. 394 */ 395static int graph_has_any_edge(struct isl_sched_graph *graph, 396 struct isl_sched_node *src, struct isl_sched_node *dst) 397{ 398 enum isl_edge_type i; 399 int r; 400 401 for (i = isl_edge_first; i <= isl_edge_last; ++i) { 402 r = graph_has_edge(graph, i, src, dst); 403 if (r < 0 || r) 404 return r; 405 } 406 407 return r; 408} 409 410/* Check whether the dependence graph has a validity edge 411 * between the given two nodes. 412 */ 413static int graph_has_validity_edge(struct isl_sched_graph *graph, 414 struct isl_sched_node *src, struct isl_sched_node *dst) 415{ 416 return graph_has_edge(graph, isl_edge_validity, src, dst); 417} 418 419static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph, 420 int n_node, int n_edge) 421{ 422 int i; 423 424 graph->n = n_node; 425 graph->n_edge = n_edge; 426 graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n); 427 graph->sorted = isl_calloc_array(ctx, int, graph->n); 428 graph->region = isl_alloc_array(ctx, struct isl_region, graph->n); 429 graph->edge = isl_calloc_array(ctx, 430 struct isl_sched_edge, graph->n_edge); 431 432 graph->intra_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge); 433 graph->inter_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge); 434 435 if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) || 436 !graph->sorted) 437 return -1; 438 439 for(i = 0; i < graph->n; ++i) 440 graph->sorted[i] = i; 441 442 return 0; 443} 444 445static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph) 446{ 447 int i; 448 449 isl_hmap_map_basic_set_free(ctx, graph->intra_hmap); 450 isl_hmap_map_basic_set_free(ctx, graph->inter_hmap); 451 452 for (i = 0; i < graph->n; ++i) { 453 isl_space_free(graph->node[i].dim); 454 isl_mat_free(graph->node[i].sched); 455 isl_map_free(graph->node[i].sched_map); 456 isl_mat_free(graph->node[i].cmap); 457 isl_mat_free(graph->node[i].cinv); 458 if (graph->root) { 459 free(graph->node[i].band); 460 free(graph->node[i].band_id); 461 free(graph->node[i].zero); 462 } 463 } 464 free(graph->node); 465 free(graph->sorted); 466 for (i = 0; i < graph->n_edge; ++i) 467 isl_map_free(graph->edge[i].map); 468 free(graph->edge); 469 free(graph->region); 470 for (i = 0; i <= isl_edge_last; ++i) 471 isl_hash_table_free(ctx, graph->edge_table[i]); 472 isl_hash_table_free(ctx, graph->node_table); 473 isl_basic_set_free(graph->lp); 474} 475 476/* For each "set" on which this function is called, increment 477 * graph->n by one and update graph->maxvar. 478 */ 479static int init_n_maxvar(__isl_take isl_set *set, void *user) 480{ 481 struct isl_sched_graph *graph = user; 482 int nvar = isl_set_dim(set, isl_dim_set); 483 484 graph->n++; 485 if (nvar > graph->maxvar) 486 graph->maxvar = nvar; 487 488 isl_set_free(set); 489 490 return 0; 491} 492 493/* Compute the number of rows that should be allocated for the schedule. 494 * The graph can be split at most "n - 1" times, there can be at most 495 * two rows for each dimension in the iteration domains (in particular, 496 * we usually have one row, but it may be split by split_scaled), 497 * and there can be one extra row for ordering the statements. 498 * Note that if we have actually split "n - 1" times, then no ordering 499 * is needed, so in principle we could use "graph->n + 2 * graph->maxvar - 1". 500 */ 501static int compute_max_row(struct isl_sched_graph *graph, 502 __isl_keep isl_union_set *domain) 503{ 504 graph->n = 0; 505 graph->maxvar = 0; 506 if (isl_union_set_foreach_set(domain, &init_n_maxvar, graph) < 0) 507 return -1; 508 graph->max_row = graph->n + 2 * graph->maxvar; 509 510 return 0; 511} 512 513/* Add a new node to the graph representing the given set. 514 */ 515static int extract_node(__isl_take isl_set *set, void *user) 516{ 517 int nvar, nparam; 518 isl_ctx *ctx; 519 isl_space *dim; 520 isl_mat *sched; 521 struct isl_sched_graph *graph = user; 522 int *band, *band_id, *zero; 523 524 ctx = isl_set_get_ctx(set); 525 dim = isl_set_get_space(set); 526 isl_set_free(set); 527 nvar = isl_space_dim(dim, isl_dim_set); 528 nparam = isl_space_dim(dim, isl_dim_param); 529 if (!ctx->opt->schedule_parametric) 530 nparam = 0; 531 sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar); 532 graph->node[graph->n].dim = dim; 533 graph->node[graph->n].nvar = nvar; 534 graph->node[graph->n].nparam = nparam; 535 graph->node[graph->n].sched = sched; 536 graph->node[graph->n].sched_map = NULL; 537 band = isl_alloc_array(ctx, int, graph->max_row); 538 graph->node[graph->n].band = band; 539 band_id = isl_calloc_array(ctx, int, graph->max_row); 540 graph->node[graph->n].band_id = band_id; 541 zero = isl_calloc_array(ctx, int, graph->max_row); 542 graph->node[graph->n].zero = zero; 543 graph->n++; 544 545 if (!sched || (graph->max_row && (!band || !band_id || !zero))) 546 return -1; 547 548 return 0; 549} 550 551struct isl_extract_edge_data { 552 enum isl_edge_type type; 553 struct isl_sched_graph *graph; 554}; 555 556/* Add a new edge to the graph based on the given map 557 * and add it to data->graph->edge_table[data->type]. 558 * If a dependence relation of a given type happens to be identical 559 * to one of the dependence relations of a type that was added before, 560 * then we don't create a new edge, but instead mark the original edge 561 * as also representing a dependence of the current type. 562 */ 563static int extract_edge(__isl_take isl_map *map, void *user) 564{ 565 isl_ctx *ctx = isl_map_get_ctx(map); 566 struct isl_extract_edge_data *data = user; 567 struct isl_sched_graph *graph = data->graph; 568 struct isl_sched_node *src, *dst; 569 isl_space *dim; 570 struct isl_sched_edge *edge; 571 int is_equal; 572 573 dim = isl_space_domain(isl_map_get_space(map)); 574 src = graph_find_node(ctx, graph, dim); 575 isl_space_free(dim); 576 dim = isl_space_range(isl_map_get_space(map)); 577 dst = graph_find_node(ctx, graph, dim); 578 isl_space_free(dim); 579 580 if (!src || !dst) { 581 isl_map_free(map); 582 return 0; 583 } 584 585 graph->edge[graph->n_edge].src = src; 586 graph->edge[graph->n_edge].dst = dst; 587 graph->edge[graph->n_edge].map = map; 588 if (data->type == isl_edge_validity) { 589 graph->edge[graph->n_edge].validity = 1; 590 graph->edge[graph->n_edge].proximity = 0; 591 } 592 if (data->type == isl_edge_proximity) { 593 graph->edge[graph->n_edge].validity = 0; 594 graph->edge[graph->n_edge].proximity = 1; 595 } 596 graph->n_edge++; 597 598 edge = graph_find_any_edge(graph, src, dst); 599 if (!edge) 600 return graph_edge_table_add(ctx, graph, data->type, 601 &graph->edge[graph->n_edge - 1]); 602 is_equal = isl_map_plain_is_equal(map, edge->map); 603 if (is_equal < 0) 604 return -1; 605 if (!is_equal) 606 return graph_edge_table_add(ctx, graph, data->type, 607 &graph->edge[graph->n_edge - 1]); 608 609 graph->n_edge--; 610 edge->validity |= graph->edge[graph->n_edge].validity; 611 edge->proximity |= graph->edge[graph->n_edge].proximity; 612 isl_map_free(map); 613 614 return graph_edge_table_add(ctx, graph, data->type, edge); 615} 616 617/* Check whether there is any dependence from node[j] to node[i] 618 * or from node[i] to node[j]. 619 */ 620static int node_follows_weak(int i, int j, void *user) 621{ 622 int f; 623 struct isl_sched_graph *graph = user; 624 625 f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]); 626 if (f < 0 || f) 627 return f; 628 return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]); 629} 630 631/* Check whether there is a validity dependence from node[j] to node[i], 632 * forcing node[i] to follow node[j]. 633 */ 634static int node_follows_strong(int i, int j, void *user) 635{ 636 struct isl_sched_graph *graph = user; 637 638 return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]); 639} 640 641/* Use Tarjan's algorithm for computing the strongly connected components 642 * in the dependence graph (only validity edges). 643 * If weak is set, we consider the graph to be undirected and 644 * we effectively compute the (weakly) connected components. 645 * Additionally, we also consider other edges when weak is set. 646 */ 647static int detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph, int weak) 648{ 649 int i, n; 650 struct isl_tarjan_graph *g = NULL; 651 652 g = isl_tarjan_graph_init(ctx, graph->n, 653 weak ? &node_follows_weak : &node_follows_strong, graph); 654 if (!g) 655 return -1; 656 657 graph->scc = 0; 658 i = 0; 659 n = graph->n; 660 while (n) { 661 while (g->order[i] != -1) { 662 graph->node[g->order[i]].scc = graph->scc; 663 --n; 664 ++i; 665 } 666 ++i; 667 graph->scc++; 668 } 669 670 isl_tarjan_graph_free(g); 671 672 return 0; 673} 674 675/* Apply Tarjan's algorithm to detect the strongly connected components 676 * in the dependence graph. 677 */ 678static int detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph) 679{ 680 return detect_ccs(ctx, graph, 0); 681} 682 683/* Apply Tarjan's algorithm to detect the (weakly) connected components 684 * in the dependence graph. 685 */ 686static int detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph) 687{ 688 return detect_ccs(ctx, graph, 1); 689} 690 691static int cmp_scc(const void *a, const void *b, void *data) 692{ 693 struct isl_sched_graph *graph = data; 694 const int *i1 = a; 695 const int *i2 = b; 696 697 return graph->node[*i1].scc - graph->node[*i2].scc; 698} 699 700/* Sort the elements of graph->sorted according to the corresponding SCCs. 701 */ 702static int sort_sccs(struct isl_sched_graph *graph) 703{ 704 return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph); 705} 706 707/* Given a dependence relation R from a node to itself, 708 * construct the set of coefficients of valid constraints for elements 709 * in that dependence relation. 710 * In particular, the result contains tuples of coefficients 711 * c_0, c_n, c_x such that 712 * 713 * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R 714 * 715 * or, equivalently, 716 * 717 * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } 718 * 719 * We choose here to compute the dual of delta R. 720 * Alternatively, we could have computed the dual of R, resulting 721 * in a set of tuples c_0, c_n, c_x, c_y, and then 722 * plugged in (c_0, c_n, c_x, -c_x). 723 */ 724static __isl_give isl_basic_set *intra_coefficients( 725 struct isl_sched_graph *graph, __isl_take isl_map *map) 726{ 727 isl_ctx *ctx = isl_map_get_ctx(map); 728 isl_set *delta; 729 isl_basic_set *coef; 730 731 if (isl_hmap_map_basic_set_has(ctx, graph->intra_hmap, map)) 732 return isl_hmap_map_basic_set_get(ctx, graph->intra_hmap, map); 733 734 delta = isl_set_remove_divs(isl_map_deltas(isl_map_copy(map))); 735 coef = isl_set_coefficients(delta); 736 isl_hmap_map_basic_set_set(ctx, graph->intra_hmap, map, 737 isl_basic_set_copy(coef)); 738 739 return coef; 740} 741 742/* Given a dependence relation R, * construct the set of coefficients 743 * of valid constraints for elements in that dependence relation. 744 * In particular, the result contains tuples of coefficients 745 * c_0, c_n, c_x, c_y such that 746 * 747 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R 748 * 749 */ 750static __isl_give isl_basic_set *inter_coefficients( 751 struct isl_sched_graph *graph, __isl_take isl_map *map) 752{ 753 isl_ctx *ctx = isl_map_get_ctx(map); 754 isl_set *set; 755 isl_basic_set *coef; 756 757 if (isl_hmap_map_basic_set_has(ctx, graph->inter_hmap, map)) 758 return isl_hmap_map_basic_set_get(ctx, graph->inter_hmap, map); 759 760 set = isl_map_wrap(isl_map_remove_divs(isl_map_copy(map))); 761 coef = isl_set_coefficients(set); 762 isl_hmap_map_basic_set_set(ctx, graph->inter_hmap, map, 763 isl_basic_set_copy(coef)); 764 765 return coef; 766} 767 768/* Add constraints to graph->lp that force validity for the given 769 * dependence from a node i to itself. 770 * That is, add constraints that enforce 771 * 772 * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x) 773 * = c_i_x (y - x) >= 0 774 * 775 * for each (x,y) in R. 776 * We obtain general constraints on coefficients (c_0, c_n, c_x) 777 * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-), 778 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative. 779 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. 780 * 781 * Actually, we do not construct constraints for the c_i_x themselves, 782 * but for the coefficients of c_i_x written as a linear combination 783 * of the columns in node->cmap. 784 */ 785static int add_intra_validity_constraints(struct isl_sched_graph *graph, 786 struct isl_sched_edge *edge) 787{ 788 unsigned total; 789 isl_map *map = isl_map_copy(edge->map); 790 isl_ctx *ctx = isl_map_get_ctx(map); 791 isl_space *dim; 792 isl_dim_map *dim_map; 793 isl_basic_set *coef; 794 struct isl_sched_node *node = edge->src; 795 796 coef = intra_coefficients(graph, map); 797 798 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef))); 799 800 coef = isl_basic_set_transform_dims(coef, isl_dim_set, 801 isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap)); 802 if (!coef) 803 goto error; 804 805 total = isl_basic_set_total_dim(graph->lp); 806 dim_map = isl_dim_map_alloc(ctx, total); 807 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2, 808 isl_space_dim(dim, isl_dim_set), 1, 809 node->nvar, -1); 810 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2, 811 isl_space_dim(dim, isl_dim_set), 1, 812 node->nvar, 1); 813 graph->lp = isl_basic_set_extend_constraints(graph->lp, 814 coef->n_eq, coef->n_ineq); 815 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp, 816 coef, dim_map); 817 isl_space_free(dim); 818 819 return 0; 820error: 821 isl_space_free(dim); 822 return -1; 823} 824 825/* Add constraints to graph->lp that force validity for the given 826 * dependence from node i to node j. 827 * That is, add constraints that enforce 828 * 829 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0 830 * 831 * for each (x,y) in R. 832 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) 833 * of valid constraints for R and then plug in 834 * (c_j_0 - c_i_0, c_j_n^+ - c_j_n^- - (c_i_n^+ - c_i_n^-), 835 * c_j_x^+ - c_j_x^- - (c_i_x^+ - c_i_x^-)), 836 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative. 837 * In graph->lp, the c_*^- appear before their c_*^+ counterpart. 838 * 839 * Actually, we do not construct constraints for the c_*_x themselves, 840 * but for the coefficients of c_*_x written as a linear combination 841 * of the columns in node->cmap. 842 */ 843static int add_inter_validity_constraints(struct isl_sched_graph *graph, 844 struct isl_sched_edge *edge) 845{ 846 unsigned total; 847 isl_map *map = isl_map_copy(edge->map); 848 isl_ctx *ctx = isl_map_get_ctx(map); 849 isl_space *dim; 850 isl_dim_map *dim_map; 851 isl_basic_set *coef; 852 struct isl_sched_node *src = edge->src; 853 struct isl_sched_node *dst = edge->dst; 854 855 coef = inter_coefficients(graph, map); 856 857 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef))); 858 859 coef = isl_basic_set_transform_dims(coef, isl_dim_set, 860 isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap)); 861 coef = isl_basic_set_transform_dims(coef, isl_dim_set, 862 isl_space_dim(dim, isl_dim_set) + src->nvar, 863 isl_mat_copy(dst->cmap)); 864 if (!coef) 865 goto error; 866 867 total = isl_basic_set_total_dim(graph->lp); 868 dim_map = isl_dim_map_alloc(ctx, total); 869 870 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1); 871 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1); 872 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1); 873 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2, 874 isl_space_dim(dim, isl_dim_set) + src->nvar, 1, 875 dst->nvar, -1); 876 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2, 877 isl_space_dim(dim, isl_dim_set) + src->nvar, 1, 878 dst->nvar, 1); 879 880 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1); 881 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1); 882 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1); 883 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2, 884 isl_space_dim(dim, isl_dim_set), 1, 885 src->nvar, 1); 886 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2, 887 isl_space_dim(dim, isl_dim_set), 1, 888 src->nvar, -1); 889 890 edge->start = graph->lp->n_ineq; 891 graph->lp = isl_basic_set_extend_constraints(graph->lp, 892 coef->n_eq, coef->n_ineq); 893 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp, 894 coef, dim_map); 895 if (!graph->lp) 896 goto error; 897 isl_space_free(dim); 898 edge->end = graph->lp->n_ineq; 899 900 return 0; 901error: 902 isl_space_free(dim); 903 return -1; 904} 905 906/* Add constraints to graph->lp that bound the dependence distance for the given 907 * dependence from a node i to itself. 908 * If s = 1, we add the constraint 909 * 910 * c_i_x (y - x) <= m_0 + m_n n 911 * 912 * or 913 * 914 * -c_i_x (y - x) + m_0 + m_n n >= 0 915 * 916 * for each (x,y) in R. 917 * If s = -1, we add the constraint 918 * 919 * -c_i_x (y - x) <= m_0 + m_n n 920 * 921 * or 922 * 923 * c_i_x (y - x) + m_0 + m_n n >= 0 924 * 925 * for each (x,y) in R. 926 * We obtain general constraints on coefficients (c_0, c_n, c_x) 927 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x), 928 * with each coefficient (except m_0) represented as a pair of non-negative 929 * coefficients. 930 * 931 * Actually, we do not construct constraints for the c_i_x themselves, 932 * but for the coefficients of c_i_x written as a linear combination 933 * of the columns in node->cmap. 934 */ 935static int add_intra_proximity_constraints(struct isl_sched_graph *graph, 936 struct isl_sched_edge *edge, int s) 937{ 938 unsigned total; 939 unsigned nparam; 940 isl_map *map = isl_map_copy(edge->map); 941 isl_ctx *ctx = isl_map_get_ctx(map); 942 isl_space *dim; 943 isl_dim_map *dim_map; 944 isl_basic_set *coef; 945 struct isl_sched_node *node = edge->src; 946 947 coef = intra_coefficients(graph, map); 948 949 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef))); 950 951 coef = isl_basic_set_transform_dims(coef, isl_dim_set, 952 isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap)); 953 if (!coef) 954 goto error; 955 956 nparam = isl_space_dim(node->dim, isl_dim_param); 957 total = isl_basic_set_total_dim(graph->lp); 958 dim_map = isl_dim_map_alloc(ctx, total); 959 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1); 960 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1); 961 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1); 962 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2, 963 isl_space_dim(dim, isl_dim_set), 1, 964 node->nvar, s); 965 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2, 966 isl_space_dim(dim, isl_dim_set), 1, 967 node->nvar, -s); 968 graph->lp = isl_basic_set_extend_constraints(graph->lp, 969 coef->n_eq, coef->n_ineq); 970 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp, 971 coef, dim_map); 972 isl_space_free(dim); 973 974 return 0; 975error: 976 isl_space_free(dim); 977 return -1; 978} 979 980/* Add constraints to graph->lp that bound the dependence distance for the given 981 * dependence from node i to node j. 982 * If s = 1, we add the constraint 983 * 984 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) 985 * <= m_0 + m_n n 986 * 987 * or 988 * 989 * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) + 990 * m_0 + m_n n >= 0 991 * 992 * for each (x,y) in R. 993 * If s = -1, we add the constraint 994 * 995 * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) 996 * <= m_0 + m_n n 997 * 998 * or 999 * 1000 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) + 1001 * m_0 + m_n n >= 0 1002 * 1003 * for each (x,y) in R. 1004 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) 1005 * of valid constraints for R and then plug in 1006 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n, 1007 * -s*c_j_x+s*c_i_x) 1008 * with each coefficient (except m_0, c_j_0 and c_i_0) 1009 * represented as a pair of non-negative coefficients. 1010 * 1011 * Actually, we do not construct constraints for the c_*_x themselves, 1012 * but for the coefficients of c_*_x written as a linear combination 1013 * of the columns in node->cmap. 1014 */ 1015static int add_inter_proximity_constraints(struct isl_sched_graph *graph, 1016 struct isl_sched_edge *edge, int s) 1017{ 1018 unsigned total; 1019 unsigned nparam; 1020 isl_map *map = isl_map_copy(edge->map); 1021 isl_ctx *ctx = isl_map_get_ctx(map); 1022 isl_space *dim; 1023 isl_dim_map *dim_map; 1024 isl_basic_set *coef; 1025 struct isl_sched_node *src = edge->src; 1026 struct isl_sched_node *dst = edge->dst; 1027 1028 coef = inter_coefficients(graph, map); 1029 1030 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef))); 1031 1032 coef = isl_basic_set_transform_dims(coef, isl_dim_set, 1033 isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap)); 1034 coef = isl_basic_set_transform_dims(coef, isl_dim_set, 1035 isl_space_dim(dim, isl_dim_set) + src->nvar, 1036 isl_mat_copy(dst->cmap)); 1037 if (!coef) 1038 goto error; 1039 1040 nparam = isl_space_dim(src->dim, isl_dim_param); 1041 total = isl_basic_set_total_dim(graph->lp); 1042 dim_map = isl_dim_map_alloc(ctx, total); 1043 1044 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1); 1045 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1); 1046 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1); 1047 1048 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, -s); 1049 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, s); 1050 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, -s); 1051 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2, 1052 isl_space_dim(dim, isl_dim_set) + src->nvar, 1, 1053 dst->nvar, s); 1054 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2, 1055 isl_space_dim(dim, isl_dim_set) + src->nvar, 1, 1056 dst->nvar, -s); 1057 1058 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, s); 1059 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, -s); 1060 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, s); 1061 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2, 1062 isl_space_dim(dim, isl_dim_set), 1, 1063 src->nvar, -s); 1064 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2, 1065 isl_space_dim(dim, isl_dim_set), 1, 1066 src->nvar, s); 1067 1068 graph->lp = isl_basic_set_extend_constraints(graph->lp, 1069 coef->n_eq, coef->n_ineq); 1070 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp, 1071 coef, dim_map); 1072 isl_space_free(dim); 1073 1074 return 0; 1075error: 1076 isl_space_free(dim); 1077 return -1; 1078} 1079 1080static int add_all_validity_constraints(struct isl_sched_graph *graph) 1081{ 1082 int i; 1083 1084 for (i = 0; i < graph->n_edge; ++i) { 1085 struct isl_sched_edge *edge= &graph->edge[i]; 1086 if (!edge->validity) 1087 continue; 1088 if (edge->src != edge->dst) 1089 continue; 1090 if (add_intra_validity_constraints(graph, edge) < 0) 1091 return -1; 1092 } 1093 1094 for (i = 0; i < graph->n_edge; ++i) { 1095 struct isl_sched_edge *edge = &graph->edge[i]; 1096 if (!edge->validity) 1097 continue; 1098 if (edge->src == edge->dst) 1099 continue; 1100 if (add_inter_validity_constraints(graph, edge) < 0) 1101 return -1; 1102 } 1103 1104 return 0; 1105} 1106 1107/* Add constraints to graph->lp that bound the dependence distance 1108 * for all dependence relations. 1109 * If a given proximity dependence is identical to a validity 1110 * dependence, then the dependence distance is already bounded 1111 * from below (by zero), so we only need to bound the distance 1112 * from above. 1113 * Otherwise, we need to bound the distance both from above and from below. 1114 */ 1115static int add_all_proximity_constraints(struct isl_sched_graph *graph) 1116{ 1117 int i; 1118 1119 for (i = 0; i < graph->n_edge; ++i) { 1120 struct isl_sched_edge *edge= &graph->edge[i]; 1121 if (!edge->proximity) 1122 continue; 1123 if (edge->src == edge->dst && 1124 add_intra_proximity_constraints(graph, edge, 1) < 0) 1125 return -1; 1126 if (edge->src != edge->dst && 1127 add_inter_proximity_constraints(graph, edge, 1) < 0) 1128 return -1; 1129 if (edge->validity) 1130 continue; 1131 if (edge->src == edge->dst && 1132 add_intra_proximity_constraints(graph, edge, -1) < 0) 1133 return -1; 1134 if (edge->src != edge->dst && 1135 add_inter_proximity_constraints(graph, edge, -1) < 0) 1136 return -1; 1137 } 1138 1139 return 0; 1140} 1141 1142/* Compute a basis for the rows in the linear part of the schedule 1143 * and extend this basis to a full basis. The remaining rows 1144 * can then be used to force linear independence from the rows 1145 * in the schedule. 1146 * 1147 * In particular, given the schedule rows S, we compute 1148 * 1149 * S = H Q 1150 * S U = H 1151 * 1152 * with H the Hermite normal form of S. That is, all but the 1153 * first rank columns of Q are zero and so each row in S is 1154 * a linear combination of the first rank rows of Q. 1155 * The matrix Q is then transposed because we will write the 1156 * coefficients of the next schedule row as a column vector s 1157 * and express this s as a linear combination s = Q c of the 1158 * computed basis. 1159 * Similarly, the matrix U is transposed such that we can 1160 * compute the coefficients c = U s from a schedule row s. 1161 */ 1162static int node_update_cmap(struct isl_sched_node *node) 1163{ 1164 isl_mat *H, *U, *Q; 1165 int n_row = isl_mat_rows(node->sched); 1166 1167 H = isl_mat_sub_alloc(node->sched, 0, n_row, 1168 1 + node->nparam, node->nvar); 1169 1170 H = isl_mat_left_hermite(H, 0, &U, &Q); 1171 isl_mat_free(node->cmap); 1172 isl_mat_free(node->cinv); 1173 node->cmap = isl_mat_transpose(Q); 1174 node->cinv = isl_mat_transpose(U); 1175 node->rank = isl_mat_initial_non_zero_cols(H); 1176 isl_mat_free(H); 1177 1178 if (!node->cmap || !node->cinv || node->rank < 0) 1179 return -1; 1180 return 0; 1181} 1182 1183/* Count the number of equality and inequality constraints 1184 * that will be added for the given map. 1185 * If carry is set, then we are counting the number of (validity) 1186 * constraints that will be added in setup_carry_lp and we count 1187 * each edge exactly once. Otherwise, we count as follows 1188 * validity -> 1 (>= 0) 1189 * validity+proximity -> 2 (>= 0 and upper bound) 1190 * proximity -> 2 (lower and upper bound) 1191 */ 1192static int count_map_constraints(struct isl_sched_graph *graph, 1193 struct isl_sched_edge *edge, __isl_take isl_map *map, 1194 int *n_eq, int *n_ineq, int carry) 1195{ 1196 isl_basic_set *coef; 1197 int f = carry ? 1 : edge->proximity ? 2 : 1; 1198 1199 if (carry && !edge->validity) { 1200 isl_map_free(map); 1201 return 0; 1202 } 1203 1204 if (edge->src == edge->dst) 1205 coef = intra_coefficients(graph, map); 1206 else 1207 coef = inter_coefficients(graph, map); 1208 if (!coef) 1209 return -1; 1210 *n_eq += f * coef->n_eq; 1211 *n_ineq += f * coef->n_ineq; 1212 isl_basic_set_free(coef); 1213 1214 return 0; 1215} 1216 1217/* Count the number of equality and inequality constraints 1218 * that will be added to the main lp problem. 1219 * We count as follows 1220 * validity -> 1 (>= 0) 1221 * validity+proximity -> 2 (>= 0 and upper bound) 1222 * proximity -> 2 (lower and upper bound) 1223 */ 1224static int count_constraints(struct isl_sched_graph *graph, 1225 int *n_eq, int *n_ineq) 1226{ 1227 int i; 1228 1229 *n_eq = *n_ineq = 0; 1230 for (i = 0; i < graph->n_edge; ++i) { 1231 struct isl_sched_edge *edge= &graph->edge[i]; 1232 isl_map *map = isl_map_copy(edge->map); 1233 1234 if (count_map_constraints(graph, edge, map, 1235 n_eq, n_ineq, 0) < 0) 1236 return -1; 1237 } 1238 1239 return 0; 1240} 1241 1242/* Add constraints that bound the values of the variable and parameter 1243 * coefficients of the schedule. 1244 * 1245 * The maximal value of the coefficients is defined by the option 1246 * 'schedule_max_coefficient'. 1247 */ 1248static int add_bound_coefficient_constraints(isl_ctx *ctx, 1249 struct isl_sched_graph *graph) 1250{ 1251 int i, j, k; 1252 int max_coefficient; 1253 int total; 1254 1255 max_coefficient = ctx->opt->schedule_max_coefficient; 1256 1257 if (max_coefficient == -1) 1258 return 0; 1259 1260 total = isl_basic_set_total_dim(graph->lp); 1261 1262 for (i = 0; i < graph->n; ++i) { 1263 struct isl_sched_node *node = &graph->node[i]; 1264 for (j = 0; j < 2 * node->nparam + 2 * node->nvar; ++j) { 1265 int dim; 1266 k = isl_basic_set_alloc_inequality(graph->lp); 1267 if (k < 0) 1268 return -1; 1269 dim = 1 + node->start + 1 + j; 1270 isl_seq_clr(graph->lp->ineq[k], 1 + total); 1271 isl_int_set_si(graph->lp->ineq[k][dim], -1); 1272 isl_int_set_si(graph->lp->ineq[k][0], max_coefficient); 1273 } 1274 } 1275 1276 return 0; 1277} 1278 1279/* Construct an ILP problem for finding schedule coefficients 1280 * that result in non-negative, but small dependence distances 1281 * over all dependences. 1282 * In particular, the dependence distances over proximity edges 1283 * are bounded by m_0 + m_n n and we compute schedule coefficients 1284 * with small values (preferably zero) of m_n and m_0. 1285 * 1286 * All variables of the ILP are non-negative. The actual coefficients 1287 * may be negative, so each coefficient is represented as the difference 1288 * of two non-negative variables. The negative part always appears 1289 * immediately before the positive part. 1290 * Other than that, the variables have the following order 1291 * 1292 * - sum of positive and negative parts of m_n coefficients 1293 * - m_0 1294 * - sum of positive and negative parts of all c_n coefficients 1295 * (unconstrained when computing non-parametric schedules) 1296 * - sum of positive and negative parts of all c_x coefficients 1297 * - positive and negative parts of m_n coefficients 1298 * - for each node 1299 * - c_i_0 1300 * - positive and negative parts of c_i_n (if parametric) 1301 * - positive and negative parts of c_i_x 1302 * 1303 * The c_i_x are not represented directly, but through the columns of 1304 * node->cmap. That is, the computed values are for variable t_i_x 1305 * such that c_i_x = Q t_i_x with Q equal to node->cmap. 1306 * 1307 * The constraints are those from the edges plus two or three equalities 1308 * to express the sums. 1309 * 1310 * If force_zero is set, then we add equalities to ensure that 1311 * the sum of the m_n coefficients and m_0 are both zero. 1312 */ 1313static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, 1314 int force_zero) 1315{ 1316 int i, j; 1317 int k; 1318 unsigned nparam; 1319 unsigned total; 1320 isl_space *dim; 1321 int parametric; 1322 int param_pos; 1323 int n_eq, n_ineq; 1324 int max_constant_term; 1325 int max_coefficient; 1326 1327 max_constant_term = ctx->opt->schedule_max_constant_term; 1328 max_coefficient = ctx->opt->schedule_max_coefficient; 1329 1330 parametric = ctx->opt->schedule_parametric; 1331 nparam = isl_space_dim(graph->node[0].dim, isl_dim_param); 1332 param_pos = 4; 1333 total = param_pos + 2 * nparam; 1334 for (i = 0; i < graph->n; ++i) { 1335 struct isl_sched_node *node = &graph->node[graph->sorted[i]]; 1336 if (node_update_cmap(node) < 0) 1337 return -1; 1338 node->start = total; 1339 total += 1 + 2 * (node->nparam + node->nvar); 1340 } 1341 1342 if (count_constraints(graph, &n_eq, &n_ineq) < 0) 1343 return -1; 1344 1345 dim = isl_space_set_alloc(ctx, 0, total); 1346 isl_basic_set_free(graph->lp); 1347 n_eq += 2 + parametric + force_zero; 1348 if (max_constant_term != -1) 1349 n_ineq += graph->n; 1350 if (max_coefficient != -1) 1351 for (i = 0; i < graph->n; ++i) 1352 n_ineq += 2 * graph->node[i].nparam + 1353 2 * graph->node[i].nvar; 1354 1355 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq); 1356 1357 k = isl_basic_set_alloc_equality(graph->lp); 1358 if (k < 0) 1359 return -1; 1360 isl_seq_clr(graph->lp->eq[k], 1 + total); 1361 if (!force_zero) 1362 isl_int_set_si(graph->lp->eq[k][1], -1); 1363 for (i = 0; i < 2 * nparam; ++i) 1364 isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1); 1365 1366 if (force_zero) { 1367 k = isl_basic_set_alloc_equality(graph->lp); 1368 if (k < 0) 1369 return -1; 1370 isl_seq_clr(graph->lp->eq[k], 1 + total); 1371 isl_int_set_si(graph->lp->eq[k][2], -1); 1372 } 1373 1374 if (parametric) { 1375 k = isl_basic_set_alloc_equality(graph->lp); 1376 if (k < 0) 1377 return -1; 1378 isl_seq_clr(graph->lp->eq[k], 1 + total); 1379 isl_int_set_si(graph->lp->eq[k][3], -1); 1380 for (i = 0; i < graph->n; ++i) { 1381 int pos = 1 + graph->node[i].start + 1; 1382 1383 for (j = 0; j < 2 * graph->node[i].nparam; ++j) 1384 isl_int_set_si(graph->lp->eq[k][pos + j], 1); 1385 } 1386 } 1387 1388 k = isl_basic_set_alloc_equality(graph->lp); 1389 if (k < 0) 1390 return -1; 1391 isl_seq_clr(graph->lp->eq[k], 1 + total); 1392 isl_int_set_si(graph->lp->eq[k][4], -1); 1393 for (i = 0; i < graph->n; ++i) { 1394 struct isl_sched_node *node = &graph->node[i]; 1395 int pos = 1 + node->start + 1 + 2 * node->nparam; 1396 1397 for (j = 0; j < 2 * node->nvar; ++j) 1398 isl_int_set_si(graph->lp->eq[k][pos + j], 1); 1399 } 1400 1401 if (max_constant_term != -1) 1402 for (i = 0; i < graph->n; ++i) { 1403 struct isl_sched_node *node = &graph->node[i]; 1404 k = isl_basic_set_alloc_inequality(graph->lp); 1405 if (k < 0) 1406 return -1; 1407 isl_seq_clr(graph->lp->ineq[k], 1 + total); 1408 isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1); 1409 isl_int_set_si(graph->lp->ineq[k][0], max_constant_term); 1410 } 1411 1412 if (add_bound_coefficient_constraints(ctx, graph) < 0) 1413 return -1; 1414 if (add_all_validity_constraints(graph) < 0) 1415 return -1; 1416 if (add_all_proximity_constraints(graph) < 0) 1417 return -1; 1418 1419 return 0; 1420} 1421 1422/* Analyze the conflicting constraint found by 1423 * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity 1424 * constraint of one of the edges between distinct nodes, living, moreover 1425 * in distinct SCCs, then record the source and sink SCC as this may 1426 * be a good place to cut between SCCs. 1427 */ 1428static int check_conflict(int con, void *user) 1429{ 1430 int i; 1431 struct isl_sched_graph *graph = user; 1432 1433 if (graph->src_scc >= 0) 1434 return 0; 1435 1436 con -= graph->lp->n_eq; 1437 1438 if (con >= graph->lp->n_ineq) 1439 return 0; 1440 1441 for (i = 0; i < graph->n_edge; ++i) { 1442 if (!graph->edge[i].validity) 1443 continue; 1444 if (graph->edge[i].src == graph->edge[i].dst) 1445 continue; 1446 if (graph->edge[i].src->scc == graph->edge[i].dst->scc) 1447 continue; 1448 if (graph->edge[i].start > con) 1449 continue; 1450 if (graph->edge[i].end <= con) 1451 continue; 1452 graph->src_scc = graph->edge[i].src->scc; 1453 graph->dst_scc = graph->edge[i].dst->scc; 1454 } 1455 1456 return 0; 1457} 1458 1459/* Check whether the next schedule row of the given node needs to be 1460 * non-trivial. Lower-dimensional domains may have some trivial rows, 1461 * but as soon as the number of remaining required non-trivial rows 1462 * is as large as the number or remaining rows to be computed, 1463 * all remaining rows need to be non-trivial. 1464 */ 1465static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node) 1466{ 1467 return node->nvar - node->rank >= graph->maxvar - graph->n_row; 1468} 1469 1470/* Solve the ILP problem constructed in setup_lp. 1471 * For each node such that all the remaining rows of its schedule 1472 * need to be non-trivial, we construct a non-triviality region. 1473 * This region imposes that the next row is independent of previous rows. 1474 * In particular the coefficients c_i_x are represented by t_i_x 1475 * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that 1476 * its first columns span the rows of the previously computed part 1477 * of the schedule. The non-triviality region enforces that at least 1478 * one of the remaining components of t_i_x is non-zero, i.e., 1479 * that the new schedule row depends on at least one of the remaining 1480 * columns of Q. 1481 */ 1482static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph) 1483{ 1484 int i; 1485 isl_vec *sol; 1486 isl_basic_set *lp; 1487 1488 for (i = 0; i < graph->n; ++i) { 1489 struct isl_sched_node *node = &graph->node[i]; 1490 int skip = node->rank; 1491 graph->region[i].pos = node->start + 1 + 2*(node->nparam+skip); 1492 if (needs_row(graph, node)) 1493 graph->region[i].len = 2 * (node->nvar - skip); 1494 else 1495 graph->region[i].len = 0; 1496 } 1497 lp = isl_basic_set_copy(graph->lp); 1498 sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n, 1499 graph->region, &check_conflict, graph); 1500 return sol; 1501} 1502 1503/* Update the schedules of all nodes based on the given solution 1504 * of the LP problem. 1505 * The new row is added to the current band. 1506 * All possibly negative coefficients are encoded as a difference 1507 * of two non-negative variables, so we need to perform the subtraction 1508 * here. Moreover, if use_cmap is set, then the solution does 1509 * not refer to the actual coefficients c_i_x, but instead to variables 1510 * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap. 1511 * In this case, we then also need to perform this multiplication 1512 * to obtain the values of c_i_x. 1513 * 1514 * If check_zero is set, then the first two coordinates of sol are 1515 * assumed to correspond to the dependence distance. If these two 1516 * coordinates are zero, then the corresponding scheduling dimension 1517 * is marked as being zero distance. 1518 */ 1519static int update_schedule(struct isl_sched_graph *graph, 1520 __isl_take isl_vec *sol, int use_cmap, int check_zero) 1521{ 1522 int i, j; 1523 int zero = 0; 1524 isl_vec *csol = NULL; 1525 1526 if (!sol) 1527 goto error; 1528 if (sol->size == 0) 1529 isl_die(sol->ctx, isl_error_internal, 1530 "no solution found", goto error); 1531 if (graph->n_total_row >= graph->max_row) 1532 isl_die(sol->ctx, isl_error_internal, 1533 "too many schedule rows", goto error); 1534 1535 if (check_zero) 1536 zero = isl_int_is_zero(sol->el[1]) && 1537 isl_int_is_zero(sol->el[2]); 1538 1539 for (i = 0; i < graph->n; ++i) { 1540 struct isl_sched_node *node = &graph->node[i]; 1541 int pos = node->start; 1542 int row = isl_mat_rows(node->sched); 1543 1544 isl_vec_free(csol); 1545 csol = isl_vec_alloc(sol->ctx, node->nvar); 1546 if (!csol) 1547 goto error; 1548 1549 isl_map_free(node->sched_map); 1550 node->sched_map = NULL; 1551 node->sched = isl_mat_add_rows(node->sched, 1); 1552 if (!node->sched) 1553 goto error; 1554 node->sched = isl_mat_set_element(node->sched, row, 0, 1555 sol->el[1 + pos]); 1556 for (j = 0; j < node->nparam + node->nvar; ++j) 1557 isl_int_sub(sol->el[1 + pos + 1 + 2 * j + 1], 1558 sol->el[1 + pos + 1 + 2 * j + 1], 1559 sol->el[1 + pos + 1 + 2 * j]); 1560 for (j = 0; j < node->nparam; ++j) 1561 node->sched = isl_mat_set_element(node->sched, 1562 row, 1 + j, sol->el[1+pos+1+2*j+1]); 1563 for (j = 0; j < node->nvar; ++j) 1564 isl_int_set(csol->el[j], 1565 sol->el[1+pos+1+2*(node->nparam+j)+1]); 1566 if (use_cmap) 1567 csol = isl_mat_vec_product(isl_mat_copy(node->cmap), 1568 csol); 1569 if (!csol) 1570 goto error; 1571 for (j = 0; j < node->nvar; ++j) 1572 node->sched = isl_mat_set_element(node->sched, 1573 row, 1 + node->nparam + j, csol->el[j]); 1574 node->band[graph->n_total_row] = graph->n_band; 1575 node->zero[graph->n_total_row] = zero; 1576 } 1577 isl_vec_free(sol); 1578 isl_vec_free(csol); 1579 1580 graph->n_row++; 1581 graph->n_total_row++; 1582 1583 return 0; 1584error: 1585 isl_vec_free(sol); 1586 isl_vec_free(csol); 1587 return -1; 1588} 1589 1590/* Convert node->sched into a multi_aff and return this multi_aff. 1591 */ 1592static __isl_give isl_multi_aff *node_extract_schedule_multi_aff( 1593 struct isl_sched_node *node) 1594{ 1595 int i, j; 1596 isl_space *space; 1597 isl_local_space *ls; 1598 isl_aff *aff; 1599 isl_multi_aff *ma; 1600 int nrow, ncol; 1601 isl_int v; 1602 1603 nrow = isl_mat_rows(node->sched); 1604 ncol = isl_mat_cols(node->sched) - 1; 1605 space = isl_space_from_domain(isl_space_copy(node->dim)); 1606 space = isl_space_add_dims(space, isl_dim_out, nrow); 1607 ma = isl_multi_aff_zero(space); 1608 ls = isl_local_space_from_space(isl_space_copy(node->dim)); 1609 1610 isl_int_init(v); 1611 1612 for (i = 0; i < nrow; ++i) { 1613 aff = isl_aff_zero_on_domain(isl_local_space_copy(ls)); 1614 isl_mat_get_element(node->sched, i, 0, &v); 1615 aff = isl_aff_set_constant(aff, v); 1616 for (j = 0; j < node->nparam; ++j) { 1617 isl_mat_get_element(node->sched, i, 1 + j, &v); 1618 aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v); 1619 } 1620 for (j = 0; j < node->nvar; ++j) { 1621 isl_mat_get_element(node->sched, 1622 i, 1 + node->nparam + j, &v); 1623 aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v); 1624 } 1625 ma = isl_multi_aff_set_aff(ma, i, aff); 1626 } 1627 1628 isl_int_clear(v); 1629 1630 isl_local_space_free(ls); 1631 1632 return ma; 1633} 1634 1635/* Convert node->sched into a map and return this map. 1636 * 1637 * The result is cached in node->sched_map, which needs to be released 1638 * whenever node->sched is updated. 1639 */ 1640static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node) 1641{ 1642 if (!node->sched_map) { 1643 isl_multi_aff *ma; 1644 1645 ma = node_extract_schedule_multi_aff(node); 1646 node->sched_map = isl_map_from_multi_aff(ma); 1647 } 1648 1649 return isl_map_copy(node->sched_map); 1650} 1651 1652/* Update the given dependence relation based on the current schedule. 1653 * That is, intersect the dependence relation with a map expressing 1654 * that source and sink are executed within the same iteration of 1655 * the current schedule. 1656 * This is not the most efficient way, but this shouldn't be a critical 1657 * operation. 1658 */ 1659static __isl_give isl_map *specialize(__isl_take isl_map *map, 1660 struct isl_sched_node *src, struct isl_sched_node *dst) 1661{ 1662 isl_map *src_sched, *dst_sched, *id; 1663 1664 src_sched = node_extract_schedule(src); 1665 dst_sched = node_extract_schedule(dst); 1666 id = isl_map_apply_range(src_sched, isl_map_reverse(dst_sched)); 1667 return isl_map_intersect(map, id); 1668} 1669 1670/* Update the dependence relations of all edges based on the current schedule. 1671 * If a dependence is carried completely by the current schedule, then 1672 * it is removed from the edge_tables. It is kept in the list of edges 1673 * as otherwise all edge_tables would have to be recomputed. 1674 */ 1675static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph) 1676{ 1677 int i; 1678 1679 for (i = graph->n_edge - 1; i >= 0; --i) { 1680 struct isl_sched_edge *edge = &graph->edge[i]; 1681 edge->map = specialize(edge->map, edge->src, edge->dst); 1682 if (!edge->map) 1683 return -1; 1684 1685 if (isl_map_plain_is_empty(edge->map)) 1686 graph_remove_edge(graph, edge); 1687 } 1688 1689 return 0; 1690} 1691 1692static void next_band(struct isl_sched_graph *graph) 1693{ 1694 graph->band_start = graph->n_total_row; 1695 graph->n_band++; 1696} 1697 1698/* Topologically sort statements mapped to the same schedule iteration 1699 * and add a row to the schedule corresponding to this order. 1700 */ 1701static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph) 1702{ 1703 int i, j; 1704 1705 if (graph->n <= 1) 1706 return 0; 1707 1708 if (update_edges(ctx, graph) < 0) 1709 return -1; 1710 1711 if (graph->n_edge == 0) 1712 return 0; 1713 1714 if (detect_sccs(ctx, graph) < 0) 1715 return -1; 1716 1717 if (graph->n_total_row >= graph->max_row) 1718 isl_die(ctx, isl_error_internal, 1719 "too many schedule rows", return -1); 1720 1721 for (i = 0; i < graph->n; ++i) { 1722 struct isl_sched_node *node = &graph->node[i]; 1723 int row = isl_mat_rows(node->sched); 1724 int cols = isl_mat_cols(node->sched); 1725 1726 isl_map_free(node->sched_map); 1727 node->sched_map = NULL; 1728 node->sched = isl_mat_add_rows(node->sched, 1); 1729 if (!node->sched) 1730 return -1; 1731 node->sched = isl_mat_set_element_si(node->sched, row, 0, 1732 node->scc); 1733 for (j = 1; j < cols; ++j) 1734 node->sched = isl_mat_set_element_si(node->sched, 1735 row, j, 0); 1736 node->band[graph->n_total_row] = graph->n_band; 1737 } 1738 1739 graph->n_total_row++; 1740 next_band(graph); 1741 1742 return 0; 1743} 1744 1745/* Construct an isl_schedule based on the computed schedule stored 1746 * in graph and with parameters specified by dim. 1747 */ 1748static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph, 1749 __isl_take isl_space *dim) 1750{ 1751 int i; 1752 isl_ctx *ctx; 1753 isl_schedule *sched = NULL; 1754 1755 if (!dim) 1756 return NULL; 1757 1758 ctx = isl_space_get_ctx(dim); 1759 sched = isl_calloc(ctx, struct isl_schedule, 1760 sizeof(struct isl_schedule) + 1761 (graph->n - 1) * sizeof(struct isl_schedule_node)); 1762 if (!sched) 1763 goto error; 1764 1765 sched->ref = 1; 1766 sched->n = graph->n; 1767 sched->n_band = graph->n_band; 1768 sched->n_total_row = graph->n_total_row; 1769 1770 for (i = 0; i < sched->n; ++i) { 1771 int r, b; 1772 int *band_end, *band_id, *zero; 1773 1774 sched->node[i].sched = 1775 node_extract_schedule_multi_aff(&graph->node[i]); 1776 if (!sched->node[i].sched) 1777 goto error; 1778 1779 sched->node[i].n_band = graph->n_band; 1780 if (graph->n_band == 0) 1781 continue; 1782 1783 band_end = isl_alloc_array(ctx, int, graph->n_band); 1784 band_id = isl_alloc_array(ctx, int, graph->n_band); 1785 zero = isl_alloc_array(ctx, int, graph->n_total_row); 1786 sched->node[i].band_end = band_end; 1787 sched->node[i].band_id = band_id; 1788 sched->node[i].zero = zero; 1789 if (!band_end || !band_id || !zero) 1790 goto error; 1791 1792 for (r = 0; r < graph->n_total_row; ++r) 1793 zero[r] = graph->node[i].zero[r]; 1794 for (r = b = 0; r < graph->n_total_row; ++r) { 1795 if (graph->node[i].band[r] == b) 1796 continue; 1797 band_end[b++] = r; 1798 if (graph->node[i].band[r] == -1) 1799 break; 1800 } 1801 if (r == graph->n_total_row) 1802 band_end[b++] = r; 1803 sched->node[i].n_band = b; 1804 for (--b; b >= 0; --b) 1805 band_id[b] = graph->node[i].band_id[b]; 1806 } 1807 1808 sched->dim = dim; 1809 1810 return sched; 1811error: 1812 isl_space_free(dim); 1813 isl_schedule_free(sched); 1814 return NULL; 1815} 1816 1817/* Copy nodes that satisfy node_pred from the src dependence graph 1818 * to the dst dependence graph. 1819 */ 1820static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src, 1821 int (*node_pred)(struct isl_sched_node *node, int data), int data) 1822{ 1823 int i; 1824 1825 dst->n = 0; 1826 for (i = 0; i < src->n; ++i) { 1827 if (!node_pred(&src->node[i], data)) 1828 continue; 1829 dst->node[dst->n].dim = isl_space_copy(src->node[i].dim); 1830 dst->node[dst->n].nvar = src->node[i].nvar; 1831 dst->node[dst->n].nparam = src->node[i].nparam; 1832 dst->node[dst->n].sched = isl_mat_copy(src->node[i].sched); 1833 dst->node[dst->n].sched_map = 1834 isl_map_copy(src->node[i].sched_map); 1835 dst->node[dst->n].band = src->node[i].band; 1836 dst->node[dst->n].band_id = src->node[i].band_id; 1837 dst->node[dst->n].zero = src->node[i].zero; 1838 dst->n++; 1839 } 1840 1841 return 0; 1842} 1843 1844/* Copy non-empty edges that satisfy edge_pred from the src dependence graph 1845 * to the dst dependence graph. 1846 * If the source or destination node of the edge is not in the destination 1847 * graph, then it must be a backward proximity edge and it should simply 1848 * be ignored. 1849 */ 1850static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, 1851 struct isl_sched_graph *src, 1852 int (*edge_pred)(struct isl_sched_edge *edge, int data), int data) 1853{ 1854 int i; 1855 enum isl_edge_type t; 1856 1857 dst->n_edge = 0; 1858 for (i = 0; i < src->n_edge; ++i) { 1859 struct isl_sched_edge *edge = &src->edge[i]; 1860 isl_map *map; 1861 struct isl_sched_node *dst_src, *dst_dst; 1862 1863 if (!edge_pred(edge, data)) 1864 continue; 1865 1866 if (isl_map_plain_is_empty(edge->map)) 1867 continue; 1868 1869 dst_src = graph_find_node(ctx, dst, edge->src->dim); 1870 dst_dst = graph_find_node(ctx, dst, edge->dst->dim); 1871 if (!dst_src || !dst_dst) { 1872 if (edge->validity) 1873 isl_die(ctx, isl_error_internal, 1874 "backward validity edge", return -1); 1875 continue; 1876 } 1877 1878 map = isl_map_copy(edge->map); 1879 1880 dst->edge[dst->n_edge].src = dst_src; 1881 dst->edge[dst->n_edge].dst = dst_dst; 1882 dst->edge[dst->n_edge].map = map; 1883 dst->edge[dst->n_edge].validity = edge->validity; 1884 dst->edge[dst->n_edge].proximity = edge->proximity; 1885 dst->n_edge++; 1886 1887 for (t = isl_edge_first; t <= isl_edge_last; ++t) { 1888 if (edge != 1889 graph_find_edge(src, t, edge->src, edge->dst)) 1890 continue; 1891 if (graph_edge_table_add(ctx, dst, t, 1892 &dst->edge[dst->n_edge - 1]) < 0) 1893 return -1; 1894 } 1895 } 1896 1897 return 0; 1898} 1899 1900/* Given a "src" dependence graph that contains the nodes from "dst" 1901 * that satisfy node_pred, copy the schedule computed in "src" 1902 * for those nodes back to "dst". 1903 */ 1904static int copy_schedule(struct isl_sched_graph *dst, 1905 struct isl_sched_graph *src, 1906 int (*node_pred)(struct isl_sched_node *node, int data), int data) 1907{ 1908 int i; 1909 1910 src->n = 0; 1911 for (i = 0; i < dst->n; ++i) { 1912 if (!node_pred(&dst->node[i], data)) 1913 continue; 1914 isl_mat_free(dst->node[i].sched); 1915 isl_map_free(dst->node[i].sched_map); 1916 dst->node[i].sched = isl_mat_copy(src->node[src->n].sched); 1917 dst->node[i].sched_map = 1918 isl_map_copy(src->node[src->n].sched_map); 1919 src->n++; 1920 } 1921 1922 dst->max_row = src->max_row; 1923 dst->n_total_row = src->n_total_row; 1924 dst->n_band = src->n_band; 1925 1926 return 0; 1927} 1928 1929/* Compute the maximal number of variables over all nodes. 1930 * This is the maximal number of linearly independent schedule 1931 * rows that we need to compute. 1932 * Just in case we end up in a part of the dependence graph 1933 * with only lower-dimensional domains, we make sure we will 1934 * compute the required amount of extra linearly independent rows. 1935 */ 1936static int compute_maxvar(struct isl_sched_graph *graph) 1937{ 1938 int i; 1939 1940 graph->maxvar = 0; 1941 for (i = 0; i < graph->n; ++i) { 1942 struct isl_sched_node *node = &graph->node[i]; 1943 int nvar; 1944 1945 if (node_update_cmap(node) < 0) 1946 return -1; 1947 nvar = node->nvar + graph->n_row - node->rank; 1948 if (nvar > graph->maxvar) 1949 graph->maxvar = nvar; 1950 } 1951 1952 return 0; 1953} 1954 1955static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph); 1956static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph); 1957 1958/* Compute a schedule for a subgraph of "graph". In particular, for 1959 * the graph composed of nodes that satisfy node_pred and edges that 1960 * that satisfy edge_pred. The caller should precompute the number 1961 * of nodes and edges that satisfy these predicates and pass them along 1962 * as "n" and "n_edge". 1963 * If the subgraph is known to consist of a single component, then wcc should 1964 * be set and then we call compute_schedule_wcc on the constructed subgraph. 1965 * Otherwise, we call compute_schedule, which will check whether the subgraph 1966 * is connected. 1967 */ 1968static int compute_sub_schedule(isl_ctx *ctx, 1969 struct isl_sched_graph *graph, int n, int n_edge, 1970 int (*node_pred)(struct isl_sched_node *node, int data), 1971 int (*edge_pred)(struct isl_sched_edge *edge, int data), 1972 int data, int wcc) 1973{ 1974 struct isl_sched_graph split = { 0 }; 1975 int t; 1976 1977 if (graph_alloc(ctx, &split, n, n_edge) < 0) 1978 goto error; 1979 if (copy_nodes(&split, graph, node_pred, data) < 0) 1980 goto error; 1981 if (graph_init_table(ctx, &split) < 0) 1982 goto error; 1983 for (t = 0; t <= isl_edge_last; ++t) 1984 split.max_edge[t] = graph->max_edge[t]; 1985 if (graph_init_edge_tables(ctx, &split) < 0) 1986 goto error; 1987 if (copy_edges(ctx, &split, graph, edge_pred, data) < 0) 1988 goto error; 1989 split.n_row = graph->n_row; 1990 split.max_row = graph->max_row; 1991 split.n_total_row = graph->n_total_row; 1992 split.n_band = graph->n_band; 1993 split.band_start = graph->band_start; 1994 1995 if (wcc && compute_schedule_wcc(ctx, &split) < 0) 1996 goto error; 1997 if (!wcc && compute_schedule(ctx, &split) < 0) 1998 goto error; 1999 2000 copy_schedule(graph, &split, node_pred, data); 2001 2002 graph_free(ctx, &split); 2003 return 0; 2004error: 2005 graph_free(ctx, &split); 2006 return -1; 2007} 2008 2009static int node_scc_exactly(struct isl_sched_node *node, int scc) 2010{ 2011 return node->scc == scc; 2012} 2013 2014static int node_scc_at_most(struct isl_sched_node *node, int scc) 2015{ 2016 return node->scc <= scc; 2017} 2018 2019static int node_scc_at_least(struct isl_sched_node *node, int scc) 2020{ 2021 return node->scc >= scc; 2022} 2023 2024static int edge_scc_exactly(struct isl_sched_edge *edge, int scc) 2025{ 2026 return edge->src->scc == scc && edge->dst->scc == scc; 2027} 2028 2029static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc) 2030{ 2031 return edge->dst->scc <= scc; 2032} 2033 2034static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc) 2035{ 2036 return edge->src->scc >= scc; 2037} 2038 2039/* Pad the schedules of all nodes with zero rows such that in the end 2040 * they all have graph->n_total_row rows. 2041 * The extra rows don't belong to any band, so they get assigned band number -1. 2042 */ 2043static int pad_schedule(struct isl_sched_graph *graph) 2044{ 2045 int i, j; 2046 2047 for (i = 0; i < graph->n; ++i) { 2048 struct isl_sched_node *node = &graph->node[i]; 2049 int row = isl_mat_rows(node->sched); 2050 if (graph->n_total_row > row) { 2051 isl_map_free(node->sched_map); 2052 node->sched_map = NULL; 2053 } 2054 node->sched = isl_mat_add_zero_rows(node->sched, 2055 graph->n_total_row - row); 2056 if (!node->sched) 2057 return -1; 2058 for (j = row; j < graph->n_total_row; ++j) 2059 node->band[j] = -1; 2060 } 2061 2062 return 0; 2063} 2064 2065/* Split the current graph into two parts and compute a schedule for each 2066 * part individually. In particular, one part consists of all SCCs up 2067 * to and including graph->src_scc, while the other part contains the other 2068 * SCCS. 2069 * 2070 * The split is enforced in the schedule by constant rows with two different 2071 * values (0 and 1). These constant rows replace the previously computed rows 2072 * in the current band. 2073 * It would be possible to reuse them as the first rows in the next 2074 * band, but recomputing them may result in better rows as we are looking 2075 * at a smaller part of the dependence graph. 2076 * compute_split_schedule is only called when no zero-distance schedule row 2077 * could be found on the entire graph, so we wark the splitting row as 2078 * non zero-distance. 2079 * 2080 * The band_id of the second group is set to n, where n is the number 2081 * of nodes in the first group. This ensures that the band_ids over 2082 * the two groups remain disjoint, even if either or both of the two 2083 * groups contain independent components. 2084 */ 2085static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph) 2086{ 2087 int i, j, n, e1, e2; 2088 int n_total_row, orig_total_row; 2089 int n_band, orig_band; 2090 int drop; 2091 2092 if (graph->n_total_row >= graph->max_row) 2093 isl_die(ctx, isl_error_internal, 2094 "too many schedule rows", return -1); 2095 2096 drop = graph->n_total_row - graph->band_start; 2097 graph->n_total_row -= drop; 2098 graph->n_row -= drop; 2099 2100 n = 0; 2101 for (i = 0; i < graph->n; ++i) { 2102 struct isl_sched_node *node = &graph->node[i]; 2103 int row = isl_mat_rows(node->sched) - drop; 2104 int cols = isl_mat_cols(node->sched); 2105 int before = node->scc <= graph->src_scc; 2106 2107 if (before) 2108 n++; 2109 2110 isl_map_free(node->sched_map); 2111 node->sched_map = NULL; 2112 node->sched = isl_mat_drop_rows(node->sched, 2113 graph->band_start, drop); 2114 node->sched = isl_mat_add_rows(node->sched, 1); 2115 if (!node->sched) 2116 return -1; 2117 node->sched = isl_mat_set_element_si(node->sched, row, 0, 2118 !before); 2119 for (j = 1; j < cols; ++j) 2120 node->sched = isl_mat_set_element_si(node->sched, 2121 row, j, 0); 2122 node->band[graph->n_total_row] = graph->n_band; 2123 node->zero[graph->n_total_row] = 0; 2124 } 2125 2126 e1 = e2 = 0; 2127 for (i = 0; i < graph->n_edge; ++i) { 2128 if (graph->edge[i].dst->scc <= graph->src_scc) 2129 e1++; 2130 if (graph->edge[i].src->scc > graph->src_scc) 2131 e2++; 2132 } 2133 2134 graph->n_total_row++; 2135 next_band(graph); 2136 2137 for (i = 0; i < graph->n; ++i) { 2138 struct isl_sched_node *node = &graph->node[i]; 2139 if (node->scc > graph->src_scc) 2140 node->band_id[graph->n_band] = n; 2141 } 2142 2143 orig_total_row = graph->n_total_row; 2144 orig_band = graph->n_band; 2145 if (compute_sub_schedule(ctx, graph, n, e1, 2146 &node_scc_at_most, &edge_dst_scc_at_most, 2147 graph->src_scc, 0) < 0) 2148 return -1; 2149 n_total_row = graph->n_total_row; 2150 graph->n_total_row = orig_total_row; 2151 n_band = graph->n_band; 2152 graph->n_band = orig_band; 2153 if (compute_sub_schedule(ctx, graph, graph->n - n, e2, 2154 &node_scc_at_least, &edge_src_scc_at_least, 2155 graph->src_scc + 1, 0) < 0) 2156 return -1; 2157 if (n_total_row > graph->n_total_row) 2158 graph->n_total_row = n_total_row; 2159 if (n_band > graph->n_band) 2160 graph->n_band = n_band; 2161 2162 return pad_schedule(graph); 2163} 2164 2165/* Compute the next band of the schedule after updating the dependence 2166 * relations based on the the current schedule. 2167 */ 2168static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph) 2169{ 2170 if (update_edges(ctx, graph) < 0) 2171 return -1; 2172 next_band(graph); 2173 2174 return compute_schedule(ctx, graph); 2175} 2176 2177/* Add constraints to graph->lp that force the dependence "map" (which 2178 * is part of the dependence relation of "edge") 2179 * to be respected and attempt to carry it, where the edge is one from 2180 * a node j to itself. "pos" is the sequence number of the given map. 2181 * That is, add constraints that enforce 2182 * 2183 * (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x) 2184 * = c_j_x (y - x) >= e_i 2185 * 2186 * for each (x,y) in R. 2187 * We obtain general constraints on coefficients (c_0, c_n, c_x) 2188 * of valid constraints for (y - x) and then plug in (-e_i, 0, c_j_x), 2189 * with each coefficient in c_j_x represented as a pair of non-negative 2190 * coefficients. 2191 */ 2192static int add_intra_constraints(struct isl_sched_graph *graph, 2193 struct isl_sched_edge *edge, __isl_take isl_map *map, int pos) 2194{ 2195 unsigned total; 2196 isl_ctx *ctx = isl_map_get_ctx(map); 2197 isl_space *dim; 2198 isl_dim_map *dim_map; 2199 isl_basic_set *coef; 2200 struct isl_sched_node *node = edge->src; 2201 2202 coef = intra_coefficients(graph, map); 2203 if (!coef) 2204 return -1; 2205 2206 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef))); 2207 2208 total = isl_basic_set_total_dim(graph->lp); 2209 dim_map = isl_dim_map_alloc(ctx, total); 2210 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); 2211 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2, 2212 isl_space_dim(dim, isl_dim_set), 1, 2213 node->nvar, -1); 2214 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2, 2215 isl_space_dim(dim, isl_dim_set), 1, 2216 node->nvar, 1); 2217 graph->lp = isl_basic_set_extend_constraints(graph->lp, 2218 coef->n_eq, coef->n_ineq); 2219 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp, 2220 coef, dim_map); 2221 isl_space_free(dim); 2222 2223 return 0; 2224} 2225 2226/* Add constraints to graph->lp that force the dependence "map" (which 2227 * is part of the dependence relation of "edge") 2228 * to be respected and attempt to carry it, where the edge is one from 2229 * node j to node k. "pos" is the sequence number of the given map. 2230 * That is, add constraints that enforce 2231 * 2232 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i 2233 * 2234 * for each (x,y) in R. 2235 * We obtain general constraints on coefficients (c_0, c_n, c_x) 2236 * of valid constraints for R and then plug in 2237 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, c_k_x - c_j_x) 2238 * with each coefficient (except e_i, c_k_0 and c_j_0) 2239 * represented as a pair of non-negative coefficients. 2240 */ 2241static int add_inter_constraints(struct isl_sched_graph *graph, 2242 struct isl_sched_edge *edge, __isl_take isl_map *map, int pos) 2243{ 2244 unsigned total; 2245 isl_ctx *ctx = isl_map_get_ctx(map); 2246 isl_space *dim; 2247 isl_dim_map *dim_map; 2248 isl_basic_set *coef; 2249 struct isl_sched_node *src = edge->src; 2250 struct isl_sched_node *dst = edge->dst; 2251 2252 coef = inter_coefficients(graph, map); 2253 if (!coef) 2254 return -1; 2255 2256 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef))); 2257 2258 total = isl_basic_set_total_dim(graph->lp); 2259 dim_map = isl_dim_map_alloc(ctx, total); 2260 2261 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1); 2262 2263 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1); 2264 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1); 2265 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1); 2266 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2, 2267 isl_space_dim(dim, isl_dim_set) + src->nvar, 1, 2268 dst->nvar, -1); 2269 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2, 2270 isl_space_dim(dim, isl_dim_set) + src->nvar, 1, 2271 dst->nvar, 1); 2272 2273 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1); 2274 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1); 2275 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1); 2276 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2, 2277 isl_space_dim(dim, isl_dim_set), 1, 2278 src->nvar, 1); 2279 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2, 2280 isl_space_dim(dim, isl_dim_set), 1, 2281 src->nvar, -1); 2282 2283 graph->lp = isl_basic_set_extend_constraints(graph->lp, 2284 coef->n_eq, coef->n_ineq); 2285 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp, 2286 coef, dim_map); 2287 isl_space_free(dim); 2288 2289 return 0; 2290} 2291 2292/* Add constraints to graph->lp that force all validity dependences 2293 * to be respected and attempt to carry them. 2294 */ 2295static int add_all_constraints(struct isl_sched_graph *graph) 2296{ 2297 int i, j; 2298 int pos; 2299 2300 pos = 0; 2301 for (i = 0; i < graph->n_edge; ++i) { 2302 struct isl_sched_edge *edge= &graph->edge[i]; 2303 2304 if (!edge->validity) 2305 continue; 2306 2307 for (j = 0; j < edge->map->n; ++j) { 2308 isl_basic_map *bmap; 2309 isl_map *map; 2310 2311 bmap = isl_basic_map_copy(edge->map->p[j]); 2312 map = isl_map_from_basic_map(bmap); 2313 2314 if (edge->src == edge->dst && 2315 add_intra_constraints(graph, edge, map, pos) < 0) 2316 return -1; 2317 if (edge->src != edge->dst && 2318 add_inter_constraints(graph, edge, map, pos) < 0) 2319 return -1; 2320 ++pos; 2321 } 2322 } 2323 2324 return 0; 2325} 2326 2327/* Count the number of equality and inequality constraints 2328 * that will be added to the carry_lp problem. 2329 * We count each edge exactly once. 2330 */ 2331static int count_all_constraints(struct isl_sched_graph *graph, 2332 int *n_eq, int *n_ineq) 2333{ 2334 int i, j; 2335 2336 *n_eq = *n_ineq = 0; 2337 for (i = 0; i < graph->n_edge; ++i) { 2338 struct isl_sched_edge *edge= &graph->edge[i]; 2339 for (j = 0; j < edge->map->n; ++j) { 2340 isl_basic_map *bmap; 2341 isl_map *map; 2342 2343 bmap = isl_basic_map_copy(edge->map->p[j]); 2344 map = isl_map_from_basic_map(bmap); 2345 2346 if (count_map_constraints(graph, edge, map, 2347 n_eq, n_ineq, 1) < 0) 2348 return -1; 2349 } 2350 } 2351 2352 return 0; 2353} 2354 2355/* Construct an LP problem for finding schedule coefficients 2356 * such that the schedule carries as many dependences as possible. 2357 * In particular, for each dependence i, we bound the dependence distance 2358 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum 2359 * of all e_i's. Dependence with e_i = 0 in the solution are simply 2360 * respected, while those with e_i > 0 (in practice e_i = 1) are carried. 2361 * Note that if the dependence relation is a union of basic maps, 2362 * then we have to consider each basic map individually as it may only 2363 * be possible to carry the dependences expressed by some of those 2364 * basic maps and not all off them. 2365 * Below, we consider each of those basic maps as a separate "edge". 2366 * 2367 * All variables of the LP are non-negative. The actual coefficients 2368 * may be negative, so each coefficient is represented as the difference 2369 * of two non-negative variables. The negative part always appears 2370 * immediately before the positive part. 2371 * Other than that, the variables have the following order 2372 * 2373 * - sum of (1 - e_i) over all edges 2374 * - sum of positive and negative parts of all c_n coefficients 2375 * (unconstrained when computing non-parametric schedules) 2376 * - sum of positive and negative parts of all c_x coefficients 2377 * - for each edge 2378 * - e_i 2379 * - for each node 2380 * - c_i_0 2381 * - positive and negative parts of c_i_n (if parametric) 2382 * - positive and negative parts of c_i_x 2383 * 2384 * The constraints are those from the (validity) edges plus three equalities 2385 * to express the sums and n_edge inequalities to express e_i <= 1. 2386 */ 2387static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph) 2388{ 2389 int i, j; 2390 int k; 2391 isl_space *dim; 2392 unsigned total; 2393 int n_eq, n_ineq; 2394 int n_edge; 2395 2396 n_edge = 0; 2397 for (i = 0; i < graph->n_edge; ++i) 2398 n_edge += graph->edge[i].map->n; 2399 2400 total = 3 + n_edge; 2401 for (i = 0; i < graph->n; ++i) { 2402 struct isl_sched_node *node = &graph->node[graph->sorted[i]]; 2403 node->start = total; 2404 total += 1 + 2 * (node->nparam + node->nvar); 2405 } 2406 2407 if (count_all_constraints(graph, &n_eq, &n_ineq) < 0) 2408 return -1; 2409 2410 dim = isl_space_set_alloc(ctx, 0, total); 2411 isl_basic_set_free(graph->lp); 2412 n_eq += 3; 2413 n_ineq += n_edge; 2414 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq); 2415 graph->lp = isl_basic_set_set_rational(graph->lp); 2416 2417 k = isl_basic_set_alloc_equality(graph->lp); 2418 if (k < 0) 2419 return -1; 2420 isl_seq_clr(graph->lp->eq[k], 1 + total); 2421 isl_int_set_si(graph->lp->eq[k][0], -n_edge); 2422 isl_int_set_si(graph->lp->eq[k][1], 1); 2423 for (i = 0; i < n_edge; ++i) 2424 isl_int_set_si(graph->lp->eq[k][4 + i], 1); 2425 2426 k = isl_basic_set_alloc_equality(graph->lp); 2427 if (k < 0) 2428 return -1; 2429 isl_seq_clr(graph->lp->eq[k], 1 + total); 2430 isl_int_set_si(graph->lp->eq[k][2], -1); 2431 for (i = 0; i < graph->n; ++i) { 2432 int pos = 1 + graph->node[i].start + 1; 2433 2434 for (j = 0; j < 2 * graph->node[i].nparam; ++j) 2435 isl_int_set_si(graph->lp->eq[k][pos + j], 1); 2436 } 2437 2438 k = isl_basic_set_alloc_equality(graph->lp); 2439 if (k < 0) 2440 return -1; 2441 isl_seq_clr(graph->lp->eq[k], 1 + total); 2442 isl_int_set_si(graph->lp->eq[k][3], -1); 2443 for (i = 0; i < graph->n; ++i) { 2444 struct isl_sched_node *node = &graph->node[i]; 2445 int pos = 1 + node->start + 1 + 2 * node->nparam; 2446 2447 for (j = 0; j < 2 * node->nvar; ++j) 2448 isl_int_set_si(graph->lp->eq[k][pos + j], 1); 2449 } 2450 2451 for (i = 0; i < n_edge; ++i) { 2452 k = isl_basic_set_alloc_inequality(graph->lp); 2453 if (k < 0) 2454 return -1; 2455 isl_seq_clr(graph->lp->ineq[k], 1 + total); 2456 isl_int_set_si(graph->lp->ineq[k][4 + i], -1); 2457 isl_int_set_si(graph->lp->ineq[k][0], 1); 2458 } 2459 2460 if (add_all_constraints(graph) < 0) 2461 return -1; 2462 2463 return 0; 2464} 2465 2466/* If the schedule_split_scaled option is set and if the linear 2467 * parts of the scheduling rows for all nodes in the graphs have 2468 * non-trivial common divisor, then split off the constant term 2469 * from the linear part. 2470 * The constant term is then placed in a separate band and 2471 * the linear part is reduced. 2472 */ 2473static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph) 2474{ 2475 int i; 2476 int row; 2477 isl_int gcd, gcd_i; 2478 2479 if (!ctx->opt->schedule_split_scaled) 2480 return 0; 2481 if (graph->n <= 1) 2482 return 0; 2483 2484 if (graph->n_total_row >= graph->max_row) 2485 isl_die(ctx, isl_error_internal, 2486 "too many schedule rows", return -1); 2487 2488 isl_int_init(gcd); 2489 isl_int_init(gcd_i); 2490 2491 isl_int_set_si(gcd, 0); 2492 2493 row = isl_mat_rows(graph->node[0].sched) - 1; 2494 2495 for (i = 0; i < graph->n; ++i) { 2496 struct isl_sched_node *node = &graph->node[i]; 2497 int cols = isl_mat_cols(node->sched); 2498 2499 isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i); 2500 isl_int_gcd(gcd, gcd, gcd_i); 2501 } 2502 2503 isl_int_clear(gcd_i); 2504 2505 if (isl_int_cmp_si(gcd, 1) <= 0) { 2506 isl_int_clear(gcd); 2507 return 0; 2508 } 2509 2510 next_band(graph); 2511 2512 for (i = 0; i < graph->n; ++i) { 2513 struct isl_sched_node *node = &graph->node[i]; 2514 2515 isl_map_free(node->sched_map); 2516 node->sched_map = NULL; 2517 node->sched = isl_mat_add_zero_rows(node->sched, 1); 2518 if (!node->sched) 2519 goto error; 2520 isl_int_fdiv_r(node->sched->row[row + 1][0], 2521 node->sched->row[row][0], gcd); 2522 isl_int_fdiv_q(node->sched->row[row][0], 2523 node->sched->row[row][0], gcd); 2524 isl_int_mul(node->sched->row[row][0], 2525 node->sched->row[row][0], gcd); 2526 node->sched = isl_mat_scale_down_row(node->sched, row, gcd); 2527 if (!node->sched) 2528 goto error; 2529 node->band[graph->n_total_row] = graph->n_band; 2530 } 2531 2532 graph->n_total_row++; 2533 2534 isl_int_clear(gcd); 2535 return 0; 2536error: 2537 isl_int_clear(gcd); 2538 return -1; 2539} 2540 2541static int compute_component_schedule(isl_ctx *ctx, 2542 struct isl_sched_graph *graph); 2543 2544/* Is the schedule row "sol" trivial on node "node"? 2545 * That is, is the solution zero on the dimensions orthogonal to 2546 * the previously found solutions? 2547 * Return 1 if the solution is trivial, 0 if it is not and -1 on error. 2548 * 2549 * Each coefficient is represented as the difference between 2550 * two non-negative values in "sol". "sol" has been computed 2551 * in terms of the original iterators (i.e., without use of cmap). 2552 * We construct the schedule row s and write it as a linear 2553 * combination of (linear combinations of) previously computed schedule rows. 2554 * s = Q c or c = U s. 2555 * If the final entries of c are all zero, then the solution is trivial. 2556 */ 2557static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol) 2558{ 2559 int i; 2560 int pos; 2561 int trivial; 2562 isl_ctx *ctx; 2563 isl_vec *node_sol; 2564 2565 if (!sol) 2566 return -1; 2567 if (node->nvar == node->rank) 2568 return 0; 2569 2570 ctx = isl_vec_get_ctx(sol); 2571 node_sol = isl_vec_alloc(ctx, node->nvar); 2572 if (!node_sol) 2573 return -1; 2574 2575 pos = 1 + node->start + 1 + 2 * node->nparam; 2576 2577 for (i = 0; i < node->nvar; ++i) 2578 isl_int_sub(node_sol->el[i], 2579 sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]); 2580 2581 node_sol = isl_mat_vec_product(isl_mat_copy(node->cinv), node_sol); 2582 2583 if (!node_sol) 2584 return -1; 2585 2586 trivial = isl_seq_first_non_zero(node_sol->el + node->rank, 2587 node->nvar - node->rank) == -1; 2588 2589 isl_vec_free(node_sol); 2590 2591 return trivial; 2592} 2593 2594/* Is the schedule row "sol" trivial on any node where it should 2595 * not be trivial? 2596 * "sol" has been computed in terms of the original iterators 2597 * (i.e., without use of cmap). 2598 * Return 1 if any solution is trivial, 0 if they are not and -1 on error. 2599 */ 2600static int is_any_trivial(struct isl_sched_graph *graph, 2601 __isl_keep isl_vec *sol) 2602{ 2603 int i; 2604 2605 for (i = 0; i < graph->n; ++i) { 2606 struct isl_sched_node *node = &graph->node[i]; 2607 int trivial; 2608 2609 if (!needs_row(graph, node)) 2610 continue; 2611 trivial = is_trivial(node, sol); 2612 if (trivial < 0 || trivial) 2613 return trivial; 2614 } 2615 2616 return 0; 2617} 2618 2619/* Construct a schedule row for each node such that as many dependences 2620 * as possible are carried and then continue with the next band. 2621 * 2622 * If the computed schedule row turns out to be trivial on one or 2623 * more nodes where it should not be trivial, then we throw it away 2624 * and try again on each component separately. 2625 */ 2626static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph) 2627{ 2628 int i; 2629 int n_edge; 2630 int trivial; 2631 isl_vec *sol; 2632 isl_basic_set *lp; 2633 2634 n_edge = 0; 2635 for (i = 0; i < graph->n_edge; ++i) 2636 n_edge += graph->edge[i].map->n; 2637 2638 if (setup_carry_lp(ctx, graph) < 0) 2639 return -1; 2640 2641 lp = isl_basic_set_copy(graph->lp); 2642 sol = isl_tab_basic_set_non_neg_lexmin(lp); 2643 if (!sol) 2644 return -1; 2645 2646 if (sol->size == 0) { 2647 isl_vec_free(sol); 2648 isl_die(ctx, isl_error_internal, 2649 "error in schedule construction", return -1); 2650 } 2651 2652 isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]); 2653 if (isl_int_cmp_si(sol->el[1], n_edge) >= 0) { 2654 isl_vec_free(sol); 2655 isl_die(ctx, isl_error_unknown, 2656 "unable to carry dependences", return -1); 2657 } 2658 2659 trivial = is_any_trivial(graph, sol); 2660 if (trivial < 0) { 2661 sol = isl_vec_free(sol); 2662 } else if (trivial) { 2663 isl_vec_free(sol); 2664 if (graph->scc > 1) 2665 return compute_component_schedule(ctx, graph); 2666 isl_die(ctx, isl_error_unknown, 2667 "unable to construct non-trivial solution", return -1); 2668 } 2669 2670 if (update_schedule(graph, sol, 0, 0) < 0) 2671 return -1; 2672 2673 if (split_scaled(ctx, graph) < 0) 2674 return -1; 2675 2676 return compute_next_band(ctx, graph); 2677} 2678 2679/* Are there any (non-empty) validity edges in the graph? 2680 */ 2681static int has_validity_edges(struct isl_sched_graph *graph) 2682{ 2683 int i; 2684 2685 for (i = 0; i < graph->n_edge; ++i) { 2686 int empty; 2687 2688 empty = isl_map_plain_is_empty(graph->edge[i].map); 2689 if (empty < 0) 2690 return -1; 2691 if (empty) 2692 continue; 2693 if (graph->edge[i].validity) 2694 return 1; 2695 } 2696 2697 return 0; 2698} 2699 2700/* Should we apply a Feautrier step? 2701 * That is, did the user request the Feautrier algorithm and are 2702 * there any validity dependences (left)? 2703 */ 2704static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph) 2705{ 2706 if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER) 2707 return 0; 2708 2709 return has_validity_edges(graph); 2710} 2711 2712/* Compute a schedule for a connected dependence graph using Feautrier's 2713 * multi-dimensional scheduling algorithm. 2714 * The original algorithm is described in [1]. 2715 * The main idea is to minimize the number of scheduling dimensions, by 2716 * trying to satisfy as many dependences as possible per scheduling dimension. 2717 * 2718 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling 2719 * Problem, Part II: Multi-Dimensional Time. 2720 * In Intl. Journal of Parallel Programming, 1992. 2721 */ 2722static int compute_schedule_wcc_feautrier(isl_ctx *ctx, 2723 struct isl_sched_graph *graph) 2724{ 2725 return carry_dependences(ctx, graph); 2726} 2727 2728/* Compute a schedule for a connected dependence graph. 2729 * We try to find a sequence of as many schedule rows as possible that result 2730 * in non-negative dependence distances (independent of the previous rows 2731 * in the sequence, i.e., such that the sequence is tilable). 2732 * If we can't find any more rows we either 2733 * - split between SCCs and start over (assuming we found an interesting 2734 * pair of SCCs between which to split) 2735 * - continue with the next band (assuming the current band has at least 2736 * one row) 2737 * - try to carry as many dependences as possible and continue with the next 2738 * band 2739 * 2740 * If Feautrier's algorithm is selected, we first recursively try to satisfy 2741 * as many validity dependences as possible. When all validity dependences 2742 * are satisfied we extend the schedule to a full-dimensional schedule. 2743 * 2744 * If we manage to complete the schedule, we finish off by topologically 2745 * sorting the statements based on the remaining dependences. 2746 * 2747 * If ctx->opt->schedule_outer_zero_distance is set, then we force the 2748 * outermost dimension in the current band to be zero distance. If this 2749 * turns out to be impossible, we fall back on the general scheme above 2750 * and try to carry as many dependences as possible. 2751 */ 2752static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph) 2753{ 2754 int force_zero = 0; 2755 2756 if (detect_sccs(ctx, graph) < 0) 2757 return -1; 2758 if (sort_sccs(graph) < 0) 2759 return -1; 2760 2761 if (compute_maxvar(graph) < 0) 2762 return -1; 2763 2764 if (need_feautrier_step(ctx, graph)) 2765 return compute_schedule_wcc_feautrier(ctx, graph); 2766 2767 if (ctx->opt->schedule_outer_zero_distance) 2768 force_zero = 1; 2769 2770 while (graph->n_row < graph->maxvar) { 2771 isl_vec *sol; 2772 2773 graph->src_scc = -1; 2774 graph->dst_scc = -1; 2775 2776 if (setup_lp(ctx, graph, force_zero) < 0) 2777 return -1; 2778 sol = solve_lp(graph); 2779 if (!sol) 2780 return -1; 2781 if (sol->size == 0) { 2782 isl_vec_free(sol); 2783 if (!ctx->opt->schedule_maximize_band_depth && 2784 graph->n_total_row > graph->band_start) 2785 return compute_next_band(ctx, graph); 2786 if (graph->src_scc >= 0) 2787 return compute_split_schedule(ctx, graph); 2788 if (graph->n_total_row > graph->band_start) 2789 return compute_next_band(ctx, graph); 2790 return carry_dependences(ctx, graph); 2791 } 2792 if (update_schedule(graph, sol, 1, 1) < 0) 2793 return -1; 2794 force_zero = 0; 2795 } 2796 2797 if (graph->n_total_row > graph->band_start) 2798 next_band(graph); 2799 return sort_statements(ctx, graph); 2800} 2801 2802/* Add a row to the schedules that separates the SCCs and move 2803 * to the next band. 2804 */ 2805static int split_on_scc(isl_ctx *ctx, struct isl_sched_graph *graph) 2806{ 2807 int i; 2808 2809 if (graph->n_total_row >= graph->max_row) 2810 isl_die(ctx, isl_error_internal, 2811 "too many schedule rows", return -1); 2812 2813 for (i = 0; i < graph->n; ++i) { 2814 struct isl_sched_node *node = &graph->node[i]; 2815 int row = isl_mat_rows(node->sched); 2816 2817 isl_map_free(node->sched_map); 2818 node->sched_map = NULL; 2819 node->sched = isl_mat_add_zero_rows(node->sched, 1); 2820 node->sched = isl_mat_set_element_si(node->sched, row, 0, 2821 node->scc); 2822 if (!node->sched) 2823 return -1; 2824 node->band[graph->n_total_row] = graph->n_band; 2825 } 2826 2827 graph->n_total_row++; 2828 next_band(graph); 2829 2830 return 0; 2831} 2832 2833/* Compute a schedule for each component (identified by node->scc) 2834 * of the dependence graph separately and then combine the results. 2835 * Depending on the setting of schedule_fuse, a component may be 2836 * either weakly or strongly connected. 2837 * 2838 * The band_id is adjusted such that each component has a separate id. 2839 * Note that the band_id may have already been set to a value different 2840 * from zero by compute_split_schedule. 2841 */ 2842static int compute_component_schedule(isl_ctx *ctx, 2843 struct isl_sched_graph *graph) 2844{ 2845 int wcc, i; 2846 int n, n_edge; 2847 int n_total_row, orig_total_row; 2848 int n_band, orig_band; 2849 2850 if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN || 2851 ctx->opt->schedule_separate_components) 2852 if (split_on_scc(ctx, graph) < 0) 2853 return -1; 2854 2855 n_total_row = 0; 2856 orig_total_row = graph->n_total_row; 2857 n_band = 0; 2858 orig_band = graph->n_band; 2859 for (i = 0; i < graph->n; ++i) 2860 graph->node[i].band_id[graph->n_band] += graph->node[i].scc; 2861 for (wcc = 0; wcc < graph->scc; ++wcc) { 2862 n = 0; 2863 for (i = 0; i < graph->n; ++i) 2864 if (graph->node[i].scc == wcc) 2865 n++; 2866 n_edge = 0; 2867 for (i = 0; i < graph->n_edge; ++i) 2868 if (graph->edge[i].src->scc == wcc && 2869 graph->edge[i].dst->scc == wcc) 2870 n_edge++; 2871 2872 if (compute_sub_schedule(ctx, graph, n, n_edge, 2873 &node_scc_exactly, 2874 &edge_scc_exactly, wcc, 1) < 0) 2875 return -1; 2876 if (graph->n_total_row > n_total_row) 2877 n_total_row = graph->n_total_row; 2878 graph->n_total_row = orig_total_row; 2879 if (graph->n_band > n_band) 2880 n_band = graph->n_band; 2881 graph->n_band = orig_band; 2882 } 2883 2884 graph->n_total_row = n_total_row; 2885 graph->n_band = n_band; 2886 2887 return pad_schedule(graph); 2888} 2889 2890/* Compute a schedule for the given dependence graph. 2891 * We first check if the graph is connected (through validity dependences) 2892 * and, if not, compute a schedule for each component separately. 2893 * If schedule_fuse is set to minimal fusion, then we check for strongly 2894 * connected components instead and compute a separate schedule for 2895 * each such strongly connected component. 2896 */ 2897static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph) 2898{ 2899 if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) { 2900 if (detect_sccs(ctx, graph) < 0) 2901 return -1; 2902 } else { 2903 if (detect_wccs(ctx, graph) < 0) 2904 return -1; 2905 } 2906 2907 if (graph->scc > 1) 2908 return compute_component_schedule(ctx, graph); 2909 2910 return compute_schedule_wcc(ctx, graph); 2911} 2912 2913/* Compute a schedule for the given union of domains that respects 2914 * all the validity dependences. 2915 * If the default isl scheduling algorithm is used, it tries to minimize 2916 * the dependence distances over the proximity dependences. 2917 * If Feautrier's scheduling algorithm is used, the proximity dependence 2918 * distances are only minimized during the extension to a full-dimensional 2919 * schedule. 2920 */ 2921__isl_give isl_schedule *isl_union_set_compute_schedule( 2922 __isl_take isl_union_set *domain, 2923 __isl_take isl_union_map *validity, 2924 __isl_take isl_union_map *proximity) 2925{ 2926 isl_ctx *ctx = isl_union_set_get_ctx(domain); 2927 isl_space *dim; 2928 struct isl_sched_graph graph = { 0 }; 2929 isl_schedule *sched; 2930 struct isl_extract_edge_data data; 2931 2932 domain = isl_union_set_align_params(domain, 2933 isl_union_map_get_space(validity)); 2934 domain = isl_union_set_align_params(domain, 2935 isl_union_map_get_space(proximity)); 2936 dim = isl_union_set_get_space(domain); 2937 validity = isl_union_map_align_params(validity, isl_space_copy(dim)); 2938 proximity = isl_union_map_align_params(proximity, dim); 2939 2940 if (!domain) 2941 goto error; 2942 2943 graph.n = isl_union_set_n_set(domain); 2944 if (graph.n == 0) 2945 goto empty; 2946 if (graph_alloc(ctx, &graph, graph.n, 2947 isl_union_map_n_map(validity) + isl_union_map_n_map(proximity)) < 0) 2948 goto error; 2949 if (compute_max_row(&graph, domain) < 0) 2950 goto error; 2951 graph.root = 1; 2952 graph.n = 0; 2953 if (isl_union_set_foreach_set(domain, &extract_node, &graph) < 0) 2954 goto error; 2955 if (graph_init_table(ctx, &graph) < 0) 2956 goto error; 2957 graph.max_edge[isl_edge_validity] = isl_union_map_n_map(validity); 2958 graph.max_edge[isl_edge_proximity] = isl_union_map_n_map(proximity); 2959 if (graph_init_edge_tables(ctx, &graph) < 0) 2960 goto error; 2961 graph.n_edge = 0; 2962 data.graph = &graph; 2963 data.type = isl_edge_validity; 2964 if (isl_union_map_foreach_map(validity, &extract_edge, &data) < 0) 2965 goto error; 2966 data.type = isl_edge_proximity; 2967 if (isl_union_map_foreach_map(proximity, &extract_edge, &data) < 0) 2968 goto error; 2969 2970 if (compute_schedule(ctx, &graph) < 0) 2971 goto error; 2972 2973empty: 2974 sched = extract_schedule(&graph, isl_union_set_get_space(domain)); 2975 2976 graph_free(ctx, &graph); 2977 isl_union_set_free(domain); 2978 isl_union_map_free(validity); 2979 isl_union_map_free(proximity); 2980 2981 return sched; 2982error: 2983 graph_free(ctx, &graph); 2984 isl_union_set_free(domain); 2985 isl_union_map_free(validity); 2986 isl_union_map_free(proximity); 2987 return NULL; 2988} 2989 2990void *isl_schedule_free(__isl_take isl_schedule *sched) 2991{ 2992 int i; 2993 if (!sched) 2994 return NULL; 2995 2996 if (--sched->ref > 0) 2997 return NULL; 2998 2999 for (i = 0; i < sched->n; ++i) { 3000 isl_multi_aff_free(sched->node[i].sched); 3001 free(sched->node[i].band_end); 3002 free(sched->node[i].band_id); 3003 free(sched->node[i].zero); 3004 } 3005 isl_space_free(sched->dim); 3006 isl_band_list_free(sched->band_forest); 3007 free(sched); 3008 return NULL; 3009} 3010 3011isl_ctx *isl_schedule_get_ctx(__isl_keep isl_schedule *schedule) 3012{ 3013 return schedule ? isl_space_get_ctx(schedule->dim) : NULL; 3014} 3015 3016/* Set max_out to the maximal number of output dimensions over 3017 * all maps. 3018 */ 3019static int update_max_out(__isl_take isl_map *map, void *user) 3020{ 3021 int *max_out = user; 3022 int n_out = isl_map_dim(map, isl_dim_out); 3023 3024 if (n_out > *max_out) 3025 *max_out = n_out; 3026 3027 isl_map_free(map); 3028 return 0; 3029} 3030 3031/* Internal data structure for map_pad_range. 3032 * 3033 * "max_out" is the maximal schedule dimension. 3034 * "res" collects the results. 3035 */ 3036struct isl_pad_schedule_map_data { 3037 int max_out; 3038 isl_union_map *res; 3039}; 3040 3041/* Pad the range of the given map with zeros to data->max_out and 3042 * then add the result to data->res. 3043 */ 3044static int map_pad_range(__isl_take isl_map *map, void *user) 3045{ 3046 struct isl_pad_schedule_map_data *data = user; 3047 int i; 3048 int n_out = isl_map_dim(map, isl_dim_out); 3049 3050 map = isl_map_add_dims(map, isl_dim_out, data->max_out - n_out); 3051 for (i = n_out; i < data->max_out; ++i) 3052 map = isl_map_fix_si(map, isl_dim_out, i, 0); 3053 3054 data->res = isl_union_map_add_map(data->res, map); 3055 if (!data->res) 3056 return -1; 3057 3058 return 0; 3059} 3060 3061/* Pad the ranges of the maps in the union map with zeros such they all have 3062 * the same dimension. 3063 */ 3064static __isl_give isl_union_map *pad_schedule_map( 3065 __isl_take isl_union_map *umap) 3066{ 3067 struct isl_pad_schedule_map_data data; 3068 3069 if (!umap) 3070 return NULL; 3071 if (isl_union_map_n_map(umap) <= 1) 3072 return umap; 3073 3074 data.max_out = 0; 3075 if (isl_union_map_foreach_map(umap, &update_max_out, &data.max_out) < 0) 3076 return isl_union_map_free(umap); 3077 3078 data.res = isl_union_map_empty(isl_union_map_get_space(umap)); 3079 if (isl_union_map_foreach_map(umap, &map_pad_range, &data) < 0) 3080 data.res = isl_union_map_free(data.res); 3081 3082 isl_union_map_free(umap); 3083 return data.res; 3084} 3085 3086/* Return an isl_union_map of the schedule. If we have already constructed 3087 * a band forest, then this band forest may have been modified so we need 3088 * to extract the isl_union_map from the forest rather than from 3089 * the originally computed schedule. This reconstructed schedule map 3090 * then needs to be padded with zeros to unify the schedule space 3091 * since the result of isl_band_list_get_suffix_schedule may not have 3092 * a unified schedule space. 3093 */ 3094__isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched) 3095{ 3096 int i; 3097 isl_union_map *umap; 3098 3099 if (!sched) 3100 return NULL; 3101 3102 if (sched->band_forest) { 3103 umap = isl_band_list_get_suffix_schedule(sched->band_forest); 3104 return pad_schedule_map(umap); 3105 } 3106 3107 umap = isl_union_map_empty(isl_space_copy(sched->dim)); 3108 for (i = 0; i < sched->n; ++i) { 3109 isl_multi_aff *ma; 3110 3111 ma = isl_multi_aff_copy(sched->node[i].sched); 3112 umap = isl_union_map_add_map(umap, isl_map_from_multi_aff(ma)); 3113 } 3114 3115 return umap; 3116} 3117 3118static __isl_give isl_band_list *construct_band_list( 3119 __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent, 3120 int band_nr, int *parent_active, int n_active); 3121 3122/* Construct an isl_band structure for the band in the given schedule 3123 * with sequence number band_nr for the n_active nodes marked by active. 3124 * If the nodes don't have a band with the given sequence number, 3125 * then a band without members is created. 3126 * 3127 * Because of the way the schedule is constructed, we know that 3128 * the position of the band inside the schedule of a node is the same 3129 * for all active nodes. 3130 * 3131 * The partial schedule for the band is created before the children 3132 * are created to that construct_band_list can refer to the partial 3133 * schedule of the parent. 3134 */ 3135static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule, 3136 __isl_keep isl_band *parent, 3137 int band_nr, int *active, int n_active) 3138{ 3139 int i, j; 3140 isl_ctx *ctx = isl_schedule_get_ctx(schedule); 3141 isl_band *band; 3142 unsigned start, end; 3143 3144 band = isl_band_alloc(ctx); 3145 if (!band) 3146 return NULL; 3147 3148 band->schedule = schedule; 3149 band->parent = parent; 3150 3151 for (i = 0; i < schedule->n; ++i) 3152 if (active[i]) 3153 break; 3154 3155 if (i >= schedule->n) 3156 isl_die(ctx, isl_error_internal, 3157 "band without active statements", goto error); 3158 3159 start = band_nr ? schedule->node[i].band_end[band_nr - 1] : 0; 3160 end = band_nr < schedule->node[i].n_band ? 3161 schedule->node[i].band_end[band_nr] : start; 3162 band->n = end - start; 3163 3164 band->zero = isl_alloc_array(ctx, int, band->n); 3165 if (band->n && !band->zero) 3166 goto error; 3167 3168 for (j = 0; j < band->n; ++j) 3169 band->zero[j] = schedule->node[i].zero[start + j]; 3170 3171 band->pma = isl_union_pw_multi_aff_empty(isl_space_copy(schedule->dim)); 3172 for (i = 0; i < schedule->n; ++i) { 3173 isl_multi_aff *ma; 3174 isl_pw_multi_aff *pma; 3175 unsigned n_out; 3176 3177 if (!active[i]) 3178 continue; 3179 3180 ma = isl_multi_aff_copy(schedule->node[i].sched); 3181 n_out = isl_multi_aff_dim(ma, isl_dim_out); 3182 ma = isl_multi_aff_drop_dims(ma, isl_dim_out, end, n_out - end); 3183 ma = isl_multi_aff_drop_dims(ma, isl_dim_out, 0, start); 3184 pma = isl_pw_multi_aff_from_multi_aff(ma); 3185 band->pma = isl_union_pw_multi_aff_add_pw_multi_aff(band->pma, 3186 pma); 3187 } 3188 if (!band->pma) 3189 goto error; 3190 3191 for (i = 0; i < schedule->n; ++i) 3192 if (active[i] && schedule->node[i].n_band > band_nr + 1) 3193 break; 3194 3195 if (i < schedule->n) { 3196 band->children = construct_band_list(schedule, band, 3197 band_nr + 1, active, n_active); 3198 if (!band->children) 3199 goto error; 3200 } 3201 3202 return band; 3203error: 3204 isl_band_free(band); 3205 return NULL; 3206} 3207 3208/* Internal data structure used inside cmp_band and pw_multi_aff_extract_int. 3209 * 3210 * r is set to a negative value if anything goes wrong. 3211 * 3212 * c1 stores the result of extract_int. 3213 * c2 is a temporary value used inside cmp_band_in_ancestor. 3214 * t is a temporary value used inside extract_int. 3215 * 3216 * first and equal are used inside extract_int. 3217 * first is set if we are looking at the first isl_multi_aff inside 3218 * the isl_union_pw_multi_aff. 3219 * equal is set if all the isl_multi_affs have been equal so far. 3220 */ 3221struct isl_cmp_band_data { 3222 int r; 3223 3224 int first; 3225 int equal; 3226 3227 isl_int t; 3228 isl_int c1; 3229 isl_int c2; 3230}; 3231 3232/* Check if "ma" assigns a constant value. 3233 * Note that this function is only called on isl_multi_affs 3234 * with a single output dimension. 3235 * 3236 * If "ma" assigns a constant value then we compare it to data->c1 3237 * or assign it to data->c1 if this is the first isl_multi_aff we consider. 3238 * If "ma" does not assign a constant value or if it assigns a value 3239 * that is different from data->c1, then we set data->equal to zero 3240 * and terminate the check. 3241 */ 3242static int multi_aff_extract_int(__isl_take isl_set *set, 3243 __isl_take isl_multi_aff *ma, void *user) 3244{ 3245 isl_aff *aff; 3246 struct isl_cmp_band_data *data = user; 3247 3248 aff = isl_multi_aff_get_aff(ma, 0); 3249 data->r = isl_aff_is_cst(aff); 3250 if (data->r >= 0 && data->r) { 3251 isl_aff_get_constant(aff, &data->t); 3252 if (data->first) { 3253 isl_int_set(data->c1, data->t); 3254 data->first = 0; 3255 } else if (!isl_int_eq(data->c1, data->t)) 3256 data->equal = 0; 3257 } else if (data->r >= 0 && !data->r) 3258 data->equal = 0; 3259 3260 isl_aff_free(aff); 3261 isl_set_free(set); 3262 isl_multi_aff_free(ma); 3263 3264 if (data->r < 0) 3265 return -1; 3266 if (!data->equal) 3267 return -1; 3268 return 0; 3269} 3270 3271/* This function is called for each isl_pw_multi_aff in 3272 * the isl_union_pw_multi_aff checked by extract_int. 3273 * Check all the isl_multi_affs inside "pma". 3274 */ 3275static int pw_multi_aff_extract_int(__isl_take isl_pw_multi_aff *pma, 3276 void *user) 3277{ 3278 int r; 3279 3280 r = isl_pw_multi_aff_foreach_piece(pma, &multi_aff_extract_int, user); 3281 isl_pw_multi_aff_free(pma); 3282 3283 return r; 3284} 3285 3286/* Check if "upma" assigns a single constant value to its domain. 3287 * If so, return 1 and store the result in data->c1. 3288 * If not, return 0. 3289 * 3290 * A negative return value from isl_union_pw_multi_aff_foreach_pw_multi_aff 3291 * means that either an error occurred or that we have broken off the check 3292 * because we already know the result is going to be negative. 3293 * In the latter case, data->equal is set to zero. 3294 */ 3295static int extract_int(__isl_keep isl_union_pw_multi_aff *upma, 3296 struct isl_cmp_band_data *data) 3297{ 3298 data->first = 1; 3299 data->equal = 1; 3300 3301 if (isl_union_pw_multi_aff_foreach_pw_multi_aff(upma, 3302 &pw_multi_aff_extract_int, data) < 0) { 3303 if (!data->equal) 3304 return 0; 3305 return -1; 3306 } 3307 3308 return !data->first && data->equal; 3309} 3310 3311/* Compare "b1" and "b2" based on the parent schedule of their ancestor 3312 * "ancestor". 3313 * 3314 * If the parent of "ancestor" also has a single member, then we 3315 * first try to compare the two band based on the partial schedule 3316 * of this parent. 3317 * 3318 * Otherwise, or if the result is inconclusive, we look at the partial schedule 3319 * of "ancestor" itself. 3320 * In particular, we specialize the parent schedule based 3321 * on the domains of the child schedules, check if both assign 3322 * a single constant value and, if so, compare the two constant values. 3323 * If the specialized parent schedules do not assign a constant value, 3324 * then they cannot be used to order the two bands and so in this case 3325 * we return 0. 3326 */ 3327static int cmp_band_in_ancestor(__isl_keep isl_band *b1, 3328 __isl_keep isl_band *b2, struct isl_cmp_band_data *data, 3329 __isl_keep isl_band *ancestor) 3330{ 3331 isl_union_pw_multi_aff *upma; 3332 isl_union_set *domain; 3333 int r; 3334 3335 if (data->r < 0) 3336 return 0; 3337 3338 if (ancestor->parent && ancestor->parent->n == 1) { 3339 r = cmp_band_in_ancestor(b1, b2, data, ancestor->parent); 3340 if (data->r < 0) 3341 return 0; 3342 if (r) 3343 return r; 3344 } 3345 3346 upma = isl_union_pw_multi_aff_copy(b1->pma); 3347 domain = isl_union_pw_multi_aff_domain(upma); 3348 upma = isl_union_pw_multi_aff_copy(ancestor->pma); 3349 upma = isl_union_pw_multi_aff_intersect_domain(upma, domain); 3350 r = extract_int(upma, data); 3351 isl_union_pw_multi_aff_free(upma); 3352 3353 if (r < 0) 3354 data->r = -1; 3355 if (r < 0 || !r) 3356 return 0; 3357 3358 isl_int_set(data->c2, data->c1); 3359 3360 upma = isl_union_pw_multi_aff_copy(b2->pma); 3361 domain = isl_union_pw_multi_aff_domain(upma); 3362 upma = isl_union_pw_multi_aff_copy(ancestor->pma); 3363 upma = isl_union_pw_multi_aff_intersect_domain(upma, domain); 3364 r = extract_int(upma, data); 3365 isl_union_pw_multi_aff_free(upma); 3366 3367 if (r < 0) 3368 data->r = -1; 3369 if (r < 0 || !r) 3370 return 0; 3371 3372 return isl_int_cmp(data->c2, data->c1); 3373} 3374 3375/* Compare "a" and "b" based on the parent schedule of their parent. 3376 */ 3377static int cmp_band(const void *a, const void *b, void *user) 3378{ 3379 isl_band *b1 = *(isl_band * const *) a; 3380 isl_band *b2 = *(isl_band * const *) b; 3381 struct isl_cmp_band_data *data = user; 3382 3383 return cmp_band_in_ancestor(b1, b2, data, b1->parent); 3384} 3385 3386/* Sort the elements in "list" based on the partial schedules of its parent 3387 * (and ancestors). In particular if the parent assigns constant values 3388 * to the domains of the bands in "list", then the elements are sorted 3389 * according to that order. 3390 * This order should be a more "natural" order for the user, but otherwise 3391 * shouldn't have any effect. 3392 * If we would be constructing an isl_band forest directly in 3393 * isl_union_set_compute_schedule then there wouldn't be any need 3394 * for a reordering, since the children would be added to the list 3395 * in their natural order automatically. 3396 * 3397 * If there is only one element in the list, then there is no need to sort 3398 * anything. 3399 * If the partial schedule of the parent has more than one member 3400 * (or if there is no parent), then it's 3401 * defnitely not assigning constant values to the different children in 3402 * the list and so we wouldn't be able to use it to sort the list. 3403 */ 3404static __isl_give isl_band_list *sort_band_list(__isl_take isl_band_list *list, 3405 __isl_keep isl_band *parent) 3406{ 3407 struct isl_cmp_band_data data; 3408 3409 if (!list) 3410 return NULL; 3411 if (list->n <= 1) 3412 return list; 3413 if (!parent || parent->n != 1) 3414 return list; 3415 3416 data.r = 0; 3417 isl_int_init(data.c1); 3418 isl_int_init(data.c2); 3419 isl_int_init(data.t); 3420 isl_sort(list->p, list->n, sizeof(list->p[0]), &cmp_band, &data); 3421 if (data.r < 0) 3422 list = isl_band_list_free(list); 3423 isl_int_clear(data.c1); 3424 isl_int_clear(data.c2); 3425 isl_int_clear(data.t); 3426 3427 return list; 3428} 3429 3430/* Construct a list of bands that start at the same position (with 3431 * sequence number band_nr) in the schedules of the nodes that 3432 * were active in the parent band. 3433 * 3434 * A separate isl_band structure is created for each band_id 3435 * and for each node that does not have a band with sequence 3436 * number band_nr. In the latter case, a band without members 3437 * is created. 3438 * This ensures that if a band has any children, then each node 3439 * that was active in the band is active in exactly one of the children. 3440 */ 3441static __isl_give isl_band_list *construct_band_list( 3442 __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent, 3443 int band_nr, int *parent_active, int n_active) 3444{ 3445 int i, j; 3446 isl_ctx *ctx = isl_schedule_get_ctx(schedule); 3447 int *active; 3448 int n_band; 3449 isl_band_list *list; 3450 3451 n_band = 0; 3452 for (i = 0; i < n_active; ++i) { 3453 for (j = 0; j < schedule->n; ++j) { 3454 if (!parent_active[j]) 3455 continue; 3456 if (schedule->node[j].n_band <= band_nr) 3457 continue; 3458 if (schedule->node[j].band_id[band_nr] == i) { 3459 n_band++; 3460 break; 3461 } 3462 } 3463 } 3464 for (j = 0; j < schedule->n; ++j) 3465 if (schedule->node[j].n_band <= band_nr) 3466 n_band++; 3467 3468 if (n_band == 1) { 3469 isl_band *band; 3470 list = isl_band_list_alloc(ctx, n_band); 3471 band = construct_band(schedule, parent, band_nr, 3472 parent_active, n_active); 3473 return isl_band_list_add(list, band); 3474 } 3475 3476 active = isl_alloc_array(ctx, int, schedule->n); 3477 if (schedule->n && !active) 3478 return NULL; 3479 3480 list = isl_band_list_alloc(ctx, n_band); 3481 3482 for (i = 0; i < n_active; ++i) { 3483 int n = 0; 3484 isl_band *band; 3485 3486 for (j = 0; j < schedule->n; ++j) { 3487 active[j] = parent_active[j] && 3488 schedule->node[j].n_band > band_nr && 3489 schedule->node[j].band_id[band_nr] == i; 3490 if (active[j]) 3491 n++; 3492 } 3493 if (n == 0) 3494 continue; 3495 3496 band = construct_band(schedule, parent, band_nr, active, n); 3497 3498 list = isl_band_list_add(list, band); 3499 } 3500 for (i = 0; i < schedule->n; ++i) { 3501 isl_band *band; 3502 if (!parent_active[i]) 3503 continue; 3504 if (schedule->node[i].n_band > band_nr) 3505 continue; 3506 for (j = 0; j < schedule->n; ++j) 3507 active[j] = j == i; 3508 band = construct_band(schedule, parent, band_nr, active, 1); 3509 list = isl_band_list_add(list, band); 3510 } 3511 3512 free(active); 3513 3514 list = sort_band_list(list, parent); 3515 3516 return list; 3517} 3518 3519/* Construct a band forest representation of the schedule and 3520 * return the list of roots. 3521 */ 3522static __isl_give isl_band_list *construct_forest( 3523 __isl_keep isl_schedule *schedule) 3524{ 3525 int i; 3526 isl_ctx *ctx = isl_schedule_get_ctx(schedule); 3527 isl_band_list *forest; 3528 int *active; 3529 3530 active = isl_alloc_array(ctx, int, schedule->n); 3531 if (schedule->n && !active) 3532 return NULL; 3533 3534 for (i = 0; i < schedule->n; ++i) 3535 active[i] = 1; 3536 3537 forest = construct_band_list(schedule, NULL, 0, active, schedule->n); 3538 3539 free(active); 3540 3541 return forest; 3542} 3543 3544/* Return the roots of a band forest representation of the schedule. 3545 */ 3546__isl_give isl_band_list *isl_schedule_get_band_forest( 3547 __isl_keep isl_schedule *schedule) 3548{ 3549 if (!schedule) 3550 return NULL; 3551 if (!schedule->band_forest) 3552 schedule->band_forest = construct_forest(schedule); 3553 return isl_band_list_dup(schedule->band_forest); 3554} 3555 3556/* Call "fn" on each band in the schedule in depth-first post-order. 3557 */ 3558int isl_schedule_foreach_band(__isl_keep isl_schedule *sched, 3559 int (*fn)(__isl_keep isl_band *band, void *user), void *user) 3560{ 3561 int r; 3562 isl_band_list *forest; 3563 3564 if (!sched) 3565 return -1; 3566 3567 forest = isl_schedule_get_band_forest(sched); 3568 r = isl_band_list_foreach_band(forest, fn, user); 3569 isl_band_list_free(forest); 3570 3571 return r; 3572} 3573 3574static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p, 3575 __isl_keep isl_band_list *list); 3576 3577static __isl_give isl_printer *print_band(__isl_take isl_printer *p, 3578 __isl_keep isl_band *band) 3579{ 3580 isl_band_list *children; 3581 3582 p = isl_printer_start_line(p); 3583 p = isl_printer_print_union_pw_multi_aff(p, band->pma); 3584 p = isl_printer_end_line(p); 3585 3586 if (!isl_band_has_children(band)) 3587 return p; 3588 3589 children = isl_band_get_children(band); 3590 3591 p = isl_printer_indent(p, 4); 3592 p = print_band_list(p, children); 3593 p = isl_printer_indent(p, -4); 3594 3595 isl_band_list_free(children); 3596 3597 return p; 3598} 3599 3600static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p, 3601 __isl_keep isl_band_list *list) 3602{ 3603 int i, n; 3604 3605 n = isl_band_list_n_band(list); 3606 for (i = 0; i < n; ++i) { 3607 isl_band *band; 3608 band = isl_band_list_get_band(list, i); 3609 p = print_band(p, band); 3610 isl_band_free(band); 3611 } 3612 3613 return p; 3614} 3615 3616__isl_give isl_printer *isl_printer_print_schedule(__isl_take isl_printer *p, 3617 __isl_keep isl_schedule *schedule) 3618{ 3619 isl_band_list *forest; 3620 3621 forest = isl_schedule_get_band_forest(schedule); 3622 3623 p = print_band_list(p, forest); 3624 3625 isl_band_list_free(forest); 3626 3627 return p; 3628} 3629 3630void isl_schedule_dump(__isl_keep isl_schedule *schedule) 3631{ 3632 isl_printer *printer; 3633 3634 if (!schedule) 3635 return; 3636 3637 printer = isl_printer_to_file(isl_schedule_get_ctx(schedule), stderr); 3638 printer = isl_printer_print_schedule(printer, schedule); 3639 3640 isl_printer_free(printer); 3641} 3642