Commit 64489255 authored by Michael Kruse's avatar Michael Kruse
Browse files

[Polly] Add greedy fusion algorithm.

When the option -polly-loopfusion-greedy is set, the ScheduleOptimizer
tries to aggressively fuse any band it can and does not violate any
dependences.

As part if the implementation, the functionalty for copying a band
into an new schedule was extracted out of the ScheduleTreeRewriter.
parent 20a0c482
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -11,3 +11,15 @@ In Polly 14 the following important changes have been incorporated.
  branch.

- Change ...

 * The command line option -polly-opt-fusion has been removed. What the
   flag does was frequently misunderstood and is rarely useful. However,
   the functionality is still accessible using
```
    -polly-isl-arg=--no-schedule-serialize-sccs
```

 * The command line option -polly-loopfusion-greedy has been added.
   This will agressively try to fuse any loop regardless of
   profitability. The is what users might have expected what
   -polly-opt-fusion=max would do.
+8 −0
Original line number Diff line number Diff line
@@ -240,6 +240,14 @@ isl::schedule_node applyRegisterTiling(isl::schedule_node Node,
                                       llvm::ArrayRef<int> TileSizes,
                                       int DefaultTileSize);

/// Apply greedy fusion. That is, fuse any loop that is possible to be fused
/// top-down.
///
/// @param Sched  Sched tree to fuse all the loops in.
/// @param Deps   Validity constraints that must be preserved.
isl::schedule applyGreedyFusion(isl::schedule Sched,
                                const isl::union_map &Deps);

} // namespace polly

#endif // POLLY_SCHEDULETREETRANSFORM_H
+6 −0
Original line number Diff line number Diff line
@@ -231,6 +231,12 @@ ISL_DUMP_OBJECT(val)
ISL_DUMP_OBJECT(val_list)
//@}

/// Emit the equivaltent of the isl_*_dump output into a raw_ostream.
/// @{
void dumpIslObj(const isl::schedule_node &Node, llvm::raw_ostream &OS);
void dumpIslObj(__isl_keep isl_schedule_node *node, llvm::raw_ostream &OS);
/// @}

inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
                                     __isl_keep isl_union_map *Map) {
  OS << polly::stringFromIslObj(Map, "null");
+21 −0
Original line number Diff line number Diff line
@@ -238,4 +238,25 @@ void neverCalled() {
  polly::dumpIslObj(isl::val());
  polly::dumpIslObj(isl::val_list());
}

void polly::dumpIslObj(__isl_keep isl_schedule_node *node, raw_ostream &OS) {
  if (!node)
    return;

  isl_ctx *ctx = isl_schedule_node_get_ctx(node);
  isl_printer *p = isl_printer_to_str(ctx);
  p = isl_printer_set_yaml_style(p, ISL_YAML_STYLE_BLOCK);
  p = isl_printer_print_schedule_node(p, node);

  char *char_str = isl_printer_get_str(p);
  OS << char_str;

  free(char_str);
  isl_printer_free(p);
}

void polly::dumpIslObj(const isl::schedule_node &Node, raw_ostream &OS) {
  dumpIslObj(Node.get(), OS);
}

#endif
+12 −0
Original line number Diff line number Diff line
@@ -97,6 +97,11 @@ static cl::opt<std::string>
                      cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
                      cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<bool>
    GreedyFusion("polly-loopfusion-greedy",
                 cl::desc("Aggressively try to fuse everything"), cl::Hidden,
                 cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<std::string> OuterCoincidence(
    "polly-opt-outer-coincidence",
    cl::desc("Try to construct schedules where the outer member of each band "
@@ -835,6 +840,13 @@ static bool runIslScheduleOptimizer(
  if (Schedule.is_null())
    return false;

  if (GreedyFusion) {
    isl::union_map Validity = D.getDependences(
        Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW);
    Schedule = applyGreedyFusion(Schedule, Validity);
    assert(!Schedule.is_null());
  }

  // Apply post-rescheduling optimizations (if enabled) and/or prevectorization.
  const OptimizerAdditionalInfoTy OAI = {
      TTI, const_cast<Dependences *>(&D),
Loading