2587 lines
92 KiB
C++
2587 lines
92 KiB
C++
/* Vectorizer
|
|
Copyright (C) 2003-2023 Free Software Foundation, Inc.
|
|
Contributed by Dorit Naishlos <dorit@il.ibm.com>
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef GCC_TREE_VECTORIZER_H
|
|
#define GCC_TREE_VECTORIZER_H
|
|
|
|
typedef class _stmt_vec_info *stmt_vec_info;
|
|
typedef struct _slp_tree *slp_tree;
|
|
|
|
#include "tree-data-ref.h"
|
|
#include "tree-hash-traits.h"
|
|
#include "target.h"
|
|
#include "internal-fn.h"
|
|
#include "tree-ssa-operands.h"
|
|
#include "gimple-match.h"
|
|
|
|
/* Used for naming of new temporaries. */
|
|
enum vect_var_kind {
|
|
vect_simple_var,
|
|
vect_pointer_var,
|
|
vect_scalar_var,
|
|
vect_mask_var
|
|
};
|
|
|
|
/* Defines type of operation. */
|
|
enum operation_type {
|
|
unary_op = 1,
|
|
binary_op,
|
|
ternary_op
|
|
};
|
|
|
|
/* Define type of available alignment support. */
|
|
enum dr_alignment_support {
|
|
dr_unaligned_unsupported,
|
|
dr_unaligned_supported,
|
|
dr_explicit_realign,
|
|
dr_explicit_realign_optimized,
|
|
dr_aligned
|
|
};
|
|
|
|
/* Define type of def-use cross-iteration cycle. */
|
|
enum vect_def_type {
|
|
vect_uninitialized_def = 0,
|
|
vect_constant_def = 1,
|
|
vect_external_def,
|
|
vect_internal_def,
|
|
vect_induction_def,
|
|
vect_reduction_def,
|
|
vect_double_reduction_def,
|
|
vect_nested_cycle,
|
|
vect_first_order_recurrence,
|
|
vect_unknown_def_type
|
|
};
|
|
|
|
/* Define operation type of linear/non-linear induction variable. */
|
|
enum vect_induction_op_type {
|
|
vect_step_op_add = 0,
|
|
vect_step_op_neg,
|
|
vect_step_op_mul,
|
|
vect_step_op_shl,
|
|
vect_step_op_shr
|
|
};
|
|
|
|
/* Define type of reduction. */
|
|
enum vect_reduction_type {
|
|
TREE_CODE_REDUCTION,
|
|
COND_REDUCTION,
|
|
INTEGER_INDUC_COND_REDUCTION,
|
|
CONST_COND_REDUCTION,
|
|
|
|
/* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop
|
|
to implement:
|
|
|
|
for (int i = 0; i < VF; ++i)
|
|
res = cond[i] ? val[i] : res; */
|
|
EXTRACT_LAST_REDUCTION,
|
|
|
|
/* Use a folding reduction within the loop to implement:
|
|
|
|
for (int i = 0; i < VF; ++i)
|
|
res = res OP val[i];
|
|
|
|
(with no reassocation). */
|
|
FOLD_LEFT_REDUCTION
|
|
};
|
|
|
|
#define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \
|
|
|| ((D) == vect_double_reduction_def) \
|
|
|| ((D) == vect_nested_cycle))
|
|
|
|
/* Structure to encapsulate information about a group of like
|
|
instructions to be presented to the target cost model. */
|
|
struct stmt_info_for_cost {
|
|
int count;
|
|
enum vect_cost_for_stmt kind;
|
|
enum vect_cost_model_location where;
|
|
stmt_vec_info stmt_info;
|
|
slp_tree node;
|
|
tree vectype;
|
|
int misalign;
|
|
};
|
|
|
|
typedef vec<stmt_info_for_cost> stmt_vector_for_cost;
|
|
|
|
/* Maps base addresses to an innermost_loop_behavior and the stmt it was
|
|
derived from that gives the maximum known alignment for that base. */
|
|
typedef hash_map<tree_operand_hash,
|
|
std::pair<stmt_vec_info, innermost_loop_behavior *> >
|
|
vec_base_alignments;
|
|
|
|
/* Represents elements [START, START + LENGTH) of cyclical array OPS*
|
|
(i.e. OPS repeated to give at least START + LENGTH elements) */
|
|
struct vect_scalar_ops_slice
|
|
{
|
|
tree op (unsigned int i) const;
|
|
bool all_same_p () const;
|
|
|
|
vec<tree> *ops;
|
|
unsigned int start;
|
|
unsigned int length;
|
|
};
|
|
|
|
/* Return element I of the slice. */
|
|
inline tree
|
|
vect_scalar_ops_slice::op (unsigned int i) const
|
|
{
|
|
return (*ops)[(i + start) % ops->length ()];
|
|
}
|
|
|
|
/* Hash traits for vect_scalar_ops_slice. */
|
|
struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
|
|
{
|
|
typedef vect_scalar_ops_slice value_type;
|
|
typedef vect_scalar_ops_slice compare_type;
|
|
|
|
static const bool empty_zero_p = true;
|
|
|
|
static void mark_deleted (value_type &s) { s.length = ~0U; }
|
|
static void mark_empty (value_type &s) { s.length = 0; }
|
|
static bool is_deleted (const value_type &s) { return s.length == ~0U; }
|
|
static bool is_empty (const value_type &s) { return s.length == 0; }
|
|
static hashval_t hash (const value_type &);
|
|
static bool equal (const value_type &, const compare_type &);
|
|
};
|
|
|
|
/************************************************************************
|
|
SLP
|
|
************************************************************************/
|
|
typedef vec<std::pair<unsigned, unsigned> > lane_permutation_t;
|
|
typedef auto_vec<std::pair<unsigned, unsigned>, 16> auto_lane_permutation_t;
|
|
typedef vec<unsigned> load_permutation_t;
|
|
typedef auto_vec<unsigned, 16> auto_load_permutation_t;
|
|
|
|
/* A computation tree of an SLP instance. Each node corresponds to a group of
|
|
stmts to be packed in a SIMD stmt. */
|
|
struct _slp_tree {
|
|
_slp_tree ();
|
|
~_slp_tree ();
|
|
|
|
/* Nodes that contain def-stmts of this node statements operands. */
|
|
vec<slp_tree> children;
|
|
|
|
/* A group of scalar stmts to be vectorized together. */
|
|
vec<stmt_vec_info> stmts;
|
|
/* A group of scalar operands to be vectorized together. */
|
|
vec<tree> ops;
|
|
/* The representative that should be used for analysis and
|
|
code generation. */
|
|
stmt_vec_info representative;
|
|
|
|
/* Load permutation relative to the stores, NULL if there is no
|
|
permutation. */
|
|
load_permutation_t load_permutation;
|
|
/* Lane permutation of the operands scalar lanes encoded as pairs
|
|
of { operand number, lane number }. The number of elements
|
|
denotes the number of output lanes. */
|
|
lane_permutation_t lane_permutation;
|
|
|
|
tree vectype;
|
|
/* Vectorized stmt/s. */
|
|
vec<gimple *> vec_stmts;
|
|
vec<tree> vec_defs;
|
|
/* Number of vector stmts that are created to replace the group of scalar
|
|
stmts. It is calculated during the transformation phase as the number of
|
|
scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
|
|
divided by vector size. */
|
|
unsigned int vec_stmts_size;
|
|
|
|
/* Reference count in the SLP graph. */
|
|
unsigned int refcnt;
|
|
/* The maximum number of vector elements for the subtree rooted
|
|
at this node. */
|
|
poly_uint64 max_nunits;
|
|
/* The DEF type of this node. */
|
|
enum vect_def_type def_type;
|
|
/* The number of scalar lanes produced by this node. */
|
|
unsigned int lanes;
|
|
/* The operation of this node. */
|
|
enum tree_code code;
|
|
|
|
int vertex;
|
|
|
|
/* If not NULL this is a cached failed SLP discovery attempt with
|
|
the lanes that failed during SLP discovery as 'false'. This is
|
|
a copy of the matches array. */
|
|
bool *failed;
|
|
|
|
/* Allocate from slp_tree_pool. */
|
|
static void *operator new (size_t);
|
|
|
|
/* Return memory to slp_tree_pool. */
|
|
static void operator delete (void *, size_t);
|
|
|
|
/* Linked list of nodes to release when we free the slp_tree_pool. */
|
|
slp_tree next_node;
|
|
slp_tree prev_node;
|
|
};
|
|
|
|
/* The enum describes the type of operations that an SLP instance
|
|
can perform. */
|
|
|
|
enum slp_instance_kind {
|
|
slp_inst_kind_store,
|
|
slp_inst_kind_reduc_group,
|
|
slp_inst_kind_reduc_chain,
|
|
slp_inst_kind_bb_reduc,
|
|
slp_inst_kind_ctor
|
|
};
|
|
|
|
/* SLP instance is a sequence of stmts in a loop that can be packed into
|
|
SIMD stmts. */
|
|
typedef class _slp_instance {
|
|
public:
|
|
/* The root of SLP tree. */
|
|
slp_tree root;
|
|
|
|
/* For vector constructors, the constructor stmt that the SLP tree is built
|
|
from, NULL otherwise. */
|
|
vec<stmt_vec_info> root_stmts;
|
|
|
|
/* The unrolling factor required to vectorized this SLP instance. */
|
|
poly_uint64 unrolling_factor;
|
|
|
|
/* The group of nodes that contain loads of this SLP instance. */
|
|
vec<slp_tree> loads;
|
|
|
|
/* The SLP node containing the reduction PHIs. */
|
|
slp_tree reduc_phis;
|
|
|
|
/* Vector cost of this entry to the SLP graph. */
|
|
stmt_vector_for_cost cost_vec;
|
|
|
|
/* If this instance is the main entry of a subgraph the set of
|
|
entries into the same subgraph, including itself. */
|
|
vec<_slp_instance *> subgraph_entries;
|
|
|
|
/* The type of operation the SLP instance is performing. */
|
|
slp_instance_kind kind;
|
|
|
|
dump_user_location_t location () const;
|
|
} *slp_instance;
|
|
|
|
|
|
/* Access Functions. */
|
|
#define SLP_INSTANCE_TREE(S) (S)->root
|
|
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
|
|
#define SLP_INSTANCE_LOADS(S) (S)->loads
|
|
#define SLP_INSTANCE_ROOT_STMTS(S) (S)->root_stmts
|
|
#define SLP_INSTANCE_KIND(S) (S)->kind
|
|
|
|
#define SLP_TREE_CHILDREN(S) (S)->children
|
|
#define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
|
|
#define SLP_TREE_SCALAR_OPS(S) (S)->ops
|
|
#define SLP_TREE_REF_COUNT(S) (S)->refcnt
|
|
#define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
|
|
#define SLP_TREE_VEC_DEFS(S) (S)->vec_defs
|
|
#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
|
|
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
|
|
#define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation
|
|
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
|
|
#define SLP_TREE_VECTYPE(S) (S)->vectype
|
|
#define SLP_TREE_REPRESENTATIVE(S) (S)->representative
|
|
#define SLP_TREE_LANES(S) (S)->lanes
|
|
#define SLP_TREE_CODE(S) (S)->code
|
|
|
|
/* Key for map that records association between
|
|
scalar conditions and corresponding loop mask, and
|
|
is populated by vect_record_loop_mask. */
|
|
|
|
struct scalar_cond_masked_key
|
|
{
|
|
scalar_cond_masked_key (tree t, unsigned ncopies_)
|
|
: ncopies (ncopies_)
|
|
{
|
|
get_cond_ops_from_tree (t);
|
|
}
|
|
|
|
void get_cond_ops_from_tree (tree);
|
|
|
|
unsigned ncopies;
|
|
bool inverted_p;
|
|
tree_code code;
|
|
tree op0;
|
|
tree op1;
|
|
};
|
|
|
|
template<>
|
|
struct default_hash_traits<scalar_cond_masked_key>
|
|
{
|
|
typedef scalar_cond_masked_key compare_type;
|
|
typedef scalar_cond_masked_key value_type;
|
|
|
|
static inline hashval_t
|
|
hash (value_type v)
|
|
{
|
|
inchash::hash h;
|
|
h.add_int (v.code);
|
|
inchash::add_expr (v.op0, h, 0);
|
|
inchash::add_expr (v.op1, h, 0);
|
|
h.add_int (v.ncopies);
|
|
h.add_flag (v.inverted_p);
|
|
return h.end ();
|
|
}
|
|
|
|
static inline bool
|
|
equal (value_type existing, value_type candidate)
|
|
{
|
|
return (existing.ncopies == candidate.ncopies
|
|
&& existing.code == candidate.code
|
|
&& existing.inverted_p == candidate.inverted_p
|
|
&& operand_equal_p (existing.op0, candidate.op0, 0)
|
|
&& operand_equal_p (existing.op1, candidate.op1, 0));
|
|
}
|
|
|
|
static const bool empty_zero_p = true;
|
|
|
|
static inline void
|
|
mark_empty (value_type &v)
|
|
{
|
|
v.ncopies = 0;
|
|
v.inverted_p = false;
|
|
}
|
|
|
|
static inline bool
|
|
is_empty (value_type v)
|
|
{
|
|
return v.ncopies == 0;
|
|
}
|
|
|
|
static inline void mark_deleted (value_type &) {}
|
|
|
|
static inline bool is_deleted (const value_type &)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void remove (value_type &) {}
|
|
};
|
|
|
|
typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
|
|
|
|
/* Key and map that records association between vector conditions and
|
|
corresponding loop mask, and is populated by prepare_vec_mask. */
|
|
|
|
typedef pair_hash<tree_operand_hash, tree_operand_hash> tree_cond_mask_hash;
|
|
typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type;
|
|
|
|
/* Describes two objects whose addresses must be unequal for the vectorized
|
|
loop to be valid. */
|
|
typedef std::pair<tree, tree> vec_object_pair;
|
|
|
|
/* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE.
|
|
UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */
|
|
class vec_lower_bound {
|
|
public:
|
|
vec_lower_bound () {}
|
|
vec_lower_bound (tree e, bool u, poly_uint64 m)
|
|
: expr (e), unsigned_p (u), min_value (m) {}
|
|
|
|
tree expr;
|
|
bool unsigned_p;
|
|
poly_uint64 min_value;
|
|
};
|
|
|
|
/* Vectorizer state shared between different analyses like vector sizes
|
|
of the same CFG region. */
|
|
class vec_info_shared {
|
|
public:
|
|
vec_info_shared();
|
|
~vec_info_shared();
|
|
|
|
void save_datarefs();
|
|
void check_datarefs();
|
|
|
|
/* The number of scalar stmts. */
|
|
unsigned n_stmts;
|
|
|
|
/* All data references. Freed by free_data_refs, so not an auto_vec. */
|
|
vec<data_reference_p> datarefs;
|
|
vec<data_reference> datarefs_copy;
|
|
|
|
/* The loop nest in which the data dependences are computed. */
|
|
auto_vec<loop_p> loop_nest;
|
|
|
|
/* All data dependences. Freed by free_dependence_relations, so not
|
|
an auto_vec. */
|
|
vec<ddr_p> ddrs;
|
|
};
|
|
|
|
/* Vectorizer state common between loop and basic-block vectorization. */
|
|
class vec_info {
|
|
public:
|
|
typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
|
|
enum vec_kind { bb, loop };
|
|
|
|
vec_info (vec_kind, vec_info_shared *);
|
|
~vec_info ();
|
|
|
|
stmt_vec_info add_stmt (gimple *);
|
|
stmt_vec_info add_pattern_stmt (gimple *, stmt_vec_info);
|
|
stmt_vec_info lookup_stmt (gimple *);
|
|
stmt_vec_info lookup_def (tree);
|
|
stmt_vec_info lookup_single_use (tree);
|
|
class dr_vec_info *lookup_dr (data_reference *);
|
|
void move_dr (stmt_vec_info, stmt_vec_info);
|
|
void remove_stmt (stmt_vec_info);
|
|
void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *);
|
|
void insert_on_entry (stmt_vec_info, gimple *);
|
|
void insert_seq_on_entry (stmt_vec_info, gimple_seq);
|
|
|
|
/* The type of vectorization. */
|
|
vec_kind kind;
|
|
|
|
/* Shared vectorizer state. */
|
|
vec_info_shared *shared;
|
|
|
|
/* The mapping of GIMPLE UID to stmt_vec_info. */
|
|
vec<stmt_vec_info> stmt_vec_infos;
|
|
/* Whether the above mapping is complete. */
|
|
bool stmt_vec_info_ro;
|
|
|
|
/* Whether we've done a transform we think OK to not update virtual
|
|
SSA form. */
|
|
bool any_known_not_updated_vssa;
|
|
|
|
/* The SLP graph. */
|
|
auto_vec<slp_instance> slp_instances;
|
|
|
|
/* Maps base addresses to an innermost_loop_behavior that gives the maximum
|
|
known alignment for that base. */
|
|
vec_base_alignments base_alignments;
|
|
|
|
/* All interleaving chains of stores, represented by the first
|
|
stmt in the chain. */
|
|
auto_vec<stmt_vec_info> grouped_stores;
|
|
|
|
/* The set of vector modes used in the vectorized region. */
|
|
mode_set used_vector_modes;
|
|
|
|
/* The argument we should pass to related_vector_mode when looking up
|
|
the vector mode for a scalar mode, or VOIDmode if we haven't yet
|
|
made any decisions about which vector modes to use. */
|
|
machine_mode vector_mode;
|
|
|
|
private:
|
|
stmt_vec_info new_stmt_vec_info (gimple *stmt);
|
|
void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
|
|
void free_stmt_vec_infos ();
|
|
void free_stmt_vec_info (stmt_vec_info);
|
|
};
|
|
|
|
class _loop_vec_info;
|
|
class _bb_vec_info;
|
|
|
|
template<>
|
|
template<>
|
|
inline bool
|
|
is_a_helper <_loop_vec_info *>::test (vec_info *i)
|
|
{
|
|
return i->kind == vec_info::loop;
|
|
}
|
|
|
|
template<>
|
|
template<>
|
|
inline bool
|
|
is_a_helper <_bb_vec_info *>::test (vec_info *i)
|
|
{
|
|
return i->kind == vec_info::bb;
|
|
}
|
|
|
|
/* In general, we can divide the vector statements in a vectorized loop
|
|
into related groups ("rgroups") and say that for each rgroup there is
|
|
some nS such that the rgroup operates on nS values from one scalar
|
|
iteration followed by nS values from the next. That is, if VF is the
|
|
vectorization factor of the loop, the rgroup operates on a sequence:
|
|
|
|
(1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS)
|
|
|
|
where (i,j) represents a scalar value with index j in a scalar
|
|
iteration with index i.
|
|
|
|
[ We use the term "rgroup" to emphasise that this grouping isn't
|
|
necessarily the same as the grouping of statements used elsewhere.
|
|
For example, if we implement a group of scalar loads using gather
|
|
loads, we'll use a separate gather load for each scalar load, and
|
|
thus each gather load will belong to its own rgroup. ]
|
|
|
|
In general this sequence will occupy nV vectors concatenated
|
|
together. If these vectors have nL lanes each, the total number
|
|
of scalar values N is given by:
|
|
|
|
N = nS * VF = nV * nL
|
|
|
|
None of nS, VF, nV and nL are required to be a power of 2. nS and nV
|
|
are compile-time constants but VF and nL can be variable (if the target
|
|
supports variable-length vectors).
|
|
|
|
In classical vectorization, each iteration of the vector loop would
|
|
handle exactly VF iterations of the original scalar loop. However,
|
|
in vector loops that are able to operate on partial vectors, a
|
|
particular iteration of the vector loop might handle fewer than VF
|
|
iterations of the scalar loop. The vector lanes that correspond to
|
|
iterations of the scalar loop are said to be "active" and the other
|
|
lanes are said to be "inactive".
|
|
|
|
In such vector loops, many rgroups need to be controlled to ensure
|
|
that they have no effect for the inactive lanes. Conceptually, each
|
|
such rgroup needs a sequence of booleans in the same order as above,
|
|
but with each (i,j) replaced by a boolean that indicates whether
|
|
iteration i is active. This sequence occupies nV vector controls
|
|
that again have nL lanes each. Thus the control sequence as a whole
|
|
consists of VF independent booleans that are each repeated nS times.
|
|
|
|
Taking mask-based approach as a partially-populated vectors example.
|
|
We make the simplifying assumption that if a sequence of nV masks is
|
|
suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by
|
|
VIEW_CONVERTing it. This holds for all current targets that support
|
|
fully-masked loops. For example, suppose the scalar loop is:
|
|
|
|
float *f;
|
|
double *d;
|
|
for (int i = 0; i < n; ++i)
|
|
{
|
|
f[i * 2 + 0] += 1.0f;
|
|
f[i * 2 + 1] += 2.0f;
|
|
d[i] += 3.0;
|
|
}
|
|
|
|
and suppose that vectors have 256 bits. The vectorized f accesses
|
|
will belong to one rgroup and the vectorized d access to another:
|
|
|
|
f rgroup: nS = 2, nV = 1, nL = 8
|
|
d rgroup: nS = 1, nV = 1, nL = 4
|
|
VF = 4
|
|
|
|
[ In this simple example the rgroups do correspond to the normal
|
|
SLP grouping scheme. ]
|
|
|
|
If only the first three lanes are active, the masks we need are:
|
|
|
|
f rgroup: 1 1 | 1 1 | 1 1 | 0 0
|
|
d rgroup: 1 | 1 | 1 | 0
|
|
|
|
Here we can use a mask calculated for f's rgroup for d's, but not
|
|
vice versa.
|
|
|
|
Thus for each value of nV, it is enough to provide nV masks, with the
|
|
mask being calculated based on the highest nL (or, equivalently, based
|
|
on the highest nS) required by any rgroup with that nV. We therefore
|
|
represent the entire collection of masks as a two-level table, with the
|
|
first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
|
|
the second being indexed by the mask index 0 <= i < nV. */
|
|
|
|
/* The controls (like masks or lengths) needed by rgroups with nV vectors,
|
|
according to the description above. */
|
|
struct rgroup_controls {
|
|
/* The largest nS for all rgroups that use these controls. */
|
|
unsigned int max_nscalars_per_iter;
|
|
|
|
/* For the largest nS recorded above, the loop controls divide each scalar
|
|
into FACTOR equal-sized pieces. This is useful if we need to split
|
|
element-based accesses into byte-based accesses. */
|
|
unsigned int factor;
|
|
|
|
/* This is a vector type with MAX_NSCALARS_PER_ITER * VF / nV elements.
|
|
For mask-based controls, it is the type of the masks in CONTROLS.
|
|
For length-based controls, it can be any vector type that has the
|
|
specified number of elements; the type of the elements doesn't matter. */
|
|
tree type;
|
|
|
|
/* A vector of nV controls, in iteration order. */
|
|
vec<tree> controls;
|
|
|
|
/* In case of len_load and len_store with a bias there is only one
|
|
rgroup. This holds the adjusted loop length for the this rgroup. */
|
|
tree bias_adjusted_ctrl;
|
|
};
|
|
|
|
typedef auto_vec<rgroup_controls> vec_loop_masks;
|
|
|
|
typedef auto_vec<rgroup_controls> vec_loop_lens;
|
|
|
|
typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
|
|
|
|
/* Information about a reduction accumulator from the main loop that could
|
|
conceivably be reused as the input to a reduction in an epilogue loop. */
|
|
struct vect_reusable_accumulator {
|
|
/* The final value of the accumulator, which forms the input to the
|
|
reduction operation. */
|
|
tree reduc_input;
|
|
|
|
/* The stmt_vec_info that describes the reduction (i.e. the one for
|
|
which is_reduc_info is true). */
|
|
stmt_vec_info reduc_info;
|
|
};
|
|
|
|
/*-----------------------------------------------------------------*/
|
|
/* Info on vectorized loops. */
|
|
/*-----------------------------------------------------------------*/
|
|
typedef class _loop_vec_info : public vec_info {
|
|
public:
|
|
_loop_vec_info (class loop *, vec_info_shared *);
|
|
~_loop_vec_info ();
|
|
|
|
/* The loop to which this info struct refers to. */
|
|
class loop *loop;
|
|
|
|
/* The loop basic blocks. */
|
|
basic_block *bbs;
|
|
|
|
/* Number of latch executions. */
|
|
tree num_itersm1;
|
|
/* Number of iterations. */
|
|
tree num_iters;
|
|
/* Number of iterations of the original loop. */
|
|
tree num_iters_unchanged;
|
|
/* Condition under which this loop is analyzed and versioned. */
|
|
tree num_iters_assumptions;
|
|
|
|
/* The cost of the vector code. */
|
|
class vector_costs *vector_costs;
|
|
|
|
/* The cost of the scalar code. */
|
|
class vector_costs *scalar_costs;
|
|
|
|
/* Threshold of number of iterations below which vectorization will not be
|
|
performed. It is calculated from MIN_PROFITABLE_ITERS and
|
|
param_min_vect_loop_bound. */
|
|
unsigned int th;
|
|
|
|
/* When applying loop versioning, the vector form should only be used
|
|
if the number of scalar iterations is >= this value, on top of all
|
|
the other requirements. Ignored when loop versioning is not being
|
|
used. */
|
|
poly_uint64 versioning_threshold;
|
|
|
|
/* Unrolling factor */
|
|
poly_uint64 vectorization_factor;
|
|
|
|
/* If this loop is an epilogue loop whose main loop can be skipped,
|
|
MAIN_LOOP_EDGE is the edge from the main loop to this loop's
|
|
preheader. SKIP_MAIN_LOOP_EDGE is then the edge that skips the
|
|
main loop and goes straight to this loop's preheader.
|
|
|
|
Both fields are null otherwise. */
|
|
edge main_loop_edge;
|
|
edge skip_main_loop_edge;
|
|
|
|
/* If this loop is an epilogue loop that might be skipped after executing
|
|
the main loop, this edge is the one that skips the epilogue. */
|
|
edge skip_this_loop_edge;
|
|
|
|
/* The vectorized form of a standard reduction replaces the original
|
|
scalar code's final result (a loop-closed SSA PHI) with the result
|
|
of a vector-to-scalar reduction operation. After vectorization,
|
|
this variable maps these vector-to-scalar results to information
|
|
about the reductions that generated them. */
|
|
hash_map<tree, vect_reusable_accumulator> reusable_accumulators;
|
|
|
|
/* The number of times that the target suggested we unroll the vector loop
|
|
in order to promote more ILP. This value will be used to re-analyze the
|
|
loop for vectorization and if successful the value will be folded into
|
|
vectorization_factor (and therefore exactly divides
|
|
vectorization_factor). */
|
|
unsigned int suggested_unroll_factor;
|
|
|
|
/* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
|
|
if there is no particular limit. */
|
|
unsigned HOST_WIDE_INT max_vectorization_factor;
|
|
|
|
/* The masks that a fully-masked loop should use to avoid operating
|
|
on inactive scalars. */
|
|
vec_loop_masks masks;
|
|
|
|
/* The lengths that a loop with length should use to avoid operating
|
|
on inactive scalars. */
|
|
vec_loop_lens lens;
|
|
|
|
/* Set of scalar conditions that have loop mask applied. */
|
|
scalar_cond_masked_set_type scalar_cond_masked_set;
|
|
|
|
/* Set of vector conditions that have loop mask applied. */
|
|
vec_cond_masked_set_type vec_cond_masked_set;
|
|
|
|
/* If we are using a loop mask to align memory addresses, this variable
|
|
contains the number of vector elements that we should skip in the
|
|
first iteration of the vector loop (i.e. the number of leading
|
|
elements that should be false in the first mask). */
|
|
tree mask_skip_niters;
|
|
|
|
/* The type that the loop control IV should be converted to before
|
|
testing which of the VF scalars are active and inactive.
|
|
Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
|
|
tree rgroup_compare_type;
|
|
|
|
/* For #pragma omp simd if (x) loops the x expression. If constant 0,
|
|
the loop should not be vectorized, if constant non-zero, simd_if_cond
|
|
shouldn't be set and loop vectorized normally, if SSA_NAME, the loop
|
|
should be versioned on that condition, using scalar loop if the condition
|
|
is false and vectorized loop otherwise. */
|
|
tree simd_if_cond;
|
|
|
|
/* The type that the vector loop control IV should have when
|
|
LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
|
|
tree rgroup_iv_type;
|
|
|
|
/* Unknown DRs according to which loop was peeled. */
|
|
class dr_vec_info *unaligned_dr;
|
|
|
|
/* peeling_for_alignment indicates whether peeling for alignment will take
|
|
place, and what the peeling factor should be:
|
|
peeling_for_alignment = X means:
|
|
If X=0: Peeling for alignment will not be applied.
|
|
If X>0: Peel first X iterations.
|
|
If X=-1: Generate a runtime test to calculate the number of iterations
|
|
to be peeled, using the dataref recorded in the field
|
|
unaligned_dr. */
|
|
int peeling_for_alignment;
|
|
|
|
/* The mask used to check the alignment of pointers or arrays. */
|
|
int ptr_mask;
|
|
|
|
/* Data Dependence Relations defining address ranges that are candidates
|
|
for a run-time aliasing check. */
|
|
auto_vec<ddr_p> may_alias_ddrs;
|
|
|
|
/* Data Dependence Relations defining address ranges together with segment
|
|
lengths from which the run-time aliasing check is built. */
|
|
auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
|
|
|
|
/* Check that the addresses of each pair of objects is unequal. */
|
|
auto_vec<vec_object_pair> check_unequal_addrs;
|
|
|
|
/* List of values that are required to be nonzero. This is used to check
|
|
whether things like "x[i * n] += 1;" are safe and eventually gets added
|
|
to the checks for lower bounds below. */
|
|
auto_vec<tree> check_nonzero;
|
|
|
|
/* List of values that need to be checked for a minimum value. */
|
|
auto_vec<vec_lower_bound> lower_bounds;
|
|
|
|
/* Statements in the loop that have data references that are candidates for a
|
|
runtime (loop versioning) misalignment check. */
|
|
auto_vec<stmt_vec_info> may_misalign_stmts;
|
|
|
|
/* Reduction cycles detected in the loop. Used in loop-aware SLP. */
|
|
auto_vec<stmt_vec_info> reductions;
|
|
|
|
/* All reduction chains in the loop, represented by the first
|
|
stmt in the chain. */
|
|
auto_vec<stmt_vec_info> reduction_chains;
|
|
|
|
/* Cost vector for a single scalar iteration. */
|
|
auto_vec<stmt_info_for_cost> scalar_cost_vec;
|
|
|
|
/* Map of IV base/step expressions to inserted name in the preheader. */
|
|
hash_map<tree_operand_hash, tree> *ivexpr_map;
|
|
|
|
/* Map of OpenMP "omp simd array" scan variables to corresponding
|
|
rhs of the store of the initializer. */
|
|
hash_map<tree, tree> *scan_map;
|
|
|
|
/* The unrolling factor needed to SLP the loop. In case of that pure SLP is
|
|
applied to the loop, i.e., no unrolling is needed, this is 1. */
|
|
poly_uint64 slp_unrolling_factor;
|
|
|
|
/* The factor used to over weight those statements in an inner loop
|
|
relative to the loop being vectorized. */
|
|
unsigned int inner_loop_cost_factor;
|
|
|
|
/* Is the loop vectorizable? */
|
|
bool vectorizable;
|
|
|
|
/* Records whether we still have the option of vectorizing this loop
|
|
using partially-populated vectors; in other words, whether it is
|
|
still possible for one iteration of the vector loop to handle
|
|
fewer than VF scalars. */
|
|
bool can_use_partial_vectors_p;
|
|
|
|
/* True if we've decided to use partially-populated vectors, so that
|
|
the vector loop can handle fewer than VF scalars. */
|
|
bool using_partial_vectors_p;
|
|
|
|
/* True if we've decided to use partially-populated vectors for the
|
|
epilogue of loop. */
|
|
bool epil_using_partial_vectors_p;
|
|
|
|
/* The bias for len_load and len_store. For now, only 0 and -1 are
|
|
supported. -1 must be used when a backend does not support
|
|
len_load/len_store with a length of zero. */
|
|
signed char partial_load_store_bias;
|
|
|
|
/* When we have grouped data accesses with gaps, we may introduce invalid
|
|
memory accesses. We peel the last iteration of the loop to prevent
|
|
this. */
|
|
bool peeling_for_gaps;
|
|
|
|
/* When the number of iterations is not a multiple of the vector size
|
|
we need to peel off iterations at the end to form an epilogue loop. */
|
|
bool peeling_for_niter;
|
|
|
|
/* True if there are no loop carried data dependencies in the loop.
|
|
If loop->safelen <= 1, then this is always true, either the loop
|
|
didn't have any loop carried data dependencies, or the loop is being
|
|
vectorized guarded with some runtime alias checks, or couldn't
|
|
be vectorized at all, but then this field shouldn't be used.
|
|
For loop->safelen >= 2, the user has asserted that there are no
|
|
backward dependencies, but there still could be loop carried forward
|
|
dependencies in such loops. This flag will be false if normal
|
|
vectorizer data dependency analysis would fail or require versioning
|
|
for alias, but because of loop->safelen >= 2 it has been vectorized
|
|
even without versioning for alias. E.g. in:
|
|
#pragma omp simd
|
|
for (int i = 0; i < m; i++)
|
|
a[i] = a[i + k] * c;
|
|
(or #pragma simd or #pragma ivdep) we can vectorize this and it will
|
|
DTRT even for k > 0 && k < m, but without safelen we would not
|
|
vectorize this, so this field would be false. */
|
|
bool no_data_dependencies;
|
|
|
|
/* Mark loops having masked stores. */
|
|
bool has_mask_store;
|
|
|
|
/* Queued scaling factor for the scalar loop. */
|
|
profile_probability scalar_loop_scaling;
|
|
|
|
/* If if-conversion versioned this loop before conversion, this is the
|
|
loop version without if-conversion. */
|
|
class loop *scalar_loop;
|
|
|
|
/* For loops being epilogues of already vectorized loops
|
|
this points to the original vectorized loop. Otherwise NULL. */
|
|
_loop_vec_info *orig_loop_info;
|
|
|
|
/* Used to store loop_vec_infos of epilogues of this loop during
|
|
analysis. */
|
|
vec<_loop_vec_info *> epilogue_vinfos;
|
|
|
|
} *loop_vec_info;
|
|
|
|
/* Access Functions. */
|
|
#define LOOP_VINFO_LOOP(L) (L)->loop
|
|
#define LOOP_VINFO_BBS(L) (L)->bbs
|
|
#define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1
|
|
#define LOOP_VINFO_NITERS(L) (L)->num_iters
|
|
/* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
|
|
prologue peeling retain total unchanged scalar loop iterations for
|
|
cost model. */
|
|
#define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged
|
|
#define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions
|
|
#define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
|
|
#define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
|
|
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
|
|
#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
|
|
#define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
|
|
#define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \
|
|
(L)->epil_using_partial_vectors_p
|
|
#define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias
|
|
#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
|
|
#define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
|
|
#define LOOP_VINFO_MASKS(L) (L)->masks
|
|
#define LOOP_VINFO_LENS(L) (L)->lens
|
|
#define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
|
|
#define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
|
|
#define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
|
|
#define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
|
|
#define LOOP_VINFO_N_STMTS(L) (L)->shared->n_stmts
|
|
#define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
|
|
#define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
|
|
#define LOOP_VINFO_DDRS(L) (L)->shared->ddrs
|
|
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
|
|
#define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
|
|
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
|
|
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
|
|
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
|
|
#define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs
|
|
#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs
|
|
#define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero
|
|
#define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds
|
|
#define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
|
|
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
|
|
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
|
|
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
|
|
#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
|
|
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
|
|
#define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
|
|
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
|
|
#define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop
|
|
#define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling
|
|
#define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
|
|
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
|
|
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
|
|
#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
|
|
#define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
|
|
|
|
#define LOOP_VINFO_FULLY_MASKED_P(L) \
|
|
(LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
|
|
&& !LOOP_VINFO_MASKS (L).is_empty ())
|
|
|
|
#define LOOP_VINFO_FULLY_WITH_LENGTH_P(L) \
|
|
(LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
|
|
&& !LOOP_VINFO_LENS (L).is_empty ())
|
|
|
|
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
|
|
((L)->may_misalign_stmts.length () > 0)
|
|
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
|
|
((L)->comp_alias_ddrs.length () > 0 \
|
|
|| (L)->check_unequal_addrs.length () > 0 \
|
|
|| (L)->lower_bounds.length () > 0)
|
|
#define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \
|
|
(LOOP_VINFO_NITERS_ASSUMPTIONS (L))
|
|
#define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \
|
|
(LOOP_VINFO_SIMD_IF_COND (L))
|
|
#define LOOP_REQUIRES_VERSIONING(L) \
|
|
(LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \
|
|
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \
|
|
|| LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
|
|
|| LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
|
|
|
|
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
|
|
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
|
|
|
|
#define LOOP_VINFO_EPILOGUE_P(L) \
|
|
(LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
|
|
|
|
#define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \
|
|
(LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
|
|
|
|
/* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL
|
|
value signifies success, and a NULL value signifies failure, supporting
|
|
propagating an opt_problem * describing the failure back up the call
|
|
stack. */
|
|
typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info;
|
|
|
|
inline loop_vec_info
|
|
loop_vec_info_for_loop (class loop *loop)
|
|
{
|
|
return (loop_vec_info) loop->aux;
|
|
}
|
|
|
|
struct slp_root
|
|
{
|
|
slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
|
|
vec<stmt_vec_info> roots_)
|
|
: kind(kind_), stmts(stmts_), roots(roots_) {}
|
|
slp_instance_kind kind;
|
|
vec<stmt_vec_info> stmts;
|
|
vec<stmt_vec_info> roots;
|
|
};
|
|
|
|
typedef class _bb_vec_info : public vec_info
|
|
{
|
|
public:
|
|
_bb_vec_info (vec<basic_block> bbs, vec_info_shared *);
|
|
~_bb_vec_info ();
|
|
|
|
/* The region we are operating on. bbs[0] is the entry, excluding
|
|
its PHI nodes. In the future we might want to track an explicit
|
|
entry edge to cover bbs[0] PHI nodes and have a region entry
|
|
insert location. */
|
|
vec<basic_block> bbs;
|
|
|
|
vec<slp_root> roots;
|
|
} *bb_vec_info;
|
|
|
|
#define BB_VINFO_BB(B) (B)->bb
|
|
#define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores
|
|
#define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
|
|
#define BB_VINFO_DATAREFS(B) (B)->shared->datarefs
|
|
#define BB_VINFO_DDRS(B) (B)->shared->ddrs
|
|
|
|
/*-----------------------------------------------------------------*/
|
|
/* Info on vectorized defs. */
|
|
/*-----------------------------------------------------------------*/
|
|
enum stmt_vec_info_type {
|
|
undef_vec_info_type = 0,
|
|
load_vec_info_type,
|
|
store_vec_info_type,
|
|
shift_vec_info_type,
|
|
op_vec_info_type,
|
|
call_vec_info_type,
|
|
call_simd_clone_vec_info_type,
|
|
assignment_vec_info_type,
|
|
condition_vec_info_type,
|
|
comparison_vec_info_type,
|
|
reduc_vec_info_type,
|
|
induc_vec_info_type,
|
|
type_promotion_vec_info_type,
|
|
type_demotion_vec_info_type,
|
|
type_conversion_vec_info_type,
|
|
cycle_phi_info_type,
|
|
lc_phi_info_type,
|
|
phi_info_type,
|
|
recurr_info_type,
|
|
loop_exit_ctrl_vec_info_type
|
|
};
|
|
|
|
/* Indicates whether/how a variable is used in the scope of loop/basic
|
|
block. */
|
|
enum vect_relevant {
|
|
vect_unused_in_scope = 0,
|
|
|
|
/* The def is only used outside the loop. */
|
|
vect_used_only_live,
|
|
/* The def is in the inner loop, and the use is in the outer loop, and the
|
|
use is a reduction stmt. */
|
|
vect_used_in_outer_by_reduction,
|
|
/* The def is in the inner loop, and the use is in the outer loop (and is
|
|
not part of reduction). */
|
|
vect_used_in_outer,
|
|
|
|
/* defs that feed computations that end up (only) in a reduction. These
|
|
defs may be used by non-reduction stmts, but eventually, any
|
|
computations/values that are affected by these defs are used to compute
|
|
a reduction (i.e. don't get stored to memory, for example). We use this
|
|
to identify computations that we can change the order in which they are
|
|
computed. */
|
|
vect_used_by_reduction,
|
|
|
|
vect_used_in_scope
|
|
};
|
|
|
|
/* The type of vectorization that can be applied to the stmt: regular loop-based
|
|
vectorization; pure SLP - the stmt is a part of SLP instances and does not
|
|
have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is
|
|
a part of SLP instance and also must be loop-based vectorized, since it has
|
|
uses outside SLP sequences.
|
|
|
|
In the loop context the meanings of pure and hybrid SLP are slightly
|
|
different. By saying that pure SLP is applied to the loop, we mean that we
|
|
exploit only intra-iteration parallelism in the loop; i.e., the loop can be
|
|
vectorized without doing any conceptual unrolling, cause we don't pack
|
|
together stmts from different iterations, only within a single iteration.
|
|
Loop hybrid SLP means that we exploit both intra-iteration and
|
|
inter-iteration parallelism (e.g., number of elements in the vector is 4
|
|
and the slp-group-size is 2, in which case we don't have enough parallelism
|
|
within an iteration, so we obtain the rest of the parallelism from subsequent
|
|
iterations by unrolling the loop by 2). */
|
|
enum slp_vect_type {
|
|
loop_vect = 0,
|
|
pure_slp,
|
|
hybrid
|
|
};
|
|
|
|
/* Says whether a statement is a load, a store of a vectorized statement
|
|
result, or a store of an invariant value. */
|
|
enum vec_load_store_type {
|
|
VLS_LOAD,
|
|
VLS_STORE,
|
|
VLS_STORE_INVARIANT
|
|
};
|
|
|
|
/* Describes how we're going to vectorize an individual load or store,
|
|
or a group of loads or stores. */
|
|
enum vect_memory_access_type {
|
|
/* An access to an invariant address. This is used only for loads. */
|
|
VMAT_INVARIANT,
|
|
|
|
/* A simple contiguous access. */
|
|
VMAT_CONTIGUOUS,
|
|
|
|
/* A contiguous access that goes down in memory rather than up,
|
|
with no additional permutation. This is used only for stores
|
|
of invariants. */
|
|
VMAT_CONTIGUOUS_DOWN,
|
|
|
|
/* A simple contiguous access in which the elements need to be permuted
|
|
after loading or before storing. Only used for loop vectorization;
|
|
SLP uses separate permutes. */
|
|
VMAT_CONTIGUOUS_PERMUTE,
|
|
|
|
/* A simple contiguous access in which the elements need to be reversed
|
|
after loading or before storing. */
|
|
VMAT_CONTIGUOUS_REVERSE,
|
|
|
|
/* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */
|
|
VMAT_LOAD_STORE_LANES,
|
|
|
|
/* An access in which each scalar element is loaded or stored
|
|
individually. */
|
|
VMAT_ELEMENTWISE,
|
|
|
|
/* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
|
|
SLP accesses. Each unrolled iteration uses a contiguous load
|
|
or store for the whole group, but the groups from separate iterations
|
|
are combined in the same way as for VMAT_ELEMENTWISE. */
|
|
VMAT_STRIDED_SLP,
|
|
|
|
/* The access uses gather loads or scatter stores. */
|
|
VMAT_GATHER_SCATTER
|
|
};
|
|
|
|
class dr_vec_info {
|
|
public:
|
|
/* The data reference itself. */
|
|
data_reference *dr;
|
|
/* The statement that contains the data reference. */
|
|
stmt_vec_info stmt;
|
|
/* The analysis group this DR belongs to when doing BB vectorization.
|
|
DRs of the same group belong to the same conditional execution context. */
|
|
unsigned group;
|
|
/* The misalignment in bytes of the reference, or -1 if not known. */
|
|
int misalignment;
|
|
/* The byte alignment that we'd ideally like the reference to have,
|
|
and the value that misalignment is measured against. */
|
|
poly_uint64 target_alignment;
|
|
/* If true the alignment of base_decl needs to be increased. */
|
|
bool base_misaligned;
|
|
tree base_decl;
|
|
|
|
/* Stores current vectorized loop's offset. To be added to the DR's
|
|
offset to calculate current offset of data reference. */
|
|
tree offset;
|
|
};
|
|
|
|
typedef struct data_reference *dr_p;
|
|
|
|
class _stmt_vec_info {
|
|
public:
|
|
|
|
enum stmt_vec_info_type type;
|
|
|
|
/* Indicates whether this stmts is part of a computation whose result is
|
|
used outside the loop. */
|
|
bool live;
|
|
|
|
/* Stmt is part of some pattern (computation idiom) */
|
|
bool in_pattern_p;
|
|
|
|
/* True if the statement was created during pattern recognition as
|
|
part of the replacement for RELATED_STMT. This implies that the
|
|
statement isn't part of any basic block, although for convenience
|
|
its gimple_bb is the same as for RELATED_STMT. */
|
|
bool pattern_stmt_p;
|
|
|
|
/* Is this statement vectorizable or should it be skipped in (partial)
|
|
vectorization. */
|
|
bool vectorizable;
|
|
|
|
/* The stmt to which this info struct refers to. */
|
|
gimple *stmt;
|
|
|
|
/* The vector type to be used for the LHS of this statement. */
|
|
tree vectype;
|
|
|
|
/* The vectorized stmts. */
|
|
vec<gimple *> vec_stmts;
|
|
|
|
/* The following is relevant only for stmts that contain a non-scalar
|
|
data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have
|
|
at most one such data-ref. */
|
|
|
|
dr_vec_info dr_aux;
|
|
|
|
/* Information about the data-ref relative to this loop
|
|
nest (the loop that is being considered for vectorization). */
|
|
innermost_loop_behavior dr_wrt_vec_loop;
|
|
|
|
/* For loop PHI nodes, the base and evolution part of it. This makes sure
|
|
this information is still available in vect_update_ivs_after_vectorizer
|
|
where we may not be able to re-analyze the PHI nodes evolution as
|
|
peeling for the prologue loop can make it unanalyzable. The evolution
|
|
part is still correct after peeling, but the base may have changed from
|
|
the version here. */
|
|
tree loop_phi_evolution_base_unchanged;
|
|
tree loop_phi_evolution_part;
|
|
enum vect_induction_op_type loop_phi_evolution_type;
|
|
|
|
/* Used for various bookkeeping purposes, generally holding a pointer to
|
|
some other stmt S that is in some way "related" to this stmt.
|
|
Current use of this field is:
|
|
If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is
|
|
true): S is the "pattern stmt" that represents (and replaces) the
|
|
sequence of stmts that constitutes the pattern. Similarly, the
|
|
related_stmt of the "pattern stmt" points back to this stmt (which is
|
|
the last stmt in the original sequence of stmts that constitutes the
|
|
pattern). */
|
|
stmt_vec_info related_stmt;
|
|
|
|
/* Used to keep a sequence of def stmts of a pattern stmt if such exists.
|
|
The sequence is attached to the original statement rather than the
|
|
pattern statement. */
|
|
gimple_seq pattern_def_seq;
|
|
|
|
/* Selected SIMD clone's function info. First vector element
|
|
is SIMD clone's function decl, followed by a pair of trees (base + step)
|
|
for linear arguments (pair of NULLs for other arguments). */
|
|
vec<tree> simd_clone_info;
|
|
|
|
/* Classify the def of this stmt. */
|
|
enum vect_def_type def_type;
|
|
|
|
/* Whether the stmt is SLPed, loop-based vectorized, or both. */
|
|
enum slp_vect_type slp_type;
|
|
|
|
/* Interleaving and reduction chains info. */
|
|
/* First element in the group. */
|
|
stmt_vec_info first_element;
|
|
/* Pointer to the next element in the group. */
|
|
stmt_vec_info next_element;
|
|
/* The size of the group. */
|
|
unsigned int size;
|
|
/* For stores, number of stores from this group seen. We vectorize the last
|
|
one. */
|
|
unsigned int store_count;
|
|
/* For loads only, the gap from the previous load. For consecutive loads, GAP
|
|
is 1. */
|
|
unsigned int gap;
|
|
|
|
/* The minimum negative dependence distance this stmt participates in
|
|
or zero if none. */
|
|
unsigned int min_neg_dist;
|
|
|
|
/* Not all stmts in the loop need to be vectorized. e.g, the increment
|
|
of the loop induction variable and computation of array indexes. relevant
|
|
indicates whether the stmt needs to be vectorized. */
|
|
enum vect_relevant relevant;
|
|
|
|
/* For loads if this is a gather, for stores if this is a scatter. */
|
|
bool gather_scatter_p;
|
|
|
|
/* True if this is an access with loop-invariant stride. */
|
|
bool strided_p;
|
|
|
|
/* For both loads and stores. */
|
|
unsigned simd_lane_access_p : 3;
|
|
|
|
/* Classifies how the load or store is going to be implemented
|
|
for loop vectorization. */
|
|
vect_memory_access_type memory_access_type;
|
|
|
|
/* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */
|
|
tree induc_cond_initial_val;
|
|
|
|
/* If not NULL the value to be added to compute final reduction value. */
|
|
tree reduc_epilogue_adjustment;
|
|
|
|
/* On a reduction PHI the reduction type as detected by
|
|
vect_is_simple_reduction and vectorizable_reduction. */
|
|
enum vect_reduction_type reduc_type;
|
|
|
|
/* The original reduction code, to be used in the epilogue. */
|
|
code_helper reduc_code;
|
|
/* An internal function we should use in the epilogue. */
|
|
internal_fn reduc_fn;
|
|
|
|
/* On a stmt participating in the reduction the index of the operand
|
|
on the reduction SSA cycle. */
|
|
int reduc_idx;
|
|
|
|
/* On a reduction PHI the def returned by vect_force_simple_reduction.
|
|
On the def returned by vect_force_simple_reduction the
|
|
corresponding PHI. */
|
|
stmt_vec_info reduc_def;
|
|
|
|
/* The vector input type relevant for reduction vectorization. */
|
|
tree reduc_vectype_in;
|
|
|
|
/* The vector type for performing the actual reduction. */
|
|
tree reduc_vectype;
|
|
|
|
/* If IS_REDUC_INFO is true and if the vector code is performing
|
|
N scalar reductions in parallel, this variable gives the initial
|
|
scalar values of those N reductions. */
|
|
vec<tree> reduc_initial_values;
|
|
|
|
/* If IS_REDUC_INFO is true and if the vector code is performing
|
|
N scalar reductions in parallel, this variable gives the vectorized code's
|
|
final (scalar) result for each of those N reductions. In other words,
|
|
REDUC_SCALAR_RESULTS[I] replaces the original scalar code's loop-closed
|
|
SSA PHI for reduction number I. */
|
|
vec<tree> reduc_scalar_results;
|
|
|
|
/* Only meaningful if IS_REDUC_INFO. If non-null, the reduction is
|
|
being performed by an epilogue loop and we have decided to reuse
|
|
this accumulator from the main loop. */
|
|
vect_reusable_accumulator *reused_accumulator;
|
|
|
|
/* Whether we force a single cycle PHI during reduction vectorization. */
|
|
bool force_single_cycle;
|
|
|
|
/* Whether on this stmt reduction meta is recorded. */
|
|
bool is_reduc_info;
|
|
|
|
/* If nonzero, the lhs of the statement could be truncated to this
|
|
many bits without affecting any users of the result. */
|
|
unsigned int min_output_precision;
|
|
|
|
/* If nonzero, all non-boolean input operands have the same precision,
|
|
and they could each be truncated to this many bits without changing
|
|
the result. */
|
|
unsigned int min_input_precision;
|
|
|
|
/* If OPERATION_BITS is nonzero, the statement could be performed on
|
|
an integer with the sign and number of bits given by OPERATION_SIGN
|
|
and OPERATION_BITS without changing the result. */
|
|
unsigned int operation_precision;
|
|
signop operation_sign;
|
|
|
|
/* If the statement produces a boolean result, this value describes
|
|
how we should choose the associated vector type. The possible
|
|
values are:
|
|
|
|
- an integer precision N if we should use the vector mask type
|
|
associated with N-bit integers. This is only used if all relevant
|
|
input booleans also want the vector mask type for N-bit integers,
|
|
or if we can convert them into that form by pattern-matching.
|
|
|
|
- ~0U if we considered choosing a vector mask type but decided
|
|
to treat the boolean as a normal integer type instead.
|
|
|
|
- 0 otherwise. This means either that the operation isn't one that
|
|
could have a vector mask type (and so should have a normal vector
|
|
type instead) or that we simply haven't made a choice either way. */
|
|
unsigned int mask_precision;
|
|
|
|
/* True if this is only suitable for SLP vectorization. */
|
|
bool slp_vect_only_p;
|
|
|
|
/* True if this is a pattern that can only be handled by SLP
|
|
vectorization. */
|
|
bool slp_vect_pattern_only_p;
|
|
};
|
|
|
|
/* Information about a gather/scatter call. */
|
|
struct gather_scatter_info {
|
|
/* The internal function to use for the gather/scatter operation,
|
|
or IFN_LAST if a built-in function should be used instead. */
|
|
internal_fn ifn;
|
|
|
|
/* The FUNCTION_DECL for the built-in gather/scatter function,
|
|
or null if an internal function should be used instead. */
|
|
tree decl;
|
|
|
|
/* The loop-invariant base value. */
|
|
tree base;
|
|
|
|
/* The original scalar offset, which is a non-loop-invariant SSA_NAME. */
|
|
tree offset;
|
|
|
|
/* Each offset element should be multiplied by this amount before
|
|
being added to the base. */
|
|
int scale;
|
|
|
|
/* The definition type for the vectorized offset. */
|
|
enum vect_def_type offset_dt;
|
|
|
|
/* The type of the vectorized offset. */
|
|
tree offset_vectype;
|
|
|
|
/* The type of the scalar elements after loading or before storing. */
|
|
tree element_type;
|
|
|
|
/* The type of the scalar elements being loaded or stored. */
|
|
tree memory_type;
|
|
};
|
|
|
|
/* Access Functions. */
|
|
#define STMT_VINFO_TYPE(S) (S)->type
|
|
#define STMT_VINFO_STMT(S) (S)->stmt
|
|
#define STMT_VINFO_RELEVANT(S) (S)->relevant
|
|
#define STMT_VINFO_LIVE_P(S) (S)->live
|
|
#define STMT_VINFO_VECTYPE(S) (S)->vectype
|
|
#define STMT_VINFO_VEC_STMTS(S) (S)->vec_stmts
|
|
#define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
|
|
#define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0)
|
|
#define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p
|
|
#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
|
|
#define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type
|
|
#define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
|
|
#define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val
|
|
#define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment
|
|
#define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx
|
|
#define STMT_VINFO_FORCE_SINGLE_CYCLE(S) (S)->force_single_cycle
|
|
|
|
#define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop
|
|
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address
|
|
#define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init
|
|
#define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset
|
|
#define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step
|
|
#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment
|
|
#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
|
|
(S)->dr_wrt_vec_loop.base_misalignment
|
|
#define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
|
|
(S)->dr_wrt_vec_loop.offset_alignment
|
|
#define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
|
|
(S)->dr_wrt_vec_loop.step_alignment
|
|
|
|
#define STMT_VINFO_DR_INFO(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux)
|
|
|
|
#define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
|
|
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
|
|
#define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
|
|
#define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info
|
|
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type
|
|
#define STMT_VINFO_GROUPED_ACCESS(S) \
|
|
((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S))
|
|
#define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged
|
|
#define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
|
|
#define STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE(S) (S)->loop_phi_evolution_type
|
|
#define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist
|
|
#define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
|
|
#define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code
|
|
#define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn
|
|
#define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
|
|
#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype
|
|
#define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in
|
|
#define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p
|
|
#define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p
|
|
|
|
#define DR_GROUP_FIRST_ELEMENT(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
|
|
#define DR_GROUP_NEXT_ELEMENT(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
|
|
#define DR_GROUP_SIZE(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
|
|
#define DR_GROUP_STORE_COUNT(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
|
|
#define DR_GROUP_GAP(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
|
|
|
|
#define REDUC_GROUP_FIRST_ELEMENT(S) \
|
|
(gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element)
|
|
#define REDUC_GROUP_NEXT_ELEMENT(S) \
|
|
(gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element)
|
|
#define REDUC_GROUP_SIZE(S) \
|
|
(gcc_checking_assert (!(S)->dr_aux.dr), (S)->size)
|
|
|
|
#define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope)
|
|
|
|
#define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid)
|
|
#define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
|
|
#define STMT_SLP_TYPE(S) (S)->slp_type
|
|
|
|
/* Contains the scalar or vector costs for a vec_info. */
|
|
class vector_costs
|
|
{
|
|
public:
|
|
vector_costs (vec_info *, bool);
|
|
virtual ~vector_costs () {}
|
|
|
|
/* Update the costs in response to adding COUNT copies of a statement.
|
|
|
|
- WHERE specifies whether the cost occurs in the loop prologue,
|
|
the loop body, or the loop epilogue.
|
|
- KIND is the kind of statement, which is always meaningful.
|
|
- STMT_INFO or NODE, if nonnull, describe the statement that will be
|
|
vectorized.
|
|
- VECTYPE, if nonnull, is the vector type that the vectorized
|
|
statement will operate on. Note that this should be used in
|
|
preference to STMT_VINFO_VECTYPE (STMT_INFO) since the latter
|
|
is not correct for SLP.
|
|
- for unaligned_load and unaligned_store statements, MISALIGN is
|
|
the byte misalignment of the load or store relative to the target's
|
|
preferred alignment for VECTYPE, or DR_MISALIGNMENT_UNKNOWN
|
|
if the misalignment is not known.
|
|
|
|
Return the calculated cost as well as recording it. The return
|
|
value is used for dumping purposes. */
|
|
virtual unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
|
|
stmt_vec_info stmt_info,
|
|
slp_tree node,
|
|
tree vectype, int misalign,
|
|
vect_cost_model_location where);
|
|
|
|
/* Finish calculating the cost of the code. The results can be
|
|
read back using the functions below.
|
|
|
|
If the costs describe vector code, SCALAR_COSTS gives the costs
|
|
of the corresponding scalar code, otherwise it is null. */
|
|
virtual void finish_cost (const vector_costs *scalar_costs);
|
|
|
|
/* The costs in THIS and OTHER both describe ways of vectorizing
|
|
a main loop. Return true if the costs described by THIS are
|
|
cheaper than the costs described by OTHER. Return false if any
|
|
of the following are true:
|
|
|
|
- THIS and OTHER are of equal cost
|
|
- OTHER is better than THIS
|
|
- we can't be sure about the relative costs of THIS and OTHER. */
|
|
virtual bool better_main_loop_than_p (const vector_costs *other) const;
|
|
|
|
/* Likewise, but the costs in THIS and OTHER both describe ways of
|
|
vectorizing an epilogue loop of MAIN_LOOP. */
|
|
virtual bool better_epilogue_loop_than_p (const vector_costs *other,
|
|
loop_vec_info main_loop) const;
|
|
|
|
unsigned int prologue_cost () const;
|
|
unsigned int body_cost () const;
|
|
unsigned int epilogue_cost () const;
|
|
unsigned int outside_cost () const;
|
|
unsigned int total_cost () const;
|
|
unsigned int suggested_unroll_factor () const;
|
|
|
|
protected:
|
|
unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
|
|
unsigned int);
|
|
unsigned int adjust_cost_for_freq (stmt_vec_info, vect_cost_model_location,
|
|
unsigned int);
|
|
int compare_inside_loop_cost (const vector_costs *) const;
|
|
int compare_outside_loop_cost (const vector_costs *) const;
|
|
|
|
/* The region of code that we're considering vectorizing. */
|
|
vec_info *m_vinfo;
|
|
|
|
/* True if we're costing the scalar code, false if we're costing
|
|
the vector code. */
|
|
bool m_costing_for_scalar;
|
|
|
|
/* The costs of the three regions, indexed by vect_cost_model_location. */
|
|
unsigned int m_costs[3];
|
|
|
|
/* The suggested unrolling factor determined at finish_cost. */
|
|
unsigned int m_suggested_unroll_factor;
|
|
|
|
/* True if finish_cost has been called. */
|
|
bool m_finished;
|
|
};
|
|
|
|
/* Create costs for VINFO. COSTING_FOR_SCALAR is true if the costs
|
|
are for scalar code, false if they are for vector code. */
|
|
|
|
inline
|
|
vector_costs::vector_costs (vec_info *vinfo, bool costing_for_scalar)
|
|
: m_vinfo (vinfo),
|
|
m_costing_for_scalar (costing_for_scalar),
|
|
m_costs (),
|
|
m_suggested_unroll_factor(1),
|
|
m_finished (false)
|
|
{
|
|
}
|
|
|
|
/* Return the cost of the prologue code (in abstract units). */
|
|
|
|
inline unsigned int
|
|
vector_costs::prologue_cost () const
|
|
{
|
|
gcc_checking_assert (m_finished);
|
|
return m_costs[vect_prologue];
|
|
}
|
|
|
|
/* Return the cost of the body code (in abstract units). */
|
|
|
|
inline unsigned int
|
|
vector_costs::body_cost () const
|
|
{
|
|
gcc_checking_assert (m_finished);
|
|
return m_costs[vect_body];
|
|
}
|
|
|
|
/* Return the cost of the epilogue code (in abstract units). */
|
|
|
|
inline unsigned int
|
|
vector_costs::epilogue_cost () const
|
|
{
|
|
gcc_checking_assert (m_finished);
|
|
return m_costs[vect_epilogue];
|
|
}
|
|
|
|
/* Return the cost of the prologue and epilogue code (in abstract units). */
|
|
|
|
inline unsigned int
|
|
vector_costs::outside_cost () const
|
|
{
|
|
return prologue_cost () + epilogue_cost ();
|
|
}
|
|
|
|
/* Return the cost of the prologue, body and epilogue code
|
|
(in abstract units). */
|
|
|
|
inline unsigned int
|
|
vector_costs::total_cost () const
|
|
{
|
|
return body_cost () + outside_cost ();
|
|
}
|
|
|
|
/* Return the suggested unroll factor. */
|
|
|
|
inline unsigned int
|
|
vector_costs::suggested_unroll_factor () const
|
|
{
|
|
gcc_checking_assert (m_finished);
|
|
return m_suggested_unroll_factor;
|
|
}
|
|
|
|
#define VECT_MAX_COST 1000
|
|
|
|
/* The maximum number of intermediate steps required in multi-step type
|
|
conversion. */
|
|
#define MAX_INTERM_CVT_STEPS 3
|
|
|
|
#define MAX_VECTORIZATION_FACTOR INT_MAX
|
|
|
|
/* Nonzero if TYPE represents a (scalar) boolean type or type
|
|
in the middle-end compatible with it (unsigned precision 1 integral
|
|
types). Used to determine which types should be vectorized as
|
|
VECTOR_BOOLEAN_TYPE_P. */
|
|
|
|
#define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \
|
|
(TREE_CODE (TYPE) == BOOLEAN_TYPE \
|
|
|| ((TREE_CODE (TYPE) == INTEGER_TYPE \
|
|
|| TREE_CODE (TYPE) == ENUMERAL_TYPE) \
|
|
&& TYPE_PRECISION (TYPE) == 1 \
|
|
&& TYPE_UNSIGNED (TYPE)))
|
|
|
|
inline bool
|
|
nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info)
|
|
{
|
|
return (loop->inner
|
|
&& (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
|
|
}
|
|
|
|
/* PHI is either a scalar reduction phi or a scalar induction phi.
|
|
Return the initial value of the variable on entry to the containing
|
|
loop. */
|
|
|
|
inline tree
|
|
vect_phi_initial_value (gphi *phi)
|
|
{
|
|
basic_block bb = gimple_bb (phi);
|
|
edge pe = loop_preheader_edge (bb->loop_father);
|
|
gcc_assert (pe->dest == bb);
|
|
return PHI_ARG_DEF_FROM_EDGE (phi, pe);
|
|
}
|
|
|
|
/* Return true if STMT_INFO should produce a vector mask type rather than
|
|
a normal nonmask type. */
|
|
|
|
inline bool
|
|
vect_use_mask_type_p (stmt_vec_info stmt_info)
|
|
{
|
|
return stmt_info->mask_precision && stmt_info->mask_precision != ~0U;
|
|
}
|
|
|
|
/* Return TRUE if a statement represented by STMT_INFO is a part of a
|
|
pattern. */
|
|
|
|
inline bool
|
|
is_pattern_stmt_p (stmt_vec_info stmt_info)
|
|
{
|
|
return stmt_info->pattern_stmt_p;
|
|
}
|
|
|
|
/* If STMT_INFO is a pattern statement, return the statement that it
|
|
replaces, otherwise return STMT_INFO itself. */
|
|
|
|
inline stmt_vec_info
|
|
vect_orig_stmt (stmt_vec_info stmt_info)
|
|
{
|
|
if (is_pattern_stmt_p (stmt_info))
|
|
return STMT_VINFO_RELATED_STMT (stmt_info);
|
|
return stmt_info;
|
|
}
|
|
|
|
/* Return the later statement between STMT1_INFO and STMT2_INFO. */
|
|
|
|
inline stmt_vec_info
|
|
get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
|
|
{
|
|
if (gimple_uid (vect_orig_stmt (stmt1_info)->stmt)
|
|
> gimple_uid (vect_orig_stmt (stmt2_info)->stmt))
|
|
return stmt1_info;
|
|
else
|
|
return stmt2_info;
|
|
}
|
|
|
|
/* If STMT_INFO has been replaced by a pattern statement, return the
|
|
replacement statement, otherwise return STMT_INFO itself. */
|
|
|
|
inline stmt_vec_info
|
|
vect_stmt_to_vectorize (stmt_vec_info stmt_info)
|
|
{
|
|
if (STMT_VINFO_IN_PATTERN_P (stmt_info))
|
|
return STMT_VINFO_RELATED_STMT (stmt_info);
|
|
return stmt_info;
|
|
}
|
|
|
|
/* Return true if BB is a loop header. */
|
|
|
|
inline bool
|
|
is_loop_header_bb_p (basic_block bb)
|
|
{
|
|
if (bb == (bb->loop_father)->header)
|
|
return true;
|
|
gcc_checking_assert (EDGE_COUNT (bb->preds) == 1);
|
|
return false;
|
|
}
|
|
|
|
/* Return pow2 (X). */
|
|
|
|
inline int
|
|
vect_pow2 (int x)
|
|
{
|
|
int i, res = 1;
|
|
|
|
for (i = 0; i < x; i++)
|
|
res *= 2;
|
|
|
|
return res;
|
|
}
|
|
|
|
/* Alias targetm.vectorize.builtin_vectorization_cost. */
|
|
|
|
inline int
|
|
builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
|
tree vectype, int misalign)
|
|
{
|
|
return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
|
|
vectype, misalign);
|
|
}
|
|
|
|
/* Get cost by calling cost target builtin. */
|
|
|
|
inline
|
|
int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
|
|
{
|
|
return builtin_vectorization_cost (type_of_cost, NULL, 0);
|
|
}
|
|
|
|
/* Alias targetm.vectorize.init_cost. */
|
|
|
|
inline vector_costs *
|
|
init_cost (vec_info *vinfo, bool costing_for_scalar)
|
|
{
|
|
return targetm.vectorize.create_costs (vinfo, costing_for_scalar);
|
|
}
|
|
|
|
extern void dump_stmt_cost (FILE *, int, enum vect_cost_for_stmt,
|
|
stmt_vec_info, slp_tree, tree, int, unsigned,
|
|
enum vect_cost_model_location);
|
|
|
|
/* Alias targetm.vectorize.add_stmt_cost. */
|
|
|
|
inline unsigned
|
|
add_stmt_cost (vector_costs *costs, int count,
|
|
enum vect_cost_for_stmt kind,
|
|
stmt_vec_info stmt_info, slp_tree node,
|
|
tree vectype, int misalign,
|
|
enum vect_cost_model_location where)
|
|
{
|
|
unsigned cost = costs->add_stmt_cost (count, kind, stmt_info, node, vectype,
|
|
misalign, where);
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
dump_stmt_cost (dump_file, count, kind, stmt_info, node, vectype, misalign,
|
|
cost, where);
|
|
return cost;
|
|
}
|
|
|
|
inline unsigned
|
|
add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
|
|
enum vect_cost_model_location where)
|
|
{
|
|
gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
|
|
|| kind == scalar_stmt);
|
|
return add_stmt_cost (costs, count, kind, NULL, NULL, NULL_TREE, 0, where);
|
|
}
|
|
|
|
/* Alias targetm.vectorize.add_stmt_cost. */
|
|
|
|
inline unsigned
|
|
add_stmt_cost (vector_costs *costs, stmt_info_for_cost *i)
|
|
{
|
|
return add_stmt_cost (costs, i->count, i->kind, i->stmt_info, i->node,
|
|
i->vectype, i->misalign, i->where);
|
|
}
|
|
|
|
/* Alias targetm.vectorize.finish_cost. */
|
|
|
|
inline void
|
|
finish_cost (vector_costs *costs, const vector_costs *scalar_costs,
|
|
unsigned *prologue_cost, unsigned *body_cost,
|
|
unsigned *epilogue_cost, unsigned *suggested_unroll_factor = NULL)
|
|
{
|
|
costs->finish_cost (scalar_costs);
|
|
*prologue_cost = costs->prologue_cost ();
|
|
*body_cost = costs->body_cost ();
|
|
*epilogue_cost = costs->epilogue_cost ();
|
|
if (suggested_unroll_factor)
|
|
*suggested_unroll_factor = costs->suggested_unroll_factor ();
|
|
}
|
|
|
|
inline void
|
|
add_stmt_costs (vector_costs *costs, stmt_vector_for_cost *cost_vec)
|
|
{
|
|
stmt_info_for_cost *cost;
|
|
unsigned i;
|
|
FOR_EACH_VEC_ELT (*cost_vec, i, cost)
|
|
add_stmt_cost (costs, cost->count, cost->kind, cost->stmt_info,
|
|
cost->node, cost->vectype, cost->misalign, cost->where);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------*/
|
|
/* Info on data references alignment. */
|
|
/*-----------------------------------------------------------------*/
|
|
#define DR_MISALIGNMENT_UNKNOWN (-1)
|
|
#define DR_MISALIGNMENT_UNINITIALIZED (-2)
|
|
|
|
inline void
|
|
set_dr_misalignment (dr_vec_info *dr_info, int val)
|
|
{
|
|
dr_info->misalignment = val;
|
|
}
|
|
|
|
extern int dr_misalignment (dr_vec_info *dr_info, tree vectype,
|
|
poly_int64 offset = 0);
|
|
|
|
#define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
|
|
|
|
/* Only defined once DR_MISALIGNMENT is defined. */
|
|
inline const poly_uint64
|
|
dr_target_alignment (dr_vec_info *dr_info)
|
|
{
|
|
if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
|
|
dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
|
|
return dr_info->target_alignment;
|
|
}
|
|
#define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR)
|
|
|
|
inline void
|
|
set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val)
|
|
{
|
|
dr_info->target_alignment = val;
|
|
}
|
|
#define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL)
|
|
|
|
/* Return true if data access DR_INFO is aligned to the targets
|
|
preferred alignment for VECTYPE (which may be less than a full vector). */
|
|
|
|
inline bool
|
|
aligned_access_p (dr_vec_info *dr_info, tree vectype)
|
|
{
|
|
return (dr_misalignment (dr_info, vectype) == 0);
|
|
}
|
|
|
|
/* Return TRUE if the (mis-)alignment of the data access is known with
|
|
respect to the targets preferred alignment for VECTYPE, and FALSE
|
|
otherwise. */
|
|
|
|
inline bool
|
|
known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
|
|
{
|
|
return (dr_misalignment (dr_info, vectype) != DR_MISALIGNMENT_UNKNOWN);
|
|
}
|
|
|
|
/* Return the minimum alignment in bytes that the vectorized version
|
|
of DR_INFO is guaranteed to have. */
|
|
|
|
inline unsigned int
|
|
vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype)
|
|
{
|
|
int misalignment = dr_misalignment (dr_info, vectype);
|
|
if (misalignment == DR_MISALIGNMENT_UNKNOWN)
|
|
return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
|
|
else if (misalignment == 0)
|
|
return known_alignment (DR_TARGET_ALIGNMENT (dr_info));
|
|
return misalignment & -misalignment;
|
|
}
|
|
|
|
/* Return the behavior of DR_INFO with respect to the vectorization context
|
|
(which for outer loop vectorization might not be the behavior recorded
|
|
in DR_INFO itself). */
|
|
|
|
inline innermost_loop_behavior *
|
|
vect_dr_behavior (vec_info *vinfo, dr_vec_info *dr_info)
|
|
{
|
|
stmt_vec_info stmt_info = dr_info->stmt;
|
|
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
|
|
if (loop_vinfo == NULL
|
|
|| !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
|
|
return &DR_INNERMOST (dr_info->dr);
|
|
else
|
|
return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
|
|
}
|
|
|
|
/* Return the offset calculated by adding the offset of this DR_INFO to the
|
|
corresponding data_reference's offset. If CHECK_OUTER then use
|
|
vect_dr_behavior to select the appropriate data_reference to use. */
|
|
|
|
inline tree
|
|
get_dr_vinfo_offset (vec_info *vinfo,
|
|
dr_vec_info *dr_info, bool check_outer = false)
|
|
{
|
|
innermost_loop_behavior *base;
|
|
if (check_outer)
|
|
base = vect_dr_behavior (vinfo, dr_info);
|
|
else
|
|
base = &dr_info->dr->innermost;
|
|
|
|
tree offset = base->offset;
|
|
|
|
if (!dr_info->offset)
|
|
return offset;
|
|
|
|
offset = fold_convert (sizetype, offset);
|
|
return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset,
|
|
dr_info->offset);
|
|
}
|
|
|
|
|
|
/* Return the vect cost model for LOOP. */
|
|
inline enum vect_cost_model
|
|
loop_cost_model (loop_p loop)
|
|
{
|
|
if (loop != NULL
|
|
&& loop->force_vectorize
|
|
&& flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
|
|
return flag_simd_cost_model;
|
|
return flag_vect_cost_model;
|
|
}
|
|
|
|
/* Return true if the vect cost model is unlimited. */
|
|
inline bool
|
|
unlimited_cost_model (loop_p loop)
|
|
{
|
|
return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
|
|
}
|
|
|
|
/* Return true if the loop described by LOOP_VINFO is fully-masked and
|
|
if the first iteration should use a partial mask in order to achieve
|
|
alignment. */
|
|
|
|
inline bool
|
|
vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo)
|
|
{
|
|
return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|
|
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
|
|
}
|
|
|
|
/* Return the number of vectors of type VECTYPE that are needed to get
|
|
NUNITS elements. NUNITS should be based on the vectorization factor,
|
|
so it is always a known multiple of the number of elements in VECTYPE. */
|
|
|
|
inline unsigned int
|
|
vect_get_num_vectors (poly_uint64 nunits, tree vectype)
|
|
{
|
|
return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
|
|
}
|
|
|
|
/* Return the number of copies needed for loop vectorization when
|
|
a statement operates on vectors of type VECTYPE. This is the
|
|
vectorization factor divided by the number of elements in
|
|
VECTYPE and is always known at compile time. */
|
|
|
|
inline unsigned int
|
|
vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
|
|
{
|
|
return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
|
|
}
|
|
|
|
/* Update maximum unit count *MAX_NUNITS so that it accounts for
|
|
NUNITS. *MAX_NUNITS can be 1 if we haven't yet recorded anything. */
|
|
|
|
inline void
|
|
vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
|
|
{
|
|
/* All unit counts have the form vec_info::vector_size * X for some
|
|
rational X, so two unit sizes must have a common multiple.
|
|
Everything is a multiple of the initial value of 1. */
|
|
*max_nunits = force_common_multiple (*max_nunits, nunits);
|
|
}
|
|
|
|
/* Update maximum unit count *MAX_NUNITS so that it accounts for
|
|
the number of units in vector type VECTYPE. *MAX_NUNITS can be 1
|
|
if we haven't yet recorded any vector types. */
|
|
|
|
inline void
|
|
vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
|
|
{
|
|
vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
|
|
}
|
|
|
|
/* Return the vectorization factor that should be used for costing
|
|
purposes while vectorizing the loop described by LOOP_VINFO.
|
|
Pick a reasonable estimate if the vectorization factor isn't
|
|
known at compile time. */
|
|
|
|
inline unsigned int
|
|
vect_vf_for_cost (loop_vec_info loop_vinfo)
|
|
{
|
|
return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
|
|
}
|
|
|
|
/* Estimate the number of elements in VEC_TYPE for costing purposes.
|
|
Pick a reasonable estimate if the exact number isn't known at
|
|
compile time. */
|
|
|
|
inline unsigned int
|
|
vect_nunits_for_cost (tree vec_type)
|
|
{
|
|
return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
|
|
}
|
|
|
|
/* Return the maximum possible vectorization factor for LOOP_VINFO. */
|
|
|
|
inline unsigned HOST_WIDE_INT
|
|
vect_max_vf (loop_vec_info loop_vinfo)
|
|
{
|
|
unsigned HOST_WIDE_INT vf;
|
|
if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
|
|
return vf;
|
|
return MAX_VECTORIZATION_FACTOR;
|
|
}
|
|
|
|
/* Return the size of the value accessed by unvectorized data reference
|
|
DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated
|
|
for the associated gimple statement, since that guarantees that DR_INFO
|
|
accesses either a scalar or a scalar equivalent. ("Scalar equivalent"
|
|
here includes things like V1SI, which can be vectorized in the same way
|
|
as a plain SI.) */
|
|
|
|
inline unsigned int
|
|
vect_get_scalar_dr_size (dr_vec_info *dr_info)
|
|
{
|
|
return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
|
|
}
|
|
|
|
/* Return true if LOOP_VINFO requires a runtime check for whether the
|
|
vector loop is profitable. */
|
|
|
|
inline bool
|
|
vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo)
|
|
{
|
|
unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
|
|
return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
&& th >= vect_vf_for_cost (loop_vinfo));
|
|
}
|
|
|
|
/* Source location + hotness information. */
|
|
extern dump_user_location_t vect_location;
|
|
|
|
/* A macro for calling:
|
|
dump_begin_scope (MSG, vect_location);
|
|
via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc,
|
|
and then calling
|
|
dump_end_scope ();
|
|
once the object goes out of scope, thus capturing the nesting of
|
|
the scopes.
|
|
|
|
These scopes affect dump messages within them: dump messages at the
|
|
top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those
|
|
in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */
|
|
|
|
#define DUMP_VECT_SCOPE(MSG) \
|
|
AUTO_DUMP_SCOPE (MSG, vect_location)
|
|
|
|
/* A sentinel class for ensuring that the "vect_location" global gets
|
|
reset at the end of a scope.
|
|
|
|
The "vect_location" global is used during dumping and contains a
|
|
location_t, which could contain references to a tree block via the
|
|
ad-hoc data. This data is used for tracking inlining information,
|
|
but it's not a GC root; it's simply assumed that such locations never
|
|
get accessed if the blocks are optimized away.
|
|
|
|
Hence we need to ensure that such locations are purged at the end
|
|
of any operations using them (e.g. via this class). */
|
|
|
|
class auto_purge_vect_location
|
|
{
|
|
public:
|
|
~auto_purge_vect_location ();
|
|
};
|
|
|
|
/*-----------------------------------------------------------------*/
|
|
/* Function prototypes. */
|
|
/*-----------------------------------------------------------------*/
|
|
|
|
/* Simple loop peeling and versioning utilities for vectorizer's purposes -
|
|
in tree-vect-loop-manip.cc. */
|
|
extern void vect_set_loop_condition (class loop *, loop_vec_info,
|
|
tree, tree, tree, bool);
|
|
extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge);
|
|
class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *,
|
|
class loop *, edge);
|
|
class loop *vect_loop_versioning (loop_vec_info, gimple *);
|
|
extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
|
|
tree *, tree *, tree *, int, bool, bool,
|
|
tree *);
|
|
extern tree vect_get_main_loop_result (loop_vec_info, tree, tree);
|
|
extern void vect_prepare_for_masked_peels (loop_vec_info);
|
|
extern dump_user_location_t find_loop_location (class loop *);
|
|
extern bool vect_can_advance_ivs_p (loop_vec_info);
|
|
extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
|
|
|
|
/* In tree-vect-stmts.cc. */
|
|
extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
|
|
poly_uint64 = 0);
|
|
extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
|
|
extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
|
|
extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
|
|
extern tree get_same_sized_vectype (tree, tree);
|
|
extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
|
|
extern bool vect_get_loop_mask_type (loop_vec_info);
|
|
extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
|
|
stmt_vec_info * = NULL, gimple ** = NULL);
|
|
extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
|
|
tree *, stmt_vec_info * = NULL,
|
|
gimple ** = NULL);
|
|
extern bool vect_is_simple_use (vec_info *, stmt_vec_info, slp_tree,
|
|
unsigned, tree *, slp_tree *,
|
|
enum vect_def_type *,
|
|
tree *, stmt_vec_info * = NULL);
|
|
extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree);
|
|
extern bool supportable_widening_operation (vec_info *,
|
|
enum tree_code, stmt_vec_info,
|
|
tree, tree, enum tree_code *,
|
|
enum tree_code *, int *,
|
|
vec<tree> *);
|
|
extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
|
|
enum tree_code *, int *,
|
|
vec<tree> *);
|
|
|
|
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
|
|
enum vect_cost_for_stmt, stmt_vec_info,
|
|
tree, int, enum vect_cost_model_location);
|
|
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
|
|
enum vect_cost_for_stmt, slp_tree,
|
|
tree, int, enum vect_cost_model_location);
|
|
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
|
|
enum vect_cost_for_stmt,
|
|
enum vect_cost_model_location);
|
|
|
|
/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */
|
|
|
|
inline unsigned
|
|
record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
|
|
enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
|
|
int misalign, enum vect_cost_model_location where)
|
|
{
|
|
return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
|
|
STMT_VINFO_VECTYPE (stmt_info), misalign, where);
|
|
}
|
|
|
|
extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
|
|
extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
|
|
gimple_stmt_iterator *);
|
|
extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
|
|
extern tree vect_get_store_rhs (stmt_vec_info);
|
|
void vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info, unsigned,
|
|
tree op, vec<tree> *, tree = NULL);
|
|
void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned,
|
|
tree, vec<tree> *,
|
|
tree = NULL, vec<tree> * = NULL,
|
|
tree = NULL, vec<tree> * = NULL,
|
|
tree = NULL, vec<tree> * = NULL);
|
|
void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned,
|
|
tree, vec<tree> *, tree,
|
|
tree = NULL, vec<tree> * = NULL, tree = NULL,
|
|
tree = NULL, vec<tree> * = NULL, tree = NULL,
|
|
tree = NULL, vec<tree> * = NULL, tree = NULL);
|
|
extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree,
|
|
gimple_stmt_iterator *);
|
|
extern tree vect_get_slp_vect_def (slp_tree, unsigned);
|
|
extern bool vect_transform_stmt (vec_info *, stmt_vec_info,
|
|
gimple_stmt_iterator *,
|
|
slp_tree, slp_instance);
|
|
extern void vect_remove_stores (vec_info *, stmt_vec_info);
|
|
extern bool vect_nop_conversion_p (stmt_vec_info);
|
|
extern opt_result vect_analyze_stmt (vec_info *, stmt_vec_info, bool *,
|
|
slp_tree,
|
|
slp_instance, stmt_vector_for_cost *);
|
|
extern void vect_get_load_cost (vec_info *, stmt_vec_info, int,
|
|
dr_alignment_support, int, bool,
|
|
unsigned int *, unsigned int *,
|
|
stmt_vector_for_cost *,
|
|
stmt_vector_for_cost *, bool);
|
|
extern void vect_get_store_cost (vec_info *, stmt_vec_info, int,
|
|
dr_alignment_support, int,
|
|
unsigned int *, stmt_vector_for_cost *);
|
|
extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
|
|
extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
|
|
extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
|
|
extern void optimize_mask_stores (class loop*);
|
|
extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
|
|
const char * = nullptr);
|
|
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
|
|
extern opt_result vect_get_vector_types_for_stmt (vec_info *,
|
|
stmt_vec_info, tree *,
|
|
tree *, unsigned int = 0);
|
|
extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
|
|
|
|
/* In tree-vect-data-refs.cc. */
|
|
extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
|
|
extern enum dr_alignment_support vect_supportable_dr_alignment
|
|
(vec_info *, dr_vec_info *, tree, int);
|
|
extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
|
|
extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
|
|
extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
|
|
extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
|
|
extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
|
|
extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
|
|
extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
|
|
extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
|
|
extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
|
|
tree, int, internal_fn *, tree *);
|
|
extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
|
|
gather_scatter_info *);
|
|
extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
|
|
vec<data_reference_p> *,
|
|
vec<int> *, int);
|
|
extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
|
|
extern void vect_record_base_alignments (vec_info *);
|
|
extern tree vect_create_data_ref_ptr (vec_info *,
|
|
stmt_vec_info, tree, class loop *, tree,
|
|
tree *, gimple_stmt_iterator *,
|
|
gimple **, bool,
|
|
tree = NULL_TREE);
|
|
extern tree bump_vector_ptr (vec_info *, tree, gimple *, gimple_stmt_iterator *,
|
|
stmt_vec_info, tree);
|
|
extern void vect_copy_ref_info (tree, tree);
|
|
extern tree vect_create_destination_var (tree, tree);
|
|
extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
|
|
extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
|
|
extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
|
|
extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
|
|
extern void vect_permute_store_chain (vec_info *, vec<tree> &,
|
|
unsigned int, stmt_vec_info,
|
|
gimple_stmt_iterator *, vec<tree> *);
|
|
extern tree vect_setup_realignment (vec_info *,
|
|
stmt_vec_info, gimple_stmt_iterator *,
|
|
tree *, enum dr_alignment_support, tree,
|
|
class loop **);
|
|
extern void vect_transform_grouped_load (vec_info *, stmt_vec_info, vec<tree>,
|
|
int, gimple_stmt_iterator *);
|
|
extern void vect_record_grouped_load_vectors (vec_info *,
|
|
stmt_vec_info, vec<tree>);
|
|
extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
|
|
extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
|
|
const char * = NULL);
|
|
extern tree vect_create_addr_base_for_vector_ref (vec_info *,
|
|
stmt_vec_info, gimple_seq *,
|
|
tree);
|
|
|
|
/* In tree-vect-loop.cc. */
|
|
extern tree neutral_op_for_reduction (tree, code_helper, tree);
|
|
extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
|
|
bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *);
|
|
/* Used in tree-vect-loop-manip.cc */
|
|
extern opt_result vect_determine_partial_vectors_and_peeling (loop_vec_info,
|
|
bool);
|
|
/* Used in gimple-loop-interchange.c and tree-parloops.cc. */
|
|
extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
|
|
enum tree_code);
|
|
extern bool needs_fold_left_reduction_p (tree, code_helper);
|
|
/* Drive for loop analysis stage. */
|
|
extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *);
|
|
extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
|
|
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
|
|
tree *, bool);
|
|
extern tree vect_halve_mask_nunits (tree, machine_mode);
|
|
extern tree vect_double_mask_nunits (tree, machine_mode);
|
|
extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
|
|
unsigned int, tree, tree);
|
|
extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
|
|
unsigned int, tree, unsigned int);
|
|
extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
|
|
tree, unsigned int);
|
|
extern tree vect_get_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
|
|
unsigned int);
|
|
extern gimple_seq vect_gen_len (tree, tree, tree, tree);
|
|
extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
|
|
extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
|
|
|
|
/* Drive for loop transformation stage. */
|
|
extern class loop *vect_transform_loop (loop_vec_info, gimple *);
|
|
struct vect_loop_form_info
|
|
{
|
|
tree number_of_iterations;
|
|
tree number_of_iterationsm1;
|
|
tree assumptions;
|
|
gcond *loop_cond;
|
|
gcond *inner_loop_cond;
|
|
};
|
|
extern opt_result vect_analyze_loop_form (class loop *, vect_loop_form_info *);
|
|
extern loop_vec_info vect_create_loop_vinfo (class loop *, vec_info_shared *,
|
|
const vect_loop_form_info *,
|
|
loop_vec_info = nullptr);
|
|
extern bool vectorizable_live_operation (vec_info *,
|
|
stmt_vec_info, gimple_stmt_iterator *,
|
|
slp_tree, slp_instance, int,
|
|
bool, stmt_vector_for_cost *);
|
|
extern bool vectorizable_reduction (loop_vec_info, stmt_vec_info,
|
|
slp_tree, slp_instance,
|
|
stmt_vector_for_cost *);
|
|
extern bool vectorizable_induction (loop_vec_info, stmt_vec_info,
|
|
gimple **, slp_tree,
|
|
stmt_vector_for_cost *);
|
|
extern bool vect_transform_reduction (loop_vec_info, stmt_vec_info,
|
|
gimple_stmt_iterator *,
|
|
gimple **, slp_tree);
|
|
extern bool vect_transform_cycle_phi (loop_vec_info, stmt_vec_info,
|
|
gimple **,
|
|
slp_tree, slp_instance);
|
|
extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
|
|
gimple **, slp_tree);
|
|
extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
|
|
stmt_vector_for_cost *);
|
|
extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
|
|
gimple **, slp_tree, stmt_vector_for_cost *);
|
|
extern bool vect_emulated_vector_p (tree);
|
|
extern bool vect_can_vectorize_without_simd_p (tree_code);
|
|
extern bool vect_can_vectorize_without_simd_p (code_helper);
|
|
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
|
|
stmt_vector_for_cost *,
|
|
stmt_vector_for_cost *,
|
|
stmt_vector_for_cost *);
|
|
extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
|
|
|
|
/* Nonlinear induction. */
|
|
extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree,
|
|
tree, enum vect_induction_op_type);
|
|
|
|
/* In tree-vect-slp.cc. */
|
|
extern void vect_slp_init (void);
|
|
extern void vect_slp_fini (void);
|
|
extern void vect_free_slp_instance (slp_instance);
|
|
extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> &,
|
|
gimple_stmt_iterator *, poly_uint64,
|
|
bool, unsigned *,
|
|
unsigned * = nullptr, bool = false);
|
|
extern bool vect_slp_analyze_operations (vec_info *);
|
|
extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
|
|
extern opt_result vect_analyze_slp (vec_info *, unsigned);
|
|
extern bool vect_make_slp_decision (loop_vec_info);
|
|
extern void vect_detect_hybrid_slp (loop_vec_info);
|
|
extern void vect_optimize_slp (vec_info *);
|
|
extern void vect_gather_slp_loads (vec_info *);
|
|
extern void vect_get_slp_defs (slp_tree, vec<tree> *);
|
|
extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
|
|
unsigned n = -1U);
|
|
extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
|
|
extern bool vect_slp_function (function *);
|
|
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
|
|
extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
|
|
extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
|
|
extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
|
|
unsigned int * = NULL,
|
|
tree * = NULL, tree * = NULL);
|
|
extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
|
|
const vec<tree> &, unsigned int, vec<tree> &);
|
|
extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
|
|
extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
|
|
extern void vect_free_slp_tree (slp_tree);
|
|
extern bool compatible_calls_p (gcall *, gcall *);
|
|
|
|
/* In tree-vect-patterns.cc. */
|
|
extern void
|
|
vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
|
|
|
|
/* Pattern recognition functions.
|
|
Additional pattern recognition functions can (and will) be added
|
|
in the future. */
|
|
void vect_pattern_recog (vec_info *);
|
|
|
|
/* In tree-vectorizer.cc. */
|
|
unsigned vectorize_loops (void);
|
|
void vect_free_loop_info_assumptions (class loop *);
|
|
gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
|
|
bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
|
|
|
|
/* SLP Pattern matcher types, tree-vect-slp-patterns.cc. */
|
|
|
|
/* Forward declaration of possible two operands operation that can be matched
|
|
by the complex numbers pattern matchers. */
|
|
enum _complex_operation : unsigned;
|
|
|
|
/* All possible load permute values that could result from the partial data-flow
|
|
analysis. */
|
|
typedef enum _complex_perm_kinds {
|
|
PERM_UNKNOWN,
|
|
PERM_EVENODD,
|
|
PERM_ODDEVEN,
|
|
PERM_ODDODD,
|
|
PERM_EVENEVEN,
|
|
/* Can be combined with any other PERM values. */
|
|
PERM_TOP
|
|
} complex_perm_kinds_t;
|
|
|
|
/* Cache from nodes to the load permutation they represent. */
|
|
typedef hash_map <slp_tree, complex_perm_kinds_t>
|
|
slp_tree_to_load_perm_map_t;
|
|
|
|
/* Cache from nodes pair to being compatible or not. */
|
|
typedef pair_hash <nofree_ptr_hash <_slp_tree>,
|
|
nofree_ptr_hash <_slp_tree>> slp_node_hash;
|
|
typedef hash_map <slp_node_hash, bool> slp_compat_nodes_map_t;
|
|
|
|
|
|
/* Vector pattern matcher base class. All SLP pattern matchers must inherit
|
|
from this type. */
|
|
|
|
class vect_pattern
|
|
{
|
|
protected:
|
|
/* The number of arguments that the IFN requires. */
|
|
unsigned m_num_args;
|
|
|
|
/* The internal function that will be used when a pattern is created. */
|
|
internal_fn m_ifn;
|
|
|
|
/* The current node being inspected. */
|
|
slp_tree *m_node;
|
|
|
|
/* The list of operands to be the children for the node produced when the
|
|
internal function is created. */
|
|
vec<slp_tree> m_ops;
|
|
|
|
/* Default constructor where NODE is the root of the tree to inspect. */
|
|
vect_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
|
{
|
|
this->m_ifn = ifn;
|
|
this->m_node = node;
|
|
this->m_ops.create (0);
|
|
if (m_ops)
|
|
this->m_ops.safe_splice (*m_ops);
|
|
}
|
|
|
|
public:
|
|
|
|
/* Create a new instance of the pattern matcher class of the given type. */
|
|
static vect_pattern* recognize (slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *, slp_tree *);
|
|
|
|
/* Build the pattern from the data collected so far. */
|
|
virtual void build (vec_info *) = 0;
|
|
|
|
/* Default destructor. */
|
|
virtual ~vect_pattern ()
|
|
{
|
|
this->m_ops.release ();
|
|
}
|
|
};
|
|
|
|
/* Function pointer to create a new pattern matcher from a generic type. */
|
|
typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree_to_load_perm_map_t *,
|
|
slp_compat_nodes_map_t *,
|
|
slp_tree *);
|
|
|
|
/* List of supported pattern matchers. */
|
|
extern vect_pattern_decl_t slp_patterns[];
|
|
|
|
/* Number of supported pattern matchers. */
|
|
extern size_t num__slp_patterns;
|
|
|
|
/* ----------------------------------------------------------------------
|
|
Target support routines
|
|
-----------------------------------------------------------------------
|
|
The following routines are provided to simplify costing decisions in
|
|
target code. Please add more as needed. */
|
|
|
|
/* Return true if an operaton of kind KIND for STMT_INFO represents
|
|
the extraction of an element from a vector in preparation for
|
|
storing the element to memory. */
|
|
inline bool
|
|
vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
|
|
{
|
|
return (kind == vec_to_scalar
|
|
&& STMT_VINFO_DATA_REF (stmt_info)
|
|
&& DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)));
|
|
}
|
|
|
|
/* Return true if STMT_INFO represents part of a reduction. */
|
|
inline bool
|
|
vect_is_reduction (stmt_vec_info stmt_info)
|
|
{
|
|
return STMT_VINFO_REDUC_IDX (stmt_info) >= 0;
|
|
}
|
|
|
|
/* If STMT_INFO describes a reduction, return the vect_reduction_type
|
|
of the reduction it describes, otherwise return -1. */
|
|
inline int
|
|
vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info)
|
|
{
|
|
if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
|
|
if (STMT_VINFO_REDUC_DEF (stmt_info))
|
|
{
|
|
stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
|
|
return int (STMT_VINFO_REDUC_TYPE (reduc_info));
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the
|
|
scalar type of the values being compared. Return null otherwise. */
|
|
inline tree
|
|
vect_embedded_comparison_type (stmt_vec_info stmt_info)
|
|
{
|
|
if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
|
|
if (gimple_assign_rhs_code (assign) == COND_EXPR)
|
|
{
|
|
tree cond = gimple_assign_rhs1 (assign);
|
|
if (COMPARISON_CLASS_P (cond))
|
|
return TREE_TYPE (TREE_OPERAND (cond, 0));
|
|
}
|
|
return NULL_TREE;
|
|
}
|
|
|
|
/* If STMT_INFO is a comparison or contains an embedded comparison, return the
|
|
scalar type of the values being compared. Return null otherwise. */
|
|
inline tree
|
|
vect_comparison_type (stmt_vec_info stmt_info)
|
|
{
|
|
if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
|
|
if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
|
|
return TREE_TYPE (gimple_assign_rhs1 (assign));
|
|
return vect_embedded_comparison_type (stmt_info);
|
|
}
|
|
|
|
/* Return true if STMT_INFO extends the result of a load. */
|
|
inline bool
|
|
vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info)
|
|
{
|
|
/* Although this is quite large for an inline function, this part
|
|
at least should be inline. */
|
|
gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
|
|
if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
|
|
return false;
|
|
|
|
tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
|
|
tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
|
|
tree rhs_type = TREE_TYPE (rhs);
|
|
if (!INTEGRAL_TYPE_P (lhs_type)
|
|
|| !INTEGRAL_TYPE_P (rhs_type)
|
|
|| TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
|
|
return false;
|
|
|
|
stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
|
|
return (def_stmt_info
|
|
&& STMT_VINFO_DATA_REF (def_stmt_info)
|
|
&& DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
|
|
}
|
|
|
|
/* Return true if STMT_INFO is an integer truncation. */
|
|
inline bool
|
|
vect_is_integer_truncation (stmt_vec_info stmt_info)
|
|
{
|
|
gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
|
|
if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
|
|
return false;
|
|
|
|
tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
|
|
tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
|
|
return (INTEGRAL_TYPE_P (lhs_type)
|
|
&& INTEGRAL_TYPE_P (rhs_type)
|
|
&& TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
|
|
}
|
|
|
|
#endif /* GCC_TREE_VECTORIZER_H */
|