613 lines
20 KiB
C
613 lines
20 KiB
C
|
/* A class for building vector constant patterns.
|
||
|
Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||
|
|
||
|
This file is part of GCC.
|
||
|
|
||
|
GCC is free software; you can redistribute it and/or modify it under
|
||
|
the terms of the GNU General Public License as published by the Free
|
||
|
Software Foundation; either version 3, or (at your option) any later
|
||
|
version.
|
||
|
|
||
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with GCC; see the file COPYING3. If not see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
#ifndef GCC_VECTOR_BUILDER_H
|
||
|
#define GCC_VECTOR_BUILDER_H
|
||
|
|
||
|
/* This class is a wrapper around auto_vec<T> for building vectors of T.
|
||
|
It aims to encode each vector as npatterns interleaved patterns,
|
||
|
where each pattern represents a sequence:
|
||
|
|
||
|
{ BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
|
||
|
|
||
|
The first three elements in each pattern provide enough information
|
||
|
to derive the other elements. If all patterns have a STEP of zero,
|
||
|
we only need to encode the first two elements in each pattern.
|
||
|
If BASE1 is also equal to BASE0 for all patterns, we only need to
|
||
|
encode the first element in each pattern. The number of encoded
|
||
|
elements per pattern is given by nelts_per_pattern.
|
||
|
|
||
|
The class can be used in two ways:
|
||
|
|
||
|
1. It can be used to build a full image of the vector, which is then
|
||
|
canonicalized by finalize (). In this case npatterns is initially
|
||
|
the number of elements in the vector and nelts_per_pattern is
|
||
|
initially 1.
|
||
|
|
||
|
2. It can be used to build a vector that already has a known encoding.
|
||
|
This is preferred since it is more efficient and copes with
|
||
|
variable-length vectors. finalize () then canonicalizes the encoding
|
||
|
to a simpler form if possible.
|
||
|
|
||
|
Shape is the type that specifies the number of elements in the vector
|
||
|
and (where relevant) the type of each element.
|
||
|
|
||
|
The derived class Derived provides the functionality of this class
|
||
|
for specific Ts. Derived needs to provide the following interface:
|
||
|
|
||
|
bool equal_p (T elt1, T elt2) const;
|
||
|
|
||
|
Return true if elements ELT1 and ELT2 are equal.
|
||
|
|
||
|
bool allow_steps_p () const;
|
||
|
|
||
|
Return true if a stepped representation is OK. We don't allow
|
||
|
linear series for anything other than integers, to avoid problems
|
||
|
with rounding.
|
||
|
|
||
|
bool integral_p (T elt) const;
|
||
|
|
||
|
Return true if element ELT can be interpreted as an integer.
|
||
|
|
||
|
StepType step (T elt1, T elt2) const;
|
||
|
|
||
|
Return the value of element ELT2 minus the value of element ELT1,
|
||
|
given integral_p (ELT1) && integral_p (ELT2). There is no fixed
|
||
|
choice of StepType.
|
||
|
|
||
|
T apply_step (T base, unsigned int factor, StepType step) const;
|
||
|
|
||
|
Return a vector element with the value BASE + FACTOR * STEP.
|
||
|
|
||
|
bool can_elide_p (T elt) const;
|
||
|
|
||
|
Return true if we can drop element ELT, even if the retained
|
||
|
elements are different. This is provided for TREE_OVERFLOW
|
||
|
handling.
|
||
|
|
||
|
void note_representative (T *elt1_ptr, T elt2);
|
||
|
|
||
|
Record that ELT2 is being elided, given that ELT1_PTR points to
|
||
|
the last encoded element for the containing pattern. This is
|
||
|
again provided for TREE_OVERFLOW handling.
|
||
|
|
||
|
static poly_uint64 shape_nelts (Shape shape);
|
||
|
|
||
|
Return the number of elements in SHAPE.
|
||
|
|
||
|
The class provides additional functionality for the case in which
|
||
|
T can describe a vector constant as well as an individual element.
|
||
|
This functionality requires:
|
||
|
|
||
|
static poly_uint64 nelts_of (T x);
|
||
|
|
||
|
Return the number of elements in vector constant X.
|
||
|
|
||
|
static unsigned int npatterns_of (T x);
|
||
|
|
||
|
Return the number of patterns used to encode vector constant X.
|
||
|
|
||
|
static unsigned int nelts_per_pattern_of (T x);
|
||
|
|
||
|
Return the number of elements used to encode each pattern
|
||
|
in vector constant X. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
class vector_builder : public auto_vec<T, 32>
|
||
|
{
|
||
|
public:
|
||
|
vector_builder ();
|
||
|
|
||
|
poly_uint64 full_nelts () const { return m_full_nelts; }
|
||
|
unsigned int npatterns () const { return m_npatterns; }
|
||
|
unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
|
||
|
unsigned int encoded_nelts () const;
|
||
|
bool encoded_full_vector_p () const;
|
||
|
T elt (unsigned int) const;
|
||
|
unsigned int count_dups (int, int, int) const;
|
||
|
|
||
|
bool operator == (const Derived &) const;
|
||
|
bool operator != (const Derived &x) const { return !operator == (x); }
|
||
|
|
||
|
bool new_unary_operation (Shape, T, bool);
|
||
|
bool new_binary_operation (Shape, T, T, bool);
|
||
|
|
||
|
void finalize ();
|
||
|
|
||
|
static unsigned int binary_encoded_nelts (T, T);
|
||
|
|
||
|
protected:
|
||
|
void new_vector (poly_uint64, unsigned int, unsigned int);
|
||
|
void reshape (unsigned int, unsigned int);
|
||
|
bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
|
||
|
bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
|
||
|
bool try_npatterns (unsigned int);
|
||
|
|
||
|
private:
|
||
|
vector_builder (const vector_builder &);
|
||
|
vector_builder &operator= (const vector_builder &);
|
||
|
Derived *derived () { return static_cast<Derived *> (this); }
|
||
|
const Derived *derived () const;
|
||
|
|
||
|
poly_uint64 m_full_nelts;
|
||
|
unsigned int m_npatterns;
|
||
|
unsigned int m_nelts_per_pattern;
|
||
|
};
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
inline const Derived *
|
||
|
vector_builder<T, Shape, Derived>::derived () const
|
||
|
{
|
||
|
return static_cast<const Derived *> (this);
|
||
|
}
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
inline
|
||
|
vector_builder<T, Shape, Derived>::vector_builder ()
|
||
|
: m_full_nelts (0),
|
||
|
m_npatterns (0),
|
||
|
m_nelts_per_pattern (0)
|
||
|
{}
|
||
|
|
||
|
/* Return the number of elements that are explicitly encoded. The vec
|
||
|
starts with these explicitly-encoded elements and may contain additional
|
||
|
elided elements. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
inline unsigned int
|
||
|
vector_builder<T, Shape, Derived>::encoded_nelts () const
|
||
|
{
|
||
|
return m_npatterns * m_nelts_per_pattern;
|
||
|
}
|
||
|
|
||
|
/* Return true if every element of the vector is explicitly encoded. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
inline bool
|
||
|
vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
|
||
|
{
|
||
|
return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
|
||
|
}
|
||
|
|
||
|
/* Start building a vector that has FULL_NELTS elements. Initially
|
||
|
encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
void
|
||
|
vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
|
||
|
unsigned int npatterns,
|
||
|
unsigned int nelts_per_pattern)
|
||
|
{
|
||
|
m_full_nelts = full_nelts;
|
||
|
m_npatterns = npatterns;
|
||
|
m_nelts_per_pattern = nelts_per_pattern;
|
||
|
this->reserve (encoded_nelts ());
|
||
|
this->truncate (0);
|
||
|
}
|
||
|
|
||
|
/* Return true if this vector and OTHER have the same elements and
|
||
|
are encoded in the same way. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
bool
|
||
|
vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
|
||
|
{
|
||
|
if (maybe_ne (m_full_nelts, other.m_full_nelts)
|
||
|
|| m_npatterns != other.m_npatterns
|
||
|
|| m_nelts_per_pattern != other.m_nelts_per_pattern)
|
||
|
return false;
|
||
|
|
||
|
unsigned int nelts = encoded_nelts ();
|
||
|
for (unsigned int i = 0; i < nelts; ++i)
|
||
|
if (!derived ()->equal_p ((*this)[i], other[i]))
|
||
|
return false;
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* Return the value of vector element I, which might or might not be
|
||
|
encoded explicitly. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
T
|
||
|
vector_builder<T, Shape, Derived>::elt (unsigned int i) const
|
||
|
{
|
||
|
/* First handle elements that are already present in the underlying
|
||
|
vector, regardless of whether they're part of the encoding or not. */
|
||
|
if (i < this->length ())
|
||
|
return (*this)[i];
|
||
|
|
||
|
/* Extrapolation is only possible if the encoding has been fully
|
||
|
populated. */
|
||
|
gcc_checking_assert (encoded_nelts () <= this->length ());
|
||
|
|
||
|
/* Identify the pattern that contains element I and work out the index of
|
||
|
the last encoded element for that pattern. */
|
||
|
unsigned int pattern = i % m_npatterns;
|
||
|
unsigned int count = i / m_npatterns;
|
||
|
unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
|
||
|
T final = (*this)[final_i];
|
||
|
|
||
|
/* If there are no steps, the final encoded value is the right one. */
|
||
|
if (m_nelts_per_pattern <= 2)
|
||
|
return final;
|
||
|
|
||
|
/* Otherwise work out the value from the last two encoded elements. */
|
||
|
T prev = (*this)[final_i - m_npatterns];
|
||
|
return derived ()->apply_step (final, count - 2,
|
||
|
derived ()->step (prev, final));
|
||
|
}
|
||
|
|
||
|
/* Try to start building a new vector of shape SHAPE that holds the result of
|
||
|
a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the
|
||
|
operation can handle stepped encodings directly, without having to expand
|
||
|
the full sequence.
|
||
|
|
||
|
Return true if the operation is possible, which it always is when
|
||
|
ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
bool
|
||
|
vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
|
||
|
bool allow_stepped_p)
|
||
|
{
|
||
|
poly_uint64 full_nelts = Derived::shape_nelts (shape);
|
||
|
gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
|
||
|
unsigned int npatterns = Derived::npatterns_of (vec);
|
||
|
unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
|
||
|
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||
|
{
|
||
|
if (!full_nelts.is_constant ())
|
||
|
return false;
|
||
|
npatterns = full_nelts.to_constant ();
|
||
|
nelts_per_pattern = 1;
|
||
|
}
|
||
|
derived ()->new_vector (shape, npatterns, nelts_per_pattern);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* Try to start building a new vector of shape SHAPE that holds the result of
|
||
|
a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is
|
||
|
true if the operation can handle stepped encodings directly, without
|
||
|
having to expand the full sequence.
|
||
|
|
||
|
Return true if the operation is possible. Leave the builder unchanged
|
||
|
otherwise. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
bool
|
||
|
vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
|
||
|
T vec1, T vec2,
|
||
|
bool allow_stepped_p)
|
||
|
{
|
||
|
poly_uint64 full_nelts = Derived::shape_nelts (shape);
|
||
|
gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
|
||
|
&& known_eq (full_nelts, Derived::nelts_of (vec2)));
|
||
|
/* Conceptually we split the patterns in VEC1 and VEC2 until we have
|
||
|
an equal number for both. Each split pattern requires the same
|
||
|
number of elements per pattern as the original. E.g. splitting:
|
||
|
|
||
|
{ 1, 2, 3, ... }
|
||
|
|
||
|
into two gives:
|
||
|
|
||
|
{ 1, 3, 5, ... }
|
||
|
{ 2, 4, 6, ... }
|
||
|
|
||
|
while splitting:
|
||
|
|
||
|
{ 1, 0, ... }
|
||
|
|
||
|
into two gives:
|
||
|
|
||
|
{ 1, 0, ... }
|
||
|
{ 0, 0, ... }. */
|
||
|
unsigned int npatterns
|
||
|
= least_common_multiple (Derived::npatterns_of (vec1),
|
||
|
Derived::npatterns_of (vec2));
|
||
|
unsigned int nelts_per_pattern
|
||
|
= MAX (Derived::nelts_per_pattern_of (vec1),
|
||
|
Derived::nelts_per_pattern_of (vec2));
|
||
|
if (!allow_stepped_p && nelts_per_pattern > 2)
|
||
|
{
|
||
|
if (!full_nelts.is_constant ())
|
||
|
return false;
|
||
|
npatterns = full_nelts.to_constant ();
|
||
|
nelts_per_pattern = 1;
|
||
|
}
|
||
|
derived ()->new_vector (shape, npatterns, nelts_per_pattern);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* Return the number of elements that the caller needs to operate on in
|
||
|
order to handle a binary operation on vector constants VEC1 and VEC2.
|
||
|
This static function is used instead of new_binary_operation if the
|
||
|
result of the operation is not a constant vector. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
unsigned int
|
||
|
vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
|
||
|
{
|
||
|
poly_uint64 nelts = Derived::nelts_of (vec1);
|
||
|
gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
|
||
|
/* See new_binary_operation for details. */
|
||
|
unsigned int npatterns
|
||
|
= least_common_multiple (Derived::npatterns_of (vec1),
|
||
|
Derived::npatterns_of (vec2));
|
||
|
unsigned int nelts_per_pattern
|
||
|
= MAX (Derived::nelts_per_pattern_of (vec1),
|
||
|
Derived::nelts_per_pattern_of (vec2));
|
||
|
unsigned HOST_WIDE_INT const_nelts;
|
||
|
if (nelts.is_constant (&const_nelts))
|
||
|
return MIN (npatterns * nelts_per_pattern, const_nelts);
|
||
|
return npatterns * nelts_per_pattern;
|
||
|
}
|
||
|
|
||
|
/* Return the number of leading duplicate elements in the range
|
||
|
[START:END:STEP]. The value is always at least 1. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
unsigned int
|
||
|
vector_builder<T, Shape, Derived>::count_dups (int start, int end,
|
||
|
int step) const
|
||
|
{
|
||
|
gcc_assert ((end - start) % step == 0);
|
||
|
|
||
|
unsigned int ndups = 1;
|
||
|
for (int i = start + step;
|
||
|
i != end && derived ()->equal_p (elt (i), elt (start));
|
||
|
i += step)
|
||
|
ndups++;
|
||
|
return ndups;
|
||
|
}
|
||
|
|
||
|
/* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
|
||
|
but without changing the underlying vector. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
void
|
||
|
vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
|
||
|
unsigned int nelts_per_pattern)
|
||
|
{
|
||
|
unsigned int old_encoded_nelts = encoded_nelts ();
|
||
|
unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
|
||
|
gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
|
||
|
unsigned int next = new_encoded_nelts - npatterns;
|
||
|
for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
|
||
|
{
|
||
|
derived ()->note_representative (&(*this)[next], (*this)[i]);
|
||
|
next += 1;
|
||
|
if (next == new_encoded_nelts)
|
||
|
next -= npatterns;
|
||
|
}
|
||
|
m_npatterns = npatterns;
|
||
|
m_nelts_per_pattern = nelts_per_pattern;
|
||
|
}
|
||
|
|
||
|
/* Return true if elements [START, END) contain a repeating sequence of
|
||
|
STEP elements. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
bool
|
||
|
vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
|
||
|
unsigned int end,
|
||
|
unsigned int step)
|
||
|
{
|
||
|
for (unsigned int i = start; i < end - step; ++i)
|
||
|
if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
|
||
|
return false;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* Return true if elements [START, END) contain STEP interleaved linear
|
||
|
series. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
bool
|
||
|
vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
|
||
|
unsigned int end,
|
||
|
unsigned int step)
|
||
|
{
|
||
|
if (!derived ()->allow_steps_p ())
|
||
|
return false;
|
||
|
|
||
|
for (unsigned int i = start + step * 2; i < end; ++i)
|
||
|
{
|
||
|
T elt1 = (*this)[i - step * 2];
|
||
|
T elt2 = (*this)[i - step];
|
||
|
T elt3 = (*this)[i];
|
||
|
|
||
|
if (!derived ()->integral_p (elt1)
|
||
|
|| !derived ()->integral_p (elt2)
|
||
|
|| !derived ()->integral_p (elt3))
|
||
|
return false;
|
||
|
|
||
|
if (maybe_ne (derived ()->step (elt1, elt2),
|
||
|
derived ()->step (elt2, elt3)))
|
||
|
return false;
|
||
|
|
||
|
if (!derived ()->can_elide_p (elt3))
|
||
|
return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* Try to change the number of encoded patterns to NPATTERNS, returning
|
||
|
true on success. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
bool
|
||
|
vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
|
||
|
{
|
||
|
if (m_nelts_per_pattern == 1)
|
||
|
{
|
||
|
/* See whether NPATTERNS is valid with the current 1-element-per-pattern
|
||
|
encoding. */
|
||
|
if (repeating_sequence_p (0, encoded_nelts (), npatterns))
|
||
|
{
|
||
|
reshape (npatterns, 1);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* We can only increase the number of elements per pattern if all
|
||
|
elements are still encoded explicitly. */
|
||
|
if (!encoded_full_vector_p ())
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (m_nelts_per_pattern <= 2)
|
||
|
{
|
||
|
/* See whether NPATTERNS is valid with a 2-element-per-pattern
|
||
|
encoding. */
|
||
|
if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
|
||
|
{
|
||
|
reshape (npatterns, 2);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/* We can only increase the number of elements per pattern if all
|
||
|
elements are still encoded explicitly. */
|
||
|
if (!encoded_full_vector_p ())
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (m_nelts_per_pattern <= 3)
|
||
|
{
|
||
|
/* See whether we have NPATTERNS interleaved linear series,
|
||
|
giving a 3-element-per-pattern encoding. */
|
||
|
if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
|
||
|
{
|
||
|
reshape (npatterns, 3);
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
gcc_unreachable ();
|
||
|
}
|
||
|
|
||
|
/* Replace the current encoding with the canonical form. */
|
||
|
|
||
|
template<typename T, typename Shape, typename Derived>
|
||
|
void
|
||
|
vector_builder<T, Shape, Derived>::finalize ()
|
||
|
{
|
||
|
/* The encoding requires the same number of elements to come from each
|
||
|
pattern. */
|
||
|
gcc_assert (multiple_p (m_full_nelts, m_npatterns));
|
||
|
|
||
|
/* Allow the caller to build more elements than necessary. For example,
|
||
|
it's often convenient to build a stepped vector from the natural
|
||
|
encoding of three elements even if the vector itself only has two. */
|
||
|
unsigned HOST_WIDE_INT const_full_nelts;
|
||
|
if (m_full_nelts.is_constant (&const_full_nelts)
|
||
|
&& const_full_nelts <= encoded_nelts ())
|
||
|
{
|
||
|
m_npatterns = const_full_nelts;
|
||
|
m_nelts_per_pattern = 1;
|
||
|
}
|
||
|
|
||
|
/* Try to whittle down the number of elements per pattern. That is:
|
||
|
|
||
|
1. If we have stepped patterns whose steps are all 0, reduce the
|
||
|
number of elements per pattern from 3 to 2.
|
||
|
|
||
|
2. If we have background fill values that are the same as the
|
||
|
foreground values, reduce the number of elements per pattern
|
||
|
from 2 to 1. */
|
||
|
while (m_nelts_per_pattern > 1
|
||
|
&& repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
|
||
|
encoded_nelts (), m_npatterns))
|
||
|
/* The last two sequences of M_NPATTERNS elements are equal,
|
||
|
so remove the last one. */
|
||
|
reshape (m_npatterns, m_nelts_per_pattern - 1);
|
||
|
|
||
|
if (pow2p_hwi (m_npatterns))
|
||
|
{
|
||
|
/* Try to halve the number of patterns while doing so gives a
|
||
|
valid pattern. This approach is linear in the number of
|
||
|
elements, whereas searcing from 1 up would be O(n*log(n)).
|
||
|
|
||
|
Each halving step tries to keep the number of elements per pattern
|
||
|
the same. If that isn't possible, and if all elements are still
|
||
|
explicitly encoded, the halving step can instead increase the number
|
||
|
of elements per pattern.
|
||
|
|
||
|
E.g. for:
|
||
|
|
||
|
{ 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8
|
||
|
|
||
|
we first realize that the second half of the sequence is not
|
||
|
equal to the first, so we cannot maintain 1 element per pattern
|
||
|
for npatterns == 4. Instead we halve the number of patterns
|
||
|
and double the number of elements per pattern, treating this
|
||
|
as a "foreground" { 0, 2, 3, 4 } against a "background" of
|
||
|
{ 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
|
||
|
|
||
|
{ 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4
|
||
|
|
||
|
Next we realize that this is *not* a foreround of { 0, 2 }
|
||
|
against a background of { 3, 4 | 3, 4 ... }, so the only
|
||
|
remaining option for reducing the number of patterns is
|
||
|
to use a foreground of { 0, 2 } against a stepped background
|
||
|
of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still
|
||
|
haven't elided any elements:
|
||
|
|
||
|
{ 0, 2 | 3, 4 | 5, 6 } npatterns == 2
|
||
|
|
||
|
This in turn can be reduced to a foreground of { 0 } against a
|
||
|
stepped background of { 1 | 2 | 3 ... }:
|
||
|
|
||
|
{ 0 | 2 | 3 } npatterns == 1
|
||
|
|
||
|
This last step would not have been possible for:
|
||
|
|
||
|
{ 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */
|
||
|
while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
|
||
|
continue;
|
||
|
|
||
|
/* Builders of arbitrary fixed-length vectors can use:
|
||
|
|
||
|
new_vector (x, x, 1)
|
||
|
|
||
|
so that every element is specified explicitly. Handle cases
|
||
|
that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
|
||
|
would be for 2-bit elements. We'll have treated them as
|
||
|
duplicates in the loop above. */
|
||
|
if (m_nelts_per_pattern == 1
|
||
|
&& m_full_nelts.is_constant (&const_full_nelts)
|
||
|
&& this->length () >= const_full_nelts
|
||
|
&& (m_npatterns & 3) == 0
|
||
|
&& stepped_sequence_p (m_npatterns / 4, const_full_nelts,
|
||
|
m_npatterns / 4))
|
||
|
{
|
||
|
reshape (m_npatterns / 4, 3);
|
||
|
while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
/* For the non-power-of-2 case, do a simple search up from 1. */
|
||
|
for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
|
||
|
if (m_npatterns % i == 0 && try_npatterns (i))
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
#endif
|