FAQ
In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/c29dfc6a6c45f86648c51f961304254cc3c449b9?hp=e67bc19562c85b51b0d54a3997beeb3ceee2447a>

- Log -----------------------------------------------------------------
commit c29dfc6a6c45f86648c51f961304254cc3c449b9
Author: Karl Williamson <khw@cpan.org>
Date: Thu Mar 3 14:24:39 2016 -0700

     regcomp.c: Add shortcuts to some inversion list ops

     When taking the union or intersection of two inversion lists, not
     infrequently the result is the same as one of the inputs. An example is
     the union of a set that contains all code points with any other set.
     The result is the set that contains all code points. Also, not
     infrequently, the result is to overwrite one of the inputs. If the
     other input contributed nothing to the result, the whole operation is
     effectively a no-op, as the result has the same contents as the input it
     overwrites.

     It turns out that it is cheap to keep track of if just one input
     contributes to the result. This commit does that, and short-circuits
     the rest of the code if it is going to turn out to be a no-op.
-----------------------------------------------------------------------

Summary of changes:
  regcomp.c | 150 ++++++++++++++++++++++++++++++++++++++++++++------------------
  1 file changed, 106 insertions(+), 44 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index e6b352b..0b1e606 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8931,13 +8931,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       * length there. The preface says to incorporate its examples into your
       * code at your own risk.
       *
- * The algorithm is like a merge sort.
- *
- * XXX A potential performance improvement is to keep track as we go along
- * if only one of the inputs contributes to the result, meaning the other
- * is a subset of that one. In that case, we can skip the final copy and
- * return the larger of the input lists, but then outside code might need
- * to keep track of whether to free the input list or not */
+ * The algorithm is like a merge sort. */

      const UV* array_a; /* a's array */
      const UV* array_b;
@@ -8952,6 +8946,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      UV i_b = 0;
      UV i_u = 0;

+ bool has_something_from_a = FALSE;
+ bool has_something_from_b = FALSE;
+
+
      /* running count, as explained in the algorithm source book; items are
       * stopped accumulating and are output when the count changes to/from 0.
       * The count is incremented when we start a range that's in the set, and
@@ -9117,10 +9115,12 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
   {
       cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
       cp= array_a[i_a++];
+ has_something_from_a = TRUE;
   }
   else {
       cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
       cp = array_b[i_b++];
+ has_something_from_b = TRUE;
   }

   /* Here, have chosen which of the two inputs to look at. Only output
@@ -9169,10 +9169,54 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       * be output. (If 'count' is non-zero, then the input list we exhausted
       * has everything remaining up to the machine's limit in its set, and hence
       * in the union, so there will be no further output. */
- len_u = i_u;
- if (count == 0) {
- /* At most one of the subexpressions will be non-zero */
- len_u += (len_a - i_a) + (len_b - i_b);
+ if (count != 0) {
+
+ /* Here, there is nothing left to put in the union. If the union came
+ * only from the input that it is to overwrite, this whole operation is
+ * a no-op */
+ if ( UNLIKELY(! has_something_from_b && *output == a)
+ || UNLIKELY(! has_something_from_a && *output == b))
+ {
+ SvREFCNT_dec_NN(u);
+ return;
+ }
+
+ len_u = i_u;
+ }
+ else {
+ /* When 'count' is 0, the list that was exhausted (if one was shorter
+ * than the other) ended with everything above it not in its set. That
+ * means that the remaining part of the union is precisely the same as
+ * the non-exhausted list, so can just copy it unchanged. If only one
+ * of the inputs contributes to the union, and the output is to
+ * overwite that particular input, then this whole operation was a
+ * no-op. */
+
+ IV copy_count = len_a - i_a;
+ if (copy_count > 0) {
+ if (UNLIKELY(! has_something_from_b && *output == a)) {
+ SvREFCNT_dec_NN(u);
+ return;
+ }
+ Copy(array_a + i_a, array_u + i_u, copy_count, UV);
+ len_u = i_u + copy_count;
+ }
+ else if ((copy_count = len_b - i_b) > 0) {
+ if (UNLIKELY(! has_something_from_a && *output == b)) {
+ SvREFCNT_dec_NN(u);
+ return;
+ }
+ Copy(array_b + i_b, array_u + i_u, copy_count, UV);
+ len_u = i_u + copy_count;
+ } else if ( UNLIKELY(! has_something_from_b && *output == a)
+ || UNLIKELY(! has_something_from_a && *output == b))
+ {
+ /* Here, both arrays are exhausted, so no need to do any additional
+ * copying. Also here, the union came only from the input that it is
+ * to overwrite, so this whole operation is a no-op */
+ SvREFCNT_dec_NN(u);
+ return;
+ }
      }

      /* Set the result to the final length, which can change the pointer to
@@ -9184,22 +9228,6 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
   array_u = invlist_array(u);
      }

- /* When 'count' is 0, the list that was exhausted (if one was shorter than
- * the other) ended with everything above it not in its set. That means
- * that the remaining part of the union is precisely the same as the
- * non-exhausted list, so can just copy it unchanged. (If both lists were
- * exhausted at the same time, then the operations below will be both 0.)
- */
- if (count == 0) {
- IV copy_count; /* At most one will have a non-zero copy count */
- if ((copy_count = len_a - i_a) > 0) {
- Copy(array_a + i_a, array_u + i_u, copy_count, UV);
- }
- else if ((copy_count = len_b - i_b) > 0) {
- Copy(array_b + i_b, array_u + i_u, copy_count, UV);
- }
- }
-
      /* If the output is not to overwrite either of the inputs, just return the
       * calculated union */
      if (a != *output && b != *output) {
@@ -9272,6 +9300,9 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       */
      UV count = 0;

+ bool has_something_from_a = FALSE;
+ bool has_something_from_b = FALSE;
+
      PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);

@@ -9368,10 +9399,12 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
   {
       cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
       cp= array_a[i_a++];
+ has_something_from_a = TRUE;
   }
   else {
       cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
       cp= array_b[i_b++];
+ has_something_from_b = TRUE;
   }

   /* Here, have chosen which of the two inputs to look at. Only output
@@ -9413,12 +9446,52 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
   count++;
      }

- /* The final length is what we've output so far plus what else is in the
- * intersection. At most one of the subexpressions below will be non-zero
- * */
- len_r = i_r;
- if (count >= 2) {
- len_r += (len_a - i_a) + (len_b - i_b);
+ if (count < 2) {
+
+ /* Here, there is nothing left to put in the intersection. If the
+ * intersection came only from the input that it is to overwrite, this
+ * whole operation is a no-op */
+ if ( UNLIKELY(! has_something_from_b && *i == a)
+ || UNLIKELY(! has_something_from_a && *i == b))
+ {
+ SvREFCNT_dec_NN(r);
+ return;
+ }
+
+ len_r = i_r;
+ }
+ else {
+ /* When 'count' is 2 or more, the list that was exhausted, what remains
+ * in the intersection is precisely the same as the non-exhausted list,
+ * so can just copy it unchanged. If only one of the inputs
+ * contributes to the intersection, and the output is to overwite that
+ * particular input, then this whole operation was a no-op. */
+
+ IV copy_count = len_a - i_a;
+ if (copy_count > 0) {
+ if (UNLIKELY(! has_something_from_b && *i == a)) {
+ SvREFCNT_dec_NN(r);
+ return;
+ }
+ Copy(array_a + i_a, array_r + i_r, copy_count, UV);
+ len_r = i_r + copy_count;
+ }
+ else if ((copy_count = len_b - i_b) > 0) {
+ if (UNLIKELY(! has_something_from_a && *i == b)) {
+ SvREFCNT_dec_NN(r);
+ return;
+ }
+ Copy(array_b + i_b, array_r + i_r, copy_count, UV);
+ len_r = i_r + copy_count;
+ } else if ( UNLIKELY(! has_something_from_b && *i == a)
+ || UNLIKELY(! has_something_from_a && *i == b))
+ {
+ /* Here, both arrays are exhausted, so no need to do any additional
+ * copying. Also here, the intersection came only from the input
+ * that it is to overwrite, so this whole operation is a no-op */
+ SvREFCNT_dec_NN(r);
+ return;
+ }
      }

      /* Set the result to the final length, which can change the pointer to
@@ -9430,17 +9503,6 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
   array_r = invlist_array(r);
      }

- /* Finish outputting any remaining */
- if (count >= 2) { /* At most one will have a non-zero copy count */
- IV copy_count;
- if ((copy_count = len_a - i_a) > 0) {
- Copy(array_a + i_a, array_r + i_r, copy_count, UV);
- }
- else if ((copy_count = len_b - i_b) > 0) {
- Copy(array_b + i_b, array_r + i_r, copy_count, UV);
- }
- }
-
      /* If the output is not to overwrite either of the inputs, just return the
       * calculated intersection */
      if (a != *i && b != *i) {

--
Perl5 Master Repository

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupperl5-changes @
categoriesperl
postedMay 26, '16 at 4:20a
activeMay 26, '16 at 4:20a
posts1
users1
websiteperl.org

1 user in discussion

Karl Williamson: 1 post

People

Translate

site design / logo © 2018 Grokbase