FAQ
Added: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java?rev=1554960&view=auto
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java (added)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/SqlMathUtil.java Thu Jan 2 23:20:09 2014
@@ -0,0 +1,607 @@
+/**
+ * Copyright (c) Microsoft Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import java.util.Arrays;
+
+/**
+ * This code was originally written for Microsoft PolyBase.
+ *
+ * Misc utilities used in this package.
+ */
+public final class SqlMathUtil {
+
+ /** Mask to convert a long to a negative long. */
+ public static final long NEGATIVE_LONG_MASK = 0x8000000000000000L;
+
+ /** Mask to convert a long to an unsigned long. */
+ public static final long FULLBITS_63 = 0x7FFFFFFFFFFFFFFFL;
+
+ /** Mask to convert an int to a negative int. */
+ public static final int NEGATIVE_INT_MASK = 0x80000000;
+
+ /** Mask to convert signed integer to unsigned long. */
+ public static final long LONG_MASK = 0xFFFFFFFFL;
+
+ /** Mask to convert an int to an unsigned int. */
+ public static final int FULLBITS_31 = 0x7FFFFFFF;
+
+ /** Max unsigned integer. */
+ public static final int FULLBITS_32 = 0xFFFFFFFF;
+
+ /** 5^13 fits in 2^31. */
+ public static final int MAX_POWER_FIVE_INT31 = 13;
+
+ /** 5^x. All unsigned values. */
+ public static final int[] POWER_FIVES_INT31 = new int[MAX_POWER_FIVE_INT31 + 1];
+
+ /** 5^27 fits in 2^63. */
+ public static final int MAX_POWER_FIVE_INT63 = 27;
+
+ /** 5^x. All unsigned values. */
+ public static final long[] POWER_FIVES_INT63 = new long[MAX_POWER_FIVE_INT63 + 1];
+
+ /** 5^55 fits in 2^128. */
+ public static final int MAX_POWER_FIVE_INT128 = 55;
+
+ /** 5^x. */
+ public static final UnsignedInt128[] POWER_FIVES_INT128 = new UnsignedInt128[MAX_POWER_FIVE_INT128 + 1];
+
+ /**
+ * 1/5^x, scaled to 128bits (in other words, 2^128/5^x). Because of flooring,
+ * this is same or smaller than real value.
+ */
+ public static final UnsignedInt128[] INVERSE_POWER_FIVES_INT128 = new UnsignedInt128[MAX_POWER_FIVE_INT128 + 1];
+
+ /** 10^9 fits in 2^31. */
+ public static final int MAX_POWER_TEN_INT31 = 9;
+
+ /** 10^x. All unsigned values. */
+ public static final int[] POWER_TENS_INT31 = new int[MAX_POWER_TEN_INT31 + 1];
+
+ /** 5 * 10^(x-1). */
+ public static final int[] ROUND_POWER_TENS_INT31 = new int[MAX_POWER_TEN_INT31 + 1];
+
+ /** 10^38 fits in UnsignedInt128. */
+ public static final int MAX_POWER_TEN_INT128 = 38;
+
+ /** 10^x. */
+ public static final UnsignedInt128[] POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1];
+
+ /** 5 * 10^(x-1). */
+ public static final UnsignedInt128[] ROUND_POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1];
+
+ /**
+ * 1/10^x, scaled to 128bits, also word-shifted for better accuracy. Because
+ * of flooring, this is same or smaller than real value.
+ */
+ public static final UnsignedInt128[] INVERSE_POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1];
+
+ /** number of words shifted up in each INVERSE_POWER_TENS_INT128. */
+ public static final int[] INVERSE_POWER_TENS_INT128_WORD_SHIFTS = new int[MAX_POWER_TEN_INT128 + 1];
+
+ /** To quickly calculate bit length for up to 256. */
+ private static final byte[] BIT_LENGTH;
+
+ /** Used in division. */
+ private static final long BASE = (1L << 32);
+
+ /**
+ * Turn on or off the highest bit of an int value.
+ *
+ * @param val
+ * the value to modify
+ * @param positive
+ * whether to turn off (positive) or on (negative).
+ * @return unsigned int value
+ */
+ public static int setSignBitInt(int val, boolean positive) {
+ if (positive) {
+ return val & FULLBITS_31;
+ }
+ return val | NEGATIVE_INT_MASK;
+ }
+
+ /**
+ * Turn on or off the highest bit of a long value.
+ *
+ * @param val
+ * the value to modify
+ * @param positive
+ * whether to turn off (positive) or on (negative).
+ * @return unsigned long value
+ */
+ public static long setSignBitLong(long val, boolean positive) {
+ if (positive) {
+ return val & FULLBITS_63;
+ }
+ return val | NEGATIVE_LONG_MASK;
+ }
+
+ /**
+ * Returns the minimal number of bits to represent the given integer value.
+ *
+ * @param word
+ * int32 value
+ * @return the minimal number of bits to represent the given integer value
+ */
+ public static short bitLengthInWord(int word) {
+ if (word < 0) {
+ return 32;
+ }
+ if (word < (1 << 16)) {
+ if (word < 1 << 8) {
+ return BIT_LENGTH[word];
+ } else {
+ return (short) (BIT_LENGTH[word >>> 8] + 8);
+ }
+ } else {
+ if (word < (1 << 24)) {
+ return (short) (BIT_LENGTH[word >>> 16] + 16);
+ } else {
+ return (short) (BIT_LENGTH[word >>> 24] + 24);
+ }
+ }
+ }
+
+ /**
+ * Returns the minimal number of bits to represent the words.
+ *
+ * @param v0
+ * v0
+ * @param v1
+ * v1
+ * @param v2
+ * v2
+ * @param v3
+ * v3
+ * @return the minimal number of bits to represent the words
+ */
+ public static short bitLength(int v0, int v1, int v2, int v3) {
+ if (v3 != 0) {
+ return (short) (bitLengthInWord(v3) + 96);
+ }
+ if (v2 != 0) {
+ return (short) (bitLengthInWord(v2) + 64);
+ }
+ if (v1 != 0) {
+ return (short) (bitLengthInWord(v1) + 32);
+ }
+ return bitLengthInWord(v0);
+ }
+
+ /**
+ * If we can assume JDK 1.8, this should use
+ * java.lang.Integer.compareUnsigned(), which will be replaced with intrinsics
+ * in JVM.
+ *
+ * @param x
+ * the first {@code int} to compare
+ * @param y
+ * the second {@code int} to compare
+ * @return the value {@code 0} if {@code x == y}; a value less than {@code 0}
+ * if {@code x < y} as unsigned values; and a value greater than
+ * {@code 0} if {@code x > y} as unsigned values
+ * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
+ */
+ public static int compareUnsignedInt(int x, int y) {
+
+ // Can't assume JDK 1.8, so implementing this explicitly.
+ // return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
+ if (x == y) {
+ return 0;
+ }
+ if (x + Integer.MIN_VALUE < y + Integer.MIN_VALUE) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+
+ /**
+ * If we can assume JDK 1.8, this should use java.lang.Long.compareUnsigned(),
+ * which will be replaced with intrinsics in JVM.
+ *
+ * @param x
+ * the first {@code int} to compare
+ * @param y
+ * the second {@code int} to compare
+ * @return the value {@code 0} if {@code x == y}; a value less than {@code 0}
+ * if {@code x < y} as unsigned values; and a value greater than
+ * {@code 0} if {@code x > y} as unsigned values
+ * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
+ */
+ public static int compareUnsignedLong(long x, long y) {
+
+ // Can't assume JDK 1.8, so implementing this explicitly.
+ // return Long.compare(x + Long.MIN_VALUE, y + Long.MIN_VALUE);
+ if (x == y) {
+ return 0;
+ }
+ if (x + Long.MIN_VALUE < y + Long.MIN_VALUE) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+
+ /**
+ * If we can assume JDK 1.8, this should use java.lang.Long.divideUnsigned(),
+ * which will be replaced with intrinsics in JVM.
+ *
+ * @param dividend
+ * the value to be divided
+ * @param divisor
+ * the value doing the dividing
+ * @return the unsigned quotient of the first argument divided by the second
+ * argument
+ * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
+ */
+ public static long divideUnsignedLong(long dividend, long divisor) {
+ if (divisor < 0L) {
+
+ // Answer must be 0 or 1 depending on relative magnitude
+ // of dividend and divisor.
+ return (compareUnsignedLong(dividend, divisor)) < 0 ? 0L : 1L;
+ }
+
+ if (dividend >= 0) { // Both inputs non-negative
+ return dividend / divisor;
+ } else {
+
+ // simple division.
+ // Yes, we should do something like this:
+ // http://www.hackersdelight.org/divcMore.pdf
+ // but later... (anyway this will be eventually replaced by
+ // intrinsics in Java 8)
+
+ // an equivalent algorithm exists in
+ // com.google.common.primitives.UnsingedLongs
+ long quotient = ((dividend >>> 1L) / divisor) << 1L;
+ long remainder = dividend - quotient * divisor;
+ if (compareUnsignedLong(remainder, divisor) >= 0) {
+ return quotient + 1;
+ }
+ return quotient;
+ }
+ }
+
+ /**
+ * If we can assume JDK 1.8, this should use
+ * java.lang.Long.remainderUnsigned(), which will be replaced with intrinsics
+ * in JVM.
+ *
+ * @param dividend
+ * the value to be divided
+ * @param divisor
+ * the value doing the dividing
+ * @return the unsigned remainder of the first argument divided by the second
+ * argument
+ * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
+ */
+ public static long remainderUnsignedLong(long dividend, long divisor) {
+ if (divisor < 0L) {
+
+ // because divisor is negative, quotient is at most 1.
+ // remainder must be dividend itself (quotient=0), or dividend -
+ // divisor
+ return (compareUnsignedLong(dividend, divisor)) < 0 ? dividend : dividend
+ - divisor;
+ }
+
+ if (dividend >= 0L) { // signed comparisons
+ return dividend % divisor;
+ } else {
+ // same above
+ long quotient = ((dividend >>> 1L) / divisor) << 1L;
+ long remainder = dividend - quotient * divisor;
+ if (compareUnsignedLong(remainder, divisor) >= 0) {
+ return remainder - divisor;
+ }
+ return remainder;
+ }
+ }
+
+ /**
+ * @param lo
+ * low 32bit
+ * @param hi
+ * high 32bit
+ * @return long value that combines the two integers
+ */
+ public static long combineInts(int lo, int hi) {
+ return ((hi & LONG_MASK) << 32L) | (lo & LONG_MASK);
+ }
+
+ /**
+ * @param val
+ * long value
+ * @return high 32bit of the given value
+ */
+ public static int extractHiInt(long val) {
+ return (int) (val >> 32);
+ }
+
+ /**
+ * @param val
+ * long value
+ * @return low 32bit of the given value
+ */
+ public static int extractLowInt(long val) {
+ return (int) val;
+ }
+
+ /** Throws an overflow exception. */
+ static void throwOverflowException() {
+ throw new ArithmeticException("Overflow");
+ }
+
+ /** Throws a divide-by-zero exception. */
+ static void throwZeroDivisionException() {
+ throw new ArithmeticException("Divide by zero");
+ }
+
+ /**
+ * Multi-precision one super-digit multiply in place.
+ *
+ * @param inOut
+ * @param multiplier
+ */
+ private static void multiplyMultiPrecision(int[] inOut, int multiplier) {
+ long multiplierUnsigned = multiplier & SqlMathUtil.LONG_MASK;
+ long product = 0L;
+ for (int i = 0; i < inOut.length; ++i) {
+ product = (inOut[i] & SqlMathUtil.LONG_MASK) * multiplierUnsigned
+ + (product >>> 32);
+ inOut[i] = (int) product;
+ }
+ if ((product >> 32) != 0) {
+ SqlMathUtil.throwOverflowException();
+ }
+ }
+
+ /**
+ * Multi-precision one super-digit divide in place.
+ *
+ * @param inOut
+ * @param divisor
+ * @return
+ */
+ private static int divideMultiPrecision(int[] inOut, int divisor) {
+ long divisorUnsigned = divisor & SqlMathUtil.LONG_MASK;
+ long quotient;
+ long remainder = 0;
+ for (int i = inOut.length - 1; i >= 0; --i) {
+ remainder = (inOut[i] & SqlMathUtil.LONG_MASK) + (remainder << 32);
+ quotient = remainder / divisorUnsigned;
+ inOut[i] = (int) quotient;
+ remainder %= divisorUnsigned;
+ }
+ return (int) remainder;
+ }
+
+ private static int arrayValidLength(int[] array) {
+ int len = array.length;
+ while (len >= 0 && array[len - 1] == 0) {
+ --len;
+ }
+ return len < 0 ? 0 : len;
+ }
+
+ /**
+ * Multi-precision divide. dividend and divisor not changed. Assumes that
+ * there is enough room in quotient for results. Drawbacks of this
+ * implementation: 1) Need one extra super-digit in R 2) As it modifies D
+ * during work, then it restores it back (this is necessary because the caller
+ * doesn't expect D to change) 3) Always get Q and R - if R is unnecessary,
+ * can be slightly faster.
+ *
+ * @param dividend
+ * dividend. in.
+ * @param divisor
+ * divisor. in.
+ * @param quotient
+ * quotient. out.
+ * @return remainder
+ */
+ public static int[] divideMultiPrecision(int[] dividend, int[] divisor,
+ int[] quotient) {
+ final int dividendLength = arrayValidLength(dividend);
+ final int divisorLength = arrayValidLength(divisor);
+ Arrays.fill(quotient, 0);
+
+ // Remainder := Dividend
+ int[] remainder = new int[dividend.length + 1];
+ System.arraycopy(dividend, 0, remainder, 0, dividend.length);
+ remainder[remainder.length - 1] = 0;
+
+ if (divisorLength == 0) {
+ throwZeroDivisionException();
+ }
+ if (dividendLength < divisorLength) {
+ return remainder;
+ }
+ if (divisorLength == 1) {
+ int rem = divideMultiPrecision(remainder, divisor[0]);
+ System.arraycopy(remainder, 0, quotient, 0, quotient.length);
+ Arrays.fill(remainder, 0);
+ remainder[0] = rem;
+ return remainder;
+ }
+
+ // Knuth, "The Art of Computer Programming", 3rd edition, vol.II, Alg.D,
+ // pg 272
+ // D1. Normalize so high digit of D >= BASE/2 - that guarantee
+ // that QH will not be too far from the correct digit later in D3
+ int d1 = (int) (BASE / ((divisor[divisorLength - 1] & LONG_MASK) + 1L));
+ if (d1 > 1) {
+
+ // We are modifying divisor here, so make a local copy.
+ int[] newDivisor = new int[divisorLength];
+ System.arraycopy(divisor, 0, newDivisor, 0, divisorLength);
+ multiplyMultiPrecision(newDivisor, d1);
+ divisor = newDivisor;
+ multiplyMultiPrecision(remainder, d1);
+ }
+
+ // only 32bits, but long to behave as unsigned
+ long dHigh = (divisor[divisorLength - 1] & LONG_MASK);
+ long dLow = (divisor[divisorLength - 2] & LONG_MASK);
+
+ // D2 already done - iulRindex initialized before normalization of R.
+ // D3-D7. Loop on iulRindex - obtaining digits one-by-one, as "in paper"
+ for (int rIndex = remainder.length - 1; rIndex >= divisorLength; --rIndex) {
+
+ // D3. Calculate Q hat - estimation of the next digit
+ long accum = combineInts(remainder[rIndex - 1], remainder[rIndex]);
+ int qhat;
+ if (dHigh == (remainder[rIndex] & LONG_MASK)) {
+ qhat = (int) (BASE - 1);
+ } else {
+ qhat = (int) divideUnsignedLong(accum, dHigh);
+ }
+
+ int rhat = (int) (accum - (qhat & LONG_MASK) * dHigh);
+ while (compareUnsignedLong(dLow * (qhat & LONG_MASK),
+ combineInts(remainder[rIndex - 2], rhat)) > 0) {
+ qhat--;
+ if ((rhat & LONG_MASK) >= -((int) dHigh)) {
+ break;
+ }
+ rhat += dHigh;
+ }
+
+ // D4. Multiply and subtract: (some digits of) R -= D * QH
+ long dwlMulAccum = 0;
+ accum = BASE;
+ int iulRwork = rIndex - divisorLength;
+ for (int dIndex = 0; dIndex < divisorLength; dIndex++, iulRwork++) {
+ dwlMulAccum += (qhat & LONG_MASK) * (divisor[dIndex] & LONG_MASK);
+ accum += (remainder[iulRwork] & LONG_MASK)
+ - (extractLowInt(dwlMulAccum) & LONG_MASK);
+ dwlMulAccum = (extractHiInt(dwlMulAccum) & LONG_MASK);
+ remainder[iulRwork] = extractLowInt(accum);
+ accum = (extractHiInt(accum) & LONG_MASK) + BASE - 1;
+ }
+ accum += (remainder[iulRwork] & LONG_MASK) - dwlMulAccum;
+ remainder[iulRwork] = extractLowInt(accum);
+ quotient[rIndex - divisorLength] = qhat;
+
+ // D5. Test remainder. Carry indicates result<0, therefore QH 1 too
+ // large
+ if (extractHiInt(accum) == 0) {
+
+ // D6. Add back - probability is 2**(-31). R += D. Q[digit] -= 1
+ quotient[rIndex - divisorLength] = qhat - 1;
+ int carry = 0;
+ int dIndex = 0;
+ for (iulRwork = rIndex - divisorLength; dIndex < divisorLength; dIndex++, iulRwork++) {
+ long accum2 = (divisor[dIndex] & LONG_MASK)
+ + (remainder[iulRwork] & LONG_MASK) + (carry & LONG_MASK);
+ carry = extractHiInt(accum2);
+ remainder[iulRwork] = extractLowInt(accum2);
+ }
+ remainder[iulRwork] += carry;
+ }
+ }
+
+ // D8. Unnormalize: Divide R to get result
+ if (d1 > 1) {
+ divideMultiPrecision(remainder, d1);
+ }
+
+ return remainder;
+ }
+
+ static {
+ BIT_LENGTH = new byte[256];
+ BIT_LENGTH[0] = 0;
+ for (int i = 1; i < 8; ++i) {
+ for (int j = 1 << (i - 1); j < 1 << i; ++j) {
+ BIT_LENGTH[j] = (byte) i;
+ }
+ }
+
+ POWER_FIVES_INT31[0] = 1;
+ for (int i = 1; i < POWER_FIVES_INT31.length; ++i) {
+ POWER_FIVES_INT31[i] = POWER_FIVES_INT31[i - 1] * 5;
+ assert (POWER_FIVES_INT31[i] > 0);
+ }
+
+ POWER_FIVES_INT63[0] = 1L;
+ for (int i = 1; i < POWER_FIVES_INT63.length; ++i) {
+ POWER_FIVES_INT63[i] = POWER_FIVES_INT63[i - 1] * 5L;
+ assert (POWER_FIVES_INT63[i] > 0L);
+ }
+
+ POWER_TENS_INT31[0] = 1;
+ ROUND_POWER_TENS_INT31[0] = 0;
+ for (int i = 1; i < POWER_TENS_INT31.length; ++i) {
+ POWER_TENS_INT31[i] = POWER_TENS_INT31[i - 1] * 10;
+ assert (POWER_TENS_INT31[i] > 0);
+ ROUND_POWER_TENS_INT31[i] = POWER_TENS_INT31[i] >> 1;
+ }
+
+ POWER_FIVES_INT128[0] = new UnsignedInt128(1);
+ INVERSE_POWER_FIVES_INT128[0] = new UnsignedInt128(0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF);
+ for (int i = 1; i < POWER_FIVES_INT128.length; ++i) {
+ POWER_FIVES_INT128[i] = new UnsignedInt128(POWER_FIVES_INT128[i - 1]);
+ POWER_FIVES_INT128[i].multiplyDestructive(5);
+ INVERSE_POWER_FIVES_INT128[i] = new UnsignedInt128(
+ INVERSE_POWER_FIVES_INT128[i - 1]);
+ INVERSE_POWER_FIVES_INT128[i].divideDestructive(5);
+ }
+
+ POWER_TENS_INT128[0] = new UnsignedInt128(1);
+ ROUND_POWER_TENS_INT128[0] = new UnsignedInt128(0);
+ INVERSE_POWER_TENS_INT128[0] = new UnsignedInt128(0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF);
+ INVERSE_POWER_TENS_INT128_WORD_SHIFTS[0] = 0;
+ int[] inverseTens = new int[8];
+ Arrays.fill(inverseTens, 0xFFFFFFFF);
+ for (int i = 1; i < POWER_TENS_INT128.length; ++i) {
+ final int divisor = 10;
+ POWER_TENS_INT128[i] = new UnsignedInt128(POWER_TENS_INT128[i - 1]);
+ POWER_TENS_INT128[i].multiplyDestructive(divisor);
+ ROUND_POWER_TENS_INT128[i] = POWER_TENS_INT128[i].shiftRightConstructive(
+ 1, false);
+
+ long quotient;
+ long remainder = 0;
+ for (int j = inverseTens.length - 1; j >= 0; --j) {
+ quotient = ((inverseTens[j] & SqlMathUtil.LONG_MASK) + (remainder << 32))
+ / divisor;
+ remainder = ((inverseTens[j] & SqlMathUtil.LONG_MASK) + (remainder << 32))
+ % divisor;
+ inverseTens[j] = (int) quotient;
+ }
+ int wordShifts = 0;
+ for (int j = inverseTens.length - 1; j >= 4 && inverseTens[j] == 0; --j) {
+ ++wordShifts;
+ }
+ INVERSE_POWER_TENS_INT128_WORD_SHIFTS[i] = wordShifts;
+ INVERSE_POWER_TENS_INT128[i] = new UnsignedInt128(
+ inverseTens[inverseTens.length - 4 - wordShifts],
+ inverseTens[inverseTens.length - 3 - wordShifts],
+ inverseTens[inverseTens.length - 2 - wordShifts],
+ inverseTens[inverseTens.length - 1 - wordShifts]);
+ }
+ }
+
+ private SqlMathUtil() {
+ }
+}

Search Discussions

Discussion Posts

Previous

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 2 of 4 | next ›
Discussion Overview
groupcommits @
categorieshive, hadoop
postedJan 2, '14 at 11:20p
activeJan 2, '14 at 11:20p
posts4
users1
websitehive.apache.org

1 user in discussion

Ehans: 4 posts

People

Translate

site design / logo © 2021 Grokbase