public class HyperLogLogPlusPlus
extends org.apache.pig.AlgebraicEvalFunc<java.lang.Long>
This uses the implementation of HyperLogLog++ from stream-lib. The HyperLogLog++ algorithm is an enhanced version of HyperLogLog as described in here.
This is a streaming implementation, and therefore the input data does not need to be sorted.
Modifier and Type | Class and Description |
---|---|
static class |
HyperLogLogPlusPlus.Final |
static class |
HyperLogLogPlusPlus.Initial |
static class |
HyperLogLogPlusPlus.Intermediate |
Constructor and Description |
---|
HyperLogLogPlusPlus()
Constructs a HyperLogLog++ estimator.
|
HyperLogLogPlusPlus(java.lang.String p)
Constructs a HyperLogLog++ estimator.
|
Modifier and Type | Method and Description |
---|---|
protected static com.clearspring.analytics.stream.cardinality.HyperLogLogPlus |
countDisctinct(org.apache.pig.data.Tuple input,
int p) |
java.lang.String |
getFinal() |
java.lang.String |
getInitial() |
java.lang.String |
getIntermed() |
org.apache.pig.impl.logicalLayer.schema.Schema |
outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input) |
allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
public HyperLogLogPlusPlus()
public HyperLogLogPlusPlus(java.lang.String p)
p
- precision valuepublic org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
outputSchema
in class org.apache.pig.EvalFunc<java.lang.Long>
public java.lang.String getFinal()
getFinal
in interface org.apache.pig.Algebraic
getFinal
in class org.apache.pig.AlgebraicEvalFunc<java.lang.Long>
public java.lang.String getInitial()
getInitial
in interface org.apache.pig.Algebraic
getInitial
in class org.apache.pig.AlgebraicEvalFunc<java.lang.Long>
public java.lang.String getIntermed()
getIntermed
in interface org.apache.pig.Algebraic
getIntermed
in class org.apache.pig.AlgebraicEvalFunc<java.lang.Long>
protected static com.clearspring.analytics.stream.cardinality.HyperLogLogPlus countDisctinct(org.apache.pig.data.Tuple input, int p) throws java.lang.NumberFormatException, java.io.IOException
java.lang.NumberFormatException
java.io.IOException