public class TupleFromBag
extends org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.Tuple>
This UDF will extract a tuple from a bag based on a specified index.
There are three input parameter:
Example:
define TupleFromBag datafu.pig.bags.TupleFromBag();
%declare defaultTuple TOTUPLE(0,'NO_NUMBER')
data = LOAD 'input' using PigStorage(',') AS (a:INT,b:CHARARRAY);
-- input:
(1,a)
(1,b)
(1,c)
(2,d)
(2,e)
(2,f)
(3,g)
(3,h)
(3,i)
grouped = GROUP data BY a;
--output:
{group: int,data: {(a: int,b: chararray)}}
(1,{(1,c),(1,b),(1,a)})
(2,{(2,f),(2,e),(2,d)})
(3,{(3,i),(3,h),(3,g)})
result1 = FOREACH grouped GENERATE
group AS a,
TupleFromBag(data, 0);
--output:
{a: int,(a: int,b: chararray)}
(1,(1,c))
(2,(2,f))
(3,(3,i))
result2 = FOREACH grouped GENERATE
group AS a,
TupleFromBag(data,0).b as first_b,
TupleFromBag(data,1).b as second_b;
--output:
{a: int,first_b: chararray,second_b: chararray}
(1,c,b)
(2,f,e)
(3,i,h)
result3 = FOREACH grouped GENERATE
group AS a,
TupleFromBag(data,0).b as first_b,
TupleFromBag(data,3).b as forth_b;
--output:
{a: int,first_b: chararray,forth_b: chararray}
(1,c,)
(2,f,)
(3,i,)
result4 = FOREACH grouped GENERATE
group AS a,
TupleFromBag(data,0,$emptyTuple).b as first_b,
TupleFromBag(data,3,$emptyTuple).b as forth_b;
--output:
{a: int,first_b: chararray,forth_b: chararray}
(1,c,NO_NUMBER)
(2,f,NO_NUMBER)
(3,i,NO_NUMBER)
Constructor and Description |
---|
TupleFromBag() |
Modifier and Type | Method and Description |
---|---|
void |
accumulate(org.apache.pig.data.Tuple tinput) |
void |
cleanup() |
org.apache.pig.data.Tuple |
getValue() |
org.apache.pig.impl.logicalLayer.schema.Schema |
outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input) |
allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
public void accumulate(org.apache.pig.data.Tuple tinput) throws java.io.IOException
accumulate
in interface org.apache.pig.Accumulator<org.apache.pig.data.Tuple>
accumulate
in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.Tuple>
java.io.IOException
public org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
outputSchema
in class org.apache.pig.EvalFunc<org.apache.pig.data.Tuple>
public void cleanup()
cleanup
in interface org.apache.pig.Accumulator<org.apache.pig.data.Tuple>
cleanup
in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.Tuple>
public org.apache.pig.data.Tuple getValue()
getValue
in interface org.apache.pig.Accumulator<org.apache.pig.data.Tuple>
getValue
in class org.apache.pig.AccumulatorEvalFunc<org.apache.pig.data.Tuple>