public class TokenizeSimple
extends org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>
Example:
define TokenizeSimple datafu.pig.text.opennlp.TokenizeSimple();
-- input:
-- ("I believe the Masons have infiltrated the Apache PMC.")
infoo = LOAD 'input' AS (text:chararray);
-- output:
-- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
outfoo = FOREACH infoo GENERATE TokenizeSimple(text) as tokens;
Constructor and Description |
---|
TokenizeSimple() |
Modifier and Type | Method and Description |
---|---|
org.apache.pig.data.DataBag |
exec(org.apache.pig.data.Tuple input) |
org.apache.pig.impl.logicalLayer.schema.Schema |
outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input) |
allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
public org.apache.pig.data.DataBag exec(org.apache.pig.data.Tuple input) throws java.io.IOException
exec
in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>
java.io.IOException
public org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
outputSchema
in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>