public class POSTag
extends org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>
Example:
define TokenizeME datafu.pig.text.opennlp.TokenizeME('data/en-token.bin');
define POSTag datafu.pig.text.opennlp.POSTag('data/en-pos-maxent.bin');
-- input:
-- (Appetizers during happy hour range from low to high.)
infoo = LOAD 'input' AS (text:chararray);
--
-- ({(Appetizers),(during),(happy),(hour),(range),(from),(low),(to),(high),(.)})
tokenized = FOREACH infoo GENERATE TokenizeME(text) AS tokens;
--
-- output:
-- Tuple schema is: (word, tag, confidence)
-- ({(Appetizers,NNP,0.3619277937390988),(during,IN,0.7945543860326094),(happy,JJ,0.9888504792754391),
-- (hour,NN,0.9427455123502427),(range,NN,0.7335527963654751),(from,IN,0.9911576465589752),(low,JJ,0.9652034031895174),
-- (to,IN,0.7005347487371849),(high,JJ,0.8227771746247106),(.,.,0.9900983495480891)})
outfoo = FOREACH tokenized GENERATE POSTag(tokens) AS tagged;
Constructor and Description |
---|
POSTag(java.lang.String modelPath) |
Modifier and Type | Method and Description |
---|---|
org.apache.pig.data.DataBag |
exec(org.apache.pig.data.Tuple input) |
java.util.List<java.lang.String> |
getCacheFiles() |
org.apache.pig.impl.logicalLayer.schema.Schema |
outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input) |
allowCompileTimeCalculation, finish, getArgToFuncMapping, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn
public java.util.List<java.lang.String> getCacheFiles()
getCacheFiles
in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>
public org.apache.pig.data.DataBag exec(org.apache.pig.data.Tuple input) throws java.io.IOException
exec
in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>
java.io.IOException
public org.apache.pig.impl.logicalLayer.schema.Schema outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema input)
outputSchema
in class org.apache.pig.EvalFunc<org.apache.pig.data.DataBag>