|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object datafu.hourglass.jobs.ExecutionPlanner
public abstract class ExecutionPlanner
Base class for execution planners. An execution planner determines which files should be processed for a particular run.
Constructor Summary | |
---|---|
ExecutionPlanner(org.apache.hadoop.fs.FileSystem fs,
java.util.Properties props)
Initializes the execution planner. |
Method Summary | |
---|---|
protected void |
determineAvailableInputDates()
Determines what input data is available. |
protected void |
determineDateRange()
Determine the date range for inputs to process based on the configuration and available inputs. |
protected java.util.Map<java.util.Date,java.util.List<DatePath>> |
getAvailableInputsByDate()
Gets a map from date to available input data. |
protected java.util.SortedMap<java.util.Date,DatePath> |
getDailyData(org.apache.hadoop.fs.Path path)
Get a map from date to path for all paths matching yyyy/MM/dd under the given path. |
protected java.util.SortedMap<java.util.Date,DatePath> |
getDatedData(org.apache.hadoop.fs.Path path)
Get a map from date to path for all paths matching yyyyMMdd under the given path. |
DateRange |
getDateRange()
Gets the desired input date range to process based on the configuration and available inputs. |
java.lang.Integer |
getDaysAgo()
Gets the number of days to subtract off the end date. |
java.util.Date |
getEndDate()
Gets the end date |
protected org.apache.hadoop.fs.FileSystem |
getFileSystem()
Gets the file system. |
java.util.List<org.apache.hadoop.fs.Path> |
getInputPaths()
Gets the input paths. |
java.lang.Integer |
getMaxToProcess()
Gets the maximum number of days to process at a time. |
java.lang.Integer |
getNumDays()
Gets the number of days to process. |
org.apache.hadoop.fs.Path |
getOutputPath()
Gets the output path. |
protected java.util.Properties |
getProps()
Gets the configuration properties. |
java.util.Date |
getStartDate()
Gets the start date |
boolean |
isFailOnMissing()
Gets whether the job should fail if data is missing within the desired date range. |
protected void |
loadInputData()
Determine what input data is available. |
void |
setDaysAgo(java.lang.Integer daysAgo)
Sets the number of days to subtract off the end date. |
void |
setEndDate(java.util.Date endDate)
Sets the end date. |
void |
setFailOnMissing(boolean failOnMissing)
Sets whether the job should fail if data is missing within the desired date range. |
void |
setInputPaths(java.util.List<org.apache.hadoop.fs.Path> inputPaths)
Sets the input paths. |
void |
setMaxToProcess(java.lang.Integer maxToProcess)
Sets the maximum number of days to process at a time. |
void |
setNumDays(java.lang.Integer numDays)
Sets the number of days to process. |
void |
setOutputPath(org.apache.hadoop.fs.Path outputPath)
Sets the output path. |
void |
setStartDate(java.util.Date startDate)
Sets the start date. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public ExecutionPlanner(org.apache.hadoop.fs.FileSystem fs, java.util.Properties props)
fs
- file system to useprops
- configuration propertiesMethod Detail |
---|
public org.apache.hadoop.fs.Path getOutputPath()
public java.util.List<org.apache.hadoop.fs.Path> getInputPaths()
public void setOutputPath(org.apache.hadoop.fs.Path outputPath)
outputPath
- output pathpublic void setInputPaths(java.util.List<org.apache.hadoop.fs.Path> inputPaths)
inputPaths
- input pathspublic void setStartDate(java.util.Date startDate)
startDate
- start datepublic java.util.Date getStartDate()
public void setEndDate(java.util.Date endDate)
endDate
- end datepublic java.util.Date getEndDate()
public void setDaysAgo(java.lang.Integer daysAgo)
daysAgo
- days agopublic java.lang.Integer getDaysAgo()
public void setNumDays(java.lang.Integer numDays)
numDays
- number of days to processpublic java.lang.Integer getNumDays()
public void setMaxToProcess(java.lang.Integer maxToProcess)
maxToProcess
- maximum number of dayspublic java.lang.Integer getMaxToProcess()
public boolean isFailOnMissing()
public void setFailOnMissing(boolean failOnMissing)
failOnMissing
- true if the job should fail on missing datapublic DateRange getDateRange()
protected org.apache.hadoop.fs.FileSystem getFileSystem()
protected java.util.Properties getProps()
protected java.util.Map<java.util.Date,java.util.List<DatePath>> getAvailableInputsByDate()
protected java.util.SortedMap<java.util.Date,DatePath> getDailyData(org.apache.hadoop.fs.Path path) throws java.io.IOException
path
- path to search under
java.io.IOException
protected java.util.SortedMap<java.util.Date,DatePath> getDatedData(org.apache.hadoop.fs.Path path) throws java.io.IOException
path
- path to search under
java.io.IOException
protected void loadInputData() throws java.io.IOException
java.io.IOException
protected void determineAvailableInputDates()
protected void determineDateRange()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |