public class PathUtils
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static java.text.SimpleDateFormat |
datedPathFormat |
static java.text.SimpleDateFormat |
nestedDatedPathFormat |
static org.apache.hadoop.fs.PathFilter |
nonHiddenPathFilter
Filters out paths starting with "." and "_".
|
static java.util.TimeZone |
timeZone |
Constructor and Description |
---|
PathUtils() |
Modifier and Type | Method and Description |
---|---|
static long |
countBytes(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Sums the size of all files listed under a given path.
|
static java.util.List<DatePath> |
findDatedPaths(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
List all paths matching the "yyyyMMdd" format under a given path.
|
static java.util.List<DatePath> |
findNestedDatedPaths(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path input)
List all paths matching the "yyyy/MM/dd" format under a given path.
|
static java.util.Date |
getDateForDatedPath(org.apache.hadoop.fs.Path path)
Gets the date for a path in the "yyyyMMdd" format.
|
static java.util.Date |
getDateForNestedDatedPath(org.apache.hadoop.fs.Path path)
Gets the date for a path in the "yyyy/MM/dd" format.
|
static org.apache.hadoop.fs.Path |
getNestedPathRoot(org.apache.hadoop.fs.Path path)
Gets the root path for a path in the "yyyy/MM/dd" format.
|
static org.apache.avro.Schema |
getSchemaFromFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Gets the schema from a given Avro data file.
|
static org.apache.avro.Schema |
getSchemaFromPath(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Gets the schema for the first Avro file under the given path.
|
static void |
keepLatestDatedPaths(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
int retentionCount)
Delete all but the last N days of paths matching the "yyyyMMdd" format.
|
static void |
keepLatestNestedDatedPaths(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
int retentionCount)
Delete all but the last N days of paths matching the "yyyy/MM/dd" format.
|
public static final java.util.TimeZone timeZone
public static final java.text.SimpleDateFormat datedPathFormat
public static final java.text.SimpleDateFormat nestedDatedPathFormat
public static final org.apache.hadoop.fs.PathFilter nonHiddenPathFilter
public static void keepLatestDatedPaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, int retentionCount) throws java.io.IOException
fs
- the filesystempath
- path to delete file fromretentionCount
- how many days to keepjava.io.IOException
- IOExceptionpublic static void keepLatestNestedDatedPaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, int retentionCount) throws java.io.IOException
fs
- the filesystempath
- path to delete files fromretentionCount
- how many days to keepjava.io.IOException
- IOExceptionpublic static java.util.List<DatePath> findNestedDatedPaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path input) throws java.io.IOException
fs
- file systeminput
- path to search underjava.io.IOException
- IOExceptionpublic static java.util.List<DatePath> findDatedPaths(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws java.io.IOException
fs
- file systempath
- path to search underjava.io.IOException
- IOExceptionpublic static org.apache.avro.Schema getSchemaFromFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws java.io.IOException
fs
- the filesystempath
- path to get schema fromjava.io.IOException
- IOExceptionpublic static org.apache.avro.Schema getSchemaFromPath(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws java.io.IOException
fs
- the filesystempath
- path to fetch schema forjava.io.IOException
- IOExceptionpublic static long countBytes(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws java.io.IOException
fs
- file systempath
- path to count bytes forjava.io.IOException
- IOExceptionpublic static java.util.Date getDateForDatedPath(org.apache.hadoop.fs.Path path)
path
- path to checkpublic static java.util.Date getDateForNestedDatedPath(org.apache.hadoop.fs.Path path)
path
- path to checkpublic static org.apache.hadoop.fs.Path getNestedPathRoot(org.apache.hadoop.fs.Path path)
path
- in "yyyy/MM/dd" format