com.nhncorp.neptune.parallel.hadoop
Class TableJoinInputFormat

java.lang.Object
  extended by com.nhncorp.neptune.parallel.hadoop.TableJoinInputFormat
All Implemented Interfaces:
org.apache.hadoop.mapred.InputFormat, org.apache.hadoop.mapred.JobConfigurable

public abstract class TableJoinInputFormat
extends java.lang.Object
implements org.apache.hadoop.mapred.InputFormat, org.apache.hadoop.mapred.JobConfigurable

두개의 테이블을 JOIN 하면서 MapTask를 수행할 때 사용하는 InputFormat. JOIN은 두 테이블의 rowkey를 이용하여 Outer Join으로 처리된다.

Author:
nhn

Field Summary
static java.lang.String PIVOT_TABLE
           
static java.lang.String TARGET_TABLE
           
 
Constructor Summary
TableJoinInputFormat()
           
 
Method Summary
 void configure(org.apache.hadoop.mapred.JobConf jobConf)
           
abstract  RowFilter getPivotScanFilter(org.apache.hadoop.mapred.JobConf jobConf)
          JOIN에서 기준이 되는 Table에 대한 RowFilter를 지정한다.
abstract  java.lang.String getPivotTableName(org.apache.hadoop.mapred.JobConf jobConf)
          JOIN에서 기준이 되는 테이블명을 지정한다.
 org.apache.hadoop.mapred.RecordReader<Row.Key,com.nhncorp.neptune.client.scanner.MergeScanner.RowArray> getRecordReader(org.apache.hadoop.mapred.InputSplit split, org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.mapred.Reporter reporter)
           
 org.apache.hadoop.mapred.InputSplit[] getSplits(org.apache.hadoop.mapred.JobConf job, int numSplits)
           
abstract  RowFilter getTargetScanFilter(org.apache.hadoop.mapred.JobConf jobConf)
          JOIN에서 상대편 테이블의 RowFilter를 지정한다.
abstract  java.lang.String getTargetTableName(org.apache.hadoop.mapred.JobConf jobConf)
          JOIN에서 상대편 테이블의 테이블명을 지정한다.
 void validateInput(org.apache.hadoop.mapred.JobConf job)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

PIVOT_TABLE

public static final java.lang.String PIVOT_TABLE
See Also:
Constant Field Values

TARGET_TABLE

public static final java.lang.String TARGET_TABLE
See Also:
Constant Field Values
Constructor Detail

TableJoinInputFormat

public TableJoinInputFormat()
                     throws java.io.IOException
Throws:
java.io.IOException
Method Detail

getPivotScanFilter

public abstract RowFilter getPivotScanFilter(org.apache.hadoop.mapred.JobConf jobConf)
JOIN에서 기준이 되는 Table에 대한 RowFilter를 지정한다.

Parameters:
jobConf -
Returns:

getTargetScanFilter

public abstract RowFilter getTargetScanFilter(org.apache.hadoop.mapred.JobConf jobConf)
JOIN에서 상대편 테이블의 RowFilter를 지정한다.

Parameters:
jobConf -
Returns:

getPivotTableName

public abstract java.lang.String getPivotTableName(org.apache.hadoop.mapred.JobConf jobConf)
JOIN에서 기준이 되는 테이블명을 지정한다.

Parameters:
jobConf -
Returns:

getTargetTableName

public abstract java.lang.String getTargetTableName(org.apache.hadoop.mapred.JobConf jobConf)
JOIN에서 상대편 테이블의 테이블명을 지정한다.

Parameters:
jobConf -
Returns:

getRecordReader

public org.apache.hadoop.mapred.RecordReader<Row.Key,com.nhncorp.neptune.client.scanner.MergeScanner.RowArray> getRecordReader(org.apache.hadoop.mapred.InputSplit split,
                                                                                                                               org.apache.hadoop.mapred.JobConf job,
                                                                                                                               org.apache.hadoop.mapred.Reporter reporter)
                                                                                                                        throws java.io.IOException
Specified by:
getRecordReader in interface org.apache.hadoop.mapred.InputFormat
Throws:
java.io.IOException

getSplits

public org.apache.hadoop.mapred.InputSplit[] getSplits(org.apache.hadoop.mapred.JobConf job,
                                                       int numSplits)
                                                throws java.io.IOException
Specified by:
getSplits in interface org.apache.hadoop.mapred.InputFormat
Throws:
java.io.IOException

configure

public void configure(org.apache.hadoop.mapred.JobConf jobConf)
Specified by:
configure in interface org.apache.hadoop.mapred.JobConfigurable

validateInput

public void validateInput(org.apache.hadoop.mapred.JobConf job)
                   throws java.io.IOException
Specified by:
validateInput in interface org.apache.hadoop.mapred.InputFormat
Throws:
java.io.IOException