VALUEIN
- Currently only support ForestDocument,
but other types like Text or BytesWritable are possible candidates
to be added.public class ForestReader<VALUEIN> extends org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN> implements MarkLogicConstants
Modifier and Type | Field and Description |
---|---|
protected long |
bytesRead |
protected Collection<String> |
colFilters |
protected
org.apache.hadoop.conf.Configuration |
conf |
protected BiendianDataInputStream |
dataIs |
protected int |
deletedCnt |
protected Collection<String> |
dirFilters |
protected boolean |
done |
protected int |
fragCnt |
protected DocumentURIWithSourceInfo |
key |
protected
org.apache.hadoop.fs.Path |
largeForestDir |
static
org.apache.commons.logging.Log |
LOG |
protected int |
nascentCnt |
protected BiendianDataInputStream |
ordIs |
protected int |
position |
protected int |
prevDocid |
protected BiendianDataInputStream |
qualIs |
protected
org.apache.hadoop.mapreduce.lib.input.FileSplit |
split |
protected String |
srcId |
protected BiendianDataInputStream |
tsIs |
protected Collection<String> |
typeFilters |
protected VALUEIN |
value |
protected Class<? extends
org.apache.hadoop.io.Writable> |
valueClass |
ADVANCED_MODE,
ASSIGNMENT_POLICY,
BASIC_MODE,
BATCH_SIZE,
BIND_SPLIT_RANGE,
COLLECTION_FILTER,
CONTENT_TYPE,
COPY_COLLECTIONS,
COPY_METADATA,
COPY_QUALITY,
DEFAULT_BATCH_SIZE,
DEFAULT_CONTENT_TYPE,
DEFAULT_LOCAL_MAX_SPLIT_SIZE,
DEFAULT_MAX_SPLIT_SIZE,
DEFAULT_OUTPUT_CONTENT_ENCODING,
DEFAULT_OUTPUT_XML_REPAIR_LEVEL,
DEFAULT_PROPERTY_OPERATION_TYPE,
DEFAULT_TXN_SIZE,
DIRECTORY_FILTER,
DOCUMENT_SELECTOR,
EXECUTION_MODE,
EXTRACT_URI,
INDENTED,
INPUT_DATABASE_NAME,
INPUT_HOST,
INPUT_KEY_CLASS,
INPUT_LEXICON_FUNCTION_CLASS,
INPUT_MODE,
INPUT_PASSWORD,
INPUT_PORT,
INPUT_QUERY,
INPUT_QUERY_LANGUAGE,
INPUT_QUERY_TIMESTAMP,
INPUT_RESTRICT_HOSTS,
INPUT_SSL_OPTIONS_CLASS,
INPUT_SSL_PROTOCOL,
INPUT_USE_SSL,
INPUT_USERNAME,
INPUT_VALUE_CLASS,
MAX_SPLIT_SIZE,
MIN_NODEUPDATE_VERSION,
MODE_DISTRIBUTED,
MODE_LOCAL,
MR_NAMESPACE,
NODE_OPERATION_TYPE,
OUTPUT_CLEAN_DIR,
OUTPUT_COLLECTION,
OUTPUT_CONTENT_ENCODING,
OUTPUT_CONTENT_LANGUAGE,
OUTPUT_CONTENT_NAMESPACE,
OUTPUT_DATABASE_NAME,
OUTPUT_DIRECTORY,
OUTPUT_FAST_LOAD,
OUTPUT_FOREST_HOST,
OUTPUT_GRAPH,
OUTPUT_HOST,
OUTPUT_KEY_TYPE,
OUTPUT_KEY_VARNAME,
OUTPUT_NAMESPACE,
OUTPUT_OVERRIDE_GRAPH,
OUTPUT_PARTITION,
OUTPUT_PASSWORD,
OUTPUT_PERMISSION,
OUTPUT_PORT,
OUTPUT_PROPERTY_ALWAYS_CREATE,
OUTPUT_QUALITY,
OUTPUT_QUERY,
OUTPUT_QUERY_LANGUAGE,
OUTPUT_RESTRICT_HOSTS,
OUTPUT_SSL_OPTIONS_CLASS,
OUTPUT_SSL_PROTOCOL,
OUTPUT_STREAMING,
OUTPUT_URI_PREFIX,
OUTPUT_URI_REPLACE,
OUTPUT_URI_SUFFIX,
OUTPUT_USE_SSL,
OUTPUT_USERNAME,
OUTPUT_VALUE_TYPE,
OUTPUT_VALUE_VARNAME,
OUTPUT_XML_REPAIR_LEVEL,
PATH_NAMESPACE,
PROPERTY_OPERATION_TYPE,
QUERY_FILTER,
RECORD_TO_FRAGMENT_RATIO,
REDACTION_RULE_COLLECTION,
SPLIT_END_VARNAME,
SPLIT_QUERY,
SPLIT_START_VARNAME,
SUBDOCUMENT_EXPRESSION,
TEMPORAL_COLLECTION,
TXN_SIZE,
TYPE_FILTER
Constructor and Description |
---|
ForestReader() |
Modifier and Type | Method and Description |
---|---|
protected boolean |
applyFilter(String uri,
ExpandedTree tree) |
void |
close() |
DocumentURIWithSourceInfo |
getCurrentKey() |
VALUEIN |
getCurrentValue() |
float |
getProgress() |
void |
initialize(org.apache.hadoop.mapreduce.InputSplit split,
org.apache.hadoop.mapreduce.TaskAttemptContext context) |
boolean |
nextKeyValue() |
protected void |
setKey(String uri,
String sub,
int line, int col)
Apply URI prefix and suffix configuration
options and set the result as DocumentURI key.
|
protected void |
setSkipKey(String sub,
int line, int col, String reason)
Set the result as DocumentURI key.
|
public static final org.apache.commons.logging.Log LOG
protected org.apache.hadoop.mapreduce.lib.input.FileSplit split
protected long bytesRead
protected org.apache.hadoop.conf.Configuration conf
protected BiendianDataInputStream dataIs
protected BiendianDataInputStream ordIs
protected BiendianDataInputStream tsIs
protected BiendianDataInputStream qualIs
protected DocumentURIWithSourceInfo key
protected VALUEIN value
protected Class<? extends org.apache.hadoop.io.Writable> valueClass
protected int position
protected int prevDocid
protected boolean done
protected org.apache.hadoop.fs.Path largeForestDir
protected int nascentCnt
protected int deletedCnt
protected int fragCnt
protected Collection<String> colFilters
protected Collection<String> dirFilters
protected Collection<String> typeFilters
protected String srcId
public void close() throws IOException
close
in
interface Closeable
close
in
interface AutoCloseable
close
in
class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
public DocumentURIWithSourceInfo getCurrentKey() throws IOException, InterruptedException
getCurrentKey
in
class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public VALUEIN getCurrentValue() throws IOException, InterruptedException
getCurrentValue
in
class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public float getProgress() throws IOException, InterruptedException
getProgress
in
class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public void initialize(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
initialize
in
class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
public boolean nextKeyValue() throws IOException, InterruptedException
nextKeyValue
in
class org.apache.hadoop.mapreduce.RecordReader<DocumentURIWithSourceInfo,VALUEIN>
IOException
InterruptedException
protected void setKey(String uri, String sub, int line, int col)
uri
- Source string of document URI.sub
- Sub-entry of the source of the document
origin.line
- Line number in the source if applicable; -1
otherwise.col
- Column number in the source if applicable;
-1 otherwise.protected void setSkipKey(String sub, int line, int col, String reason)
uri
- Source string of document URI.line
- Line number in the source if applicable; -1
otherwise.col
- Column number in the source if applicable;
-1 otherwise.reason
- Reason for skipping.protected boolean applyFilter(String uri, ExpandedTree tree)
Copyright © 2020 MarkLogic
Corporation. All Rights Reserved.
Complete online documentation for MarkLogic Server,
XQuery and related components may be found at
developer.marklogic.com