32#ifndef _MARKLOGIC_MARKLOGIC_H_
33#define _MARKLOGIC_MARKLOGIC_H_
38typedef __int32 int32_t;
39typedef unsigned __int32 uint32_t;
40typedef __int64 int64_t;
41typedef unsigned __int64 uint64_t;
43#define MLDLL __declspec(dllimport)
50#define MARKLOGIC_API_VERSION 3
55class CodePointStringImpl;
103template<
class T,
typename PT>
113 operator PT&() {
return val; }
114 operator const PT&()
const {
return val; }
116 bool operator<(T o)
const {
return val < o.val; }
117 bool operator<=(T o)
const {
return val <= o.val; }
118 bool operator>(T o)
const {
return val > o.val; }
119 bool operator>=(T o)
const {
return val >= o.val; }
120 bool operator==(T o)
const {
return val == o.val; }
121 bool operator!=(T o)
const {
return val != o.val; }
123 T operator-(T o)
const {
return val - o.val; }
124 T operator+(T o)
const {
return val + o.val; }
125 T operator*(T o)
const {
return val * o.val; }
126 T operator/(T o)
const {
return val / o.val; }
128 T operator--(
int) {
return val--; }
129 T operator++(
int) {
return val++; }
130 T& operator--() { --val;
return (T&)*
this; }
131 T& operator++() { ++val;
return (T&)*
this; }
132 T& operator-=(T o) { val -= o.val;
return (T&)*
this; }
133 T& operator+=(T o) { val += o.val;
return (T&)*
this; }
134 T& operator*=(T o) { val *= o.val;
return (T&)*
this; }
135 T& operator/=(T o) { val /= o.val;
return (T&)*
this; }
138template<
class T,
typename PT>
148 T operator-()
const {
return -this->val; }
149 T operator+()
const {
return +this->val; }
152template<
class T,
typename PT>
162 T operator%(T o)
const {
return this->val % o.val; }
163 T& operator%=(T o) { this->val %= o.val;
return (T&)*
this; }
166template<
class T,
typename PT>
176 T operator-()
const {
return -this->val; }
177 T operator+()
const {
return +this->val; }
182typedef double DecimalType;
184typedef long double DecimalType;
272 String(
const char* _str,
const char* _collation = 0);
277 operator bool()
const {
return str!=0; }
279 operator const char*()
const;
280 const char* get()
const;
281 size_t length()
const;
282 const char* collation()
const;
284 bool operator<(
const String& o)
const;
285 bool operator<=(
const String& o)
const;
286 bool operator>(
const String& o)
const;
287 bool operator>=(
const String& o)
const;
288 bool operator==(
const String& o)
const;
289 bool operator!=(
const String& o)
const;
301 LangString(
const char* _str,
const char* _language = 0);
306 operator bool ()
const {
return str!=0; }
308 operator const char*()
const;
309 const char* get()
const;
310 size_t length()
const;
311 const char* language()
const;
334 Point(
double _lat,
double _lng,
const char* _coordinateSystem =
"wgs84");
336 double latitude()
const;
337 double longitude()
const;
338 const char* coordinateSystem()
const;
340 bool operator<(
const Point& o)
const;
341 bool operator<=(
const Point& o)
const;
342 bool operator>(
const Point& o)
const;
343 bool operator>=(
const Point& o)
const;
344 bool operator==(
const Point& o)
const;
345 bool operator!=(
const Point& o)
const;
408 virtual bool null(
size_t i)
const =0;
421 virtual void value(
size_t i,
int&)
const =0;
422 virtual void value(
size_t i,
unsigned&)
const =0;
423 virtual void value(
size_t i, int64_t&)
const =0;
424 virtual void value(
size_t i, uint64_t&)
const =0;
425 virtual void value(
size_t i,
float&)
const =0;
426 virtual void value(
size_t i,
double&)
const =0;
427 virtual void value(
size_t i,
Decimal&)
const =0;
428 virtual void value(
size_t i,
DateTime&)
const =0;
429 virtual void value(
size_t i,
Date&)
const =0;
430 virtual void value(
size_t i,
Time&)
const =0;
431 virtual void value(
size_t i,
GYearMonth&)
const =0;
432 virtual void value(
size_t i,
GYear&)
const =0;
433 virtual void value(
size_t i,
GMonth&)
const =0;
434 virtual void value(
size_t i,
GDay&)
const =0;
437 virtual void value(
size_t i,
String&)
const =0;
438 virtual void value(
size_t i,
Point&)
const =0;
439 virtual void value(
size_t i,
LangString&)
const =0;
469 virtual void writeValue(
int) =0;
470 virtual void writeValue(
unsigned) =0;
471 virtual void writeValue(int64_t) =0;
472 virtual void writeValue(uint64_t) =0;
473 virtual void writeValue(
float) =0;
474 virtual void writeValue(
double) =0;
475 virtual void writeValue(
Decimal) =0;
476 virtual void writeValue(
DateTime) =0;
477 virtual void writeValue(
Date) =0;
478 virtual void writeValue(
Time) =0;
480 virtual void writeValue(
GYear) =0;
481 virtual void writeValue(
GMonth) =0;
482 virtual void writeValue(
GDay) =0;
485 virtual void writeValue(
const String&) =0;
486 virtual void writeValue(
Point) =0;
487 virtual void writeValue(
bool) =0;
488 virtual void writeValue(
const LangString&) =0;
542 virtual void encode(
const void*,
size_t) =0;
544 virtual void encode(
int) =0;
545 virtual void encode(
unsigned) =0;
546 virtual void encode(int64_t) =0;
547 virtual void encode(uint64_t) =0;
548 virtual void encode(
float) =0;
549 virtual void encode(
double) =0;
550 virtual void encode(
Decimal) =0;
552 virtual void encode(
Date) =0;
553 virtual void encode(
Time) =0;
555 virtual void encode(
GYear) =0;
556 virtual void encode(
GMonth) =0;
557 virtual void encode(
GDay) =0;
560 virtual void encode(
const String&) =0;
561 virtual void encode(
Point) =0;
562 virtual void encode(
bool) =0;
582 virtual void decode(
const void*&,
size_t&) =0;
583 virtual void decode(
void*,
size_t) =0;
585 virtual void decode(
int&) =0;
586 virtual void decode(
unsigned&) =0;
587 virtual void decode(int64_t&) =0;
588 virtual void decode(uint64_t&) =0;
589 virtual void decode(
float&) =0;
590 virtual void decode(
double&) =0;
591 virtual void decode(
Decimal&) =0;
593 virtual void decode(
Date&) =0;
594 virtual void decode(
Time&) =0;
596 virtual void decode(
GYear&) =0;
597 virtual void decode(
GMonth&) =0;
598 virtual void decode(
GDay&) =0;
601 virtual void decode(
String&) =0;
602 virtual void decode(
Point&) =0;
603 virtual void decode(
bool&) =0;
658 virtual void error(
const char* message) =0;
758 operator bool()
const {
return seq!=0; }
777 void value(
int&)
const;
778 void value(
unsigned&)
const;
779 void value(int64_t&)
const;
780 void value(uint64_t&)
const;
781 void value(
float&)
const;
782 void value(
double&)
const;
785 void value(
Date&)
const;
786 void value(
Time&)
const;
788 void value(
GYear&)
const;
789 void value(
GMonth&)
const;
790 void value(
GDay&)
const;
793 void value(
String&)
const;
794 void value(
bool&)
const;
796 void value(
Point&)
const;
797 void value(
Map&)
const;
811 Map& operator=(
const Map&);
815 operator bool()
const {
return map!=0; }
1067 begin(_begin), end(_end), type(_type), pos(_pos) {}
1068 Token(
unsigned _begin,
unsigned _end, TokenType _type):
1069 begin(_begin), end(_end), type(_type), pos(UNSPECIFIED_POS) {}
1070 Token(): begin(0), end(0), type(SPACE), pos(UNSPECIFIED_POS) {}
1071 Token(
const Token& t):
1072 begin(t.begin), end(t.end), type(t.type), pos(t.pos) {}
1079typedef unsigned CodePoint;
1091 operator bool()
const {
return cpstr!=0; };
1101 void append(
const CodePoint* ptr,
unsigned siz);
1110 CodePointStringImpl* cpstr;
1178 int argc,
const char** argv,
1244typedef LexerUDF*(*LexerFunction)();
1322 int argc,
const char** argv,
1442 void version(
unsigned pluginVersion = makeVersion(__DATE__,__TIME__),
1443 unsigned marklogicVersion = MARKLOGIC_API_VERSION);
1461 template<
class T>
void registerAggregate(
const char* name);
1498 template<
class T>
void registerLexer(
const char* name);
1534 template<
class T>
void registerStemmer(
const char* name);
1561 static unsigned makeVersion(
const char*,
const char*);
1563 unsigned pluginVersion;
Encapsulation of a User Defined Function for performing aggregate analysis across co-occurrences in r...
Definition: MarkLogic.h:856
virtual void map(TupleIterator &values, Reporter &r)=0
Entry point for performing map analysis. MarkLogic Server calls this method at lesat once per stand.
virtual void finish(OutputSequence &os, Reporter &r)=0
Finalize the results of an aggregate MapReduce job and prepare them for return to the calling applica...
AggregateUDF(unsigned version=MARKLOGIC_API_VERSION)
Construct an object compatible with a specific MarkLogic Native Plugin API version.
virtual RangeIndex::Order getOrder() const
Determine the order of range index input values.
virtual void encode(Encoder &e, Reporter &r)=0
Serialize this object's state so the object can be distributed across a MarkLogic Server cluster.
virtual void start(Sequence &arg, Reporter &r)=0
Initialize an aggregate MapReduce job.
virtual AggregateUDF * clone() const =0
Create a copy of an AggregateUDF.
virtual void decode(Decoder &d, Reporter &r)=0
De-serialize this object's state so the object can be reconstituted on a remote host.
virtual void close()=0
Release an AggregateUDF clone.
virtual void reduce(const AggregateUDF *o, Reporter &r)=0
Reduce the intermediate results of map analysis to a final result.
Definition: MarkLogic.h:105
Definition: MarkLogic.h:140
Definition: MarkLogic.h:154
Definition: MarkLogic.h:168
Definition: MarkLogic.h:1082
void push_back(CodePoint cp)
Append a single codepoint.
unsigned length() const
Number of codepoints.
void append(const CodePoint *ptr, unsigned siz)
Append a certain number of codepoints.
void reserve(size_t sz)
Reserve a certain buffer size.
void clear()
Clear out the codepoints.
const CodePoint * data() const
Get handle to codepoints.
void appendUTF8(const char *utf8)
Append a UTF8 encoded character string.
C++ representation of the XQuery type xs:dateTime.
Definition: MarkLogic.h:196
C++ representation of the XQuery type xs:date.
Definition: MarkLogic.h:204
C++ representation of the XQuery type xs:dayTimeDuration.
Definition: MarkLogic.h:260
C++ representation of the XQuery type xs:decimal.
Definition: MarkLogic.h:188
De-serialize values.
Definition: MarkLogic.h:580
Serialize values.
Definition: MarkLogic.h:540
C++ representation of the XQuery type xs:gDay.
Definition: MarkLogic.h:244
C++ representation of the XQuery type xs:gMonth.
Definition: MarkLogic.h:236
C++ representation of the XQuery type xs:gYearMonth.
Definition: MarkLogic.h:220
C++ representation of the XQuery type xs:gYear.
Definition: MarkLogic.h:228
Encapsulation of XQuery sequence item value type.
Definition: MarkLogic.h:670
Type
The types of item values that can occur in a Sequence.
Definition: MarkLogic.h:679
@ DAY_TIME_DURATION
xs:dayTimeDuration
Definition: MarkLogic.h:696
@ YEAR_MONTH_DURATION
xs:yearMonthDuration
Definition: MarkLogic.h:695
@ DECIMAL
xs:decimal
Definition: MarkLogic.h:687
@ MAP
map:map (MarkLogic XQuery dialect)
Definition: MarkLogic.h:711
@ TIME
xs:time
Definition: MarkLogic.h:689
@ ANY_URI
xs:anyURI
Definition: MarkLogic.h:698
@ DOUBLE
xs:double
Definition: MarkLogic.h:686
@ G_YEAR_MONTH
xs:gYearMonth
Definition: MarkLogic.h:691
@ G_MONTH
xs:gMonth
Definition: MarkLogic.h:693
@ INT
xs:int
Definition: MarkLogic.h:681
@ STRING
xs:string
Definition: MarkLogic.h:697
@ G_YEAR
xs:gYear
Definition: MarkLogic.h:692
@ G_DAY
xs:gDay
Definition: MarkLogic.h:694
@ DATE
xs:date
Definition: MarkLogic.h:690
@ LONG
xs:long
Definition: MarkLogic.h:683
@ DATE_TIME
xs:dateTime
Definition: MarkLogic.h:688
@ FLOAT
xs:float
Definition: MarkLogic.h:685
@ BOOLEAN
xs:boolean
Definition: MarkLogic.h:699
@ POINT
cts:point (MarkLogic XQuery dialect)
Definition: MarkLogic.h:710
@ UNSIGNED_LONG
xs:unsignedLong
Definition: MarkLogic.h:684
@ UNSIGNED_INT
xs:unsignedInt
Definition: MarkLogic.h:682
@ LANG_STRING
rdf:langString (SPARQL)
Definition: MarkLogic.h:714
C++ representation of the RDF type rdf:langString, encapsulating a string value with its language.
Definition: MarkLogic.h:298
Encapsulation of a User Defined Function for performing tokenization of text runs.
Definition: MarkLogic.h:1153
virtual bool next(Reporter &r)=0
Advance to the next token. Return true if the is another token to fetch.
LexerUDF(unsigned version=MARKLOGIC_API_VERSION)
Construct an object compatible with a specific MarkLogic Native Plugin API version.
virtual bool isStale() const =0
Return true if the LexerUDF instance can no longer be reused.
virtual void initialize(const char *lang, int argc, const char **argv, Reporter &r)=0
Initialize a lexer. This method is called once after the lexer is constructed.
virtual void reset(const CodePoint *cp, unsigned sz, Reporter &r)=0
Initiate a new tokenization episode on a new text run.
virtual void finish(Reporter &r)=0
Clean up from tokenizing a text run.
virtual const Token * token() const =0
Return the current token.
virtual void close()=0
Release a LexerUDF instance.
A key-value map, equivalent to the MarkLogic XQuery type map:map.
Definition: MarkLogic.h:807
Sequence get(const char *key) const
Extract the value(s) associated with a given key.
A sequence of values produced by your AggregateUDF::finish implementation.
Definition: MarkLogic.h:464
virtual void writeMapKey(const char *)=0
Add a key to the map. Use OutputSequence::writeValue to add the value.
virtual void startMap()=0
Begin adding a map value to the sequence.
virtual void endMap()=0
Close off the current map.
C++ representation of the MarkLogic XQuery cts:point.
Definition: MarkLogic.h:326
Encapsulation of the configuration of a range index.
Definition: MarkLogic.h:64
Type
The type of the values in a range index.
Definition: MarkLogic.h:74
@ ANY_URI
xs:anyURI
Definition: MarkLogic.h:92
@ G_YEAR_MONTH
xs:gYearMonth
Definition: MarkLogic.h:85
@ DECIMAL
xs:decimal
Definition: MarkLogic.h:81
@ DATE_TIME
xs:dateTime
Definition: MarkLogic.h:82
@ G_DAY
xs:gDay
Definition: MarkLogic.h:88
@ YEAR_MONTH_DURATION
xs:yearMonthDuration
Definition: MarkLogic.h:89
@ G_MONTH
xs:gMonth
Definition: MarkLogic.h:87
@ TIME
xs:time
Definition: MarkLogic.h:83
@ G_YEAR
xs:gYear
Definition: MarkLogic.h:86
@ INT
xs:int
Definition: MarkLogic.h:75
@ DAY_TIME_DURATION
xs:dayTimeDuration
Definition: MarkLogic.h:90
@ UNSIGNED_LONG
xs:unsignedLong
Definition: MarkLogic.h:78
@ STRING
xs:string
Definition: MarkLogic.h:91
@ FLOAT
xs:float
Definition: MarkLogic.h:79
@ UNSIGNED_INT
xs:unsignedInt
Definition: MarkLogic.h:76
@ LONG
xs:long
Definition: MarkLogic.h:77
@ DATE
xs:date
Definition: MarkLogic.h:84
@ DOUBLE
xs:double
Definition: MarkLogic.h:80
Order
Whether the index values are in ascending or descending order.
Definition: MarkLogic.h:97
The MarkLogic Server native plugin registry.
Definition: MarkLogic.h:1424
void registerStemmer(const char *name)
Register a StemmerUDF implementation with MarkLogic Server.
Definition: MarkLogic.h:1603
void version(unsigned pluginVersion=makeVersion(__DATE__, __TIME__), unsigned marklogicVersion=MARKLOGIC_API_VERSION)
Register the version of your plugin library and the version of the MarkLogic Server Native Plugin API...
virtual void registerStemmer(const char *name, StemmerFunction f)=0
Register a StemmerUDF implementation with MarkLogic Server.
virtual void registerLexer(const char *name, LexerFunction f)=0
Register a LexerUDF implementation with MarkLogic Server.
virtual void registerAggregate(const char *name, AggregateFunction f)=0
Register an AggregateUDF implementation with MarkLogic Server.
void registerAggregate(const char *name)
Register an AggregateUDF implementation with MarkLogic Server.
Definition: MarkLogic.h:1589
void registerLexer(const char *name)
Register a LexerUDF implementation with MarkLogic Server.
Definition: MarkLogic.h:1596
Log messages and report errors to MarkLogic Server. You do not need to subclass this class.
Definition: MarkLogic.h:626
virtual void log(LogLevel level, const char *message)=0
Log a message at a particular log level.
virtual void error(const char *message)=0
Log an error and cancel the current job.
LogLevel
Available log levels.
Definition: MarkLogic.h:629
virtual LogLevel logLevel() const =0
Determine the current log level.
A sequence of Item values.
Definition: MarkLogic.h:750
void next()
Advance to the next item in the sequence. If the sequence is exhausted, XDMP-UDFSEQEND is raised.
uint64_t frequency() const
Determine the number of times the current value occurs in the sequence.
Item::Type type() const
Determine the type of the current item.
bool done() const
Test whether or not there are more items in the sequence.
Encapsulation of a User Defined Function for performing stemming of individual words.
Definition: MarkLogic.h:1297
virtual bool next(Reporter &r)=0
Advance to the next stem. Return true if the is another stem to fetch.
virtual void initialize(const char *lang, int argc, const char **argv, Reporter &r)=0
Initialize a stemmer. This method is called once after the stemmer is constructed.
StemmerUDF(unsigned version=MARKLOGIC_API_VERSION)
Construct an object compatible with a specific MarkLogic Native Plugin API version.
virtual const CodePointString * stem()=0
Return the current stem.
virtual void start(Reporter &r)=0
Start iteration of stems. Normal stemming operations may require the iteration to be performed more t...
virtual void reset(const CodePoint *cp, const unsigned sz, Token::PartOfSpeech pos, Reporter &r)=0
Get stems for the input word.
virtual void close()=0
Release a StemmerUDF instance.
virtual bool delegate() const =0
Delegate to base stemmer. Return true if the base stemmer should be asked to provide stems for the in...
virtual bool isStale() const =0
Return true if the StemmerUDF instance can no longer be reused.
C++ representation of the XQuery type xs:string, encapsulating a string value with its collation.
Definition: MarkLogic.h:269
C++ representation of the XQuery type xs:time.
Definition: MarkLogic.h:212
C++ representation of the MarkLogic token.
Definition: MarkLogic.h:1010
unsigned char PartOfSpeech
The part of speech of the token being returned. Part of speech is only relevant for word tokens.
Definition: MarkLogic.h:1048
unsigned char TokenType
The kind of token being returned.
Definition: MarkLogic.h:1021
Iterator over a sequence of N-way co-occurrences of range index values from a single stand....
Definition: MarkLogic.h:374
virtual const char * coordinateSystem(size_t i) const =0
If the Ith value in each tuple is a Point (cts:point), determine its coordinate system.
virtual void next()=0
Advance to the next tuple in the sequence.
virtual const char * collation(size_t i) const =0
If the Ith value in each tuple is a string, determine its collation.
virtual void value(size_t i, int &) const =0
Extract the Ith value in the current tuple.
virtual size_t width() const =0
Determine the number of values in each tuple.
virtual bool done() const =0
Test whether or not there are more tuples in the sequence.
virtual RangeIndex::Type type(size_t i) const =0
Determine the type of the Ith value in each tuple.
virtual uint64_t frequency() const =0
Determine the number of times the current tuple occurs.
virtual bool descending() const =0
Test whether tuples are in descending order in the sequence.
C++ representation of the XQuery type xs:yearMonthDuration.
Definition: MarkLogic.h:252