tde.nodeDataExtract

tde.nodeDataExtract(
   documents as Node[],
   [templates as node[]]
) as Object

Summary

Extracts row or triple data from a list of specified documents by applying extraction templates that are either stored in the schema database or provided as a second argument.

Parameters
documents The array of input nodes from which row and triple data is extracted.
templates The tde:templates to use on the $documents. If not specified or if an empty array is provided, stored templates in the schema database are used.

Example

var doc1 = xdmp.unquote(`
<Citation>
  <ID>69152893</ID>
  <Article>
    <Journal>
      <ISSN>0123-4567</ISSN>
      <Details>
        <Volume>118-119</Volume>
        <PubDate>
          <Year>1968</Year>
          <Month>Dec</Month>
          <Day>7</Day>
        </PubDate>
      </Details>
    </Journal>
    <Authors>
      <Author>
       <LastName>Doe</LastName>
       <ForeName>John</ForeName>
      </Author>
      <Author>
        <LastName>Smith</LastName>
        <ForeName>Jane</ForeName>
      </Author>
    </Authors>
  </Article>
</Citation>
`);

var rowtde1 = xdmp.toJSON(
{
  "template":{
    "context":"/Citation/Article/Journal/Details",
    "rows":[
      {
        "schemaName":"Medical",
        "viewName":"Publications",
        "columns":[
          {
            "name":"ID",
            "scalarType":"long",
            "val":"../../../ID"
          },
          {
            "name":"ISSN",
            "scalarType":"string",
            "val":"../ISSN"
          },
          {
            "name":"Volume",
            "scalarType":"string",
            "val":"Volume"
          },
          {
            "name":"Date",
            "scalarType":"string",
            "val":"PubDate/Year||'-'||PubDate/Month||'-'||PubDate/Day"
          }
        ]
      }
    ]
  }
}
);

var tripletde1 = xdmp.toJSON(
{
  "template":{
    "context":"//Authors/Author",
    "vars":[
      {
        "name":"prefix1",
        "val":"\"http://marklogic.com/example/pubs/\""
      }
    ],
    "triples":[
      {
        "subject":{
          "val":"sem:iri($prefix1||'person/'||./ForeName||'_'||./LastName)",
          "invalidValues":"reject"
        },
        "predicate":{
          "val":"sem:iri($prefix1||'authored')",
          "invalidValues":"reject"
        },
        "object":{
          "val":"xs:string(../../Journal/ISSN)",
          "invalidValues":"reject"
        }
      }
    ]
  }
}
);

tde.nodeDataExtract([doc1],[rowtde1, tripletde1]);

=>

{
  "document1":[
    {
      "row":{
        "schema":"Medical",
        "view":"Publications",
        "data":{
          "rownum":"1",
          "ID":69152893,
          "ISSN":"0123-4567",
          "Volume":"118-119",
          "Date":"1968-Dec-7"
        }
      }
    },
    {
      "triple":{
        "subject":"http://marklogic.com/example/pubs/person/John_Doe",
        "predicate":"http://marklogic.com/example/pubs/authored",
        "object":{
          "datatype":"http://www.w3.org/2001/XMLSchema#string",
          "value":"0123-4567"
        }
      }
    },
    {
      "triple":{
        "subject":"http://marklogic.com/example/pubs/person/Jane_Smith",
        "predicate":"http://marklogic.com/example/pubs/authored",
        "object":{
          "datatype":"http://www.w3.org/2001/XMLSchema#string",
          "value":"0123-4567"
        }
      }
    }
  ]
}

Powered by MarkLogic Server | Terms of Use | Privacy Policy