Loading TOC...

MarkLogic 12 Product Documentation
op:ann-top-k

op:ann-top-k(
   $plan as map:map,
   $k as xs:unsignedInt,
   $vector-column as item(),
   $query-vector as vec:vector,
   [$distance-column as columnIdentifier?],
   [$options as xs:string*|map:map?]
) as map:map

Summary

This method searches against vector data, using a query vector, selecting and returning the top K nearest vectors from the column along with data associated with that vector, for examples, document, node, or row.

Parameters
$plan The Optic Plan. You can either use the XQuery => chaining operator or specify the variable that captures the return value from the previous operation.
$k This positive integer k is the number of nearest neighbour results to return. It can be passed in as an external parameter using op:param.
$vector-column The column is the input table to perform vector distance calculations against. The column can be named with a string or a column function such as op:col, op:view-col, or op:schema-col, or constructed from an expression with the op:as function.
$query-vector This is the query vector to compare with the vectors from the vector-column column. It can be passed in as an external parameter using op:param.
$distance-column The column is an optional output column that returns the value of the distance metric for that output row. The columns can be named with a string or a column function such as op:col, op:view-col, or op:schema-col.
$options This is either a sequence of strings or a map containing keys and values for the options to this operator. Options include:
  • max-distance

    This option is a number determines the maximum distance for a returned result. For cosine distance, the default is float max. Rows with a distance greater than this will not be returned.

  • search-factor

    This option can be used to increase or decrease the number of candidate vectors found from the index, and defaults to 1.0. Higher values will result in slower searches that may provide higher results accuracy. Lower values will result in faster searches that may give lower accuracy.

  • distance

    Takes values of cosine for now, defaulting to cosine.

Example

(: Insert template:)
xquery version "1.0-ml";
import module "http://marklogic.com/xdmp/tde" at "/MarkLogic/tde.xqy";

let $template :=
<template xmlns="http://marklogic.com/xdmp/tde">
<context>/array-node('emb')</context>
<rows>
    <row>
    <schema-name>vecs</schema-name>
    <view-name>vector_api</view-name>
    <view-layout>sparse</view-layout>
    <columns>
        <column>
        <name>embedding</name>
        <scalar-type>vector</scalar-type>
        <val>vec:vector(.)</val>
        <dimension>10</dimension>
        </column>
    </columns>
    </row>
</rows>
</template>

return
tde:template-insert("vector_api.xml", $template)

(: Insert data in JavaScript
declareUpdate();
const doc1 = {"id":105966, "title":"Infinitive", "text":"After a modal verb you must use an infinitive. For example, \"I must go\", \"he must go\" (\"he must goes\" is not correct)", "url":"https://simple.wikipedia.org/wiki?curid=18194", "wiki_id":18194, "views":26.6284027099609, "paragraph_id":0, "langs":63, "emb":[0.0119949243962765, -0.00765570625662804, -0.177374422550201, 0.286126345396042, 0.23762883245945, 0.378793567419052, 0.201189681887627, -0.438914775848389, -0.130981057882309, 0.167418643832207]}
const doc2 = {"id":106071, "title":"History of the United Kingdom", "text":"The United Kingdom of Great Britain and Northern Ireland is a sovereign state. England, Scotland, Wales (together: Great Britain) and Northern Ireland are parts of this state.", "url":"https://simple.wikipedia.org/wiki?curid=100992", "wiki_id":100992, "views":26.575159072876, "paragraph_id":0, "langs":57, "emb":[0.24431237578392, 0.0295729525387287, 0.0300141926854849, -0.0831106305122376, -0.253710359334946, 0.705469906330109, 0.372012823820114, -0.305916726589203, 0.177077278494835, 0.420972257852554]}
const doc3 = {"id":106180, "title":"Einsatzgruppen", "text":"\"Einsatzgruppe A\" worked in the Baltic States of Estonia, Latvia, and Lithuania. These countries had been occupied by the Soviet Union before the Nazis took them over. \"Einsatzgruppe A\" killed about 140,000 people between June and November 1941.", "url":"https://simple.wikipedia.org/wiki?curid=97438", "wiki_id":97438, "views":26.575159072876, "paragraph_id":17, "langs":40, "emb":[0.495228171348572, -0.301874876022339, 0.318448722362518, -0.183927640318871, 0.111921690404415, 0.0994419455528259, 0.528311431407928, -0.436288177967072, 0.301553636789322, 0.538765072822571]}
const insert1 = xdmp.documentInsert('embedding105966.json', doc1)
const insert2 = xdmp.documentInsert('embedding106071.json', doc2)
xdmp.documentInsert('embedding106180.json', doc3)
:)

(: Calculate the top k nearest neighbor vectors:)
xquery version "1.0-ml";
import module namespace op="http://marklogic.com/optic"
     at "/MarkLogic/optic.xqy";

let $query-vector := vec:vector(fn:doc('embedding105966.json')/emb)
let $distance-column := plan:column('distance')
let $options := map:entry('max-distance',0.2)=>map:with('search-factor', 1.0)=>map:with('distance', 'cosine')
return op:from-view('vecs','vector_api')
=>op:ann-top-k(10, op:view-col('vector_api','embedding'), $query-vector, $distance-column, $options)
=>op:select($distance-column)
=>op:result()

(: Results as below
| distance |
| -1.19209289550781e-7 |
| 0.650373935699463 |
:)

  

Example

(: Same setup as above. Use string-format options:)
(: Calculate the top k nearest neighbor vectors:)
xquery version "1.0-ml";
import module namespace op="http://marklogic.com/optic"
     at "/MarkLogic/optic.xqy";

let $query-vector := vec:vector(fn:doc('embedding105966.json')/emb)
let $distance-column := plan:column('distance')
let $options := ('max-distance=0.2', 'search-factor=1.0', 'distance=cosine')
return op:from-view('vecs','vector_api')
=>op:ann-top-k(10, op:view-col('vector_api','embedding'), $query-vector, $distance-column, $options)
=>op:select($distance-column)
=>op:result()

(: Results as below
| distance |
| -1.19209289550781e-7 |
| 0.650373935699463 |
:)
    

Stack Overflow iconStack Overflow: Get the most useful answers to questions from the MarkLogic community, or ask your own question.