Loading TOC...

MarkLogic 12 Product Documentation
ModifyPlan.prototype.annTopK

ModifyPlan.prototype.annTopK(
   k as Number,
   vectorColumn as ColumnIdentifier,
   queryVector as vec.vector,
   [distanceColumn as columnIdentifier?],
   [options as objectLiteral?]
) as ModifyPlan

Summary

This method searches against vector data, using a query vector, selecting and returning the top K nearest vectors from the column along with data associated with that vector, for examples, document, node, or row.

Parameters
k This positive integer k is the number of nearest neighbour results to return. It can be passed in as an external parameter using op.param.
vectorColumn The column is the input table to perform vector distance calculations against. The column can be named with a string or a column function such as op.col, op.viewCol, or op.schemaCol, or constructed from an expression with the op.as function.
queryVector This is the query vector to compare with the vectors from the vectorColumn column. It can be passed in as an external parameter using op.param.
distanceColumn The column is an optional output column that returns the value of the distance metric for that output row. The columns can be named with a string or a column function such as op.col, op.viewCol, or op.schemaCol.
options This is either an array of strings or an object containing keys and values for the options to this operator. Options include:
  • maxDistance

    This option is a number determines the maximum distance for a returned result. For cosine distance, the default is float max. Rows with a distance greater than this will not be returned.

  • searchFactor

    This option can be used to increase or decrease the number of candidate vectors found from the index, and defaults to 1.0. Higher values will result in slower searches that may provide higher results accuracy. Lower values will result in faster searches that may give lower accuracy.

  • distance

    Takes values of cosine for now, defaulting to cosine.

Usage Notes

annTopK is a method of the following classes:

Example

/* Insert template in XQuery
xquery version "1.0-ml";
import module "http://marklogic.com/xdmp/tde" at "/MarkLogic/tde.xqy";

let $template :=
<template xmlns="http://marklogic.com/xdmp/tde">
<context>/array-node('emb')</context>
<rows>
    <row>
    <schema-name>vecs</schema-name>
    <view-name>vector_api</view-name>
    <view-layout>sparse</view-layout>
    <columns>
        <column>
        <name>embedding</name>
        <scalar-type>vector</scalar-type>
        <val>vec:vector(.)</val>
        <dimension>10</dimension>
        </column>
    </columns>
    </row>
</rows>
</template>

return
tde:template-insert("vector_api.xml", $template)
*/

// Insert 3 docments
declareUpdate();
const doc1 = {"id":105966, "title":"Infinitive", "text":"After a modal verb you must use an infinitive. For example, \"I must go\", \"he must go\" (\"he must goes\" is not correct)", "url":"https://simple.wikipedia.org/wiki?curid=18194", "wiki_id":18194, "views":26.6284027099609, "paragraph_id":0, "langs":63, "emb":[0.0119949243962765, -0.00765570625662804, -0.177374422550201, 0.286126345396042, 0.23762883245945, 0.378793567419052, 0.201189681887627, -0.438914775848389, -0.130981057882309, 0.167418643832207]}
const doc2 = {"id":106071, "title":"History of the United Kingdom", "text":"The United Kingdom of Great Britain and Northern Ireland is a sovereign state. England, Scotland, Wales (together: Great Britain) and Northern Ireland are parts of this state.", "url":"https://simple.wikipedia.org/wiki?curid=100992", "wiki_id":100992, "views":26.575159072876, "paragraph_id":0, "langs":57, "emb":[0.24431237578392, 0.0295729525387287, 0.0300141926854849, -0.0831106305122376, -0.253710359334946, 0.705469906330109, 0.372012823820114, -0.305916726589203, 0.177077278494835, 0.420972257852554]}
const doc3 = {"id":106180, "title":"Einsatzgruppen", "text":"\"Einsatzgruppe A\" worked in the Baltic States of Estonia, Latvia, and Lithuania. These countries had been occupied by the Soviet Union before the Nazis took them over. \"Einsatzgruppe A\" killed about 140,000 people between June and November 1941.", "url":"https://simple.wikipedia.org/wiki?curid=97438", "wiki_id":97438, "views":26.575159072876, "paragraph_id":17, "langs":40, "emb":[0.495228171348572, -0.301874876022339, 0.318448722362518, -0.183927640318871, 0.111921690404415, 0.0994419455528259, 0.528311431407928, -0.436288177967072, 0.301553636789322, 0.538765072822571]}
const insert1 = xdmp.documentInsert('embedding105966.json', doc1)
const insert2 = xdmp.documentInsert('embedding106071.json', doc2)
xdmp.documentInsert('embedding106180.json', doc3)

//Calculate the top k nearest neighbor vector
const op = require('/MarkLogic/optic');

const queryVector = vec.vector(fn.head(fn.doc('embedding105966.json')).toObject().emb)
const distanceColumn = op.col('distance')
const vectorColumn = op.viewCol('vector_api','embedding')
op.fromView('vecs','vector_api')
.annTopK(2, vectorColumn, queryVector, distanceColumn, {"maxDistance": 0.2,"searchFactor": 1.0, "distance": "cosine"})
.select(distanceColumn)
.result()

/* It returns
{"distance":-1.19209289550781e-7}
{"distance":0.451159417629242}
*/
  

Example

//Same setup as above, with string-format options
//Calculate the top k nearest neighbor vector
const op = require('/MarkLogic/optic');

const queryVector = vec.vector(fn.head(fn.doc('embedding105966.json')).toObject().emb)
const distanceColumn = op.col('distance')
const vectorColumn = op.viewCol('vector_api','embedding')
op.fromView('vecs','vector_api')
.annTopK(2, vectorColumn, queryVector, distanceColumn, ['maxDistance=0.2', 'searchFactor=1.0', 'distance=cosine'])
.select(distanceColumn)
.result()

/* It returns
{"distance":-1.19209289550781e-7}
{"distance":0.451159417629242}
*/
    

Stack Overflow iconStack Overflow: Get the most useful answers to questions from the MarkLogic community, or ask your own question.