AccessPlan.prototype.annTopK( k as Number, vectorColumn as ColumnIdentifier, queryVector as vec.vector, [distanceColumn as columnIdentifier?], [options as objectLiteral?] ) as ModifyPlan
This method searches against vector data, using a query vector, selecting and returning the top K nearest vectors from the column along with data associated with that vector, for examples, document, node, or row.
Parameters | |
---|---|
k | This positive integer k is the number of nearest neighbour results to return. It can be passed in as an external parameter using op.param. |
vectorColumn | The column is the input table to perform vector distance calculations against. The column can be named with a string or a column function such as op.col, op.viewCol, or op.schemaCol, or constructed from an expression with the op.as function. |
queryVector | This is the query vector to compare with the vectors from the vectorColumn column. It can be passed in as an external parameter using op.param. |
distanceColumn | The column is an optional output column that returns the value of the distance metric for that output row. The columns can be named with a string or a column function such as op.col, op.viewCol, or op.schemaCol. |
options |
This is either an array of strings or an object containing keys and values for the options to this operator.
Options include:
|
annTopK
is a method of the following classes:
/* Insert template in XQuery xquery version "1.0-ml"; import module "http://marklogic.com/xdmp/tde" at "/MarkLogic/tde.xqy"; let $template := <template xmlns="http://marklogic.com/xdmp/tde"> <context>/array-node('emb')</context> <rows> <row> <schema-name>vecs</schema-name> <view-name>vector_api</view-name> <view-layout>sparse</view-layout> <columns> <column> <name>embedding</name> <scalar-type>vector</scalar-type> <val>vec:vector(.)</val> <dimension>10</dimension> </column> </columns> </row> </rows> </template> return tde:template-insert("vector_api.xml", $template) */ // Insert 3 docments declareUpdate(); const doc1 = {"id":105966, "title":"Infinitive", "text":"After a modal verb you must use an infinitive. For example, \"I must go\", \"he must go\" (\"he must goes\" is not correct)", "url":"https://simple.wikipedia.org/wiki?curid=18194", "wiki_id":18194, "views":26.6284027099609, "paragraph_id":0, "langs":63, "emb":[0.0119949243962765, -0.00765570625662804, -0.177374422550201, 0.286126345396042, 0.23762883245945, 0.378793567419052, 0.201189681887627, -0.438914775848389, -0.130981057882309, 0.167418643832207]} const doc2 = {"id":106071, "title":"History of the United Kingdom", "text":"The United Kingdom of Great Britain and Northern Ireland is a sovereign state. England, Scotland, Wales (together: Great Britain) and Northern Ireland are parts of this state.", "url":"https://simple.wikipedia.org/wiki?curid=100992", "wiki_id":100992, "views":26.575159072876, "paragraph_id":0, "langs":57, "emb":[0.24431237578392, 0.0295729525387287, 0.0300141926854849, -0.0831106305122376, -0.253710359334946, 0.705469906330109, 0.372012823820114, -0.305916726589203, 0.177077278494835, 0.420972257852554]} const doc3 = {"id":106180, "title":"Einsatzgruppen", "text":"\"Einsatzgruppe A\" worked in the Baltic States of Estonia, Latvia, and Lithuania. These countries had been occupied by the Soviet Union before the Nazis took them over. \"Einsatzgruppe A\" killed about 140,000 people between June and November 1941.", "url":"https://simple.wikipedia.org/wiki?curid=97438", "wiki_id":97438, "views":26.575159072876, "paragraph_id":17, "langs":40, "emb":[0.495228171348572, -0.301874876022339, 0.318448722362518, -0.183927640318871, 0.111921690404415, 0.0994419455528259, 0.528311431407928, -0.436288177967072, 0.301553636789322, 0.538765072822571]} const insert1 = xdmp.documentInsert('embedding105966.json', doc1) const insert2 = xdmp.documentInsert('embedding106071.json', doc2) xdmp.documentInsert('embedding106180.json', doc3) //Calculate the top k nearest neighbor vector const op = require('/MarkLogic/optic'); const queryVector = vec.vector(fn.head(fn.doc('embedding105966.json')).toObject().emb) const distanceColumn = op.col('distance') const vectorColumn = op.viewCol('vector_api','embedding') op.fromView('vecs','vector_api') .annTopK(2, vectorColumn, queryVector, distanceColumn, {"maxDistance": 0.2,"searchFactor": 1.0, "distance": "cosine"}) .select(distanceColumn) .result() /* It returns {"distance":-1.19209289550781e-7} {"distance":0.451159417629242} */
//Same setup as above, with string-format options //Calculate the top k nearest neighbor vector const op = require('/MarkLogic/optic'); const queryVector = vec.vector(fn.head(fn.doc('embedding105966.json')).toObject().emb) const distanceColumn = op.col('distance') const vectorColumn = op.viewCol('vector_api','embedding') op.fromView('vecs','vector_api') .annTopK(2, vectorColumn, queryVector, distanceColumn, ['maxDistance=0.2', 'searchFactor=1.0', 'distance=cosine']) .select(distanceColumn) .result() /* It returns {"distance":-1.19209289550781e-7} {"distance":0.451159417629242} */
Stack Overflow: Get the most useful answers to questions from the MarkLogic community, or ask your own question.