op:ann-top-k( $plan as map:map, $k as xs:unsignedInt, $vector-column as item(), $query-vector as vec:vector, [$distance-column as columnIdentifier?], [$options as xs:string*|map:map?] ) as map:map
This method searches against vector data, using a query vector, selecting and returning the top K nearest vectors from the column along with data associated with that vector, for examples, document, node, or row.
Parameters | |
---|---|
$plan | The Optic Plan. You can either use the XQuery => chaining operator or specify the variable that captures the return value from the previous operation. |
$k | This positive integer k is the number of nearest neighbour results to return. It can be passed in as an external parameter using op:param. |
$vector-column | The column is the input table to perform vector distance calculations against. The column can be named with a string or a column function such as op:col, op:view-col, or op:schema-col, or constructed from an expression with the op:as function. |
$query-vector | This is the query vector to compare with the vectors from the vector-column column. It can be passed in as an external parameter using op:param. |
$distance-column | The column is an optional output column that returns the value of the distance metric for that output row. The columns can be named with a string or a column function such as op:col, op:view-col, or op:schema-col. |
$options |
This is either a sequence of strings or a map containing keys and values for the options to this operator.
Options include:
|
(: Insert template:) xquery version "1.0-ml"; import module "http://marklogic.com/xdmp/tde" at "/MarkLogic/tde.xqy"; let $template := <template xmlns="http://marklogic.com/xdmp/tde"> <context>/array-node('emb')</context> <rows> <row> <schema-name>vecs</schema-name> <view-name>vector_api</view-name> <view-layout>sparse</view-layout> <columns> <column> <name>embedding</name> <scalar-type>vector</scalar-type> <val>vec:vector(.)</val> <dimension>10</dimension> </column> </columns> </row> </rows> </template> return tde:template-insert("vector_api.xml", $template) (: Insert data in JavaScript declareUpdate(); const doc1 = {"id":105966, "title":"Infinitive", "text":"After a modal verb you must use an infinitive. For example, \"I must go\", \"he must go\" (\"he must goes\" is not correct)", "url":"https://simple.wikipedia.org/wiki?curid=18194", "wiki_id":18194, "views":26.6284027099609, "paragraph_id":0, "langs":63, "emb":[0.0119949243962765, -0.00765570625662804, -0.177374422550201, 0.286126345396042, 0.23762883245945, 0.378793567419052, 0.201189681887627, -0.438914775848389, -0.130981057882309, 0.167418643832207]} const doc2 = {"id":106071, "title":"History of the United Kingdom", "text":"The United Kingdom of Great Britain and Northern Ireland is a sovereign state. England, Scotland, Wales (together: Great Britain) and Northern Ireland are parts of this state.", "url":"https://simple.wikipedia.org/wiki?curid=100992", "wiki_id":100992, "views":26.575159072876, "paragraph_id":0, "langs":57, "emb":[0.24431237578392, 0.0295729525387287, 0.0300141926854849, -0.0831106305122376, -0.253710359334946, 0.705469906330109, 0.372012823820114, -0.305916726589203, 0.177077278494835, 0.420972257852554]} const doc3 = {"id":106180, "title":"Einsatzgruppen", "text":"\"Einsatzgruppe A\" worked in the Baltic States of Estonia, Latvia, and Lithuania. These countries had been occupied by the Soviet Union before the Nazis took them over. \"Einsatzgruppe A\" killed about 140,000 people between June and November 1941.", "url":"https://simple.wikipedia.org/wiki?curid=97438", "wiki_id":97438, "views":26.575159072876, "paragraph_id":17, "langs":40, "emb":[0.495228171348572, -0.301874876022339, 0.318448722362518, -0.183927640318871, 0.111921690404415, 0.0994419455528259, 0.528311431407928, -0.436288177967072, 0.301553636789322, 0.538765072822571]} const insert1 = xdmp.documentInsert('embedding105966.json', doc1) const insert2 = xdmp.documentInsert('embedding106071.json', doc2) xdmp.documentInsert('embedding106180.json', doc3) :) (: Calculate the top k nearest neighbor vectors:) xquery version "1.0-ml"; import module namespace op="http://marklogic.com/optic" at "/MarkLogic/optic.xqy"; let $query-vector := vec:vector(fn:doc('embedding105966.json')/emb) let $distance-column := plan:column('distance') let $options := map:entry('max-distance',0.2)=>map:with('search-factor', 1.0)=>map:with('distance', 'cosine') return op:from-view('vecs','vector_api') =>op:ann-top-k(10, op:view-col('vector_api','embedding'), $query-vector, $distance-column, $options) =>op:select($distance-column) =>op:result() (: Results as below | distance | | -1.19209289550781e-7 | | 0.650373935699463 | :)
(: Same setup as above. Use string-format options:) (: Calculate the top k nearest neighbor vectors:) xquery version "1.0-ml"; import module namespace op="http://marklogic.com/optic" at "/MarkLogic/optic.xqy"; let $query-vector := vec:vector(fn:doc('embedding105966.json')/emb) let $distance-column := plan:column('distance') let $options := ('max-distance=0.2', 'search-factor=1.0', 'distance=cosine') return op:from-view('vecs','vector_api') =>op:ann-top-k(10, op:view-col('vector_api','embedding'), $query-vector, $distance-column, $options) =>op:select($distance-column) =>op:result() (: Results as below | distance | | -1.19209289550781e-7 | | 0.650373935699463 | :)
Stack Overflow: Get the most useful answers to questions from the MarkLogic community, or ask your own question.