Skip to main content

Vector similarity search

Vector search returns the objects with most similar vectors to that of the query.

Search with text

Use the Near Text operator to find objects with the nearest vector to an input text.

from weaviate.classes.query import MetadataQuery

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_text(
query="animals in movies",
limit=2,
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)
Example response

The output is like this:

{
"data": {
"Get": {
"JeopardyQuestion": [
{
"answer": "meerkats",
"question": "Group of mammals seen <a href=\"http://www.j-archive.com/media/1998-06-01_J_28.jpg\" target=\"_blank\">here</a>: [like Timon in <i>The Lion King</i>]",
"_additional": { "distance": 0.17602634 }
},
{
"answer": "dogs",
"question": "Scooby-Doo, Goofy & Pluto are cartoon versions",
"_additional": { "distance": 0.17842108 }
}
]
}
}
}

Search with image

Use the Near Image operator to find objects with the nearest vector to an image.
This example uses a base64 representation of an image.

base64_string="SOME_BASE_64_REPRESENTATION"

# Get the collection containing images
dogs = client.collections.get("Dog")

# Perform query
response = dogs.query.near_image(
near_image=base64_string,
return_properties=["breed"],
limit=1,
# targetVector: "vector_name" # required when using multiple named vectors
)

print(response.objects[0])

client.close()

See Image search for more information.

Search with an existing object

If you have an object ID, use the Near Object operator to find similar objects to that object.

from weaviate.classes.query import MetadataQuery

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_object(
near_object=uuid, # A UUID of an object (e.g. "56b9449e-65db-5df4-887b-0a4773f52aa7")
limit=2,
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)
Additional information
To get the object ID, see [Retrieve the object ID](/developers/weaviate/search/basics#retrieve-the-object-id).

Search with a vector

If you have an input vector, use the Near Vector operator to find objects with similar vectors

from weaviate.classes.query import MetadataQuery

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_vector(
near_vector=query_vector, # your query vector goes here
limit=2,
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)

Named vectors

Added in v1.24

To search a collection that has named vectors, use the target vector field to specify which named vector to search.

from weaviate.classes.query import MetadataQuery

reviews = client.collections.get("WineReviewNV")
response = reviews.query.near_text(
query="a sweet German white wine",
limit=2,
target_vector="title_country", # Specify the target vector for named vector collections
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)
Example response

The output is like this:

{
"WineReviewNV": [
{
"country": "Austria",
"review_body": "With notions of cherry and cinnamon on the nose and just slight fizz, this is a refreshing, fruit-driven sparkling ros\u00e9 that's full of strawberry and cherry notes\u2014it might just be the very definition of easy summer wine. It ends dry, yet refreshing.",
"title": "Gebeshuber 2013 Frizzante Ros\u00e9 Pinot Noir (\u00d6sterreichischer Perlwein)"
},
{
"country": "Austria",
"review_body": "Beautifully perfumed, with acidity, white fruits and a mineral context. The wine is layered with citrus and lime, hints of fresh pineapple acidity. Screw cap.",
"title": "Stadt Krems 2009 Steinterrassen Riesling (Kremstal)"
}
]
}

Set a similarity threshold

To set a similarity threshold between the search and target vectors, define a maximum distance (or certainty).

from weaviate.classes.query import MetadataQuery

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_text(
query="animals in movies",
distance=0.25, # max accepted distance
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)
Additional information
  • The distance value depends on many factors, including the vectorization model you use. Experiment with your data to find a value that works for you.
  • certainty is only available with cosine distance.
  • To find the least similar objects, use the negative cosine distance with nearVector search.

limit & offset

Use limit to set a fixed maximum number of objects to return.

Optionally, use offset to paginate the results.

from weaviate.classes.query import MetadataQuery

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_text(
query="animals in movies",
limit=2, # return 2 objects
offset=1, # With an offset of 1
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)

Limit result groups

To limit results to groups of similar distances to the query, use the autocut filter to set the number of groups to return.

from weaviate.classes.query import MetadataQuery

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_text(
query="animals in movies",
auto_limit=1, # number of close groups
return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)
Example response

The output is like this:

{
"data": {
"Get": {
"JeopardyQuestion": [
{
"answer": "meerkats",
"question": "Group of mammals seen <a href=\"http://www.j-archive.com/media/1998-06-01_J_28.jpg\" target=\"_blank\">here</a>: [like Timon in <i>The Lion King</i>]",
"_additional": { "distance": 0.17602634 }
},
{
"answer": "dogs",
"question": "Scooby-Doo, Goofy & Pluto are cartoon versions",
"_additional": { "distance": 0.17842108 }
}
]
}
}
}

Group results

Use a property or a cross-reference to group results. To group returned objects, the query must include a Near search operator, such as Near Text or Near Object.

from weaviate.classes.query import MetadataQuery, GroupBy

jeopardy = client.collections.get("JeopardyQuestion")

group_by = GroupBy(
prop="round", # group by this property
objects_per_group=2, # maximum objects per group
number_of_groups=2, # maximum number of groups
)

response = jeopardy.query.near_text(
query="animals in movies", # find object based on this query
limit=10, # maximum total objects
return_metadata=MetadataQuery(distance=True),
group_by=group_by
)


for o in response.objects:
print(o.uuid)
print(o.belongs_to_group)
print(o.metadata.distance)

for grp, grp_items in response.groups.items():
print("=" * 10 + grp_items.name + "=" * 10)
print(grp_items.number_of_objects)
for o in grp_items.objects:
print(o.properties)
print(o.metadata)
Example response

The output is like this:

{
"data": {
"Get": {
"JeopardyQuestion": [
{
"_additional": {
"group": {
"count": 2,
"groupedBy": {
"path": [
"round"
],
"value": "Jeopardy!"
},
"hits": [
{
"answer": "meerkats",
"question": "Group of mammals seen <a href=\"http://www.j-archive.com/media/1998-06-01_J_28.jpg\" target=\"_blank\">here</a>: [like Timon in <i>The Lion King</i>]"
},
{
"answer": "dogs",
"question": "Scooby-Doo, Goofy & Pluto are cartoon versions"
}
],
"id": 0,
"maxDistance": 0.17842054,
"minDistance": 0.17602539
}
}
},
{
"_additional": {
"group": {
"count": 1,
"groupedBy": {
"path": [
"round"
],
"value": "Double Jeopardy!"
},
"hits": [
{
"answer": "fox",
"question": "In titles, animal associated with both Volpone and Reynard"
}
],
"id": 1,
"maxDistance": 0.18770188,
"minDistance": 0.18770188
}
}
}
]
}
}
}

Filter results

For more specific results, use a filter to narrow your search.

from weaviate.classes.query import MetadataQuery, Filter

jeopardy = client.collections.get("JeopardyQuestion")
response = jeopardy.query.near_text(
query="animals in movies",
filters=Filter.by_property("round").equal("Double Jeopardy!"),
limit=2,
return_metadata=MetadataQuery(distance=True),
)

for o in response.objects:
print(o.properties)
print(o.metadata.distance)
Example response

The output is like this:

{
"data": {
"Get": {
"JeopardyQuestion": [
{
"_additional": {
"distance": 0.18759078
},
"answer": "fox",
"question": "In titles, animal associated with both Volpone and Reynard",
"round": "Double Jeopardy!"
},
{
"_additional": {
"distance": 0.19532347
},
"answer": "Swan",
"question": "In a Tchaikovsky ballet, Prince Siegfried goes hunting for these animals & falls in love with 1 of them",
"round": "Double Jeopardy!"
}
]
}
}
}

Questions and feedback

If you have any questions or feedback, let us know in the user forum.