Skip to main content
Version: User Guides (BYOC)

Get & Scalar Query

This guide demonstrates how to get entities by ID and conduct scalar filtering. A scalar filtering retrieves entities that match the specified filtering conditions.

Overview

A scalar query filters entities in a collection based on a defined condition using boolean expressions. The query result is a set of entities that match the defined condition. Unlike a vector search, which identifies the closest vector to a given vector in a collection, queries filter entities based on specific criteria.

On Zilliz Cloud, a filter is always a string compising field names joined by operators. In this guide, you will find various filter examples. To learn more about the operator details, go to the Reference section.

Preparations

The following steps repurpose the code to connect to a Zilliz Cloud cluster, quickly set up a collection, and insert over 1,000 randomly generated entities into the collection.

Step 1: Create a collection

import random, time
from pymilvus import MilvusClient

CLUSTER_ENDPOINT = "YOUR_CLUSTER_ENDPOINT"
TOKEN = "YOUR_CLUSTER_TOKEN"

# 1. Set up a Milvus client
client = MilvusClient(
uri=CLUSTER_ENDPOINT,
token=TOKEN
)

# 2. Create a collection
client.create_collection(
collection_name="quick_setup",
dimension=5, # The dimension value should be greater than 1
)

Step 2: Insert randomly generated entities

# 3. Insert randomly generated vectors 
colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
data = []

for i in range(1000):
current_color = random.choice(colors)
current_tag = random.randint(1000, 9999)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})

print(data[0])

# Output
#
# {
# "id": 0,
# "vector": [
# 0.7371107800002366,
# -0.7290389773227746,
# 0.38367002049157417,
# 0.36996000494220627,
# -0.3641898951462792
# ],
# "color": "yellow",
# "tag": 6781,
# "color_tag": "yellow_6781"
# }

res = client.insert(
collection_name="quick_setup",
data=data
)

print(res)

# Output
#
# {
# "insert_count": 1000,
# "ids": [
# 0,
# 1,
# 2,
# 3,
# 4,
# 5,
# 6,
# 7,
# 8,
# 9,
# "(990 more items hidden)"
# ]
# }

Step 3: Create partitions and insert more entities

# 4. Create partitions and insert more entities
client.create_partition(
collection_name="quick_setup",
partition_name="partitionA"
)

client.create_partition(
collection_name="quick_setup",
partition_name="partitionB"
)

data = []

for i in range(1000, 1500):
current_color = random.choice(colors)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})

res = client.insert(
collection_name="quick_setup",
data=data,
partition_name="partitionA"
)

print(res)

# Output
#
# {
# "insert_count": 500,
# "ids": [
# 1000,
# 1001,
# 1002,
# 1003,
# 1004,
# 1005,
# 1006,
# 1007,
# 1008,
# 1009,
# "(490 more items hidden)"
# ]
# }

data = []

for i in range(1500, 2000):
current_color = random.choice(colors)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})

res = client.insert(
collection_name="quick_setup",
data=data,
partition_name="partitionB"
)

print(res)

# Output
#
# {
# "insert_count": 500,
# "ids": [
# 1500,
# 1501,
# 1502,
# 1503,
# 1504,
# 1505,
# 1506,
# 1507,
# 1508,
# 1509,
# "(490 more items hidden)"
# ]
# }

Get Entities by ID

If you know the IDs of the entities of your interests, you can use the get() method.

# 5. Get entities by ID
res = client.get(
collection_name="quick_setup",
ids=[0, 1, 2]
)

print(res)

# Output
#
# [
# {
# "id": 0,
# "vector": [
# 0.7371108,
# -0.72903895,
# 0.38367003,
# 0.36996,
# -0.3641899
# ],
# "color": "yellow",
# "tag": 6781,
# "color_tag": "yellow_6781"
# },
# {
# "id": 1,
# "vector": [
# -0.10924426,
# -0.7659806,
# 0.8613359,
# 0.65219676,
# -0.06385158
# ],
# "color": "pink",
# "tag": 1023,
# "color_tag": "pink_1023"
# },
# {
# "id": 2,
# "vector": [
# 0.402096,
# -0.74742633,
# -0.901683,
# 0.6292514,
# 0.77286446
# ],
# "color": "blue",
# "tag": 3972,
# "color_tag": "blue_3972"
# }
# ]

Get entities from partitions

You can also get entities from specific partitions.

# 5. Get entities from partitions
res = client.get(
collection_name="quick_setup",
ids=[1000, 1001, 1002],
partition_names=["partitionA"]
)

print(res)

# Output
#
# [
# {
# "color": "green",
# "tag": 1995,
# "color_tag": "green_1995",
# "id": 1000,
# "vector": [
# 0.7807706,
# 0.8083741,
# 0.17276904,
# -0.8580777,
# 0.024156934
# ]
# },
# {
# "color": "red",
# "tag": 1995,
# "color_tag": "red_1995",
# "id": 1001,
# "vector": [
# 0.065074645,
# -0.44882354,
# -0.29479212,
# -0.19798489,
# -0.77542555
# ]
# },
# {
# "color": "green",
# "tag": 1995,
# "color_tag": "green_1995",
# "id": 1002,
# "vector": [
# 0.027934508,
# -0.44199976,
# -0.40262738,
# -0.041511405,
# 0.024782438
# ]
# }
# ]

Use Basic Operators

In this section, you will find examples of how to use basic operators in scalar filtering. You can apply these filters to vector searches and data deletions too.

  • Filter entities with their tag values falling between 1,000 to 1,500.

    # 6. Use basic operators

    res = client.query(
    collection_name="quick_setup",
    filter="1000 < tag < 1500",
    output_fields=["color_tag"],
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "id": 1,
    # "color_tag": "pink_1023"
    # },
    # {
    # "id": 41,
    # "color_tag": "red_1483"
    # },
    # {
    # "id": 44,
    # "color_tag": "grey_1146"
    # }
    # ]
  • Filter entities with their color values set to brown.

    res = client.query(
    collection_name="quick_setup",
    filter='color == "brown"',
    output_fields=["color_tag"],
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "brown_5343",
    # "id": 15
    # },
    # {
    # "color_tag": "brown_3167",
    # "id": 27
    # },
    # {
    # "color_tag": "brown_3100",
    # "id": 30
    # }
    # ]
  • Filter entities with their color values not set to green and purple.

    res = client.query(
    collection_name="quick_setup",
    filter='color not in ["green", "purple"]',
    output_fields=["color_tag"],
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "yellow_6781",
    # "id": 0
    # },
    # {
    # "color_tag": "pink_1023",
    # "id": 1
    # },
    # {
    # "color_tag": "blue_3972",
    # "id": 2
    # }
    # ]
  • Filter articles whose color tags start with red.

    res = client.query(
    collection_name="quick_setup",
    filter='color_tag like "red%"',
    output_fields=["color_tag"],
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "red_6443",
    # "id": 17
    # },
    # {
    # "color_tag": "red_1483",
    # "id": 41
    # },
    # {
    # "color_tag": "red_4348",
    # "id": 47
    # }
    # ]
  • Filter entities with their colors set to red and tag values within the range from 1,000 to 1,500.

    res = client.query(
    collection_name="quick_setup",
    filter='(color == "red") and (1000 < tag < 1500)',
    output_fields=["color_tag"],
    limit=3
    )

    print(res)

    # Output
    #
    # [
    # {
    # "color_tag": "red_1483",
    # "id": 41
    # },
    # {
    # "color_tag": "red_1100",
    # "id": 94
    # },
    # {
    # "color_tag": "red_1343",
    # "id": 526
    # }
    # ]

Use Advanced Operators

In this section, you will find examples of how to use advanced operators in scalar filtering. You can apply these filters to vector searches and data deletions too.

Count entities

  • Counts the total number of entities in a collection.

    # 7. Use advanced operators

    # Count the total number of entities in a collection
    res = client.query(
    collection_name="quick_setup",
    output_fields=["count(*)"]
    )

    print(res)

    # Output
    #
    # [
    # {
    # "count(*)": 2000
    # }
    # ]
  • Counts the total number of entities in specific partitions.

    # Count the number of entities in a partition
    res = client.query(
    collection_name="quick_setup",
    output_fields=["count(*)"],
    partition_names=["partitionA"]
    )

    print(res)

    # Output
    #
    # [
    # {
    # "count(*)": 500
    # }
    # ]
  • Counts the number of entities that match a filtering condition

    # Count the number of entities that match a specific filter
    res = client.query(
    collection_name="quick_setup",
    filter='(color == "red") and (1000 < tag < 1500)',
    output_fields=["count(*)"],
    )

    print(res)

    # Output
    #
    # [
    # {
    # "count(*)": 3
    # }
    # ]

Reference on scalar filters

Basic Operators

A boolean expression is always a string comprising field names joined by operators. In this section, you will learn more about basic operators.

Operator

Description

add (&&)

True if both operands are true

or (||)

True if either operand is true

+, -, *, /

Addition, subtraction, multiplication, and division

/**

Exponent

%

Modulus

<, >

Less than, greater than

==, !=

Equal to, not equal to

<=, >=

Less than or equal to, greater than or equal to

not

Reverses the result of a given condition.

like

Compares a value to similar values using wildcard operators.

For example, like "prefix%" matches strings that begin with "prefix".

in

Tests if an expression matches any value in a list of values.

Advanced operators

  • count(*)

    Counts the exact number of entities in the collection. Use this as an output field to get the exact number of entities in a collection or partition.

    📘Notes

    This applies to loaded collections. You should use it as the only output field.