JSON Field
A JSON field is a scalar field that stores additional information along with vector embeddings, in key-value pairs. Here's an example of how data is stored in JSON format:
{
"metadata": {
"product_info": {
"category": "electronics",
"brand": "BrandA"
},
"price": 99.99,
"in_stock": true,
"tags": ["summer_sale", "clearance"]
}
}
Limits
-
Field Size: JSON fields are limited to 65,536 bytes in size.
-
Nested Dictionaries: Any nested dictionaries within JSON field values are treated as plain strings for storage.
-
Default Values: JSON fields do not support default values. However, you can set the
nullable
attribute toTrue
to allow null values. For details, refer to Nullable & Default. -
Type Matching: If a JSON field’s key value is an integer or float, it can only be compared (via expression filters) with another numeric key of the same type.
-
Naming: When naming JSON keys, it is recommended to use only letters, numbers, and underscores. Using other characters may cause issues when filtering or searching.
-
String Handling: Milvus stores string values in JSON fields as entered, without semantic conversion. For example:
-
'a"b'
,"a'b"
,'a\'b'
, and"a\"b"
are stored exactly as they are. -
'a'b'
and"a"b"
are considered invalid.
-
-
JSON Indexing: When indexing a JSON field, you can specify one or more paths in the JSON field to accelerate filtering. Each additional path increases indexing overhead, so plan your indexing strategy carefully. For more considerations on indexing a JSON field, refer to Considerations on JSON indexing.
Add JSON field
To add this JSON field metadata
to your collection schema, use DataType.JSON
. The example below defines a JSON field metadata
that allows null values:
- Python
- Java
- Go
- NodeJS
- cURL
# Import necessary libraries
from pymilvus import MilvusClient, DataType
# Define server address
SERVER_ADDR = "YOUR_CLUSTER_ENDPOINT"
# Create a MilvusClient instance
client = MilvusClient(uri=SERVER_ADDR)
# Define the collection schema
schema = client.create_schema(
auto_id=False,
enable_dynamic_fields=True,
)
# Add a JSON field that supports null values
schema.add_field(field_name="metadata", datatype=DataType.JSON, nullable=True)
schema.add_field(field_name="pk", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=3)
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.DataType;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
MilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()
.uri("YOUR_CLUSTER_ENDPOINT")
.build());
CreateCollectionReq.CollectionSchema schema = client.createSchema();
schema.setEnableDynamicField(true);
schema.addField(AddFieldReq.builder()
.fieldName("metadata")
.dataType(DataType.JSON)
.isNullable(true)
.build());
schema.addField(AddFieldReq.builder()
.fieldName("pk")
.dataType(DataType.Int64)
.isPrimaryKey(true)
.build());
schema.addField(AddFieldReq.builder()
.fieldName("embedding")
.dataType(DataType.FloatVector)
.dimension(3)
.build());
import "github.com/milvus-io/milvus/client/v2/entity"
schema := entity.NewSchema()
schema.WithField(entity.NewField().
WithName("pk").
WithDataType(entity.FieldTypeInt64).
WithIsAutoID(true),
).WithField(entity.NewField().
WithName("embedding").
WithDataType(entity.FieldTypeFloatVector).
WithDim(3),
).WithField(entity.NewField().
WithName("metadata").
WithDataType(entity.FieldTypeJSON),
)
import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
const schema = [
{
name: "metadata",
data_type: DataType.JSON,
},
{
name: "pk",
data_type: DataType.Int64,
is_primary_key: true,
},
{
name: "embedding",
data_type: DataType.FloatVector,
dim: 3,
},
];
export jsonField='{
"fieldName": "metadata",
"dataType": "JSON"
}'
export pkField='{
"fieldName": "pk",
"dataType": "Int64",
"isPrimary": true
}'
export vectorField='{
"fieldName": "embedding",
"dataType": "FloatVector",
"elementTypeParams": {
"dim": 3
}
}'
export schema="{
\"autoID\": false,
\"enableDynamicField\": true,
\"fields\": [
$jsonField,
$pkField,
$vectorField
]
}"
Set
enable_dynamic_fields=True
if you need to insert additional, undefined fields in the future.Use
nullable=True
to allow missing or null JSON objects.
Set index params
Indexing helps Milvus quickly filter or search across large volumes of data. In Milvus, indexing is:
- Mandatory for vector fields (to efficiently run similarity searches).
Index a vector field
The following example creates an index on the vector field embedding
, using the AUTOINDEX
index type. With this type, Milvus automatically selects the most suitable index based on the data type.
- Python
- Java
- Go
- NodeJS
- cURL
# Set index params
index_params = client.prepare_index_params()
# Index `embedding` with AUTOINDEX and specify similarity metric type
index_params.add_index(
field_name="embedding",
index_name="vector_index",
index_type="AUTOINDEX", # Use automatic indexing to simplify complex index settings
metric_type="COSINE" # Specify similarity metric type, options include L2, COSINE, or IP
)
import io.milvus.v2.common.IndexParam;
import java.util.*;
List<IndexParam> indexes = new ArrayList<>();
indexes.add(IndexParam.builder()
.fieldName("embedding")
.indexName("vector_index")
.indexType(IndexParam.IndexType.AUTOINDEX)
.metricType(IndexParam.MetricType.COSINE)
.build());
vectorIndex := index.NewAutoIndex(entity.COSINE)
indexOpt := milvusclient.NewCreateIndexOption("my_json_collection", "embedding", vectorIndex)
indexParams.push({
index_name: 'embedding_index',
field_name: 'embedding',
index_name: 'vector_index',
metricType: MetricType.CONSINE,
index_type: IndexType.AUTOINDEX,
));
export indexParams='[
{
"fieldName": "embedding",
"indexName": "vector_index",
"metricType": "COSINE",
"indexType": "AUTOINDEX"
}
]'
Create collection
Once the schema and index are defined, create a collection that includes string fields.
- Python
- Java
- Go
- NodeJS
- cURL
client.create_collection(
collection_name="my_json_collection",
schema=schema,
index_params=index_params
)
CreateCollectionReq requestCreate = CreateCollectionReq.builder()
.collectionName("my_json_collection")
.collectionSchema(schema)
.indexParams(indexes)
.build();
client.createCollection(requestCreate);
err = cli.CreateCollection(ctx, milvusclient.NewCreateCollectionOption("my_json_collection", schema).
WithIndexOptions(indexOpt1, indexOpt2, indexOpt))
if err != nil {
// handler err
}
}
await client.create_collection({
collection_name: "my_json_collection",
schema: schema,
index_params: indexParams
});
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d "{
\"collectionName\": \"my_json_collection\",
\"schema\": $schema,
\"indexParams\": $indexParams
}"
Insert data
After creating the collection, insert entities that match the schema.
- Python
- Java
- Go
- NodeJS
- cURL
# Sample data
data = [
{
"metadata": {
"product_info": {"category": "electronics", "brand": "BrandA"},
"price": 99.99,
"in_stock": True,
"tags": ["summer_sale"]
},
"pk": 1,
"embedding": [0.12, 0.34, 0.56]
},
{
"metadata": None, # Entire JSON object is null
"pk": 2,
"embedding": [0.56, 0.78, 0.90]
},
{
# JSON field is completely missing
"pk": 3,
"embedding": [0.91, 0.18, 0.23]
},
{
# Some sub-keys are null
"metadata": {
"product_info": {"category": None, "brand": "BrandB"},
"price": 59.99,
"in_stock": None
},
"pk": 4,
"embedding": [0.56, 0.38, 0.21]
}
]
client.insert(
collection_name="my_json_collection",
data=data
)
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.response.InsertResp;
List<JsonObject> rows = new ArrayList<>();
Gson gson = new Gson();
rows.add(gson.fromJson("{\"metadata\":{\"product_info\":{\"category\":\"electronics\",\"brand\":\"BrandA\"},\"price\":99.99,\"in_stock\":True,\"tags\":[\"summer_sale\"]},\"pk\":1,\"embedding\":[0.12,0.34,0.56]}", JsonObject.class));
rows.add(gson.fromJson("{\"metadata\":null,\"pk\":2,\"embedding\":[0.56,0.78,0.90]}", JsonObject.class));
rows.add(gson.fromJson("{\"pk\":3,\"embedding\":[0.91,0.18,0.23]}", JsonObject.class));
rows.add(gson.fromJson("{\"metadata\":{\"product_info\":{\"category\":null,\"brand\":\"BrandB\"},\"price\":59.99,\"in_stock\":null},\"pk\":4,\"embedding\":[0.56,0.38,0.21]}", JsonObject.class));
InsertResp insertR = client.insert(InsertReq.builder()
.collectionName("my_json_collection")
.data(rows)
.build());
import (
"github.com/milvus-io/milvus/client/v2/column"
"github.com/milvus-io/milvus/client/v2/milvusclient"
)
resp, err := cli.Insert(ctx, milvusclient.NewColumnBasedInsertOption("my_json_collection").
WithInt64Column("pk", []int64{1, 2, 3, 4}).
WithFloatVectorColumn("embedding", 3, [][]float32{
{0.12, 0.34, 0.56},
{0.56, 0.78, 0.90},
{0.91, 0.18, 0.23},
{0.56, 0.38, 0.21},
}).WithColumns(
column.NewColumnJSONBytes("metadata", [][]byte{
[]byte(`{
"product_info": {"category": "electronics", "brand": "BrandA"},
"price": 99.99,
"in_stock": True,
"tags": ["summer_sale"]
}`),
[]byte(`null`),
[]byte(`null`),
[]byte(`"metadata": {
"product_info": {"category": None, "brand": "BrandB"},
"price": 59.99,
"in_stock": None
}`),
}),
))
if err != nil {
// handle err
}
fmt.Println(resp)
const data = [
{
"metadata": {
"product_info": {"category": "electronics", "brand": "BrandA"},
"price": 99.99,
"in_stock": True,
"tags": ["summer_sale"]
},
"pk": 1,
"embedding": [0.12, 0.34, 0.56]
},
{
"metadata": None, # Entire JSON object is null
"pk": 2,
"embedding": [0.56, 0.78, 0.90]
},
{
# JSON field is completely missing
"pk": 3,
"embedding": [0.91, 0.18, 0.23]
},
{
# Some sub-keys are null
"metadata": {
"product_info": {"category": None, "brand": "BrandB"},
"price": 59.99,
"in_stock": None
},
"pk": 4,
"embedding": [0.56, 0.38, 0.21]
}
];
await client.insert({
collection_name: "my_json_collection",
data: data
});
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/insert" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
--data '{
"data": [
{
"metadata": {
"product_info": {"category": "electronics", "brand": "BrandA"},
"price": 99.99,
"in_stock": true,
"tags": ["summer_sale"]
},
"varchar_field2": "High quality product",
"pk": 1,
"embedding": [0.1, 0.2, 0.3]
},
{
"metadata": null,
"pk": 2,
"embedding": [0.56, 0.78, 0.90]
},
{
"pk": 3,
"embedding": [0.91, 0.18, 0.23]
},
{
"metadata": {
"product_info": {"category": null, "brand": "BrandB"},
"price": 59.99,
"in_stock": null
},
"pk": 4,
"embedding": [0.56, 0.38, 0.21]
}
],
"collectionName": "my_json_collection"
}'
Query with filter expressions
After inserting entities, use the query
method to retrieve entities that match the specified filter expressions.
For JSON fields that allow null values, the field will be treated as null if the entire JSON object is missing or set to None
. For more information, refer to JSON Fields with Null Values.
To retrieve entities where metadata
is not null:
- Python
- Java
- Go
- NodeJS
- cURL
# Query to filter out records with null metadata
filter = 'metadata is not null'
res = client.query(
collection_name="my_json_collection",
filter=filter,
output_fields=["metadata", "pk"]
)
# Expected result:
# Rows with pk=1 and pk=4 have valid, non-null metadata.
# Rows with pk=2 (metadata=None) and pk=3 (no metadata key) are excluded.
print(res)
# Output:
# data: [
# "{'metadata': {'product_info': {'category': 'electronics', 'brand': 'BrandA'}, 'price': 99.99, 'in_stock': True, 'tags': ['summer_sale']}, 'pk': 1}",
# "{'metadata': {'product_info': {'category': None, 'brand': 'BrandB'}, 'price': 59.99, 'in_stock': None}, 'pk': 4}"
# ]
import io.milvus.v2.service.vector.request.QueryReq;
import io.milvus.v2.service.vector.response.QueryResp;
String filter = "metadata is not null";
QueryResp resp = client.query(QueryReq.builder()
.collectionName("my_json_collection")
.filter(filter)
.outputFields(Arrays.asList("metadata", "pk"))
.build());
System.out.println(resp.getQueryResults());
// Output
//
// [
// QueryResp.QueryResult(entity={metadata={"product_info":{"category":"electronics","brand":"BrandA"},"price":99.99,"in_stock":true,"tags":["summer_sale"]}, pk=1}),
// QueryResp.QueryResult(entity={metadata={"product_info":{"category":null,"brand":"BrandB"},"price":59.99,"in_stock":null}, pk=4})
// ]
rs, err := cli.Query(ctx, milvusclient.NewQueryOption("my_json_collection").
WithFilter("metadata is not null").
WithOutputFields("metadata", "pk"))
if err != nil {
// handle error
}
fmt.Println(rs.GetColumn("pk"))
fmt.Println(rs.GetColumn("metadata"))
await client.query({
collection_name: 'my_scalar_collection',
filter: 'metadata["category"] == "electronics" and metadata["price"] < 150',
output_fields: ['metadata']
});
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "my_json_collection",
"filter": "metadata is not null",
"outputFields": ["metadata", "pk"]
}'
#{"code":0,"cost":0,"data":[{"metadata":"{\"product_info\": {\"category\": \"electronics\", \"brand\": \"BrandA\"}, \"price\": 99.99, \"in_stock\": true, \"tags\": [\"summer_sale\"]}","pk":1},{"metadata":"","pk":2},{"metadata":"","pk":3},{"metadata":"{\"product_info\": {\"category\": null, \"brand\": \"BrandB\"}, \"price\": 59.99, \"in_stock\": null}","pk":4}]}
To retrieve entities where metadata["product_info"]["category"]
is "electronics"
:
- Python
- Java
- Go
- NodeJS
- cURL
filter = 'metadata["product_info"]["category"] == "electronics"'
res = client.query(
collection_name="my_json_collection",
filter=filter,
output_fields=["metadata", "pk"]
)
# Expected result:
# - Only pk=1 has "category": "electronics".
# - pk=4 has "category": None, so it doesn't match.
# - pk=2 and pk=3 have no valid metadata.
print(res)
# Output:
# data: [
# "{'pk': 1, 'metadata': {'product_info': {'category': 'electronics', 'brand': 'BrandA'}, 'price': 99.99, 'in_stock': True, 'tags': ['summer_sale']}}"
# ]
String filter = "metadata[\"product_info\"][\"category\"] == \"electronics\"";
QueryResp resp = client.query(QueryReq.builder()
.collectionName("my_json_collection")
.filter(filter)
.outputFields(Arrays.asList("metadata", "pk"))
.build());
System.out.println(resp.getQueryResults());
// Output
// [QueryResp.QueryResult(entity={metadata={"product_info":{"category":"electronics","brand":"BrandA"},"price":99.99,"in_stock":true,"tags":["summer_sale"]}, pk=1})]
rs, err := cli.Query(ctx, milvusclient.NewQueryOption("my_json_collection").
WithFilter(`metadata["product_info"]["category"] == "electronics"`).
WithOutputFields("metadata", "pk"))
if err != nil {
// handle error
}
fmt.Println(rs.GetColumn("pk"))
fmt.Println(rs.GetColumn("metadata"))
const filter = 'metadata["category"] == "electronics"';
const res = await client.query({
collection_name: "my_json_collection",
filter: filter,
output_fields: ["metadata", "pk"]
});
// Example output:
// {
//. data: [
// {'pk': 1, 'metadata': {'category': 'electronics', 'price': 99.99, 'brand': 'BrandA'}}
// ]
// }
# restful
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "my_json_collection",
"filter": "metadata[\"product_info\"][\"category\"] == \"electronics\"",
"outputFields": ["metadata", "pk"]
}'
#{"code":0,"cost":0,"data":[{"metadata":"{\"product_info\": {\"category\": \"electronics\", \"brand\": \"BrandA\"}, \"price\": 99.99, \"in_stock\": true, \"tags\": [\"summer_sale\"]}","pk":1}]}
Vector search with filter expressions
In addition to basic scalar field filtering, you can combine vector similarity searches with scalar field filters. For example, the following code shows how to add a scalar field filter to a vector search:
- Python
- Java
- Go
- NodeJS
- cURL
filter = 'metadata["product_info"]["brand"] == "BrandA"'
res = client.search(
collection_name="my_json_collection",
data=[[0.3, -0.6, 0.1]],
limit=5,
search_params={"params": {"nprobe": 10}},
output_fields=["metadata"],
filter=filter
)
# Expected result:
# - Only pk=1 has "brand": "BrandA" in metadata["product_info"].
# - pk=4 has "brand": "BrandB".
# - pk=2 and pk=3 have no valid metadata.
# Hence, only pk=1 matches the filter.
print(res)
# Output:
# data: [
# "[{'id': 1, 'distance': -0.2479381263256073, 'entity': {'metadata': {'product_info': {'category': 'electronics', 'brand': 'BrandA'}, 'price': 99.99, 'in_stock': True, 'tags': ['summer_sale']}}}]"
# ]
import io.milvus.v2.service.vector.request.SearchReq;
import io.milvus.v2.service.vector.response.SearchResp;
String filter = "metadata[\"product_info\"][\"brand\"] == \"BrandA\"";
SearchResp resp = client.search(SearchReq.builder()
.collectionName("my_json_collection")
.annsField("embedding")
.data(Collections.singletonList(new FloatVec(new float[]{0.3f, -0.6f, 0.1f})))
.topK(5)
.outputFields(Collections.singletonList("metadata"))
.filter(filter)
.build());
System.out.println(resp.getSearchResults());
// Output
//
// [
// [
// SearchResp.SearchResult(entity={metadata={"product_info":{"category":"electronics","brand":"BrandA"},"price":99.99,"in_stock":true,"tags":["summer_sale"]}}, score=-0.24793813, id=1)
// ]
// ]
queryVector := []float32{0.3, -0.6, -0.1}
annParam := index.NewCustomAnnParam()
annParam.WithExtraParam("nprobe", 10)
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
"my_json_collection", // collectionName
5, // limit
[]entity.Vector{entity.FloatVector(queryVector)},
).WithOutputFields("metadata").WithAnnParam(annParam))
if err != nil {
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
}
for _, resultSet := range resultSets {
log.Println("IDs: ", resultSet.IDs)
log.Println("Scores: ", resultSet.Scores)
}
await client.search({
collection_name: 'my_json_collection',
data: [0.3, -0.6, 0.1],
limit: 5,
output_fields: ['metadata'],
filter: 'metadata["category"] == "electronics" and metadata["price"] < 150',
});
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/entities/query" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d '{
"collectionName": "my_json_collection",
"data": [
[0.3, -0.6, 0.1]
],
"annsField": "embedding",
"limit": 5,
"searchParams": {
"params": {
"nprobe": 10
}
},
"outputFields": ["metadata"],
"filter": "metadata[\"product_info\"][\"brand\"] == \"BrandA\""
}'
##{"code":0,"cost":0,"data":[{"metadata":"{\"product_info\": {\"category\": \"electronics\", \"brand\": \"BrandA\"}, \"price\": 99.99, \"in_stock\": true, \"tags\": [\"summer_sale\"]}","pk":1}]}
Additionally, Zilliz Cloud supports advanced JSON filtering operators such as JSON_CONTAINS
, JSON_CONTAINS_ALL
, and JSON_CONTAINS_ANY
, which can further enhance query capabilities. For more details, refer to JSON Operators.