Enable Dynamic Field
This page explains how to use the dynamic field in a collection for flexible data insertion and retrieval.
Overview
Zilliz Cloud allows you to define the schema of a collection by setting the name and the data type of each specific field so that you can create indexes in these fields for improved search performance.
Once a field is defined, you need to include this field when you insert data. What if some fields are not always present in all your data entries? This is where the dynamic field comes in.
The dynamic field in a collection is a reserved JSON field named $meta. It can hold non-schema-defined fields and their values as key-value pairs. Using the dynamic field, you can search and query both schema-defined fields and any non-schema-defined fields they may have.
Enable dynamic field
When defining a schema for a collection, you can set enable_dynamic_field
to True
to enable the reserved dynamic field, indicating that any non-schema-defined fields and their values inserted later on will be saved as key-value pairs in the reserved dynamic field.
The following snippet creates a collection with two schema-defined fields, namely id
and vector
, and enables the dynamic field.
- Python
- Java
- NodeJS
import random, time
from pymilvus import connections, MilvusClient, DataType
CLUSTER_ENDPOINT = "YOUR_CLUSTER_ENDPOINT"
TOKEN = "YOUR_CLUSTER_TOKEN"
# 1. Set up a Milvus client
client = MilvusClient(
uri=CLUSTER_ENDPOINT,
token=TOKEN
)
# 2. Create a collection
schema = MilvusClient.create_schema(
auto_id=False,
enable_dynamic_field=True,
)
schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
# The dim value should be greater than 1
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=5)
index_params = MilvusClient.prepare_index_params()
index_params.add_index(
field_name="id",
index_type="STL_SORT"
)
index_params.add_index(
field_name="vector",
index_type="AUTOINDEX",
metric_type="L2"
)
client.create_collection(
collection_name="test_collection",
schema=schema,
index_params=index_params
)
res = client.get_load_state(
collection_name="test_collection"
)
print(res)
# Output
#
# {
# "state": "<LoadState: Loaded>"
# }
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.DataType;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.GetLoadStateReq;
String CLUSTER_ENDPOINT = "YOUR_CLUSTER_ENDPOINT";
String TOKEN = "YOUR_CLUSTER_TOKEN";
// 1. Connect to Milvus server
ConnectConfig connectConfig = ConnectConfig.builder()
.uri(CLUSTER_ENDPOINT)
.token(TOKEN)
.build();
MilvusClientV2 client = new MilvusClientV2(connectConfig);
// 2. Create a collection in customized setup mode
// 2.1 Create schema
CreateCollectionReq.CollectionSchema schema = client.createSchema();
// 2.2 Add fields to schema
schema.addField(AddFieldReq.builder().fieldName("id").dataType(DataType.Int64).isPrimaryKey(true).autoID(false).build());
// The dimension value should be greater than 1
schema.addField(AddFieldReq.builder().fieldName("vector").dataType(DataType.FloatVector).dimension(5).build());
// 2.3 Prepare index parameters
IndexParam indexParamForIdField = IndexParam.builder()
.fieldName("id")
.indexType(IndexParam.IndexType.STL_SORT)
.build();
IndexParam indexParamForVectorField = IndexParam.builder()
.fieldName("vector")
.indexType(IndexParam.IndexType.AUTOINDEX)
.metricType(IndexParam.MetricType.IP)
.build();
List<IndexParam> indexParams = new ArrayList<>();
indexParams.add(indexParamForIdField);
indexParams.add(indexParamForVectorField);
// 2.4 Create a collection with schema and index parameters
CreateCollectionReq customizedSetupReq = CreateCollectionReq.builder()
.collectionName("customized_setup")
.collectionSchema(schema)
.indexParams(indexParams)
.enableDynamicField(true)
.build();
client.createCollection(customizedSetupReq);
Thread.sleep(5000);
// 2.5 Get load state of the collection
GetLoadStateReq customSetupLoadStateReq1 = GetLoadStateReq.builder()
.collectionName("customized_setup")
.build();
boolean res = client.getLoadState(customSetupLoadStateReq1);
System.out.println(res);
// Output:
// true
const { MilvusClient, DataType, sleep } = require("@zilliz/milvus2-sdk-node")
const address = "YOUR_CLUSTER_ENDPOINT"
const token = "YOUR_CLUSTER_TOKEN"
async function main() {
// 1. Set up a Milvus Client
client = new MilvusClient({address, token});
// 2. Create a collection
// 2.1 Define fields
const fields = [
{
name: "id",
data_type: DataType.Int64,
is_primary_key: true,
auto_id: false
},
{
name: "vector",
data_type: DataType.FloatVector,
dim: 5 // The dim value should be greater than 1
},
]
// 2.2 Prepare index parameters
const index_params = [{
field_name: "id",
index_type: "STL_SORT"
},{
field_name: "vector",
index_type: "AUTOINDEX",
metric_type: "IP"
}]
// 2.3 Create a collection with fields and index parameters
res = await client.createCollection({
collection_name: "test_collection",
fields: fields,
index_params: index_params,
enable_dynamic_field: true
})
console.log(res.error_code)
// Output
//
// Success
//
res = await client.getLoadState({
collection_name: "test_collection",
})
console.log(res.state)
// Output
//
// LoadStateLoaded
//
Insert dynamic data
Once the collection is created with the dynamic field enabled, you can start inserting data, including any non-schema-defined fields and their values.
Prepare data
In this section, you need to prepare some randomly generated data for the insertion later on.
- Python
- Java
- NodeJS
colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
data = []
for i in range(1000):
current_color = random.choice(colors)
current_tag = random.randint(1000, 9999)
data.append({
"id": i,
"vector": [ random.uniform(-1, 1) for _ in range(5) ],
"color": current_color,
"tag": current_tag,
"color_tag": f"{current_color}_{str(current_tag)}"
})
print(data[0])
List<String> colors = Arrays.asList("green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey");
List<JSONObject> data = new ArrayList<>();
for (int i=0; i<1000; i++) {
Random rand = new Random();
String current_color = colors.get(rand.nextInt(colors.size()-1));
int current_tag = rand.nextInt(8999) + 1000;
JSONObject row = new JSONObject();
row.put("id", Long.valueOf(i));
row.put("vector", Arrays.asList(rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat()));
row.put("color", current_color);
row.put("tag", current_tag);
row.put("color_tag", current_color + "_" + String.valueOf(rand.nextInt(8999) + 1000));
data.add(row);
}
System.out.println(JSONObject.toJSON(data.get(0)));
const colors = ["green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey"]
var data = []
for (let i = 0; i < 1000; i++) {
const current_color = colors[Math.floor(Math.random() * colors.length)]
const current_tag = Math.floor(Math.random() * 8999 + 1000)
data.push({
id: i,
vector: [Math.random(), Math.random(), Math.random(), Math.random(), Math.random()],
color: current_color,
tag: current_tag,
color_tag: `${current_color}_${current_tag}`
})
}
console.log(data[0])
You can view the structure of the generated data by checking its first entry.
{
id: 0,
vector: [
0.1275656405044483,
0.47417858592773277,
0.13858264437643286,
0.2390904907020377,
0.8447862593689635
],
color: 'blue',
tag: 2064,
color_tag: 'blue_2064'
}
Insert data
Then you can safely insert the data into the collection.
- Python
- Java
- NodeJS
res = client.insert(
collection_name="test_collection",
data=data,
)
print(res)
# Output
#
# {
# "insert_count": 1000,
# "ids": [
# 0,
# 1,
# 2,
# 3,
# 4,
# 5,
# 6,
# 7,
# 8,
# 9,
# "(990 more items hidden)"
# ]
# }
time.sleep(5)
// 3.1 Insert data into the collection
InsertReq insertReq = InsertReq.builder()
.collectionName("customized_setup")
.data(data)
.build();
InsertResp insertResp = client.insert(insertReq);
System.out.println(JSONObject.toJSON(insertResp));
// Output:
// {"insertCnt": 1000}
Thread.sleep(5000);
res = await client.insert({
collection_name: "test_collection",
data: data,
})
console.log(res.insert_cnt)
// Output
//
// 1000
//
await sleep(5000)
Search with dynamic fields
If you have created the collection with the dynamic field enabled and inserted non-schema-defined fields, you can use these fields in the filter expression of a search or a query as follows:
- Python
- Java
- NodeJS
# 4. Search with dynamic fields
query_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]
res = client.search(
collection_name="test_collection",
data=query_vectors,
filter="color in [\"red\", \"green\"]",
search_params={"metric_type": "L2", "params": {"nprobe": 10}},
limit=3
)
print(res)
# Output
#
# [
# [
# {
# "id": 863,
# "distance": 0.188413605093956,
# "entity": {
# "id": 863,
# "color_tag": "red_2371"
# }
# },
# {
# "id": 799,
# "distance": 0.29188022017478943,
# "entity": {
# "id": 799,
# "color_tag": "red_2235"
# }
# },
# {
# "id": 564,
# "distance": 0.3492690920829773,
# "entity": {
# "id": 564,
# "color_tag": "red_9186"
# }
# }
# ]
# ]
// 4. Search with non-schema-defined fields
List<List<Float>> queryVectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));
SearchReq searchReq = SearchReq.builder()
.collectionName("customized_setup")
.data(queryVectors)
.filter("$meta[\"color\"] in [\"red\", \"green\"]")
.outputFields(List.of("id", "color_tag"))
.topK(3)
.build();
SearchResp searchResp = client.search(searchReq);
System.out.println(JSONObject.toJSON(searchResp));
// Output:
// {"searchResults": [[
// {
// "distance": 1.3159835,
// "id": 979,
// "entity": {
// "color_tag": "red_7155",
// "id": 979
// }
// },
// {
// "distance": 1.0744804,
// "id": 44,
// "entity": {
// "color_tag": "green_8006",
// "id": 44
// }
// },
// {
// "distance": 1.0060014,
// "id": 617,
// "entity": {
// "color_tag": "red_4056",
// "id": 617
// }
// }
// ]]}
// 4. Search with non-schema-defined fields
const query_vectors = [[0.1, 0.2, 0.3, 0.4, 0.5]]
res = await client.search({
collection_name: "test_collection",
data: query_vectors,
filter: "color in [\"red\", \"green\"]",
output_fields: ["color_tag"],
limit: 3
})
console.log(res.results)
// Output
//
// [
// { score: 1.2284551858901978, id: '301', color_tag: 'red_1270' },
// { score: 1.2195171117782593, id: '205', color_tag: 'red_2780' },
// { score: 1.2055039405822754, id: '487', color_tag: 'red_6653' }
// ]
//
Recaps
It is worth noting that color, tag, and color_tag are not present when you define the collection schema, but you can use them as schema-defined fields when you conduct searches and queries.
If the name of a non-schema-defined field contains characters other than digits, letters, and underscores, such as plus signs (+), asterisks (*), or dollar signs ($), you have to include the key within $meta[] as shown in the following code snippet when using it in a boolean expression or including it in the output fields.
...
filter='$meta["$key"] in ["a", "b", "c"]',
output_fields='$meta["$key"]'
...