Primary Field & AutoID
Every collection in Zilliz Cloud must have a primary field to uniquely identify each entity. This field ensures that every entity can be inserted, updated, queried, or deleted without ambiguity.
Depending on your use case, you can either let Zilliz Cloud automatically generate IDs (AutoID) or assign your own IDs manually.
What is a primary field?
A primary field acts as the unique key for each entity in a collection, similar to a primary key in a traditional database. Zilliz Cloud uses the primary field to manage entities during insert, upsert, delete, and query operations.
Key requirements:
-
Each collection must have exactly one primary field.
-
Primary field values cannot be null.
-
The data type must be specified at creation and cannot be changed later.
Supported data types
The primary field must use a supported scalar data type that can uniquely identify entities.
Data Type | Description |
|---|---|
| 64-bit integer type, commonly used with AutoID. This is the recommended option for most use cases. |
| Variable-length string type. Use this when entity identifiers come from external systems (for example, product codes or user IDs). Requires the |
Choose between AutoID and Manual IDs
Zilliz Cloud supports two modes for assigning primary key values.
Mode | Description | Recommended For |
|---|---|---|
AutoID | Zilliz Cloud automatically generates unique identifiers for inserted or imported entities. | Most scenarios where you don’t need to manage IDs manually. |
Manual ID | You provide unique IDs yourself when inserting or importing data. | When IDs must align with external systems or pre-existing datasets. |
If you are unsure which mode to choose, start with AutoID for simpler ingestion and guaranteed uniqueness.
Quickstart: Use AutoID
You can let Zilliz Cloud handle ID generation automatically.
Step 1: Create a collection with AutoID
Enable auto_id=True in your primary field definition. Zilliz Cloud will handle ID generation automatically.
- Python
- Java
- NodeJS
- Go
- cURL
from pymilvus import MilvusClient, DataType
client = MilvusClient(uri="YOUR_CLUSTER_ENDPOINT")
schema = client.create_schema()
# Define primary field with AutoID enabled
schema.add_field(
field_name="id", # Primary field name
is_primary=True,
auto_id=True, # Milvus generates IDs automatically; Defaults to False
datatype=DataType.INT64
)
# Define the other fields
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=4) # Vector field
schema.add_field(field_name="category", datatype=DataType.VARCHAR, max_length=1000) # Scalar field of the VARCHAR type
# Create the collection
if client.has_collection("demo_autoid"):
client.drop_collection("demo_autoid")
client.create_collection(collection_name="demo_autoid", schema=schema)
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.DropCollectionReq;
MilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()
.uri("YOUR_CLUSTER_ENDPOINT")
.build());
CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
.build();
collectionSchema.addField(AddFieldReq.builder()
.fieldName("id")
.dataType(DataType.Int64)
.isPrimaryKey(true)
.autoID(true)
.build());
collectionSchema.addField(AddFieldReq.builder()
.fieldName("embedding")
.dataType(DataType.FloatVector)
.dimension(4)
.build());
collectionSchema.addField(AddFieldReq.builder()
.fieldName("category")
.dataType(DataType.VarChar)
.maxLength(1000)
.build());
client.dropCollection(DropCollectionReq.builder()
.collectionName("demo_autoid")
.build());
CreateCollectionReq requestCreate = CreateCollectionReq.builder()
.collectionName("demo_autoid")
.collectionSchema(collectionSchema)
.build();
client.createCollection(requestCreate);
import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
const client = new MilvusClient({
address: "YOUR_CLUSTER_ENDPOINT",
});
// Define schema fields
const schema = [
{
name: "id",
description: "Primary field",
data_type: DataType.Int64,
is_primary_key: true,
autoID: true, // Milvus generates IDs automatically
},
{
name: "embedding",
description: "Vector field",
data_type: DataType.FloatVector,
dim: 4,
},
{
name: "category",
description: "Scalar field",
data_type: DataType.VarChar,
max_length: 1000,
},
];
// Create the collection
await client.createCollection({
collection_name: "demo_autoid",
fields: schema,
});
// go
# restful
export SCHEMA='{
"autoID": true,
"fields": [
{
"fieldName": "id",
"dataType": "Int64",
"isPrimary": true,
"elementTypeParams": {}
},
{
"fieldName": "embedding",
"dataType": "FloatVector",
"isPrimary": false,
"elementTypeParams": {
"dim": "4"
}
},
{
"fieldName": "category",
"dataType": "VarChar",
"isPrimary": false,
"elementTypeParams": {
"max_length": "1000"
}
}
]
}'
curl -X POST 'YOUR_CLUSTER_ENDPOINT/v2/vectordb/collections/create' \
-H 'Content-Type: application/json' \
-d "{
\"collectionName\": \"demo_autoid\",
\"schema\": $SCHEMA
}"
Step 2: Insert Data
Important: Do not include the primary field column in your data. Zilliz Cloud generates IDs automatically.
- Python
- Java
- NodeJS
- Go
- cURL
data = [
{"embedding": [0.1, 0.2, 0.3, 0.4], "category": "book"},
{"embedding": [0.2, 0.3, 0.4, 0.5], "category": "toy"},
]
res = client.insert(collection_name="demo_autoid", data=data)
print("Generated IDs:", res.get("ids"))
# Output example:
# Generated IDs: [461526052788333649, 461526052788333650]
import com.google.gson.*;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.response.InsertResp;
List<JsonObject> rows = new ArrayList<>();
Gson gson = new Gson();
JsonObject row1 = new JsonObject();
row1.add("embedding", gson.toJsonTree(new float[]{0.1f, 0.2f, 0.3f, 0.4f}));
row1.addProperty("category", "book");
rows.add(row1);
JsonObject row2 = new JsonObject();
row2.add("embedding", gson.toJsonTree(new float[]{0.2f, 0.3f, 0.4f, 0.5f}));
row2.addProperty("category", "toy");
rows.add(row2);
InsertResp insertR = client.insert(InsertReq.builder()
.collectionName("demo_autoid")
.data(rows)
.build());
System.out.printf("Generated IDs: %s\n", insertR.getPrimaryKeys());
const data = [
{"embedding": [0.1, 0.2, 0.3, 0.4], "category": "book"},
{"embedding": [0.2, 0.3, 0.4, 0.5], "category": "toy"},
];
const res = await client.insert({
collection_name: "demo_autoid",
fields_data: data,
});
console.log(res);
// go
# restful
export INSERT_DATA='[
{
"embedding": [0.1, 0.2, 0.3, 0.4],
"category": "book"
},
{
"embedding": [0.2, 0.3, 0.4, 0.5],
"category": "toy"
}
]'
curl -X POST 'YOUR_CLUSTER_ENDPOINT/v2/vectordb/entities/insert' \
-H 'Content-Type: application/json' \
-d "{
\"collectionName\": \"demo_autoid\",
\"data\": $INSERT_DATA
}"
Use upsert() instead of insert() when working with existing entities to avoid duplicate ID errors.
Use manual IDs
If you need to control IDs manually, disable AutoID and provide your own values.
Step 1: Create a collection without AutoID
- Python
- Java
- NodeJS
- Go
- cURL
from pymilvus import MilvusClient, DataType
client = MilvusClient(uri="YOUR_CLUSTER_ENDPOINT")
schema = client.create_schema()
# Define the primary field without AutoID
schema.add_field(
field_name="product_id",
is_primary=True,
auto_id=False, # You'll provide IDs manually at data ingestion
datatype=DataType.VARCHAR,
max_length=100 # Required when datatype is VARCHAR
)
# Define the other fields
schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=4) # Vector field
schema.add_field(field_name="category", datatype=DataType.VARCHAR, max_length=1000) # Scalar field of the VARCHAR type
# Create the collection
if client.has_collection("demo_manual_ids"):
client.drop_collection("demo_manual_ids")
client.create_collection(collection_name="demo_manual_ids", schema=schema)
import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.DropCollectionReq;
MilvusClientV2 client = new MilvusClientV2(ConnectConfig.builder()
.uri("YOUR_CLUSTER_ENDPOINT")
.build());
CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
.build();
collectionSchema.addField(AddFieldReq.builder()
.fieldName("product_id")
.dataType(DataType.VarChar)
.isPrimaryKey(true)
.autoID(false)
.maxLength(100)
.build());
collectionSchema.addField(AddFieldReq.builder()
.fieldName("embedding")
.dataType(DataType.FloatVector)
.dimension(4)
.build());
collectionSchema.addField(AddFieldReq.builder()
.fieldName("category")
.dataType(DataType.VarChar)
.maxLength(1000)
.build());
client.dropCollection(DropCollectionReq.builder()
.collectionName("demo_manual_ids")
.build());
CreateCollectionReq requestCreate = CreateCollectionReq.builder()
.collectionName("demo_manual_ids")
.collectionSchema(collectionSchema)
.build();
client.createCollection(requestCreate);
import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
const client = new MilvusClient({
address: "YOUR_CLUSTER_ENDPOINT",
username: "username",
password: "Aa12345!!",
});
const schema = [
{
name: "product_id",
data_type: DataType.VARCHAR,
is_primary_key: true,
autoID: false,
},
{
name: "embedding",
data_type: DataType.FLOAT_VECTOR,
dim: 4,
},
{
name: "category",
data_type: DataType.VARCHAR,
max_length: 1000,
},
];
const res = await client.createCollection({
collection_name: "demo_autoid",
schema: schema,
});
// go
# restful
export SCHEMA='{
"autoID": false,
"fields": [
{
"fieldName": "product_id",
"dataType": "VarChar",
"isPrimary": true,
"elementTypeParams": {
"max_length": "100"
}
},
{
"fieldName": "embedding",
"dataType": "FloatVector",
"isPrimary": false,
"elementTypeParams": {
"dim": "4"
}
},
{
"fieldName": "category",
"dataType": "VarChar",
"isPrimary": false,
"elementTypeParams": {
"max_length": "1000"
}
}
]
}'
curl -X POST 'YOUR_CLUSTER_ENDPOINT/v2/vectordb/collections/create' \
-H 'Content-Type: application/json' \
-d "{
\"collectionName\": \"demo_manual_ids\",
\"schema\": $SCHEMA
}"
Step 2: Insert data with your IDs
You must include the primary field column in every insert operation.
- Python
- Java
- NodeJS
- Go
- cURL
# Each entity must contain the primary field `product_id`
data = [
{"product_id": "PROD-001", "embedding": [0.1, 0.2, 0.3, 0.4], "category": "book"},
{"product_id": "PROD-002", "embedding": [0.2, 0.3, 0.4, 0.5], "category": "toy"},
]
res = client.insert(collection_name="demo_manual_ids", data=data)
print("Generated IDs:", res.get("ids"))
# Output example:
# Generated IDs: ['PROD-001', 'PROD-002']
import com.google.gson.*;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.response.InsertResp;
List<JsonObject> rows = new ArrayList<>();
Gson gson = new Gson();
JsonObject row1 = new JsonObject();
row1.addProperty("product_id", "PROD-001");
row1.add("embedding", gson.toJsonTree(new float[]{0.1f, 0.2f, 0.3f, 0.4f}));
row1.addProperty("category", "book");
rows.add(row1);
JsonObject row2 = new JsonObject();
row2.addProperty("product_id", "PROD-002");
row2.add("embedding", gson.toJsonTree(new float[]{0.2f, 0.3f, 0.4f, 0.5f}));
row2.addProperty("category", "toy");
rows.add(row2);
InsertResp insertR = client.insert(InsertReq.builder()
.collectionName("demo_manual_ids")
.data(rows)
.build());
System.out.printf("Generated IDs: %s\n", insertR.getPrimaryKeys());
const data = [
{"product_id": "PROD-001", "embedding": [0.1, 0.2, 0.3, 0.4], "category": "book"},
{"product_id": "PROD-002", "embedding": [0.2, 0.3, 0.4, 0.5], "category": "toy"},
];
const insert = await client.insert({
collection_name: "demo_autoid",
fields_data: data,
});
console.log(insert);
// go
# restful
export INSERT_DATA='[
{
"product_id": "PROD-001",
"embedding": [0.1, 0.2, 0.3, 0.4],
"category": "book"
},
{
"product_id": "PROD-002",
"embedding": [0.2, 0.3, 0.4, 0.5],
"category": "toy"
}
]'
# 插入数据
curl -X POST 'YOUR_CLUSTER_ENDPOINT/v2/vectordb/entities/insert' \
-H 'Content-Type: application/json' \
-d "{
\"collectionName\": \"demo_manual_ids\",
\"data\": $INSERT_DATA
}"
Your responsibilities:
-
Ensure all IDs are unique across all entities
-
Include the primary field in every insert/import operation
-
Handle ID conflicts and duplicate detection yourself