Product Quantization (PQ)
Starting in v1.23, AutoPQ simplifies configuring PQ on new collections.
Product quantization (PQ) is a form of data compression for vectors. PQ reduces the HNSW index's memory footprint so you can work with larger datasets. For a discussion of how PQ saves memory, see Product quantization.
PQ makes tradeoffs between recall, performance, and memory usage. This means a PQ configuration that reduces memory may also reduce recall. There are similar trade-offs when you use HNSW without PQ. If you use PQ compression, you should also tune HNSW so that they compliment each other.
To configure HNSW, see Configuration: Vector index .
Enable PQ compression
PQ is configured at a collection level. There are two ways to enable PQ compression:
Configure AutoPQ
For new collections, use AutoPQ. AutoPQ automates triggering of the PQ training step based on the size of the collection.
1. Set the environment variable
AutoPQ requires asynchronous indexing.
- Open-source Weaviate users: To enable AutoPQ, set the environment variable
ASYNC_INDEXING=true
and restart your Weaviate instance. - Weaviate Cloud (WCD) users: Enable async indexing through the WCD Console and restart your Weaviate instance.
2. Configure PQ
To configure PQ in a collection, use the PQ parameters.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
import weaviate.classes.config as wc
client.collections.create(
name="Question",
vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
vector_index_config=wc.Configure.VectorIndex.hnsw(
quantizer=wc.Configure.VectorIndex.Quantizer.pq(training_limit=50000) # Set the threshold to begin training
),
properties=[
wc.Property(name="question", data_type=wc.DataType.TEXT),
wc.Property(name="answer", data_type=wc.DataType.TEXT),
],
)
class_definition = {
"class": "Question",
"vectorizer": "text2vec-openai",
"vectorIndexConfig": {
"pq": {
"enabled": True, # Enable PQ
"trainingLimit": 50000, # Set the threshold to begin training
}
},
}
client.schema.create_class(class_definition)
import { configure } from 'weaviate-client';
const collection = await client.collections.create({
name: 'Question',
vectorizers: weaviate.configure.vectorizer.text2VecOpenAI({
vectorIndexConfig: weaviate.configure.vectorIndex.hnsw({
quantizer: weaviate.configure.vectorIndex.quantizer.pq({
trainingLimit: 50000,
}),
})
})
})
async function addSchemaAutoPQ() {
const classObj = {
class: 'Question',
vectorizer: 'text2vec-openai',
vectorIndexConfig: {
pq: {
enabled: true, // Enable PQ
trainingLimit: 50000, // Set the training limit
},
},
properties: [
{ name: 'question', dataType: ['text'] },
{ name: 'answer', dataType: ['text'] },
],
};
const res = await client.schema.classCreator().withClass(classObj).do();
console.log(res);
}
await addSchemaAutoPQ();
3. Load your data
Load your data. You do not have to load an initial set of training data.
AutoPQ creates the PQ codebook when the object count reaches the training limit. By default, the training limit is 100,000 objects per shard.
Manually configure PQ
You can manually enable PQ on an existing collection. After PQ is enabled, Weaviate trains the PQ codebook. Before you enable PQ, verify that the training set has 100,000 objects per shard.
To manually enable PQ, follow these steps:
Phase One: Create a codebook
Phase Two: Load the rest of your data
We suggest 10,000 to 100,000 objects per shard.
Weaviate logs a message when PQ is enabled and another message when vector compression is complete. Do not import the rest of your data until the initial training step is complete.
Follow these steps to manually enable PQ.
1. Define a collection without PQ
Create a collection without specifying a quantizer.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
client.collections.create(
name="Question",
description="A Jeopardy! question",
vectorizer_config=wc.Configure.Vectorizer.text2vec_openai(),
generative_config=wc.Configure.Generative.openai(),
properties=[
wc.Property(name="question", data_type=wc.DataType.TEXT),
wc.Property(name="answer", data_type=wc.DataType.TEXT),
],
)
class_definition = {
"class": "Question",
"vectorizer": "text2vec-openai",
"properties": [
{"name": "question", "dataType": ["text"]},
{"name": "answer", "dataType": ["text"]},
{"name": "round", "dataType": ["text"]},
],
}
client.schema.create_class(class_definition)
const collection = await client.collections.create({
name: 'Question',
vectorizer: weaviate.configure.vectorizer.text2VecOpenAI({
sourceProperties: ["title"],
})
})
async function addSchema() {
const classObj = {
class: 'Question',
vectorizer: 'text2vec-openai',
properties: [
{ name: 'question', dataType: ['text'] },
{ name: 'answer', dataType: ['text'] },
],
};
const res = await client.schema.classCreator().withClass(classObj).do();
console.log(res);
}
await addSchema();
class := &models.Class{
Class: "Question",
Vectorizer: "text2vec-openai",
Properties: []*models.Property{
{Name: "Question", DataType: []string{"text"}},
{Name: "Answer", DataType: []string{"text"}},
},
}
err = client.Schema().ClassCreator().
WithClass(class).Do(context.Background())
if err != nil {
log.Fatalf("create class: %v", err)
}
List<Property> properties = Arrays.asList(
Property.builder()
.name("airDate")
.dataType(Arrays.asList(DataType.DATE))
.build(),
Property.builder()
.name("round")
.dataType(Arrays.asList(DataType.TEXT))
.build(),
Property.builder()
.name("value")
.dataType(Arrays.asList(DataType.INT))
.build(),
Property.builder()
.name("category")
.dataType(Arrays.asList(DataType.TEXT))
.build(),
Property.builder()
.name("question")
.dataType(Arrays.asList(DataType.TEXT))
.build(),
Property.builder()
.name("answer")
.dataType(Arrays.asList(DataType.TEXT))
.build()
);
WeaviateClass jeopardyClass = WeaviateClass.builder()
.className("Question")
.description("A Jeopardy! question")
.properties(properties)
.vectorizer("text2vec-openai")
.build();
Result<Boolean> createResult = client.schema().classCreator()
.withClass(jeopardyClass)
.run();
2. Load training data
Add objects that will be used to train PQ. Weaviate will use the greater of the training limit, or the collection size, to train PQ.
We recommend loading a representative sample such that the trained centroids are representative of the entire dataset.
From v1.27.0
, Weaviate uses a sparse Fisher-Yates algorithm to select the training set from the available objects when PQ is enabled manually. Nonetheless, it is still recommended to load a representative sample of the data so that the trained centroids are representative of the entire dataset.
3. Enable PQ and create the codebook
Update your collection definition to enable PQ. Once PQ is enabled, Weaviate trains the codebook using the training data.
PQ relies on a codebook to compress the original vectors. The codebook defines "centroids" that are used to calculate the compressed vector. If you are not using AutoPQ, you must have some vectors loaded before you enable PQ so Weaviate can define the centroids. We recommend a training set size of between 10,000 and 100,000 for each shard.
To enable PQ, update your collection definition as shown below. For additional configuration options, see the PQ parameter table.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
import weaviate.classes.config as wc
jeopardy = client.collections.get("Question")
jeopardy.config.update(
vector_index_config=wc.Reconfigure.VectorIndex.hnsw(
quantizer=wc.Reconfigure.VectorIndex.Quantizer.pq(
training_limit=50000 # Default: 100000
)
)
)
client.schema.update_config(
"Question",
{
"vectorIndexConfig": {
"pq": {
"enabled": True, # Enable PQ
"trainingLimit": 100000,
"segments": 96
}
}
},
)
const collection = client.collections.get(collectionName);
await collection.config.update({
vectorizers: weaviate.reconfigure.vectorizer.update({
vectorIndexConfig: weaviate.reconfigure.vectorIndex.hnsw({
quantizer: weaviate.reconfigure.vectorIndex.quantizer.pq({
trainingLimit: 50000
})
})
})
})
// Note: This is carried out using the REST endpoint directly,
// as the JS/TS client cannot currently update the schema.
async function updateSchema() {
let url = 'http://localhost:8080/v1/schema/Question';
let pqUpdateDefinition = {
class: 'Question',
vectorizer: 'text2vec-openai',
properties: [
{ name: 'question', dataType: ['text'] },
{ name: 'answer', dataType: ['text'] },
],
vectorIndexConfig: {
pq: {
enabled: true,
trainingLimit: 100000,
segments: 96,
},
},
};
let response = await fetch(url, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(pqUpdateDefinition),
});
console.log(await response.json());
}
await updateSchema();
class, err = client.Schema().ClassGetter().
WithClassName("Question").Do(context.Background())
if err != nil {
log.Fatalf("get class for vec idx cfg update: %v", err)
}
cfg := class.VectorIndexConfig.(map[string]interface{})
cfg["pq"] = map[string]interface{}{
"enabled": true,
"trainingLimit": 100_000,
"segments": 96,
}
class.VectorIndexConfig = cfg
err = client.Schema().ClassUpdater().
WithClass(class).Do(context.Background())
if err != nil {
log.Fatalf("update class to use pq: %v", err)
}
WeaviateClass updatedJeopardyClass = WeaviateClass.builder()
.className("Question")
.description("A Jeopardy! question")
.properties(properties)
.vectorizer("text2vec-openai")
.vectorIndexConfig(VectorIndexConfig.builder()
.pq(PQConfig.builder()
.enabled(true)
.trainingLimit(100_000)
.segments(96)
.build())
.build())
.build();
Result<Boolean> updateResult = client.schema().classUpdater()
.withClass(updatedJeopardyClass)
.run();
4. Load the rest of your data
Once the codebook has been trained, you may continue to add data as per normal. Weaviate compresses the new data when it adds it to the database.
If you already have data in your Weaviate instance when you create the codebook, Weaviate automatically compresses the remaining objects (the ones after the initial training set).
PQ Parameters
You can configure PQ compression by setting the following parameters at the collection level.
Parameter | Type | Default | Details |
---|---|---|---|
enabled | boolean | false | Enable PQ when true . The Python client v4 does not use the enabled parameter. To enable PQ with the v4 client, set a quantizer in the collection definition. |
trainingLimit | integer | 100000 | The maximum number of objects, per shard, used to fit the centroids. Larger values increase the time it takes to fit the centroids. Larger values also require more memory. |
segments | integer | -- | The number of segments to use. The number of vector dimensions must be evenly divisible by the number of segments. Starting in v1.23 , Weaviate uses the number of dimensions to optimize the number of segments. |
centroids | integer | 256 | The number of centroids to use (max: 256). We generally recommend you do not change this value. Due to the data structure used, smaller centroid value will not result in smaller vectors, but may result in faster compression at cost of recall. |
encoder | string | kmeans | Encoder specification. There are two encoders. You can specify the type of encoder as either kmeans (default) or tile . |
distribution | string | log-normal | Encoder distribution type. Only used with the tile encoder. If you use the tile encoder, you can specify the distribution as log-normal (default) or normal . |
Additional tools and considerations
Change the codebook training limit
For most use cases, 100,000 objects is an optimal training size. There is little benefit to increasing trainingLimit
. If you do increase trainingLimit
, the training period will take longer. You could also have memory problems if you set a high trainingLimit
.
If you have a small dataset and wish to enable compression, consider using binary quantization (BQ). BQ is a simpler compression method that does not require training.
Check the system logs
When compression is enabled, Weaviate logs diagnostic messages like these.
pq-conf-demo-1 | {"action":"compress","level":"info","msg":"switching to compressed vectors","time":"2023-11-13T21:10:52Z"}
pq-conf-demo-1 | {"action":"compress","level":"info","msg":"vector compression complete","time":"2023-11-13T21:10:53Z"}
If you use docker-compose
to run Weaviate, you can get the logs on the system console.
docker compose logs -f --tail 10 weaviate
You can also view the log file directly. Check docker
to get the file location.
docker inspect --format='{{.LogPath}}' <your-weaviate-container-id>
Review the current pq
configuration
To review the current pq
configuration, you can retrieve it as shown below.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
jeopardy = client.collections.get("Question")
config = jeopardy.config.get()
pq_config = config.vector_index_config.quantizer
# print some of the config properties
print(f"Encoder: { pq_config.encoder }")
print(f"Training: { pq_config.training_limit }")
print(f"Segments: { pq_config.segments }")
print(f"Centroids: { pq_config.centroids }")
schema = client.schema.get("Question")
print(json.dumps(schema["vectorIndexConfig"]["pq"], indent=2))
const collection = client.collections.get(collectionName);
let collectionConfig = await collection.config.get();
console.log(collectionConfig.vectorizer['default'].indexConfig.quantizer)
const schema = await client.schema.classGetter().withClassName('Question').do();
// Inspect the PQ configuration
console.log(JSON.stringify(schema.vectorIndexConfig.pq, null, 2));
class, err = client.Schema().ClassGetter().
WithClassName("Question").Do(context.Background())
if err != nil {
log.Fatalf("get class to verify vec idx cfg changes: %v", err)
}
cfg = class.VectorIndexConfig.(map[string]interface{})
log.Printf("pq config: %v", cfg["pq"])
Result<WeaviateClass> getResult = client.schema().classGetter()
.withClassName("Question")
.run();
PQConfig pqConfig = getResult.getResult().getVectorIndexConfig().getPq();
String json = new GsonBuilder().setPrettyPrinting().create().toJson(pqConfig);
System.out.println(json);
Multiple vectors (named vectors)
Collections support multiple named vectors.
Collections can have multiple named vectors. The vectors in a collection can have their own configurations, and compression must be enabled independently for each vector. Every vector is independent and can use PQ, BQ, SQ, or no compression.
Related pages
- Configuration: Vector index
- Concepts: Vector index
- Concepts: Vector quantization
- Guide: Schemas and collection definitions
Questions and feedback
If you have any questions or feedback, let us know in the user forum.