Manage collections
Every object in Weaviate belongs to exactly one collection. Use the examples on this page to manage your collections.
Newer Weaviate documentation discuses "collections." Older Weaviate documentation refers to "classes" instead. Expect to see both terms throughout the documentation.
Create a collection
To create a collection, specify at least the collection name. If you don't specify any properties, auto-schema
creates them.
Weaviate follows GraphQL naming conventions.
- Start collection names with an upper case letter.
- Start property names with a lower case letter.
If you use an initial upper case letter to define a property name, Weaviate changes it to a lower case letter internally.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
client.collections.create("Article")
class_name = "Article"
class_obj = {"class": class_name}
client.schema.create_class(class_obj) # returns null on success
const newCollection = await client.collections.create({
name: 'Article'
})
// The returned value is the full collection definition, showing all defaults
console.log(JSON.stringify(newCollection, null, 2));
const className = 'Article';
const emptyClassDefinition = {
class: className,
};
// Add the class to the schema
let result = await client.schema
.classCreator()
.withClass(emptyClassDefinition)
.do();
String collectionName = "Article";
WeaviateClass emptyClass = WeaviateClass.builder()
.className(collectionName)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(emptyClass)
.run();
className := "Article"
emptyClass := &models.Class{
Class: className,
}
// Create the collection (also called class)
err := client.Schema().ClassCreator().
WithClass(emptyClass).
Do(ctx)
Using too many collections can lead to scalability issues like high memory usage and degraded query performance. Instead, consider using multi-tenancy, where a single collection is subdivided into multiple tenants.
For more details, see Starter Guides: Scaling limits with collections.
Create a collection and define properties
Properties are the data fields in your collection. Each property has a name and a data type.
Additional information
Use properties to configure additional parameters such as data type, index characteristics, or tokenization.
For details, see:
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Property, DataType
# Note that you can use `client.collections.create_from_dict()` to create a collection from a v3-client-style JSON object
client.collections.create(
"Article",
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
class_obj = {
"class": "Article",
"properties": [
{
"name": "title",
"dataType": ["text"],
},
{
"name": "body",
"dataType": ["text"],
},
],
}
client.schema.create_class(class_obj) # returns null on success
import { dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
properties: [
{
name: 'title',
dataType: dataType.TEXT,
},
{
name: 'body',
dataType: dataType.TEXT,
},
],
})
const classWithProps = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
{
name: 'body',
dataType: ['text'],
},
],
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithProps).do();
// Define collection properties
Property titleProperty = Property.builder()
.name("title")
.description("Title Property Description...")
.dataType(Arrays.asList(DataType.TEXT))
.build();
Property bodyProperty = Property.builder()
.name("body")
.description("Body Property Description...")
.dataType(Arrays.asList(DataType.TEXT))
.build();
// Add the defined properties to the collection
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.description("Article collection Description...")
.properties(Arrays.asList(titleProperty, bodyProperty))
.build();
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "body",
DataType: schema.DataTypeText.PropString(),
},
},
}
Disable auto-schema
By default, Weaviate creates missing collections and missing properties. When you configure collections manually, you have more precise control of the collection settings.
To disable auto-schema
set AUTOSCHEMA_ENABLED: 'false'
in your system configuration file.
Specify a vectorizer
Specify a vectorizer
for a collection.
Additional information
Collection level settings override default values and general configuration parameters such as environment variables.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
properties=[ # properties configuration is optional
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
class_obj = {
"class": "Article",
"properties": [
{
"name": "title",
"dataType": ["text"],
},
],
"vectorizer": "text2vec-openai" # this could be any vectorizer
}
client.schema.create_class(class_obj)
import { vectorizer, dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
],
})
const classWithVectorizer = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithVectorizer).do();
// Additional configuration not shown
// Define the vectorizer in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorizer("text2vec-openai") // Vectorize of your choic e.g. text2vec-openai or text2vec-cohere
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Vectorizer: "text2vec-openai",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "body",
DataType: schema.DataTypeText.PropString(),
},
},
}
Specify vectorizer settings
To configure how a vectorizer works (i.e. what model to use) with a specific collection, set the vectorizer parameters.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_cohere(
model="embed-multilingual-v2.0",
vectorize_collection_name=True
),
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-cohere", # this could be any vectorizer
"moduleConfig": {
"text2vec-cohere": { # this must match the vectorizer used
"vectorizeClassName": True,
"model": "embed-multilingual-v2.0",
}
}
}
client.schema.create_class(class_obj)
import { vectorizer } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecCohere({
model: 'embed-multilingual-v2.0',
vectorizeCollectionName: true,
}),
})
const classWithModuleSettings = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-cohere', // this could be any vectorizer
moduleConfig: {
'text2vec-cohere': {
// this must match the vectorizer used
vectorizeClassName: true,
model: 'embed-multilingual-v2.0',
},
},
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithModuleSettings)
.do();
// Additional configuration not shown
// Define the module settings
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("vectorizePropertyName", false);
text2vecOpenAISettings.put("model", "text-embedding-3-small"); // set the model of your choice e.g. //
// text-embedding-3-small
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
Map<Object, Object> moduleConfig = new HashMap<>();
moduleConfig.put("text2vec-openai", text2vecOpenAI);
// Set the module configu in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.moduleConfig(moduleConfig) // Set the module config
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Vectorizer: "text2vec-cohere",
ModuleConfig: map[string]interface{}{
"text2vec-cohere": map[string]interface{}{
"model": "embed-multilingual-v2.0",
"vectorizeClassName": true,
},
},
}
Define named vectors
v1.24
You can define multiple named vectors per collection. This allows each object to be represented by multiple vector embeddings, each with its own vector index.
As such, each named vector configuration can include its own vectorizer and vector index settings.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"ArticleNV",
vectorizer_config=[
# Set a named vector with the "text2vec-cohere" vectorizer
Configure.NamedVectors.text2vec_cohere(
name="title",
source_properties=["title"], # (Optional) Set the source property(ies)
vector_index_config=Configure.VectorIndex.hnsw() # (Optional) Set vector index options
),
# Set another named vector with the "text2vec-openai" vectorizer
Configure.NamedVectors.text2vec_openai(
name="title_country",
source_properties=["title", "country"], # (Optional) Set the source property(ies)
vector_index_config=Configure.VectorIndex.hnsw() # (Optional) Set vector index options
),
# Set a named vector for your own uploaded vectors
Configure.NamedVectors.none(
name="custom_vector",
vector_index_config=Configure.VectorIndex.hnsw() # (Optional) Set vector index options
)
],
properties=[ # Define properties
Property(name="title", data_type=DataType.TEXT),
Property(name="country", data_type=DataType.TEXT),
],
)
# Unfortunately, named vectors are not suppored in the v3 API / Python client.
# Please upgrade to the v4 API / Python client to use named vectors.
import { vectorizer, dataType } from 'weaviate-client';
await client.collections.create({
name: 'ArticleNV',
vectorizers: [
// Set a named vector with the "text2vec-cohere" vectorizer
vectorizer.text2VecCohere({
name: 'title',
sourceProperties: ['title'], // (Optional) Set the source property(ies)
vectorIndexConfig: configure.vectorIndex.hnsw() // (Optional) Set the vector index configuration
}),
// Set a named vector with the "text2vec-openai" vectorizer
vectorizer.text2VecOpenAI({
name: 'title_country',
sourceProperties: ['title','country'], // (Optional) Set the source property(ies)
vectorIndexConfig: configure.vectorIndex.hnsw() // (Optional) Set the vector index configuration
}),
// Set a named vector for your own uploaded vectors
vectorizer.none({
name: 'custom_vector',
vectorIndexConfig: configure.vectorIndex.hnsw() // (Optional) Set the vector index configuration
})
],
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'country', dataType: dataType.TEXT },
],
})
const classWithNamedVectors = {
class: 'ArticleNV',
vectorConfig: {
// Set a named vector with the "text2vec-cohere" vectorizer
title: {
vectorizer: {
'text2vec-cohere': {
properties: ['title'], // (Optional) Set the source property(ies)
},
},
vectorIndexType: 'hnsw', // (Optional) Set the vector index type
vectorIndexConfig: {} // (Optional) Set the vector index configuration
},
// Set a named vector with the "text2vec-openai" vectorizer
title_country: {
vectorizer: {
'text2vec-openai': {
properties: ['title','country'], // (Optional) Set the source property(ies)
},
},
vectorIndexType: 'hnsw', // (Optional) Set the vector index type
vectorIndexConfig: {} // (Optional) Set the vector index configuration
},
// Set a named vector for your own uploaded vectors
custom_vector: {
vectorizer: {
'none': {}
},
vectorIndexType: 'hnsw', // (Optional) Set the vector index type
vectorIndexConfig: {} // (Optional) Set the vector index configuration
},
},
properties: [
{
name: 'title',
dataType: ['text'],
},
{
name: 'country',
dataType: ['text'],
},
],
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithNamedVectors)
.do();
// Additional configuration not shown
// Define the vectorizers configurations
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("properties", new String[] { "name" });
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
Map<String, Object> text2vecCohere = new HashMap<>();
Map<String, Object> text2vecCohereSettings = new HashMap<>();
text2vecCohereSettings.put("properties", new String[] { "body" });
text2vecCohere.put("text2vec-cohere", text2vecCohereSettings);
// Define the vector configurations
Map<String, WeaviateClass.VectorConfig> vectorConfig = new HashMap<>();
vectorConfig.put("name_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2vecOpenAI)
.build());
vectorConfig.put("body_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2vecCohere)
.build());
// Define the vectorizers in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorConfig(vectorConfig)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "ArticleNV",
Description: "Collection of articles with named vectors",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "country",
DataType: schema.DataTypeText.PropString(),
},
},
VectorConfig: map[string]models.VectorConfig{
"title": {
Vectorizer: map[string]interface{}{
"text2vec-openai": map[string]interface{}{
"sourceProperties": []string{"title"},
},
},
VectorIndexType: "hnsw",
},
"title_country": {
Vectorizer: map[string]interface{}{
"text2vec-openai": map[string]interface{}{
"sourceProperties": []string{"title", "country"},
},
},
VectorIndexType: "hnsw",
},
"custom_vector": {
Vectorizer: map[string]interface{}{
"none": map[string]interface{}{},
},
VectorIndexType: "hnsw",
},
},
}
Add new named vectors
v1.31
Named vectors can be added to existing collection definitions with named vectors. (This is not possible for collections without named vectors.)
- Python Client v4
- JS/TS Client v3
- Java
- Go
from weaviate.classes.config import Configure
articles = client.collections.get("Article")
articles.config.add_vector(
vector_config=Configure.NamedVectors.text2vec_cohere(
name="body_vector",
source_properties=["body"],
)
)
// TS support coming soon
// Java support coming soon
// Go support coming soon
Adding a new named vector to the collection definition won't trigger vectorization for existing objects. Only new or updated objects will receive embeddings for the newly added named vector definition.
Define multi-vector embeddings (e.g. ColBERT, ColPali)
v1.29
, v1.30
Multi-vector embeddings, also known as multi-vectors, represent a single object with multiple vectors, i.e. a 2-dimensional matrix. Multi-vectors are currently only available for HNSW indexes for named vectors. To use multi-vectors, enable it for the appropriate named vector.
- Python Client v4
- Java
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"DemoCollection",
vectorizer_config=[
# Example 1 - Use a model integration
# The factory function will automatically enable multi-vector support for the HNSW index
Configure.NamedVectors.text2colbert_jinaai(
name="jina_colbert",
source_properties=["text"],
),
# Example 2 - User-provided multi-vector representations
# Must explicitly enable multi-vector support for the HNSW index
Configure.NamedVectors.none(
name="custom_multi_vector",
vector_index_config=Configure.VectorIndex.hnsw(
multi_vector=Configure.VectorIndex.MultiVector.multi_vector()
),
),
],
properties=[
Property(name="text", data_type=DataType.TEXT)
]
# Additional parameters not shown
)
// Define collection properties
Property textProperty = Property.builder()
.name("text")
.description("Text content for ColBERT vectorization")
.dataType(Arrays.asList(DataType.TEXT))
.build();
// Define the vectorizers configurations for named vectors
Map<String, Object> text2colbertJinaAI = new HashMap<>();
Map<String, Object> text2colbertSettings = new HashMap<>();
text2colbertSettings.put("properties", new String[] { "text" });
text2colbertJinaAI.put("text2colbert-jinaai", text2colbertSettings);
// Configure multi-vector for custom vectors
Map<String, Object> noneVectorizer = new HashMap<>();
noneVectorizer.put("none", new Object());
// Create multi-vector config for custom vectors
VectorIndexConfig customMultiVectorConfig = VectorIndexConfig.builder()
.multiVector(MultiVectorConfig.builder().build()) // Enable multi-vector with default settings
.build();
// Define the vector configurations
Map<String, WeaviateClass.VectorConfig> vectorConfig = new HashMap<>();
// Example 1: ColBERT vectorizer
vectorConfig.put("jina_colbert", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2colbertJinaAI)
.build());
// Example 2: User-provided multi-vector representations
vectorConfig.put("custom_multi_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(noneVectorizer)
.vectorIndexConfig(customMultiVectorConfig)
.build());
// Create the collection with multi-vector configuration
WeaviateClass multiVecClass = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(textProperty))
.vectorConfig(vectorConfig)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(multiVecClass)
.run();
Multi-vector embeddings use up more memory than single vector embeddings. You can use vector quantization and encoding to compress them and reduce memory usage.
Set vector index type
The vector index type can be set for each collection at creation time, between hnsw
, flat
and dynamic
index types.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
vector_index_config=Configure.VectorIndex.hnsw(), # Use the HNSW index
# vector_index_config=Configure.VectorIndex.flat(), # Use the FLAT index
# vector_index_config=Configure.VectorIndex.dynamic(), # Use the DYNAMIC index
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
class_obj = {
'class': 'Article',
'properties': [
{
'name': 'title',
'dataType': ['text'],
},
],
'vectorizer': 'text2vec-openai', # this could be any vectorizer
"vectorIndexType": "hnsw", # or "flat" or "dynamic"
}
client.schema.create_class(class_obj)
import { vectorizer, dataType, configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI({
vectorIndexConfig: configure.vectorIndex.hnsw(), // Use HNSW
// vectorIndexConfig: configure.vectorIndex.flat(), // Use Flat
// vectorIndexConfig: configure.vectorIndex.dynamic(), // Use Dynamic
}),
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
],
})
const classWithIndexType = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
vectorIndexType: 'flat', // or 'hnsw', or 'dynamic'
vectorIndexConfig: {
bq: {
enabled: true, // Enable BQ compression. Default: False
rescoreLimit: 200, // The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
cache: true, // Enable use of vector cache. Default: False
},
vectorCacheMaxObjects: 100000, // Cache size if `cache` enabled. Default: 1000000000000
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithIndexType).do();
// Additional configuration not shown
// Define the index type in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorizer("text2vec-openai")
.vectorIndexType("hnsw") // set the vector index of your choice e.g. hnsw, flat...
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "country",
DataType: schema.DataTypeText.PropString(),
},
},
Vectorizer: "text2vec-openai",
VectorIndexType: "hnsw", // Or "flat", "dynamic"
}
Additional information
- Read more about index types & compression in Concepts: Vector index.
Set vector index parameters
Set vector index parameters such as compression and filter strategy through collection configuration. Some parameters can be updated later after collection creation.
Was added in v1.27
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType, VectorDistances, VectorFilterStrategy
client.collections.create(
"Article",
# Additional configuration not shown
vector_index_config=Configure.VectorIndex.hnsw(
quantizer=Configure.VectorIndex.Quantizer.bq(),
ef_construction=300,
distance_metric=VectorDistances.COSINE,
filter_strategy=VectorFilterStrategy.SWEEPING # or ACORN (Available from Weaviate v1.27.0)
),
)
class_obj = {
'class': 'Article',
# Additional configuration not shown
"vectorIndexType": "flat",
"vectorIndexConfig": {
"bq": {
"enabled": True, # Enable BQ compression. Default: False
"rescoreLimit": 200, # The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
"cache": True, # Enable use of vector cache. Default: False
},
"vectorCacheMaxObjects": 100000, # Cache size if `cache` enabled. Default: 1000000000000
"filterStrategy": "sweeping" # or "acorn" (Available from Weaviate v1.27.0)
}
}
client.schema.create_class(class_obj)
import { configure, vectorizer } from 'weaviate-client';
await client.collections.create({
name: 'Article',
// Additional configuration not shown
vectorizers: vectorizer.text2VecCohere({
vectorIndexConfig: configure.vectorIndex.flat({
quantizer: configure.vectorIndex.quantizer.bq({
rescoreLimit: 200,
cache: true
}),
vectorCacheMaxObjects: 100000
})
})
})
const classWithIndexParams = {
class: 'Article',
// Additional configuration not shown
vectorIndexType: 'flat', // or `hnsw`
vectorIndexConfig: {
bq: {
enabled: true, // Enable BQ compression. Default: False
rescoreLimit: 200, // The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
cache: true, // Enable use of vector cache. Default: False
},
vectorCacheMaxObjects: 100000, // Cache size if `cache` enabled. Default: 1000000000000
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithIndexType).do();
// Additional configuration not shown
// Define the VectorIndexConfig with compression
VectorIndexConfig createBqIndexConfig = VectorIndexConfig.builder()
.bq(BQConfig.builder()
.enabled(true)
.rescoreLimit(123L)
.cache(true)
.build())
.vectorCacheMaxObjects(100000L)
.build();
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorIndexType("flat") // set the vector index of your choice e.g. hnsw, flat...
.vectorIndexConfig(createBqIndexConfig)
.vectorizer("text2vec-openai")
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "country",
DataType: schema.DataTypeText.PropString(),
},
},
Vectorizer: "text2vec-openai",
VectorIndexType: "hnsw",
VectorIndexConfig: map[string]interface{}{
"bq": map[string]interface{}{
"enabled": true,
},
"efConstruction": 300,
"distance": "cosine",
"filterStrategy": "acorn",
},
}
Additional information
- Read more about index types & compression in Concepts: Vector index.
Property-level settings
Configure individual properties in a collection. Each property can have it's own configuration. Here are some common settings:
- Vectorize the property
- Vectorize the property name
- Set a tokenization type
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType, Tokenization
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_cohere(),
properties=[
Property(
name="title",
data_type=DataType.TEXT,
vectorize_property_name=True, # Use "title" as part of the value to vectorize
tokenization=Tokenization.LOWERCASE, # Use "lowecase" tokenization
description="The title of the article." # Optional description
),
Property(
name="body",
data_type=DataType.TEXT,
skip_vectorization=True, # Don't vectorize this property
tokenization=Tokenization.WHITESPACE # Use "whitespace" tokenization
),
]
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-huggingface", # this could be any vectorizer
"properties": [
{
"name": "title",
"dataType": ["text"],
"moduleConfig": {
"text2vec-huggingface": { # this must match the vectorizer used
"vectorizePropertyName": True,
"tokenization": "lowercase"
}
}
},
{
"name": "body",
"dataType": ["text"],
"moduleConfig": {
"text2vec-huggingface": { # this must match the vectorizer used
"skip": True, # Don't vectorize body
"tokenization": "whitespace"
}
}
},
],
}
client.schema.create_class(class_obj)
import { vectorizer, dataType, tokenization } from 'weaviate-client';
const newCollection = await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecHuggingFace(),
properties: [
{
name: 'title',
dataType: dataType.TEXT,
vectorizePropertyName: true,
tokenization: tokenization.LOWERCASE // or 'lowercase'
},
{
name: 'body',
dataType: dataType.TEXT,
skipVectorization: true,
tokenization: tokenization.WHITESPACE // or 'whitespace'
},
],
})
const classWithPropModuleSettings = {
class: 'Article',
vectorizer: 'text2vec-huggingface', // this could be any vectorizer
properties: [
{
name: 'title',
dataType: ['text'],
moduleConfig: {
'text2vec-huggingface': {
// this must match the vectorizer used
vectorizePropertyName: true,
tokenization: 'lowercase', // Use "lowercase" tokenization
},
},
},
{
name: 'body',
dataType: ['text'],
moduleConfig: {
'text2vec-huggingface': {
// this must match the vectorizer used
skip: true, // Don't vectorize this property
tokenization: 'whitespace', // Use "whitespace" tokenization
},
},
},
],
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithPropModuleSettings)
.do();
Property titleProperty = Property.builder()
.name("title")
.description("title of the article")
.dataType(Arrays.asList(DataType.TEXT))
.tokenization(Tokenization.WORD)
.build();
Property bodyProperty = Property.builder()
.name("body")
.description("body of the article")
.dataType(Arrays.asList(DataType.TEXT))
.tokenization(Tokenization.LOWERCASE)
.build();
// Add the defined properties to the collection
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.description("Article collection Description...")
.properties(Arrays.asList(titleProperty, bodyProperty))
.build();
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
vTrue := true
vFalse := false
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
Tokenization: "lowercase",
IndexFilterable: &vTrue,
IndexSearchable: &vFalse,
ModuleConfig: map[string]interface{}{
"text2vec-cohere": map[string]interface{}{
"vectorizePropertyName": true,
},
},
},
{
Name: "body",
DataType: schema.DataTypeText.PropString(),
Tokenization: "whitespace",
IndexFilterable: &vTrue,
IndexSearchable: &vTrue,
ModuleConfig: map[string]interface{}{
"text2vec-cohere": map[string]interface{}{
"vectorizePropertyName": false,
},
},
},
},
Vectorizer: "text2vec-cohere",
}
Specify a distance metric
If you choose to bring your own vectors, you should specify the distance metric
.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, VectorDistances
client.collections.create(
"Article",
vector_index_config=Configure.VectorIndex.hnsw(
distance_metric=VectorDistances.COSINE
),
)
class_obj = {
"class": "Article",
"vectorIndexConfig": {
"distance": "cosine",
},
}
client.schema.create_class(class_obj)
import { configure, vectorizer, vectorDistances } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOllama({
vectorIndexConfig: configure.vectorIndex.hnsw({
distanceMetric: vectorDistances.COSINE // or 'cosine'
})
})
})
const classWithDistance = {
class: 'Article',
vectorIndexConfig: {
distance: 'cosine',
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithDistance).do();
// Additional configuration not shown
VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder()
.distance(DistanceType.DOT) // Define Distance Type e.g. Dot, Cosine, hamming...
.build();
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorIndexConfig(vectorIndexConfig)
.build();
Result<Boolean> classResult = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
VectorIndexConfig: map[string]interface{}{
"distance": "cosine",
},
}
Additional information
For details on the configuration parameters, see the following:
Set inverted index parameters
Various inverted index parameters are configurable for each collection. Some parameters are set at the collection level, while others are set at the property level.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
# Additional settings not shown
properties=[ # properties configuration is optional
Property(
name="title",
data_type=DataType.TEXT,
index_filterable=True,
index_searchable=True,
),
Property(
name="chunk",
data_type=DataType.TEXT,
index_filterable=True,
index_searchable=True,
),
Property(
name="chunk_number",
data_type=DataType.INT,
index_range_filters=True,
),
],
inverted_index_config=Configure.inverted_index( # Optional
bm25_b=0.7,
bm25_k1=1.25,
index_null_state=True,
index_property_length=True,
index_timestamps=True
)
)
class_obj = {
"class": "Article",
"properties": [
{
"name": "title",
"dataType": ["text"],
"indexFilterable": True,
"indexSearchable": True,
"moduleConfig": {
"text2vec-huggingface": {}
}
},
{
"name": "chunk",
"dataType": ["text"],
"indexFilterable": True,
"indexSearchable": True,
},
{
"name": "chunk_no",
"dataType": ["int"],
"indexRangeFilters": True,
},
],
"invertedIndexConfig": {
"bm25": {
"b": 0.7,
"k1": 1.25
},
"indexTimestamps": True,
"indexNullState": True,
"indexPropertyLength": True
}
}
client.schema.create_class(class_obj)
import { dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
properties: [
{
name: 'title',
dataType: dataType.TEXT,
indexFilterable: true,
indexSearchable: true,
},
{
name: 'chunk',
dataType: dataType.TEXT,
indexFilterable: true,
indexSearchable: true,
},
{
name: 'chunk_no',
dataType: dataType.INT,
indexRangeFilters: true,
},
],
invertedIndex: {
bm25: {
b: 0.7,
k1: 1.25
},
indexNullState: true,
indexPropertyLength: true,
indexTimestamps: true
}
})
const classWithInvIndexSettings = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
indexFilterable: true,
indexSearchable: true,
},
{
name: 'chunk',
dataType: ['text'],
indexFilterable: true,
indexSearchable: true,
},
{
name: 'chunk_no',
dataType: ['int'],
indexRangeFilters: true,
},
],
invertedIndexConfig: {
bm25: {
b: 0.7,
k1: 1.25
},
indexTimestamps: true,
indexNullState: true,
indexPropertyLength: true
}
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithPropModuleSettings)
.do();
// Create properties with specific indexing configurations
Property titleProperty = Property.builder()
.name("title")
.dataType(Arrays.asList(DataType.TEXT))
.indexFilterable(true)
.indexSearchable(true)
.build();
Property chunkProperty = Property.builder()
.name("chunk")
.dataType(Arrays.asList(DataType.INT))
.indexRangeFilters(true)
.build();
// Configure BM25 settings
BM25Config bm25Config = BM25Config.builder()
.b(0.7f)
.k1(1.25f)
.build();
// Configure inverted index with BM25 and other settings
InvertedIndexConfig invertedIndexConfig = InvertedIndexConfig.builder()
.bm25(bm25Config)
.indexNullState(true)
.indexPropertyLength(true)
.indexTimestamps(true)
.build();
// Create the Article collection with properties and inverted index configuration
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, chunkProperty))
.invertedIndexConfig(invertedIndexConfig)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
vTrue := true
vFalse := false
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
Tokenization: "lowercase",
IndexFilterable: &vTrue,
IndexSearchable: &vFalse,
},
{
Name: "chunk",
DataType: schema.DataTypeText.PropString(),
Tokenization: "word",
IndexFilterable: &vTrue,
IndexSearchable: &vTrue,
},
{
Name: "chunk_no",
DataType: schema.DataTypeInt.PropString(),
IndexRangeFilters: &vTrue,
},
},
InvertedIndexConfig: &models.InvertedIndexConfig{
Bm25: &models.BM25Config{
B: 0.7,
K1: 1.25,
},
IndexNullState: true,
IndexPropertyLength: true,
IndexTimestamps: true,
},
}
Specify a reranker model integration
Configure a reranker
model integration for reranking retrieved results.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
reranker_config=Configure.Reranker.cohere()
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-openai", # set your vectorizer module
"moduleConfig": {
"reranker-cohere": {} # set your reranker module
}
}
client.schema.create_class(class_obj)
import { vectorizer, reranker } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
reranker: reranker.cohere(),
})
const classWithReranker = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
moduleConfig: {
'reranker-cohere': {}, // set your reranker module
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithReranker).do();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Vectorizer: "text2vec-openai",
ModuleConfig: map[string]interface{}{
"reranker-cohere": map[string]interface{}{
"model": "rerank-v3.5",
},
},
}
Update the reranker model integration
v1.25.23
, v1.26.8
and v1.27.1
The reranker
and generative
configurations are mutable from v1.25.23
, v1.26.8
and v1.27.1
.
Update the reranker
model integration for reranking retrieved results.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
from weaviate.classes.config import Reconfigure
collection = client.collections.get("Article")
collection.config.update(
reranker_config=Reconfigure.Reranker.cohere() # Update the reranker module
)
class_obj = {
"moduleConfig": {
"reranker-cohere": {} # Update your reranker module
}
}
client.schema.update_config("Article", class_obj)
import { reconfigure } from 'weaviate-client';
const collection = client.collections.get('Article')
await collection.config.update({
reranker: reconfigure.reranker.cohere() // Update the reranker module
})
// Collection definition updates are not available in the v2 API.
// Consider upgrading to the v3 API, or deleting and recreating the collection.
updatedArticleClassConfig := &models.Class{
// Note: The new collection config must be provided in full,
// including the configuration that is not being updated.
// We suggest using the original class config as a starting point.
Class: "Article",
ModuleConfig: map[string]interface{}{
"reranker-cohere": map[string]interface{}{
"model": "rerank-v3.5",
},
},
}
Specify a generative model integration
Specify a generative
model integration for a collection (for RAG).
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
generative_config=Configure.Generative.openai(
model="gpt-4o" # set your generative model (optional parameter)
),
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-openai", # set your vectorizer module
"moduleConfig": {
"generative-openai": {
"model": "gpt-4o" # set your generative model (optional parameter)
}
}
}
client.schema.create_class(class_obj)
import { vectorizer, generative } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
generative: generative.openAI({
model: "gpt-4o" // set your generative model (optional parameter)
}),
})
const classWithGenerative = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
moduleConfig: {
'generative-openai': {
model: 'gpt-4o' // set your generative model (optional parameter)
},
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithGenerative).do();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Vectorizer: "text2vec-openai",
ModuleConfig: map[string]interface{}{
"generative-openai": map[string]interface{}{
"model": "gpt-4o",
},
},
}
// Configure OpenAI text2vec module settings
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("model", "text-embedding-3-small"); // or your preferred embedding model
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
// Configure OpenAI generative module settings
Map<String, Object> generativeOpenAI = new HashMap<>();
Map<String, Object> generativeOpenAISettings = new HashMap<>();
generativeOpenAISettings.put("model", "gpt-4");
generativeOpenAI.put("generative-openai", generativeOpenAISettings);
// Combine module configurations
Map<String, Object> moduleConfig = new HashMap<>();
moduleConfig.put("text2vec-openai", text2vecOpenAI);
moduleConfig.put("generative-openai", generativeOpenAI);
// Create the Article collection with vectorizer and generative configuration
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.vectorizer("text2vec-openai") // Set the vectorizer
.moduleConfig(moduleConfig) // Set both vectorizer and generative configs
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
Update the generative model integration
v1.25.23
, v1.26.8
and v1.27.1
The reranker
and generative
configurations are mutable from v1.25.23
, v1.26.8
and v1.27.1
.
Update a generative
model integration.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
from weaviate.classes.config import Reconfigure
collection = client.collections.get("Article")
collection.config.update(
generative_config=Reconfigure.Generative.cohere() # Update the generative module
)
class_obj = {
"class": "Article",
"moduleConfig": {
"generative-cohere": {} # Update your generative module
}
}
client.schema.update_config("Article", class_obj)
import { reconfigure } from 'weaviate-client';
const collection = client.collections.get("Article")
await collection.config.update({
generative: weaviate.reconfigure.generative.cohere() // Update the generative module
})
// Collection definition updates are not available in the v2 API.
// Consider upgrading to the v3 API, or deleting and recreating the collection.
updatedArticleClassConfig := &models.Class{
Class: "Article",
ModuleConfig: map[string]interface{}{
"generative-cohere": map[string]interface{}{},
},
}
You can override the generative integration settings at query time without updating it in the collection configuration.
Replication settings
Currently (from v1.25.0
onwards) a replication factor cannot be changed once it is set.
This is due to the schema consensus algorithm change in v1.25
. This will be improved in future versions.
Configure replication settings, such as async replication and deletion resolution strategy.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- Java
- Go
- cURL
from weaviate.classes.config import Configure, ReplicationDeletionStrategy
client.collections.create(
"Article",
replication_config=Configure.replication(
factor=3,
async_enabled=True, # Enable asynchronous repair
deletion_strategy=ReplicationDeletionStrategy.TIME_BASED_RESOLUTION, # Added in v1.28; Set the deletion conflict resolution strategy
)
)
class_obj = {
"class": "Article",
"replicationConfig": {
"factor": 3,
"asyncEnabled": True,
"deletionStrategy": "TimeBasedResolution" # Available from Weaviate v1.28.0
},
}
client.schema.create_class(class_obj)
import { configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
replication: configure.replication({
factor: 3,
asyncEnabled: true,
deletionStrategy: 'TimeBasedResolution' // Available from Weaviate v1.28.0
}),
})
// Configure replication settings
Integer replicationFactor = 3;
Boolean asyncEnabled = true;
// Create replication configuration
ReplicationConfig replicationConfig = ReplicationConfig.builder()
.factor(replicationFactor) // factor=3
.asyncEnabled(asyncEnabled) // async_enabled=True
.deletionStrategy(ReplicationConfig.DeletionStrategy.DELETE_ON_CONFLICT)
.build();
// Create the Article collection with replication configuration
WeaviateClass articleClass = WeaviateClass.builder()
.className(collectionName)
.description("Article collection with replication configuration")
.replicationConfig(replicationConfig) // Set the replication config
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
ReplicationConfig: &models.ReplicationConfig{
AsyncEnabled: true,
Factor: 3,
DeletionStrategy: models.ReplicationConfigDeletionStrategyTimeBasedResolution,
},
}
curl \
-X POST \
-H "Content-Type: application/json" \
-d '{
"class": "Article",
"properties": [
{
"dataType": [
"string"
],
"description": "Title of the article",
"name": "title"
}
],
"replicationConfig": {
"factor": 3,
"asyncEnabled": true,
"deletionStrategy": "TimeBasedResolution"
}
}' \
http://localhost:8080/v1/schema
Additional information
To use replication factors greater than one, use a multi-node deployment.
For details on the configuration parameters, see the following:
Sharding settings
Configure sharding per collection.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure
client.collections.create(
"Article",
sharding_config=Configure.sharding(
virtual_per_physical=128,
desired_count=1,
desired_virtual_count=128,
)
)
class_obj = {
"class": "Article",
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"desiredVirtualCount": 128,
},
}
client.schema.create_class(class_obj)
import { configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
sharding: configure.sharding({
virtualPerPhysical: 128,
desiredCount: 1,
desiredVirtualCount: 128,
})
})
const classWithSharding = {
class: 'Article',
vectorIndexConfig: {
distance: 'cosine',
},
shardingConfig: {
virtualPerPhysical: 128,
desiredCount: 1,
desiredVirtualCount: 128,
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithSharding).do();
// Configure sharding settings
Integer virtualPerPhysical = 128;
Integer desiredCount = 1;
Integer desiredVirtualCount = 128;
// Create sharding configuration
ShardingConfig shardingConfig = ShardingConfig.builder()
.virtualPerPhysical(virtualPerPhysical) // virtual_per_physical=128
.desiredCount(desiredCount) // desired_count=1
.desiredVirtualCount(desiredVirtualCount) // desired_virtual_count=128
.build();
// Create the Article collection with sharding configuration
WeaviateClass articleClass = WeaviateClass.builder()
.className(collectionName)
.description("Article collection with sharding configuration")
.shardingConfig(shardingConfig) // Set the sharding config
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
ShardingConfig: sharding.Config{
VirtualPerPhysical: 128,
DesiredCount: 1,
DesiredVirtualCount: 128,
Key: sharding.DefaultKey,
Strategy: sharding.DefaultStrategy,
Function: sharding.DefaultFunction,
},
}
Additional information
For details on the configuration parameters, see the following:
Multi-tenancy
v1.20
Create a collection with multi-tenancy enabled.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Configure
client.collections.create(
"Article",
multi_tenancy_config=Configure.multi_tenancy(True)
)
class_obj = {
"class": "Article",
"multiTenancyConfig": {"enabled": True}
}
client.schema.create_class(class_obj) # returns null on success
await client.collections.create({
name: 'Article',
multiTenancy: { enabled: true }
// multiTenancy: configure.multiTenancy({ enabled: true }) // alternatively use helper function
})
await client.schema
.classCreator()
.withClass({
class: 'Article',
multiTenancyConfig: { enabled: true },
})
.do();
// Create multi-tenancy configuration
MultiTenancyConfig multiTenancyConfig = MultiTenancyConfig.builder()
.enabled(true)
.build();
// Create the Article collection with multi-tenancy configuration
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.description("Article collection with multi-tenancy enabled")
.multiTenancyConfig(multiTenancyConfig)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
MultiTenancyConfig: &models.MultiTenancyConfig{
Enabled: true,
},
}
Read a single collection definition
Retrieve a collection definition from the schema.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
articles = client.collections.get("Article")
articles_config = articles.config.get()
print(articles_config)
class_name = "Article"
response = client.schema.get(class_name)
print(json.dumps(response, indent=2))
let articles = client.collections.get('Article')
const collectionConfig = await articles.config.get()
console.log(collectionConfig)
const className = 'Article';
let classDefinition = await client.schema
.classGetter()
.withClassName(className)
.do();
console.log(JSON.stringify(classDefinition, null, 2));
String collectionName = "Article";
Result<WeaviateClass> result = client.schema().classGetter()
.withClassName(collectionName)
.run();
String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult());
System.out.println(json);
className := "Article"
class, err := client.Schema().ClassGetter().
WithClassName(className).
Do(ctx)
b, err := json.MarshalIndent(class, "", " ")
fmt.Println(string(b))
Sample configuration: Text objects
This configuration for text objects defines the following:
- The collection name (
Article
) - The vectorizer module (
text2vec-cohere
) and model (embed-multilingual-v2.0
) - A set of properties (
title
,body
) withtext
data types.
{
"class": "Article",
"vectorizer": "text2vec-cohere",
"moduleConfig": {
"text2vec-cohere": {
"model": "embed-multilingual-v2.0",
},
},
"properties": [
{
"name": "title",
"dataType": ["text"]
},
{
"name": "body",
"dataType": ["text"]
},
],
}
Sample configuration: Nested objects
v1.22
This configuration for nested objects defines the following:
The collection name (
Person
)The vectorizer module (
text2vec-huggingface
)A set of properties (
last_name
,address
)last_name
hastext
data typeaddress
hasobject
data type
The
address
property has two nested properties (street
andcity
)
{
"class": "Person",
"vectorizer": "text2vec-huggingface",
"properties": [
{
"dataType": ["text"],
"name": "last_name",
},
{
"dataType": ["object"],
"name": "address",
"nestedProperties": [
{"dataType": ["text"], "name": "street"},
{"dataType": ["text"], "name": "city"}
],
}
],
}
Sample configuration: Generative search
This configuration for retrieval augmented generation defines the following:
- The collection name (
Article
) - The default vectorizer module (
text2vec-openai
) - The generative module (
generative-openai
) - A set of properties (
title
,chunk
,chunk_no
andurl
) - The tokenization option for the
url
property - The vectorization option (
skip
vectorization) for theurl
property
{
"class": "Article",
"vectorizer": "text2vec-openai",
"vectorIndexConfig": {
"distance": "cosine",
},
"moduleConfig": {
"generative-openai": {}
},
"properties": [
{
"name": "title",
"dataType": ["text"]
},
{
"name": "chunk",
"dataType": ["text"]
},
{
"name": "chunk_no",
"dataType": ["int"]
},
{
"name": "url",
"dataType": ["text"],
"tokenization": "field",
"moduleConfig": {
"text2vec-openai": {
"skip": true
},
}
},
],
}
Sample configuration: Images
This configuration for image search defines the following:
The collection name (
Image
)The vectorizer module (
img2vec-neural
)- The
image
property configures collection to store image data.
- The
The vector index distance metric (
cosine
)A set of properties (
image
), with theimage
property set asblob
.
For image searches, see Image search.
{
"class": "Image",
"vectorizer": "img2vec-neural",
"vectorIndexConfig": {
"distance": "cosine",
},
"moduleConfig": {
"img2vec-neural": {
"imageFields": [
"image"
]
}
},
"properties": [
{
"name": "image",
"dataType": ["blob"]
},
],
}
Read all collection definitions
Fetch the database schema to retrieve all of the collection definitions.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
response = client.collections.list_all(simple=False)
print(response)
response = client.schema.get()
print(json.dumps(response, indent=2))
const allCollections = await client.collections.listAll()
console.log(JSON.stringify(allCollections, null, 2));
let allCollections = await client.schema.getter().do();
console.log(JSON.stringify(allCollections, null, 2));
Result<Schema> result = client.schema().getter()
.run();
String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult());
System.out.println(json);
schema, err := client.Schema().Getter().
Do(ctx)
b, err := json.MarshalIndent(schema, "", " ")
fmt.Println(string(b))
Update a collection definition
Currently (from v1.25.0
onwards) a replication factor cannot be changed once it is set.
This is due to the schema consensus algorithm change in v1.25
. This will be improved in future versions.
You can update a collection definition to change the mutable collection settings.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Reconfigure, VectorFilterStrategy, ReplicationDeletionStrategy
articles = client.collections.get("Article")
# Update the collection definition
articles.config.update(
description="An updated collection description.",
property_descriptions={
"title": "The updated title description for article",
}, # Available from Weaviate v1.31.0
inverted_index_config=Reconfigure.inverted_index(
bm25_k1=1.5
),
vector_index_config=Reconfigure.VectorIndex.hnsw(
filter_strategy=VectorFilterStrategy.ACORN # Available from Weaviate v1.27.0
),
replication_config=Reconfigure.replication(
deletion_strategy=ReplicationDeletionStrategy.TIME_BASED_RESOLUTION # Available from Weaviate v1.28.0
)
)
articles = client.collections.get("Article")
article_shards = articles.config.update_shards(
status="READY",
shard_names=shard_names # The names (List[str]) of the shard to update (or a shard name)
)
print(article_shards)
class_name = "Article"
# Update the collection definition
collection_def_changes = {
"class": class_name,
"invertedIndexConfig": {
"bm25": {
"k1": 1.5 # Change the k1 parameter from 1.2
}
},
"vectorIndexConfig": {
"filterStrategy": "acorn" # Available from Weaviate v1.27.0
},
"replicationConfig": {
"deletionStrategy": "TimeBasedResolution" # Available from Weaviate v1.28.0
}
}
client.schema.update_config("Article", collection_def_changes)
import { reconfigure } from 'weaviate-client';
let articles = client.collections.get('Article')
await articles.config.update({
invertedIndex: reconfigure.invertedIndex({
bm25k1: 1.5 // Change the k1 parameter from 1.2
}),
vectorizers: reconfigure.vectorizer.update({
vectorIndexConfig: reconfigure.vectorIndex.hnsw({
quantizer: reconfigure.vectorIndex.quantizer.pq(),
ef: 4,
filterStrategy: 'acorn', // Available from Weaviate v1.27.0
}),
})
})
// Collection definition updates are not available in the v2 API.
// Consider upgrading to the v3 API, or deleting and recreating the collection.
// Get existing collection
Result<WeaviateClass> existingResult = client.schema().classGetter()
.withClassName(collectionName)
.run();
assertThat(existingResult).isNotNull()
.returns(false, Result::hasErrors);
WeaviateClass existingClass = existingResult.getResult();
// Create updated configurations
InvertedIndexConfig invertedConfig = InvertedIndexConfig.builder()
.bm25(BM25Config.builder().k1(1.5f).build())
.build();
VectorIndexConfig vectorConfig = VectorIndexConfig.builder()
.filterStrategy(VectorIndexConfig.FilterStrategy.ACORN)
.build();
ReplicationConfig replicationConfig = ReplicationConfig.builder()
.deletionStrategy(ReplicationConfig.DeletionStrategy.NO_AUTOMATED_RESOLUTION)
.build();
// Update collection with new configurations - preserve critical existing configs
WeaviateClass updatedClass = WeaviateClass.builder()
.className(collectionName)
.shardingConfig(existingClass.getShardingConfig()) // Preserve sharding (immutable)
.invertedIndexConfig(invertedConfig) // Update
.vectorIndexConfig(vectorConfig) // Update
.replicationConfig(replicationConfig) // Update
.build();
Result<Boolean> updateResult = client.schema().classUpdater()
.withClass(updatedClass)
.run();
updatedArticleClassConfig := &models.Class{
// Note: The new collection config must be provided in full,
// including the configuration that is not being updated.
// We suggest using the original class config as a starting point.
Class: "Article",
InvertedIndexConfig: &models.InvertedIndexConfig{
Bm25: &models.BM25Config{
K1: 1.5,
},
},
VectorIndexConfig: map[string]interface{}{
"filterStrategy": "acorn",
},
ReplicationConfig: &models.ReplicationConfig{
DeletionStrategy: models.ReplicationConfigDeletionStrategyTimeBasedResolution,
},
}
Delete a collection
You can delete any unwanted collection(s), along with the data that they contain.
When you delete a collection, you delete all associated objects!
Be very careful with deletes on a production database and anywhere else that you have important data.
This code deletes a collection and its objects.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
- Curl
# collection_name can be a string ("Article") or a list of strings (["Article", "Category"])
client.collections.delete(collection_name) # THIS WILL DELETE THE SPECIFIED COLLECTION(S) AND THEIR OBJECTS
# Note: you can also delete all collections in the Weaviate instance with:
# client.collections.delete_all()
# delete class "Article" - THIS WILL DELETE ALL DATA IN THIS CLASS
client.schema.delete_class("Article") # Replace with your class name
// delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
await client.collections.delete('Article')
// you can also delete all collections of a cluster
// await client.collections.deleteAll()
// delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
await client.schema
.classDeleter()
.withClassName('Article')
.do();
className := "YourClassName"
// delete the class
if err := client.Schema().ClassDeleter().WithClassName(className).Do(context.Background()); err != nil {
// Weaviate will return a 400 if the class does not exist, so this is allowed, only return an error if it's not a 400
if status, ok := err.(*fault.WeaviateClientError); ok && status.StatusCode != http.StatusBadRequest {
panic(err)
}
}
Result<Boolean> result = client.schema().classDeleter()
.withClassName(collectionName)
.run();
curl \
-X DELETE \
https://WEAVIATE_INSTANCE_URL/v1/schema/YourClassName # Replace WEAVIATE_INSTANCE_URL with your instance URL
Add a property
Indexing limitations after data import
There are no index limitations when you add collection properties before you import data.
If you add a new property after you import data, there is an impact on indexing.
Property indexes are built at import time. If you add a new property after importing some data, pre-existing objects index aren't automatically updated to add the new property. This means pre-existing objects aren't added to the new property index. Queries may return unexpected results because the index only includes new objects.
To create an index that includes all of the objects in a collection, do one of the following:
- New collections: Add all of the collection's properties before importing objects.
- Existing collections: Export the existing data from the collection. Re-create it with the new property. Import the data into the updated collection.
We are working on a re-indexing API to allow you to re-index the data after adding a property. This will be available in a future release.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
from weaviate.classes.config import Property, DataType
articles = client.collections.get("Article")
articles.config.add_property(
Property(
name="onHomepage",
data_type=DataType.BOOL
)
)
add_prop = {
"dataType": [
"boolean"
],
"name": "onHomepage"
}
client.schema.property.create("Article", add_prop)
let articles = client.collections.get('Article')
articles.config.addProperty({
name: 'onHomepage',
dataType: 'boolean'
})
const className = 'Article';
const prop = {
dataType: ['boolean'],
name: 'onHomepage',
};
const response = await client.schema
.propertyCreator()
.withClassName(className)
.withProperty(prop)
.do();
console.log(JSON.stringify(response, null, 2));
package main
import (
"context"
"github.com/weaviate/weaviate-go-client/v5/weaviate"
"github.com/weaviate/weaviate/entities/models"
)
func main() {
cfg := weaviate.Config{
Host: "localhost:8080",
Scheme: "http",
}
client, err := weaviate.NewClient(cfg)
if err != nil {
panic(err)
}
prop := &models.Property{
DataType: []string{"boolean"},
Name: "onHomepage",
}
err := client.Schema().PropertyCreator().
WithClassName("Article").
WithProperty(prop).
Do(context.Background())
if err != nil {
panic(err)
}
}
Property property = Property.builder()
.dataType(Arrays.asList(DataType.BOOLEAN))
.name(propertyName)
.build();
Result<Boolean> result = client.schema().propertyCreator()
.withClassName(collectionName)
.withProperty(property)
.run();
Inspect shards (for a collection)
An index itself can be comprised of multiple shards.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
articles = client.collections.get("Article")
article_shards = articles.config.get_shards()
print(article_shards)
article_shards = client.schema.get_class_shards("Article")
print(article_shards)
let articles = client.collections.get('Article')
const shards = await articles.config.getShards()
console.log(JSON.stringify(shards, null, 2));
const response = await client.schema
.shardsGetter()
.withClassName('Article')
.do();
console.log(JSON.stringify(response, null, 2));
package main
import (
"context"
"fmt"
"github.com/weaviate/weaviate-go-client/v5/weaviate"
)
func main() {
cfg := weaviate.Config{
Host: "localhost:8080",
Scheme: "http",
}
client, err := weaviate.NewClient(cfg)
if err != nil {
panic(err)
}
shards, err := client.Schema().
ShardsGetter().
WithClassName("Article").
Do(context.Background())
if err != nil {
panic(err)
}
fmt.Printf("%v", shards)
}
Result<Shard[]> result = client.schema().shardsGetter()
.withClassName(collectionName)
.run();
Shard[] shards = result.getResult();
if (shards == null || shards.length == 0) {
System.out.println("No shards found in this collection.");
return;
}
// Iterate over each shard and print its status
for (Shard shard : shards) {
System.out.println("Shard name: " + shard.getName());
System.out.println("Shard status: " + shard.getStatus()); // Get shard status (whether it's READY or READONLY)
}
Update shard status
You can manually update a shard to change it's status. For example, update the shard status from READONLY
to READY
after you make other changes.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
articles = client.collections.get("Article")
article_shards = articles.config.update_shards(
status="READY",
shard_names=shard_names # The names (List[str]) of the shard to update (or a shard name)
)
print(article_shards)
article_shards = client.schema.update_class_shard(
"Article",
status="READY",
shard_name="shard-1234"
)
print(article_shards)
let articles = client.collections.get('Article')
const shards = await articles.config.updateShards('READY', 'shard-1234')
console.log(JSON.stringify(shards, null, 2));
const shards = await client.schema.shardUpdater()
.withClassName('Article')
.withShardName('shard-1234')
.withStatus('READY')
.do()
console.log(JSON.stringify(shards, null, 2));
shardStatus, err := client.Schema().ShardUpdater().
WithClassName(myCollectionName). // Set your collection name
WithShardName(shardName). // Set the shard name to update
WithStatus("READY").
Do(ctx)
if err != nil {
// handle error
panic(err)
}
fmt.Printf("%v", shardStatus)
Result<ShardStatus> updateToReadyStatus = client.schema().shardUpdater()
.withClassName(collectionName)
.withShardName(shardName)
.withStatus(ShardStatuses.READY)
.run();
if (updateToReadyStatus.hasErrors()) {
System.out.println(updateToReadyStatus.getError());
return;
}
System.out.println(updateToReadyStatus.getResult());
Further resources
References
Background knowledge
Questions and feedback
If you have any questions or feedback, let us know in the user forum.