Manage collections
Every object in Weaviate belongs to exactly one collection. Use the examples on this page to manage your collections.
Newer Weaviate documentation discuses "collections." Older Weaviate documentation refers to "classes" instead. Expect to see both terms throughout the documentation.
Create a collection
To create a collection, specify at least the collection name. If you don't specify any properties, auto-schema
creates them.
Weaviate follows GraphQL naming conventions.
- Start collection names with an upper case letter.
- Start property names with a lower case letter.
If you use an initial upper case letter to define a property name, Weaviate changes it to a lower case letter internally.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
client.collections.create("Article")
class_name = "Article"
class_obj = {"class": class_name}
client.schema.create_class(class_obj) # returns null on success
const newCollection = await client.collections.create({
name: 'Article'
})
// The returned value is the full collection definition, showing all defaults
console.log(JSON.stringify(newCollection, null, 2));
const className = 'Article';
const emptyClassDefinition = {
class: className,
};
// Add the class to the schema
let result = await client.schema
.classCreator()
.withClass(emptyClassDefinition)
.do();
String className = "Article";
WeaviateClass emptyClass = WeaviateClass.builder()
.className(className)
.build();
// Add the class to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(emptyClass)
.run();
className := "Article"
emptyClass := &models.Class{
Class: className,
}
// Add the class to the schema
err := client.Schema().ClassCreator().
WithClass(emptyClass).
Do(ctx)
Create a collection and define properties
Properties are the data fields in your collection. Each property has a name and a data type.
Additional information
Use properties to configure additional parameters such as data type, index characteristics, or tokenization.
For details, see:
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Property, DataType
# Note that you can use `client.collections.create_from_dict()` to create a collection from a v3-client-style JSON object
client.collections.create(
"Article",
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
class_obj = {
"class": "Article",
"properties": [
{
"name": "title",
"dataType": ["text"],
},
{
"name": "body",
"dataType": ["text"],
},
],
}
client.schema.create_class(class_obj) # returns null on success
import { dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
properties: [
{
name: 'title',
dataType: dataType.TEXT,
},
{
name: 'body',
dataType: dataType.TEXT,
},
],
})
const classWithProps = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
{
name: 'body',
dataType: ['text'],
},
],
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithProps).do();
// Define class properties
Property titleProperty = Property.builder()
.name("title")
.description("Title Property Description...")
.dataType(Arrays.asList(DataType.TEXT))
.build();
Property bodyProperty = Property.builder()
.name("body")
.description("Body Property Description...")
.dataType(Arrays.asList(DataType.TEXT))
.build();
//Add the defined properties to the class
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.description("Article Class Description...")
.properties(Arrays.asList(titleProperty, bodyProperty))
.build();
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Disable auto-schema
By default, Weaviate creates missing collections and missing properties. When you configure collections manually, you have more precise control of the collection settings.
To disable auto-schema
set AUTOSCHEMA_ENABLED: 'false'
in your system configuration file.
Specify a vectorizer
Specify a vectorizer
for a collection.
Additional information
Collection level settings override default values and general configuration parameters such as environment variables.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
properties=[ # properties configuration is optional
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
class_obj = {
"class": "Article",
"properties": [
{
"name": "title",
"dataType": ["text"],
},
],
"vectorizer": "text2vec-openai" # this could be any vectorizer
}
client.schema.create_class(class_obj)
import { vectorizer, dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
],
})
const classWithVectorizer = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithVectorizer).do();
// Additional configuration not shown
// Define the vectorizer in the WeaviateClass Builder
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorizer("text2vec-openai") // Vectorize of your choic e.g. text2vec-openai or text2vec-cohere
.build();
// Add the class to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Define multiple named vectors
v1.24
You can define multiple named vectors per collection. This allows each object to be represented by multiple vectors, such as a text
vector and an image
vector, or a title
vector and a body
vector.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"ArticleNV",
vectorizer_config=[
# Set a named vector
Configure.NamedVectors.text2vec_cohere( # Use the "text2vec-cohere" vectorizer
name="title", source_properties=["title"] # Set the source property(ies)
),
# Set another named vector
Configure.NamedVectors.text2vec_openai( # Use the "text2vec-openai" vectorizer
name="body", source_properties=["body"] # Set the source property(ies)
),
# Set another named vector
Configure.NamedVectors.text2vec_openai( # Use the "text2vec-openai" vectorizer
name="title_country", source_properties=["title", "country"] # Set the source property(ies)
)
],
properties=[ # Define properties
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
Property(name="country", data_type=DataType.TEXT),
],
)
# Unfortunately, named vectors are not suppored in the v3 API / Python client.
# Please upgrade to the v4 API / Python client to use named vectors.
import { vectorizer, dataType } from 'weaviate-client';
await client.collections.create({
name: 'ArticleNV',
vectorizers: [
vectorizer.text2VecCohere({
name: 'title',
sourceProperties: ['title']
}),
vectorizer.text2VecOpenAI({
name: 'body',
sourceProperties: ['body'],
}),
vectorizer.text2VecOpenAI({
name: 'title_country',
sourceProperties: ['title','country'],
})
],
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
{ name: 'country', dataType: dataType.TEXT },
],
})
const classWithNamedVectors = {
class: 'ArticleNV',
vectorConfig: {
// Set a named vector
title: {
vectorizer: {
'text2vec-cohere': {
properties: ['title'], // Set the source property(ies)
},
},
},
// Set another named vector
body: {
vectorizer: {
'text2vec-openai': {
properties: ['body'], // Set the source property(ies)
},
},
},
title_country: {
vectorizer: {
'text2vec-openai': {
properties: ['title','country'], // Set the source property(ies)
},
},
},
},
properties: [
{
name: 'title',
dataType: ['text'],
},
{
name: 'body',
dataType: ['text'],
},
{
name: 'country',
dataType: ['text'],
},
],
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithNamedVectors)
.do();
// Additional configuration not shown
//Define the vectorizers configurations
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("properties", new String[]{ "name" });
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
Map<String, Object> text2vecCohere = new HashMap<>();
Map<String, Object> text2vecCohereSettings = new HashMap<>();
text2vecCohereSettings.put("properties", new String[]{ "body" });
text2vecCohere.put("text2vec-cohere", text2vecCohereSettings);
//Define the vector configurations
Map<String, WeaviateClass.VectorConfig> vectorConfig = new HashMap<>();
vectorConfig.put("name_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2vecOpenAI)
.build());
vectorConfig.put("body_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2vecCohere)
.build());
// Define the vectorizers in the WeaviateClass Builder
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorConfig(vectorConfig)
.build();
// Add the class to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Specify vectorizer settings
To configure how a vectorizer works (i.e. what model to use) with a specific collection, set the vectorizer parameters.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_cohere(
model="embed-multilingual-v2.0",
vectorize_collection_name=True
),
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-cohere", # this could be any vectorizer
"moduleConfig": {
"text2vec-cohere": { # this must match the vectorizer used
"vectorizeClassName": True,
"model": "embed-multilingual-v2.0",
}
}
}
client.schema.create_class(class_obj)
import { vectorizer } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecCohere({
model: 'embed-multilingual-v2.0',
vectorizeCollectionName: true,
}),
})
const classWithModuleSettings = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-cohere', // this could be any vectorizer
moduleConfig: {
'text2vec-cohere': {
// this must match the vectorizer used
vectorizeClassName: true,
model: 'embed-multilingual-v2.0',
},
},
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithModuleSettings)
.do();
// Additional configuration not shown
//Define the module settings
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("vectorizePropertyName", false);
text2vecOpenAISettings.put("model", "text-embedding-3-small"); //set the model of your choice e.g. text-embedding-3-small
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
Map<Object, Object> moduleConfig = new HashMap<>();
moduleConfig.put("text2vec-openai", text2vecOpenAI);
// Set the module configu in the WeaviateClass Builder
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.properties(Arrays.asList(titleProperty, bodyProperty))
.moduleConfig(moduleConfig) // Set the module config
.build();
// Add the class to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Set vector index type
The vector index type can be set for each collection at creation time, between hnsw
, flat
and dynamic
index types.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
vector_index_config=Configure.VectorIndex.hnsw(), # Use the HNSW index
# vector_index_config=Configure.VectorIndex.flat(), # Use the FLAT index
# vector_index_config=Configure.VectorIndex.dynamic(), # Use the DYNAMIC index
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
class_obj = {
'class': 'Article',
'properties': [
{
'name': 'title',
'dataType': ['text'],
},
],
'vectorizer': 'text2vec-openai', # this could be any vectorizer
"vectorIndexType": "hnsw", # or "flat" or "dynamic"
}
client.schema.create_class(class_obj)
import { vectorizer, dataType, configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI({
vectorIndexConfig: configure.vectorIndex.hnsw(), // Use HNSW
// vectorIndexConfig: configure.vectorIndex.flat(), // Use Flat
// vectorIndexConfig: configure.vectorIndex.dynamic(), // Use Dynamic
}),
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
],
})
const classWithIndexType = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
vectorIndexType: 'flat', // or 'hnsw', or 'dynamic'
vectorIndexConfig: {
bq: {
enabled: true, // Enable BQ compression. Default: False
rescoreLimit: 200, // The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
cache: true, // Enable use of vector cache. Default: False
},
vectorCacheMaxObjects: 100000, // Cache size if `cache` enabled. Default: 1000000000000
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithIndexType).do();
// Additional configuration not shown
// Define the index type in the WeaviateClass Builder
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorizer("text2vec-openai")
.vectorIndexType("hnsw") //set the vector index of your choice e.g. hnsw, flat...
.build();
// Add the class to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Additional information
- Read more about index types & compression in Concepts: Vector index.
Set vector index parameters
Various vector index parameters are configurable at collection creation time, including compression and filter strategy.
Was added in v1.27
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure, Property, DataType, VectorDistances, VectorFilterStrategy
client.collections.create(
"Article",
# Additional configuration not shown
vector_index_config=Configure.VectorIndex.hnsw(
quantizer=Configure.VectorIndex.Quantizer.bq(),
ef_construction=300,
distance_metric=VectorDistances.COSINE,
filter_strategy=VectorFilterStrategy.SWEEPING # or ACORN (Available from Weaviate v1.27.0)
),
)
class_obj = {
'class': 'Article',
# Additional configuration not shown
"vectorIndexType": "flat",
"vectorIndexConfig": {
"bq": {
"enabled": True, # Enable BQ compression. Default: False
"rescoreLimit": 200, # The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
"cache": True, # Enable use of vector cache. Default: False
},
"vectorCacheMaxObjects": 100000, # Cache size if `cache` enabled. Default: 1000000000000
"filterStrategy": "sweeping" # or "acorn" (Available from Weaviate v1.27.0)
}
}
client.schema.create_class(class_obj)
import { configure, vectorizer } from 'weaviate-client';
await client.collections.create({
name: 'Article',
// Additional configuration not shown
vectorizers: vectorizer.text2VecCohere({
vectorIndexConfig: configure.vectorIndex.flat({
quantizer: configure.vectorIndex.quantizer.bq({
rescoreLimit: 200,
cache: true
}),
vectorCacheMaxObjects: 100000
})
})
})
const classWithIndexParams = {
class: 'Article',
// Additional configuration not shown
vectorIndexType: 'flat', // or `hnsw`
vectorIndexConfig: {
bq: {
enabled: true, // Enable BQ compression. Default: False
rescoreLimit: 200, // The minimum number of candidates to fetch before rescoring. Default: -1 (No limit)
cache: true, // Enable use of vector cache. Default: False
},
vectorCacheMaxObjects: 100000, // Cache size if `cache` enabled. Default: 1000000000000
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithIndexType).do();
// Additional configuration not shown
// Define the VectorIndexConfig with compression
VectorIndexConfig createBqIndexConfig = VectorIndexConfig.builder()
.bq(BQConfig.builder()
.enabled(true)
.rescoreLimit(123L)
.cache(true)
.build())
.vectorCacheMaxObjects(100000L)
.build();
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorIndexType("flat") //set the vector index of your choice e.g. hnsw, flat...
.vectorIndexConfig(createBqIndexConfig)
.vectorizer("text2vec-openai")
.build();
// Add the class to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Additional information
- Read more about index types & compression in Concepts: Vector index.
Property-level settings
Configure individual properties in a collection. Each property can have it's own configuration. Here are some common settings:
- Vectorize the property
- Vectorize the property name
- Set a tokenization type
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure, Property, DataType, Tokenization
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_huggingface(),
properties=[
Property(
name="title",
data_type=DataType.TEXT,
vectorize_property_name=True, # Use "title" as part of the value to vectorize
tokenization=Tokenization.LOWERCASE # Use "lowecase" tokenization
),
Property(
name="body",
data_type=DataType.TEXT,
skip_vectorization=True, # Don't vectorize this property
tokenization=Tokenization.WHITESPACE # Use "whitespace" tokenization
),
]
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-huggingface", # this could be any vectorizer
"properties": [
{
"name": "title",
"dataType": ["text"],
"moduleConfig": {
"text2vec-huggingface": { # this must match the vectorizer used
"vectorizePropertyName": True,
"tokenization": "lowercase"
}
}
},
{
"name": "body",
"dataType": ["text"],
"moduleConfig": {
"text2vec-huggingface": { # this must match the vectorizer used
"skip": True, # Don't vectorize body
"tokenization": "whitespace"
}
}
},
],
}
client.schema.create_class(class_obj)
import { vectorizer, dataType, tokenization } from 'weaviate-client';
const newCollection = await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecHuggingFace(),
properties: [
{
name: 'title',
dataType: dataType.TEXT,
vectorizePropertyName: true,
tokenization: tokenization.LOWERCASE // or 'lowercase'
},
{
name: 'body',
dataType: dataType.TEXT,
skipVectorization: true,
tokenization: tokenization.WHITESPACE // or 'whitespace'
},
],
})
const classWithPropModuleSettings = {
class: 'Article',
vectorizer: 'text2vec-huggingface', // this could be any vectorizer
properties: [
{
name: 'title',
dataType: ['text'],
moduleConfig: {
'text2vec-huggingface': {
// this must match the vectorizer used
vectorizePropertyName: true,
tokenization: 'lowercase', // Use "lowercase" tokenization
},
},
},
{
name: 'body',
dataType: ['text'],
moduleConfig: {
'text2vec-huggingface': {
// this must match the vectorizer used
skip: true, // Don't vectorize this property
tokenization: 'whitespace', // Use "whitespace" tokenization
},
},
},
],
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithPropModuleSettings)
.do();
Property titleProperty = Property.builder()
.name("title")
.description("title of the article")
.dataType(Arrays.asList(DataType.TEXT))
.tokenization(Tokenization.WORD)
.build();
Property bodyProperty = Property.builder()
.name("body")
.description("body of the article")
.dataType(Arrays.asList(DataType.TEXT))
.tokenization(Tokenization.LOWERCASE)
.build();
//Add the defined properties to the class
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.description("Article Class Description...")
.properties(Arrays.asList(titleProperty, bodyProperty))
.build();
Result<Boolean> result = client.schema().classCreator()
.withClass(articleClass)
.run();
Specify a distance metric
If you choose to bring your own vectors, you should specify the distance metric
.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
from weaviate.classes.config import Configure, VectorDistances
client.collections.create(
"Article",
vector_index_config=Configure.VectorIndex.hnsw(
distance_metric=VectorDistances.COSINE
),
)
class_obj = {
"class": "Article",
"vectorIndexConfig": {
"distance": "cosine",
},
}
client.schema.create_class(class_obj)
import { configure, vectorizer, vectorDistances } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOllama({
vectorIndexConfig: configure.vectorIndex.hnsw({
distanceMetric: vectorDistances.COSINE // or 'cosine'
})
})
})
const classWithDistance = {
class: 'Article',
vectorIndexConfig: {
distance: 'cosine',
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithDistance).do();
// Additional configuration not shown
VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder()
.distance(DistanceType.DOT) // Define Distance Type e.g. Dot, Cosine, hamming...
.build();
WeaviateClass articleClass = WeaviateClass.builder()
.className(className)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorIndexConfig(vectorIndexConfig)
.build();
Result<Boolean> classResult = client.schema().classCreator()
.withClass(articleClass)
.run();
Additional information
For details on the configuration parameters, see the following:
Set inverted index parameters
Various inverted index parameters are configurable for each collection. Some parameters are set at the collection level, while others are set at the property level.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
# Additional settings not shown
properties=[ # properties configuration is optional
Property(
name="title",
data_type=DataType.TEXT,
index_filterable=True,
index_searchable=True,
),
Property(
name="Chunk",
data_type=DataType.INT,
index_range_filters=True,
),
],
inverted_index_config=Configure.inverted_index( # Optional
bm25_b=0.7,
bm25_k1=1.25,
index_null_state=True,
index_property_length=True,
index_timestamps=True
)
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-huggingface", # this could be any vectorizer
"properties": [
{
"name": "title",
"dataType": ["text"],
"indexFilterable": True,
"indexSearchable": True,
"moduleConfig": {
"text2vec-huggingface": {}
}
},
{
"name": "chunk",
"dataType": ["int"],
"indexRangeFilters": True,
},
],
"invertedIndexConfig": {
"bm25": {
"b": 0.7,
"k1": 1.25
},
"indexTimestamps": True,
"indexNullState": True,
"indexPropertyLength": True
}
}
client.schema.create_class(class_obj)
import { dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
properties: [
{
name: 'title',
dataType: dataType.TEXT,
indexFilterable: true,
indexSearchable: true,
},
{
name: 'chunk',
dataType: dataType.INT,
indexRangeFilters: true,
},
],
invertedIndex: {
bm25: {
b: 0.7,
k1: 1.25
},
indexNullState: true,
indexPropertyLength: true,
indexTimestamps: true
}
})
const classWithInvIndexSettings = {
class: 'Article',
vectorizer: 'text2vec-huggingface', // this could be any vectorizer
properties: [
{
name: 'title',
dataType: ['text'],
indexFilterable: true,
indexSearchable: true,
moduleConfig: {
'text2vec-huggingface': {},
},
},
{
name: 'chunk',
dataType: ['int'],
indexRangeFilters: true,
},
],
invertedIndexConfig: {
bm25: {
b: 0.7,
k1: 1.25
},
indexTimestamps: true,
indexNullState: true,
indexPropertyLength: true
}
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithPropModuleSettings)
.do();
Specify a reranker model integration
Configure a reranker
model integration for reranking retrieved results.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
reranker_config=Configure.Reranker.cohere()
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-openai", # set your vectorizer module
"moduleConfig": {
"reranker-cohere": {} # set your reranker module
}
}
client.schema.create_class(class_obj)
// import { vectorizer, reranker } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
reranker: reranker.cohere(),
})
const classWithReranker = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
moduleConfig: {
'reranker-cohere': {}, // set your reranker module
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithReranker).do();
Update the reranker model integration
v1.25.23
, v1.26.8
and v1.27.1
The reranker
and generative
configurations are mutable from v1.25.23
, v1.26.8
and v1.27.1
.
Update the reranker
model integration for reranking retrieved results.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Reconfigure
collection = client.collections.get("Article")
collection.config.update(
reranker_config=Reconfigure.Reranker.cohere() # Update the reranker module
)
class_obj = {
"moduleConfig": {
"reranker-cohere": {} # Update your reranker module
}
}
client.schema.update_config("Article", class_obj)
// Coming soon
// Collection definition updates are not available in the v2 API.
// Consider upgrading to the v3 API, or deleting and recreating the collection.
Specify a generative model integration
Specify a generative
model integration for a collection (for RAG).
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
generative_config=Configure.Generative.openai(),
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-openai", # set your vectorizer module
"moduleConfig": {
"generative-openai": {} # set your generative module
}
}
client.schema.create_class(class_obj)
import { vectorizer, generative } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
generative: generative.openAI(),
})
const classWithGenerative = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
moduleConfig: {
'generative-openai': {}, // set your generative module
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithGenerative).do();
Specify a generative model name
Specify a generative model name
.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
generative_config=Configure.Generative.openai(
model="gpt-4"
),
)
class_obj = {
"class": "Article",
"vectorizer": "text2vec-openai", # set your vectorizer module
"moduleConfig": {
"generative-openai": {
"model": "gpt-4" # select generative model name
}
}
}
client.schema.create_class(class_obj)
import { vectorizer, generative } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectorizer.text2VecOpenAI(),
generative: generative.openAI({
model: "gpt-4"
}),
})
const classWithGenerativeModel = {
class: 'Article',
properties: [
{
name: 'title',
dataType: ['text'],
},
],
vectorizer: 'text2vec-openai', // this could be any vectorizer
moduleConfig: {
'generative-openai': {
'model': 'gpt-4' // set your generative model
},
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithGenerativeModel).do();
Update the generative model integration
v1.25.23
, v1.26.8
and v1.27.1
The reranker
and generative
configurations are mutable from v1.25.23
, v1.26.8
and v1.27.1
.
Update a reranker
model integration for reranking retrieved results.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Reconfigure
collection = client.collections.get("Article")
collection.config.update(
generative_config=Reconfigure.Generative.cohere() # Update the generative module
)
class_obj = {
"class": "Article",
"moduleConfig": {
"generative-cohere": {} # Update your generative module
}
}
client.schema.create_class(class_obj)
// Coming soon
// Collection definition updates are not available in the v2 API.
// Consider upgrading to the v3 API, or deleting and recreating the collection.
Replication settings
v1.25
In Weaviate v1.25
, a replication factor cannot be changed once it is set.
This is due to the schema consensus algorithm change in v1.25
. This will be improved in future versions.
Configure replication settings, such as async replication and deletion resolution strategy.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- cURL
from weaviate.classes.config import Configure, ReplicationDeletionStrategy
client.collections.create(
"Article",
replication_config=Configure.replication(
factor=3,
async_enabled=True, # Enable asynchronous repair
deletion_strategy=ReplicationDeletionStrategy.TIME_BASED_RESOLUTION, # Added in v1.28; Set the deletion conflict resolution strategy
)
)
class_obj = {
"class": "Article",
"replicationConfig": {
"factor": 3,
"aysnc_enabled": True
},
}
client.schema.create_class(class_obj)
import { configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
replication: configure.replication({
factor: 3,
asyncEnabled: true,
}),
})
const classWithReplication = {
class: 'Article',
replicationConfig: {
factor: 3,
asyncEnabled: true,
},
};
// Add the class to the schema
result = await client.schema
.classCreator()
.withClass(classWithReplication)
.do();
curl \
-X POST \
-H "Content-Type: application/json" \
-d '{
"class": "Article",
"properties": [
{
"dataType": [
"string"
],
"description": "Title of the article",
"name": "title"
}
],
"replicationConfig": {
"factor": 3,
"asyncEnabled": true
}
}' \
http://localhost:8080/v1/schema
Additional information
To use replication factors greater than one, use a multi-node deployment.
For details on the configuration parameters, see the following:
Sharding settings
Configure sharding per collection.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Configure
client.collections.create(
"Article",
sharding_config=Configure.sharding(
virtual_per_physical=128,
desired_count=1,
desired_virtual_count=128,
)
)
class_obj = {
"class": "Article",
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"actual_actualCountcount": 1,
"desiredVirtualCount": 128,
"actualVirtualCount": 128,
},
}
client.schema.create_class(class_obj)
import { configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
sharding: configure.sharding({
virtualPerPhysical: 128,
desiredCount: 1,
desiredVirtualCount: 128,
})
})
const classWithSharding = {
class: 'Article',
vectorIndexConfig: {
distance: 'cosine',
},
shardingConfig: {
virtualPerPhysical: 128,
desiredCount: 1,
actualCount: 1,
desiredVirtualCount: 128,
actualVirtualCount: 128,
},
};
// Add the class to the schema
result = await client.schema.classCreator().withClass(classWithSharding).do();
Additional information
For details on the configuration parameters, see the following:
Multi-tenancy
v1.20
Create a collection with multi-tenancy enabled.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Configure
client.collections.create(
"Article",
multi_tenancy_config=Configure.multi_tenancy(True)
)
class_obj = {
"class": "Article",
"multiTenancyConfig": {"enabled": True}
}
client.schema.create_class(class_obj) # returns null on success
await client.collections.create({
name: 'Article',
multiTenancy: { enabled: true }
// multiTenancy: configure.multiTenancy({ enabled: true }) // alternatively use helper function
})
await client.schema
.classCreator()
.withClass({
class: 'Article',
multiTenancyConfig: { enabled: true },
})
.do();
Read a single collection definition
Retrieve a collection definition from the schema.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
articles = client.collections.get("Article")
articles_config = articles.config.get()
print(articles_config)
class_name = "Article"
response = client.schema.get(class_name)
print(json.dumps(response, indent=2))
let articles = client.collections.get('Article')
const collectionConfig = await articles.config.get()
console.log(collectionConfig)
const className = 'Article';
let classDefinition = await client.schema
.classGetter()
.withClassName(className)
.do();
console.log(JSON.stringify(classDefinition, null, 2));
String className = "Article";
Result<WeaviateClass> result = client.schema().classGetter()
.withClassName(className)
.run();
String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult());
System.out.println(json);
className := "Article"
class, err := client.Schema().ClassGetter().
WithClassName(className).
Do(ctx)
b, err := json.MarshalIndent(class, "", " ")
fmt.Println(string(b))
Sample configuration: Text objects
This configuration for text objects defines the following:
- The collection name (
Article
) - The vectorizer module (
text2vec-cohere
) and model (embed-multilingual-v2.0
) - A set of properties (
title
,body
) withtext
data types.
{
"class": "Article",
"vectorizer": "text2vec-cohere",
"moduleConfig": {
"text2vec-cohere": {
"model": "embed-multilingual-v2.0",
},
},
"properties": [
{
"name": "title",
"dataType": ["text"]
},
{
"name": "body",
"dataType": ["text"]
},
],
}
Sample configuration: Nested objects
v1.22
This configuration for nested objects defines the following:
The collection name (
Person
)The vectorizer module (
text2vec-huggingface
)A set of properties (
last_name
,address
)last_name
hastext
data typeaddress
hasobject
data type
The
address
property has two nested properties (street
andcity
)
{
"class": "Person",
"vectorizer": "text2vec-huggingface",
"properties": [
{
"dataType": ["text"],
"name": "last_name",
},
{
"dataType": ["object"],
"name": "address",
"nestedProperties": [
{"dataType": ["text"], "name": "street"},
{"dataType": ["text"], "name": "city"}
],
}
],
}
Sample configuration: Generative search
This configuration for retrieval augmented generation defines the following:
- The collection name (
Article
) - The default vectorizer module (
text2vec-openai
) - The generative module (
generative-openai
) - A set of properties (
title
,chunk
,chunk_no
andurl
) - The tokenization option for the
url
property - The vectorization option (
skip
vectorization) for theurl
property
{
"class": "Article",
"vectorizer": "text2vec-openai",
"vectorIndexConfig": {
"distance": "cosine",
},
"moduleConfig": {
"generative-openai": {}
},
"properties": [
{
"name": "title",
"dataType": ["text"]
},
{
"name": "chunk",
"dataType": ["text"]
},
{
"name": "chunk_no",
"dataType": ["int"]
},
{
"name": "url",
"dataType": ["text"],
"tokenization": "field",
"moduleConfig": {
"text2vec-openai": {
"skip": true
},
}
},
],
}
Sample configuration: Images
This configuration for image search defines the following:
The collection name (
Image
)The vectorizer module (
img2vec-neural
)- The
image
property configures collection to store image data.
- The
The vector index distance metric (
cosine
)A set of properties (
image
), with theimage
property set asblob
.
For image searches, see Image search.
{
"class": "Image",
"vectorizer": "img2vec-neural",
"vectorIndexConfig": {
"distance": "cosine",
},
"moduleConfig": {
"img2vec-neural": {
"imageFields": [
"image"
]
}
},
"properties": [
{
"name": "image",
"dataType": ["blob"]
},
],
}
Read all collection definitions
Fetch the database schema to retrieve all of the collection definitions.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
response = client.collections.list_all(simple=False)
print(response)
response = client.schema.get()
print(json.dumps(response, indent=2))
const allCollections = await client.collections.listAll()
console.log(JSON.stringify(allCollections, null, 2));
let allCollections = await client.schema.getter().do();
console.log(JSON.stringify(allCollections, null, 2));
Result<Schema> result = client.schema().getter()
.run();
String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult());
System.out.println(json);
schema, err := client.Schema().Getter().
Do(ctx)
b, err := json.MarshalIndent(schema, "", " ")
fmt.Println(string(b))
Update a collection definition
v1.25
In Weaviate v1.25
, a replication factor cannot be changed once it is set.
This is due to the schema consensus algorithm change in v1.25
. This will be improved in future versions.
You can update a collection definition to change the mutable collection settings.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
from weaviate.classes.config import Reconfigure, VectorFilterStrategy
articles = client.collections.get("Article")
# Update the collection definition
articles.config.update(
inverted_index_config=Reconfigure.inverted_index(
bm25_k1=1.5
),
vector_index_config=Reconfigure.VectorIndex.hnsw(
filter_strategy=VectorFilterStrategy.ACORN # Available from Weaviate v1.27.0
)
)
articles = client.collections.get("Article")
article_shards = articles.config.update_shards(
status="READY",
shard_names=shard_names # The names (List[str]) of the shard to update (or a shard name)
)
print(article_shards)
class_name = "Article"
# Update the collection definition
collection_def_changes = {
"class": class_name,
"invertedIndexConfig": {
"bm25": {
"k1": 1.5 # Change the k1 parameter from 1.2
}
},
"vectorIndexConfig": {
"filterStrategy": "acorn" # Available from Weaviate v1.27.0
}
}
client.schema.update_config("Article", collection_def_changes)
import { reconfigure } from 'weaviate-client';
let articles = client.collections.get('Article')
await articles.config.update({
invertedIndex: reconfigure.invertedIndex({
bm25k1: 1.5 // Change the k1 parameter from 1.2
}),
vectorizers: reconfigure.vectorizer.update({
vectorIndexConfig: reconfigure.vectorIndex.hnsw({
quantizer: reconfigure.vectorIndex.quantizer.pq(),
ef: 4,
filterStrategy: 'acorn', // Available from Weaviate v1.27.0
}),
})
})
// Collection definition updates are not available in the v2 API.
// Consider upgrading to the v3 API, or deleting and recreating the collection.
// This feature is under development
// This feature is under development
Update a parameter
Some parameters cannot be modified after you create your collection.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
from weaviate.classes.config import Reconfigure
# Get the Article collection object
articles = client.collections.get("Article")
# Update the collection configuration
articles.config.update(
# Note, use Reconfigure here (not Configure)
inverted_index_config=Reconfigure.inverted_index(
stopwords_removals=["a", "the"]
)
)
class_obj = {
"invertedIndexConfig": {
"stopwords": {
"preset": "en",
"removals": ["a", "the"]
},
},
}
client.schema.update_config("Article", class_obj)
class_obj = {
"invertedIndexConfig": {
"stopwords": {
"preset": "en",
"removals": ["a", "the"]
},
},
}
client.schema.update_config("Article", class_obj)
let articles = client.collections.get('Article')
import { reconfigure } from 'weaviate-client';
articles.config.update({
invertedIndex: reconfigure.invertedIndex({
stopwordsRemovals: ['a', 'the'],
})
})
Delete a collection
You can delete any unwanted collection(s), along with the data that they contain.
When you delete a collection, you delete all associated objects!
Be very careful with deletes on a production database and anywhere else that you have important data.
This code deletes a collection and its objects.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
- Curl
# collection_name can be a string ("Article") or a list of strings (["Article", "Category"])
client.collections.delete(collection_name) # THIS WILL DELETE THE SPECIFIED COLLECTION(S) AND THEIR OBJECTS
# Note: you can also delete all collections in the Weaviate instance with:
# client.collections.delete_all()
# delete class "Article" - THIS WILL DELETE ALL DATA IN THIS CLASS
client.schema.delete_class("Article") # Replace with your class name
// delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
await client.collections.delete('Article')
// you can also delete all collections of a cluster
// await client.collections.deleteAll()
// delete collection "Article" - THIS WILL DELETE THE COLLECTION AND ALL ITS DATA
await client.schema
.classDeleter()
.withClassName('Article')
.do();
className := "YourClassName"
// delete the class
if err := client.Schema().ClassDeleter().WithClassName(className).Do(context.Background()); err != nil {
// Weaviate will return a 400 if the class does not exist, so this is allowed, only return an error if it's not a 400
if status, ok := err.(*fault.WeaviateClientError); ok && status.StatusCode != http.StatusBadRequest {
panic(err)
}
}
Result<Boolean> result = client.schema().classDeleter()
.withClassName(className)
.run();
curl \
-X DELETE \
https://WEAVIATE_INSTANCE_URL/v1/schema/YourClassName # Replace WEAVIATE_INSTANCE_URL with your instance URL
Add a property
Indexing limitations after data import
There are no index limitations when you add collection properties before you import data.
If you add a new property after you import data, there is an impact on indexing.
Property indexes are built at import time. If you add a new property after importing some data, pre-existing objects index aren't automatically updated to add the new property. This means pre-existing objects aren't added to the new property index. Queries may return unexpected results because the index only includes new objects.
To create an index that includes all of the objects in a collection, do one of the following:
- New collections: Add all of the collection's properties before importing objects.
- Existing collections: Export the existing data from the collection. Re-create it with the new property. Import the data into the updated collection.
We are working on a re-indexing API to allow you to re-index the data after adding a property. This will be available in a future release.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
from weaviate.classes.config import Property, DataType
articles = client.collections.get("Article")
articles.config.add_property(
Property(
name="onHomepage",
data_type=DataType.BOOL
)
)
add_prop = {
"dataType": [
"boolean"
],
"name": "onHomepage"
}
client.schema.property.create("Article", add_prop)
let articles = client.collections.get('Article')
articles.config.addProperty({
name: 'onHomepage',
dataType: 'boolean'
})
const className = 'Article';
const prop = {
dataType: ['boolean'],
name: 'onHomepage',
};
const response = await client.schema
.propertyCreator()
.withClassName(className)
.withProperty(prop)
.do();
console.log(JSON.stringify(response, null, 2));
package main
import (
"context"
"github.com/weaviate/weaviate-go-client/v4/weaviate"
"github.com/weaviate/weaviate/entities/models"
)
func main() {
cfg := weaviate.Config{
Host: "localhost:8080",
Scheme: "http",
}
client, err := weaviate.NewClient(cfg)
if err != nil {
panic(err)
}
prop := &models.Property{
DataType: []string{"boolean"},
Name: "onHomepage",
}
err := client.Schema().PropertyCreator().
WithClassName("Article").
WithProperty(prop).
Do(context.Background())
if err != nil {
panic(err)
}
}
Property property = Property.builder()
.dataType(Arrays.asList(DataType.BOOLEAN))
.name(propertyName)
.build();
Result<Boolean> result = client.schema().propertyCreator()
.withClassName(className)
.withProperty(property)
.run();
Inspect shards (for a collection)
An index itself can be comprised of multiple shards.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
articles = client.collections.get("Article")
article_shards = articles.config.get_shards()
print(article_shards)
article_shards = client.schema.get_class_shards("Article")
print(article_shards)
let articles = client.collections.get('Article')
const shards = await articles.config.getShards()
console.log(JSON.stringify(shards, null, 2));
const response = await client.schema
.shardsGetter()
.withClassName('Article')
.do();
console.log(JSON.stringify(response, null, 2));
package main
import (
"context"
"fmt"
"github.com/weaviate/weaviate-go-client/v4/weaviate"
)
func main() {
cfg := weaviate.Config{
Host: "localhost:8080",
Scheme: "http",
}
client, err := weaviate.NewClient(cfg)
if err != nil {
panic(err)
}
shards, err := client.Schema().
ShardsGetter().
WithClassName("Article").
Do(context.Background())
if err != nil {
panic(err)
}
fmt.Printf("%v", shards)
}
Result<Shard[]> result = client.schema().shardsGetter()
.withClassName(className)
.run();
Shard[] shards = result.getResult();
if (shards == null || shards.length == 0) {
System.out.println("No shards found in this collection.");
return;
}
// Iterate over each shard and print its status
for (Shard shard : shards) {
System.out.println("Shard name: " + shard.getName());
System.out.println("Shard status: " + shard.getStatus()); // Get shard status (whether it's READY or READONLY)
}
Update shard status
You can manually update a shard to change it's status. For example, update the shard status from READONLY
to READY
after you make other changes.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Go
- Java
articles = client.collections.get("Article")
article_shards = articles.config.update_shards(
status="READY",
shard_names=shard_names # The names (List[str]) of the shard to update (or a shard name)
)
print(article_shards)
article_shards = client.schema.update_class_shard(
"Article",
status="READY",
shard_name="shard-1234"
)
print(article_shards)
let articles = client.collections.get('Article')
const shards = await articles.config.updateShards('READY', 'shard-1234')
console.log(JSON.stringify(shards, null, 2));
const shards = await client.schema.shardUpdater()
.withClassName('Article')
.withShardName('shard-1234')
.withStatus('READY')
.do()
console.log(JSON.stringify(shards, null, 2));
shardStatus, err := client.Schema().ShardUpdater().
WithClassName(myCollectionName). // Set your collection name
WithShardName(shardName). // Set the shard name to update
WithStatus("READY").
Do(ctx)
if err != nil {
// handle error
panic(err)
}
fmt.Printf("%v", shardStatus)
Result<ShardStatus> updateToReadyStatus = client.schema().shardUpdater()
.withClassName(className)
.withShardName(shardName)
.withStatus(ShardStatuses.READY)
.run();
if (updateToReadyStatus.hasErrors()) {
System.out.println(updateToReadyStatus.getError());
return;
}
System.out.println(updateToReadyStatus.getResult());
Further resources
References
Background knowledge
Questions and feedback
If you have any questions or feedback, let us know in the user forum.