Read all objects
Weaviate provides the necessary APIs to iterate through all your data. This is useful when you want to manually copy/migrate your data (and vector embeddings) from one place to another.
This is done with the help of the after
operator, also called the cursor API.
The new API clients (currently supported by the Python Client v4), encapsulate this functionality as an Iterator
.
Read object properties and ids
The following code iterates through all objects, providing the properties and id for each object.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
- Java
- Go
collection = client.collections.get("WineReview")
for item in collection.iterator():
print(item.uuid, item.properties)
# STEP 1 - Prepare a helper function to iterate through data in batches
def get_batch_with_cursor(collection_name, batch_size, cursor=None):
# First prepare the query to run through data
query = (
client.query.get(
collection_name, # update with your collection name
["title", "description"] # update with the required properties
)
.with_additional(["id"])
.with_limit(batch_size)
)
# Fetch the next set of results
if cursor is not None:
result = query.with_after(cursor).do()
# Fetch the first set of results
else:
result = query.do()
return result["data"]["Get"][collection_name]
# STEP 2 - Iterate through the data
cursor = None
while True:
# Get the next batch of objects
next_batch = get_batch_with_cursor("CollectionName", 100, cursor)
# Break the loop if empty – we are done
if len(next_batch) == 0:
break
# Here is your next batch of objects
print(next_batch)
# Move the cursor to the last returned uuid
cursor=next_batch[-1]["_additional"]["id"]
const myCollection = client.collections.get("WineReview");
for await (let item of myCollection.iterator()) {
console.log(item.uuid, item.properties);
}
// STEP 1 - Prepare a helper function to iterate through data in batches
async function getBatchWithCursor(
collectionName: string,
batchSize: number,
cursor: string
): Promise<any[]> {
// First prepare the query to run through data
const query = client.graphql.get()
.withClassName(collectionName)
.withFields('title description _additional { id }')
.withLimit(batchSize);
if (cursor) {
// Fetch the next set of results
let result = await query.withAfter(cursor).do();
return result.data.Get[collectionName];
} else {
// Fetch the first set of results
let result = await query.do();
return result.data.Get[collectionName];
}
}
// STEP 2 - Iterate through the data
let cursor = null;
// Batch import all objects to the target instance
while (true) {
// Get Request next batch of objects
let nextBatch = await getBatchWithCursor('CollectionName', 100, cursor);
// Break the loop if empty – we are done
if (nextBatch.length === 0)
break;
// Here is your next batch of objects
console.log(JSON.stringify(nextBatch));
// Move the cursor to the last returned uuid
cursor = nextBatch.at(-1)['_additional']['id'];
}
import io.weaviate.client.Config;
import io.weaviate.client.WeaviateAuthClient;
import io.weaviate.client.WeaviateClient;
String scheme = "https";
String host = "WEAVIATE_INSTANCE_URL"; // Replace with your Weaviate URL
String apiKey = "YOUR-WEAVIATE-API-KEY"; // If auth enabled. Replace with your Weaviate instance API key.
try {
WeaviateClient sourceClient = WeaviateAuthClient.apiKey(new Config(scheme, host), apiKey);
} catch (AuthException e) {
// handle error in case of authorization problems
throw new RuntimeException(e);
}
int batchSize = 20;
String className = "WineReview";
String[] classProperties = new String[]{"title"};
private Result<GraphQLResponse> getBatchWithCursor(WeaviateClient client,
String className, String[] properties, int batchSize, String cursor) {
Get query = client.graphQL().get()
.withClassName(className)
// Optionally retrieve the vector embedding by adding `vector` to the _additional fields
.withFields(Stream.concat(Arrays.stream(properties), Stream.of("_additional { id vector }"))
.map(prop -> Field.builder().name(prop).build())
.toArray(Field[]::new)
)
.withLimit(batchSize);
if (cursor != null) {
return query.withAfter(cursor).run();
}
return query.run();
}
private List<Map<String, Object>> getProperties(GraphQLResponse result, String className, String[] classProperties) {
Object get = ((Map<?, ?>) result.getData()).get("Get");
Object clazz = ((Map<?, ?>) get).get(className);
List<?> objects = (List<?>) clazz;
List<Map<String, Object>> res = new ArrayList<>();
for (Object obj : objects) {
Map<String, Object> objProps = new HashMap<>();
for (String prop: classProperties) {
Object propValue = ((Map<?, ?>) obj).get(prop);
objProps.put(prop, propValue);
}
Object additional = ((Map<?, ?>) obj).get("_additional");
Object id = ((Map<?, ?>) additional).get("id");
objProps.put("id", id);
Object vector = ((Map<?, ?>) additional).get("vector");
objProps.put("vector", vector);
res.add(objProps);
}
return res;
}
private int getObjectsCount(GraphQLResponse result, String className) {
Object get = ((Map<?, ?>) result.getData()).get("Get");
Object clazz = ((Map<?, ?>) get).get(className);
List<?> objects = (List<?>) clazz;
return objects.size();
}
"github.com/weaviate/weaviate-go-client/v4/weaviate"
"github.com/weaviate/weaviate-go-client/v4/weaviate/auth"
"github.com/weaviate/weaviate-go-client/v4/weaviate/graphql"
"github.com/weaviate/weaviate/entities/models"
sourceClient, err := weaviate.NewClient(weaviate.Config{
Scheme: "https",
Host: "WEAVIATE_INSTANCE_URL", // Replace WEAVIATE_INSTANCE_URL with your instance URL
AuthConfig: auth.ApiKey{
Value: "YOUR-WEAVIATE-API-KEY", // If auth enabled. Replace with your Weaviate instance API key.
},
})
if err != nil {
// handle error
panic(err)
}
batchSize := 20
className := "WineReview"
classProperties := []string{"title"}
getBatchWithCursor := func(client weaviate.Client,
className string, classProperties []string, batchSize int, cursor string) (*models.GraphQLResponse, error) {
fields := []graphql.Field{}
for _, prop := range classProperties {
fields = append(fields, graphql.Field{Name: prop})
}
fields = append(fields, graphql.Field{Name: "_additional { id vector }"})
get := client.GraphQL().Get().
WithClassName(className).
// Optionally retrieve the vector embedding by adding `vector` to the _additional fields
WithFields(fields...).
WithLimit(batchSize)
if cursor != "" {
return get.WithAfter(cursor).Do(context.Background())
}
return get.Do(context.Background())
}
Read all objects including vectors
Read through all data including the vectors. (Also applicable where named vectors are used.)
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
collection = client.collections.get("WineReview")
for item in collection.iterator(
include_vector=True # If using named vectors, you can specify ones to include e.g. ['title', 'body'], or True to include all
):
print(item.properties)
print(item.vector)
# STEP 1 - Prepare a helper function to iterate through data in batches
def get_batch_with_cursor(collection_name, batch_size, cursor=None):
# First prepare the query to run through data
query = (
client.query.get(
collection_name, # update with your collection name
["title", "description"] # update with the required properties
)
.with_additional(["id vector"])
.with_limit(batch_size)
)
# Fetch the next set of results
if cursor is not None:
result = query.with_after(cursor).do()
# Fetch the first set of results
else:
result = query.do()
return result["data"]["Get"][collection_name]
# STEP 2 - Iterate through the data
cursor = None
while True:
# Get the next batch of objects
next_batch = get_batch_with_cursor("CollectionName", 100, cursor)
# Break the loop if empty – we are done
if len(next_batch) == 0:
break
# Here is your next batch of objects
print(next_batch)
# Move the cursor to the last returned uuid
cursor=next_batch[-1]["_additional"]["id"]
const myCollection = client.collections.get("WineReview");
for await (let item of myCollection.iterator({
includeVector: true
})) {
console.log(item.uuid, item.properties);
console.log(item.vectors);
}
// STEP 1 - Prepare a helper function to iterate through data in batches
async function getBatchWithCursor(
collectionName: string,
batchSize: number,
cursor: string
): Promise<any[]> {
const query = client.graphql.get()
.withClassName(collectionName)
.withFields('title description _additional { id vector }')
.withLimit(batchSize);
if (cursor) {
let result = await query.withAfter(cursor).do();
return result.data.Get[collectionName];
} else {
let result = await query.do();
return result.data.Get[collectionName];
}
}
// STEP 2 - Iterate through the data
let cursor = null;
while (true) {
// Request the next batch of objects
let nextBatch = await getBatchWithCursor('CollectionName', 100, cursor);
// Break the loop if empty – we are done
if (nextBatch.length === 0)
break;
// Here is your next batch of objects
console.log(JSON.stringify(nextBatch));
// Move the cursor to the last returned uuid
cursor = nextBatch.at(-1)['_additional']['id'];
}
Read all objects - Multi-tenant collections
Iterate through all tenants and read data for each.
For classes where multi-tenancy is enabled, you need to specify the tenant name when reading or creating objects. See Manage data: multi-tenancy operations for details.
- Python Client v4
- Python Client v3
- JS/TS Client v3
- JS/TS Client v2
multi_collection = client.collections.get("WineReviewMT")
# Get a list of tenants
tenants = multi_collection.tenants.get()
# Iterate through tenants
for tenant_name in tenants.keys():
# Iterate through objects within each tenant
for item in multi_collection.with_tenant(tenant_name).iterator():
print(f"{tenant_name}: {item.properties}")
# STEP 1 - Prepare a helper function to iterate through data in batches
def get_batch_with_cursor(collection_name, tenant_name, batch_size, cursor):
# First prepare the query to run through data
query = (
client.query.get(
collection_name, # update with your collection name
["title", "description"] # update with the required properties
)
.with_tenant(tenant_name) # tenant name goes here
.with_additional(["id"])
.with_limit(batch_size)
)
# Fetch the next set of results
if cursor is not None:
result = query.with_after(cursor).do()
# Fetch the first set of results
else:
result = query.do()
return result["data"]["Get"]["MultiTenancyCollection"]
# Get Tenants
tenants = client.schema.get_class_tenants(
class_name="MultiTenancyCollection" # The class from which the tenants will be retrieved
)
# STEP 2 - Iterate through Tenants
for tenant in tenants:
# Reset the cursor to the beginning
cursor = None
while True:
# Get the next batch of objects
next_batch = get_batch_with_cursor("MultiTenancyCollection", tenant.name, 100, cursor)
# Break the loop if empty – we are done
if len(next_batch) == 0:
break
# Here is your next batch of objects
print(next_batch)
# Move the cursor to the last returned uuid
cursor=next_batch[-1]["_additional"]["id"]
const multiCollection = client.collections.get("WineReviewMT");
const tenants = await multiCollection.tenants.get()
for (let tenantName in tenants) {
for await (let item of multiCollection.withTenant(tenantName).iterator()) {
console.log(`${tenantName}:`, item.properties);
}
}
// STEP 1 - Prepare a helper function to iterate through data in batches
async function getBatchWithCursor(
collectionName: string,
tenantName: string,
batchSize: number,
cursor: string,
): Promise<any[]> {
const query = client.graphql.get()
.withClassName(collectionName)
.withTenant(tenantName)
.withFields('title description _additional { id }')
.withLimit(batchSize);
if (cursor) {
let result = await query.withAfter(cursor).do();
return result.data.Get[collectionName];
} else {
let result = await query.do();
return result.data.Get[collectionName];
}
}
// Get Tenants
let tenants = await client.schema
.tenantsGetter('MultiTenancyClass')
.do();
// STEP 2 - Iterate through Tenants
for await (const tenant of tenants) {
// For each tenant, reset the cursor to the beginning
let cursor = null;
while (true) {
// Request the next batch of objects
let nextBatch = await getBatchWithCursor('MultiTenancyClass', tenant.name, 100, cursor);
// Break the loop if empty – we are done
if (nextBatch.length === 0)
break;
// Here is your next batch of objects
console.log(JSON.stringify(nextBatch));
// Move the cursor to the last returned uuid
cursor = nextBatch.at(-1)['_additional']['id'];
}
}
Related pages
- Connect to Weaviate
- How-to: Read objects
- References: GraphQL - Additional Operators
- Manage data: multi-tenancy operations
Questions and feedback
If you have any questions or feedback, let us know in the user forum.