Read all objects
Overviewโ
Sometimes, you may wish to list every object in a class, such as for manual backup when the backup
feature is not suitable. You may also wish to then restore these objects as well, to a different Weaviate instance for example.
The best way to do this is with the after
parameter, also called the cursor API.
The after
parameter is based on the order of IDs. No other ordering of data, such as sorting or searching, is possible.
Retrieve and restore objectsโ
List every objectโ
You can list (i.e. retrieve) every object as shown in the below example, looping through the data with the after
parameter.
The below example connects to a "source" instance at https://some-endpoint.weaviate.network
. It also defines a function that will fetch a group of objects (and their title
property) in the WineReview
class from using the ID of the last object retrieved as the cursor:
- Python
- TypeScript
import weaviate
source_client = weaviate.Client(
url="https://some-endpoint.weaviate.network", # Replace with your endpoint
auth_client_secret=weaviate.AuthApiKey(api_key="YOUR-WEAVIATE-API-KEY"), # If auth enabled. Replace w/ your Weaviate instance API key
)
batch_size = 20
class_name = "WineReview"
class_properties = ["title"]
cursor = None
def get_batch_with_cursor(client, class_name, class_properties, batch_size, cursor=None):
query = (
client.query.get(class_name, class_properties)
.with_additional(["id"])
.with_limit(batch_size)
)
if cursor is not None:
return query.with_after(cursor).do()
else:
return query.do()
import weaviate, { WeaviateClient, ApiKey } from 'weaviate-ts-client';
const sourceClient = weaviate.client({
scheme: 'https',
host: 'some-endpoint.weaviate.network', // Replace with your Weaviate URL
apiKey: new ApiKey('YOUR-WEAVIATE-API-KEY'), // If auth enabled. Replace w/ your Weaviate instance API key.
});
const batchSize = 20;
const className = 'WineReview';
const classProperties = ['title'];
async function getBatchWithCursor(
client: WeaviateClient,
className: string, classProperties: string[],
batchSize: number, cursor?: string
): Promise<{data: any}> {
const query = client.graphql.get()
.withClassName(className)
.withFields(classProperties.join(' ') + ' _additional { id vector }')
.withLimit(batchSize);
if (cursor) {
return await query.withAfter(cursor).do();
} else {
return await query.do();
}
}
Fetch the schemaโ
You can fetch the existing class definition like this:
- Python
- TypeScript
class_schema = source_client.schema.get(class_name)
const classDef = await sourceClient.schema.classGetter().withClassName(className).do();
Restore to a target instanceโ
And then restore to a target instance, by:
- Creating the same class in the target instance using the fetched class definition, and
- Then streaming the objects from the source instance to the target instance using batch imports.
- Python
- TypeScript
target_client = weaviate.Client(
url="https://anon-endpoint.weaviate.network", # Replace with your endpoint
)
target_client.schema.create_class(class_schema)
with target_client.batch(
batch_size=50,
) as batch:
# Batch import all objects to the target instance
while True:
# From the SOURCE instance, get the next group of objects
results = get_batch_with_cursor(source_client, class_name, class_properties, batch_size, cursor)
# If empty, we're finished
if len(results["data"]["Get"][class_name]) == 0:
break
# Otherwise, add the objects to the batch to be added to the target instance
objects_list = results["data"]["Get"][class_name]
aggregate_count += len(objects_list)
for retrieved_object in objects_list:
new_object = dict()
for prop in class_properties:
new_object[prop] = retrieved_object[prop]
target_client.batch.add_data_object(new_object, class_name=class_name)
# Update the cursor
cursor = results["data"]["Get"][class_name][-1]["_additional"]["id"]
const targetClient = weaviate.client({
scheme: 'https',
host: 'anon-endpoint.weaviate.network', // Replace with your endpoint
});
await targetClient.schema.classCreator().withClass(classDef).do();
let targetBatcher = targetClient.batch.objectsBatcher();
let results, cursor;
// Batch import all objects to the target instance
while (true) {
// From the SOURCE instance, get the next group of objects
results = await getBatchWithCursor(sourceClient, className, classProperties, batchSize, cursor);
// If empty, we're finished
if (results.data.Get[className].length === 0)
break;
// Otherwise, add the objects to the batch to be added to the target instance
for (const retrievedObject of results.data.Get[className]) {
const newObject = {};
for (const prop of classProperties)
newObject[prop] = retrievedObject[prop];
targetBatcher = targetBatcher.withObject({
class: className,
properties: newObject,
vector: retrievedObject['_additional']['vector'],
});
// When the batch counter reaches batchSize, push the objects to Weaviate
if (++aggregateCount % batchSize === 0) {
console.log(`Imported ${aggregateCount} objects...`);
// Flush the batch queue and restart it
const response = await targetBatcher.do();
targetBatcher = targetClient.batch.objectsBatcher();
// Handle errors
for (const r of response)
if (r.result.errors)
throw r.result.errors;
}
}
// Update the cursor
cursor = results.data.Get[className].at(-1)['_additional']['id'];
}
// Flush any remaining objects
if (targetBatcher.payload().objects.length > 0)
await targetBatcher.do();
Putting it togetherโ
Putting the pieces together, the below example will retrieve all objects and the schema from the WineReview
class from https://some-endpoint.weaviate.network
and populate https://anon-endpoint.weaviate.network
with the same:
- Python
- TypeScript
# Retrieve data
import weaviate
source_client = weaviate.Client(
url="https://some-endpoint.weaviate.network", # Replace with your endpoint
auth_client_secret=weaviate.AuthApiKey(api_key="YOUR-WEAVIATE-API-KEY"), # If auth enabled. Replace w/ your Weaviate instance API key
)
batch_size = 20
class_name = "WineReview"
class_properties = ["title"]
cursor = None
def get_batch_with_cursor(client, class_name, class_properties, batch_size, cursor=None):
query = (
client.query.get(class_name, class_properties)
.with_additional(["id"])
.with_limit(batch_size)
)
if cursor is not None:
return query.with_after(cursor).do()
else:
return query.do()
# Use this function to retrieve data
# Fetch the schema
class_schema = source_client.schema.get(class_name)
# Finished fetching the schema
# Restore to a new (target) instance
target_client = weaviate.Client(
url="https://anon-endpoint.weaviate.network", # Replace with your endpoint
)
target_client.schema.create_class(class_schema)
with target_client.batch(
batch_size=50,
) as batch:
# Batch import all objects to the target instance
while True:
# From the SOURCE instance, get the next group of objects
results = get_batch_with_cursor(source_client, class_name, class_properties, batch_size, cursor)
# If empty, we're finished
if len(results["data"]["Get"][class_name]) == 0:
break
# Otherwise, add the objects to the batch to be added to the target instance
objects_list = results["data"]["Get"][class_name]
aggregate_count += len(objects_list)
for retrieved_object in objects_list:
new_object = dict()
for prop in class_properties:
new_object[prop] = retrieved_object[prop]
target_client.batch.add_data_object(new_object, class_name=class_name)
# Update the cursor
cursor = results["data"]["Get"][class_name][-1]["_additional"]["id"]
import weaviate, { WeaviateClient, ApiKey } from 'weaviate-ts-client';
const sourceClient = weaviate.client({
scheme: 'https',
host: 'some-endpoint.weaviate.network', // Replace with your Weaviate URL
apiKey: new ApiKey('YOUR-WEAVIATE-API-KEY'), // If auth enabled. Replace w/ your Weaviate instance API key.
});
const batchSize = 20;
const className = 'WineReview';
const classProperties = ['title'];
async function getBatchWithCursor(
client: WeaviateClient,
className: string, classProperties: string[],
batchSize: number, cursor?: string
): Promise<{data: any}> {
const query = client.graphql.get()
.withClassName(className)
.withFields(classProperties.join(' ') + ' _additional { id vector }')
.withLimit(batchSize);
if (cursor) {
return await query.withAfter(cursor).do();
} else {
return await query.do();
}
}
// Use this function to retrieve data
// Fetch the schema
const classDef = await sourceClient.schema.classGetter().withClassName(className).do();
// Finished fetching the schema
// Restore to a new (target) instance
const targetClient = weaviate.client({
scheme: 'https',
host: 'anon-endpoint.weaviate.network', // Replace with your endpoint
});
await targetClient.schema.classCreator().withClass(classDef).do();
let targetBatcher = targetClient.batch.objectsBatcher();
let results, cursor;
// Batch import all objects to the target instance
while (true) {
// From the SOURCE instance, get the next group of objects
results = await getBatchWithCursor(sourceClient, className, classProperties, batchSize, cursor);
// If empty, we're finished
if (results.data.Get[className].length === 0)
break;
// Otherwise, add the objects to the batch to be added to the target instance
for (const retrievedObject of results.data.Get[className]) {
const newObject = {};
for (const prop of classProperties)
newObject[prop] = retrievedObject[prop];
targetBatcher = targetBatcher.withObject({
class: className,
properties: newObject,
vector: retrievedObject['_additional']['vector'],
});
// When the batch counter reaches batchSize, push the objects to Weaviate
if (++aggregateCount % batchSize === 0) {
console.log(`Imported ${aggregateCount} objects...`);
// Flush the batch queue and restart it
const response = await targetBatcher.do();
targetBatcher = targetClient.batch.objectsBatcher();
// Handle errors
for (const r of response)
if (r.result.errors)
throw r.result.errors;
}
}
// Update the cursor
cursor = results.data.Get[className].at(-1)['_additional']['id'];
}
// Flush any remaining objects
if (targetBatcher.payload().objects.length > 0)
await targetBatcher.do();
More Resourcesโ
If you can't find the answer to your question here, please look at the:
- Frequently Asked Questions. Or,
- Knowledge base of old issues. Or,
- For questions: Stackoverflow. Or,
- For more involved discussion: Weaviate Community Forum. Or,
- We also have a Slack channel.