Skip to main content

Migrate data

Overview

Follow these examples to migrate data manually when using a backup is not possible. They cover all permutations between:

  • a single-tenancy collection (Collection), and
  • a tenant in a multi-tenancy collection (Tenant).
Additional information

The examples use two different Weaviate instances, exposed through different ports. The same process can be used for two different instances as well.

Cross-references in Weaviate are properties. As such, you can retrieve cross-reference in one of two ways:

  1. Retrieve the object that it belongs to.
  2. Retrieve the cross-reference as a part of a query.
What about cross-references?

These scripts should migrate cross-references as well.


Cross-references are properties. As such, these cursor-based exports will include them. During restoration, restore the cross-referenced (i.e. "to") object first, then the object that contains the cross-reference (i.e. "from" object).

Collection → Collection

Step 1: Create the target collection(s)

Create a collection (e.g. WineReview) at the target instance, matching the collection (e.g. WineReview) at the source instance.

import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient

client_src = weaviate.connect_to_local(
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):

reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)

return reviews


reviews_tgt = create_collection(client_tgt, "WineReview", enable_mt=False)

Step 2: Migrate the data

Migrate:

  • The source collection data in the client_src instance
  • to target collection in the client_tgt instance
def migrate_data(collection_src: Collection, collection_tgt: Collection):

with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector=q.vector["default"],
uuid=q.uuid
)

return True


reviews_src = client_src.collections.get("WineReview")
reviews_tgt = client_tgt.collections.get("WineReview")

migrate_data(reviews_src, reviews_tgt)

client_src.close()
client_tgt.close()

Collection → Tenant

Step 1: Create the target collection(s)

Create a collection (e.g. WineReview) at the target instance, matching the collection (e.g. WineReview) at the source instance, and enable multi-tenancy for the target collection.

import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient

client_src = weaviate.connect_to_local(
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):

reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)

return reviews


reviews_mt_tgt = create_collection(client_tgt, "WineReviewMT", enable_mt=True)

Step 2: Create the tenant(s)

Add tenants at the target instance before adding data objects.

tenants_tgt = [wvc.tenants.Tenant(name="tenantA"), wvc.tenants.Tenant(name="tenantB")]

reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_mt_tgt.tenants.create(tenants_tgt)

Step 3: Migrate the data

Migrate:

  • The source collection data in the client_src instance
  • to target tenant data from target collection in the client_tgt instance
def migrate_data(collection_src: Collection, collection_tgt: Collection):

with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector=q.vector["default"],
uuid=q.uuid
)

return True


reviews_src = client_src.collections.get("WineReview")
reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_tgt_tenant_a = reviews_mt_tgt.with_tenant(tenants_tgt[0].name)

migrate_data(reviews_src, reviews_tgt_tenant_a)

client_src.close()
client_tgt.close()

Tenant → Collection

Step 1: Create the target collection(s)

Create a collection (e.g. WineReview) at the target instance, matching the collection (e.g. WineReview) at the source instance, and enable multi-tenancy for the target collection.

import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient

client_src = weaviate.connect_to_local(
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):

reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)

return reviews


reviews_tgt = create_collection(client_tgt, "WineReview", enable_mt=False)

Step 2: Migrate the data

Migrate:

  • The source tenant data from source collection in the client_src instance
  • to target collection in the client_tgt instance
def migrate_data(collection_src: Collection, collection_tgt: Collection):

with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector=q.vector["default"],
uuid=q.uuid
)

return True


reviews_src = client_src.collections.get("WineReviewMT")
reviews_src_tenant_a = reviews_src.with_tenant("tenantA")
reviews_tgt = client_tgt.collections.get("WineReview")

migrate_data(reviews_src_tenant_a, reviews_tgt)

client_src.close()
client_tgt.close()

Tenant → Tenant

Step 1: Create the target collection(s)

Create a collection (e.g. WineReview) at the target instance, matching the collection (e.g. WineReview) at the source instance including enabling multi-tenancy.

import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient

client_src = weaviate.connect_to_local(
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")}
)

def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):

reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)

return reviews


reviews_mt_tgt = create_collection(client_tgt, "WineReviewMT", enable_mt=True)

Step 2: Create the tenant(s)

Add tenants at the target instance before adding data objects.

tenants_tgt = [wvc.tenants.Tenant(name="tenantA"), wvc.tenants.Tenant(name="tenantB")]

reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_mt_tgt.tenants.create(tenants_tgt)

Step 3: Migrate the data

Migrate:

  • The source tenant data from source collection in the client_src instance
  • to target tenant data from target collection in the client_tgt instance
def migrate_data(collection_src: Collection, collection_tgt: Collection):

with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector=q.vector["default"],
uuid=q.uuid
)

return True


reviews_mt_src = client_src.collections.get("WineReviewMT")
reviews_src_tenant_a = reviews_mt_src.with_tenant("tenantA")
reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_tgt_tenant_a = reviews_mt_tgt.with_tenant(tenants_tgt[0].name)

migrate_data(reviews_src_tenant_a, reviews_tgt_tenant_a)

client_src.close()
client_tgt.close()