/v1/batch

Weaviate on Stackoverflow badge Weaviate issues on Github badge Weaviate total Docker pulls badge

đź’ˇ You are looking at older or release candidate documentation. The current Weaviate version is v1.15.2

With batch you can upload a lot of data objects in bulk. This saves time compared to a lot of single request.


Batch data objects

For sending data objects to Weaviate in bulk.

Method and URL

POST /v1/batch/objects

Parameters

The body requires the following field:

nametyperequireddescription
objectslist of data objectsyesa list of data objects, which correspond to the data object body

Example request

  import weaviate

client = weaviate.Client("http://localhost:8080")

first_object_props = {
    "name": "Jane Doe",
    "writesFor": [{
        "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
    }]
}

second_object_props = {
    "name": "John Doe",
    "writesFor": [{
        "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
    }]
}

# Python client specific configurations can be set with `client.batch.configure`
# the settings can be applied to both `objects` AND `references`.
# You have to only set them once.
client.batch.configure(
  # `batch_size` takes an `int` value to enable auto-batching
  # (`None` is used for manual batching)
  batch_size=100, 
  # dynamically update the `batch_size` based on import speed
  dynamic=False,
  # `timeout_retries` takes an `int` value to retry on time outs
  timeout_retries=3,
  # checks for batch-item creation errors
  # this is the default in weaviate-client >= 3.6.0
  callback=weaviate.util.check_batch_result,
)

with client.batch as batch:
  # Add object without a custom vector.
  # When using vectorization modules this can be used
  # or when you don't want to set a vector
  batch.add_data_object(first_object_props, 'Author', "36ddd591-2dee-4e7e-a3cc-eb86d30a4303")
  # Add object with a custom vector
  batch.add_data_object(second_object_props, 'Author', "36ddd591-2dee-4e7e-a3cc-eb86d30a4304", vector=[0.1, 0.2, 0.3])
  const weaviate = require("weaviate-client");

const client = weaviate.client({
  scheme: 'http',
  host: 'localhost:8080',
});

var toImport = [{
  class: "Author",
  id: '36ddd591-2dee-4e7e-a3cc-eb86d30a4303',
  properties: {
    name: "Jane Doe",
    writesFor: [{
      beacon: "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
    }]
  }
},
{
  class: "Author",
  id: '36ddd591-2dee-4e7e-a3cc-eb86d30a4304',
  properties: {
    name: "John Doe",
    writesFor: [{
      beacon: "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
    }]
  }
}];

client.batch
  .objectsBatcher()
  .withObject(toImport[0])
  .withObject(toImport[1])
  .do()
  .then(res => {
      console.log(res)
  })
  .catch(err => {
      console.error(err)
  });

/* The following is also possible:
const weaviate = require("weaviate-client");

const client = weaviate.client({
  scheme: 'http',
  host: 'localhost:8080',
});

const toImport = [
  client.data
    .creator()
    .withClassName('Author')
    .withId('36ddd591-2dee-4e7e-a3cc-eb86d30a4303')
    .withProperties({
      name: "Jane Doe",
      writesFor: [{
        beacon: "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
      }]
    })
    .payload(), // note the .payload(), not .do()!
  client.data
    .creator()
    .withClassName('Author')
    .withId('36ddd591-2dee-4e7e-a3cc-eb86d30a4304')
    .withProperties({
      name: "John Doe",
      writesFor: [{
        beacon: "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
      }]
    })
    .payload(), // note the .payload(), not .do()!
  ];

client.batch
      .objectsBatcher()
      .withObject(toImport[0])
      .withObject(toImport[1])
      .do()
      .then(res => {
        console.log(res)
      })
      .catch(err => {
          console.error(err)
      });
*/
  package main

import (
  "context"
  "fmt"

  "github.com/semi-technologies/weaviate-go-client/v4/weaviate"
  "github.com/semi-technologies/weaviate/entities/models"
)

func main() {
  cfg := weaviate.Config{
    Host:   "localhost:8080",
    Scheme: "http",
  }
  client := weaviate.New(cfg)

  objects := []*models.Object{
    {
      Class: "Author",
      ID:    "36ddd591-2dee-4e7e-a3cc-eb86d30a4303",
      Properties: map[string]interface{}{
        "name": "Jane Doe",
        "writesFor": map[string]string{
          "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80",
        },
      },
    },
    {
      Class: "Author",
      ID:    "36ddd591-2dee-4e7e-a3cc-eb86d30a4304",
      Properties: map[string]interface{}{
        "name": "John Doe",
        "writesFor": map[string]string{
          "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80",
        },
      },
    },
  }

  result, err := client.Batch().ObjectsBatcher().
    WithObject(objects[0]).
    WithObject(objects[1]).
    Do(context.Background())

  if err != nil {
    panic(err)
  }
  fmt.Printf("%v", result)
}
  package technology.semi.weaviate;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import technology.semi.weaviate.client.Config;
import technology.semi.weaviate.client.WeaviateClient;
import technology.semi.weaviate.client.base.Result;
import technology.semi.weaviate.client.v1.batch.model.ObjectGetResponse;
import technology.semi.weaviate.client.v1.data.model.WeaviateObject;

public class App {
  public static void main(String[] args) {
    Config config = new Config("http", "localhost:8080");
    WeaviateClient client = new WeaviateClient(config);

    List<WeaviateObject> objects = new ArrayList() {
      {
        add(
          WeaviateObject.builder()
            .className("Author")
            .id("36ddd591-2dee-4e7e-a3cc-eb86d30a4303")
            .properties(new HashMap() {
              {
                put("name", "Jane Doe");
                put("writesFor", new HashMap() {
                  {
                    put("beacon", "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80");
                  }
                });
              }
            })
            .build()
        );
        add(
          WeaviateObject.builder()
            .className("Author")
            .id("36ddd591-2dee-4e7e-a3cc-eb86d30a4304")
            .properties(new HashMap() {
              {
                put("name", "John Doe");
                put("writesFor", new HashMap() {
                  {
                    put("beacon", "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80");
                  }
                });
              }
            })
            .build()
        );
      }
    };

    Result<ObjectGetResponse[]> result = client.batch().objectsBatcher()
      .withObject(objects.get(0))
      .withObject(objects.get(1))
      .run();

    if (result.hasErrors()) {
      System.out.println(result.getError());
      return;
    }
    System.out.println(result.getResult());
  }
}
  $ curl \
-X POST \
-H "Content-Type: application/json" \
-d '{
  "objects": [{
    "class": "Author",
    "id": "36ddd591-2dee-4e7e-a3cc-eb86d30a4303",
    "properties": {
        "name": "Jane Doe",
        "writesFor": [{
          "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
        }]
    }
  }, {
    "class": "Author",
    "id": "36ddd591-2dee-4e7e-a3cc-eb86d30a4304",
    "properties": {
        "name": "John Doe",
        "writesFor": [{
          "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
        }]
    }
  }]
}' \
http://localhost:8080/v1/batch/objects

For detailed information and instructions of batching in Python, click here.

Tips for batching objects with the Python Client

  • There is no limit to how many objects/references one could add to a batch before committing/creating it. However a too large batch can lead to a TimeOut error, which means that Weaviate could not process and create all the objects from the batch in the specified time (the timeout configuration can be set like this or this). Note that setting a timeout configuration higher that 60s would require some changes to the docker-compose.yml/helm chart file.
  • The batch class in the Python Client can be used in three ways:
    • Case 1: Everything should be done by the user, i.e. the user should add the objects/object-references and create them whenever the user wants. To create one of the data type use these methods of this class: create_objects, create_references and flush. This case has the Batch instance’s batch_size set to None (see docs for the configure or __call__ method). Can be used in a context manager, see below.
    • Case 2: Batch auto-creates when full. This can be achieved by setting the Batch instance’s batch_size set to a positive integer (see docs for the configure or __call__ method). The batch_size in this case corresponds to the sum of added objects and references. This case does not require the user to create the batch/s, but it can be done. Also to create non-full batches (last batches) that do not meet the requirement to be auto-created use the flush method. Can be used in a context manager, see below.
    • Case 3: Similar to Case II but uses dynamic batching, i.e. auto-creates either objects or references when one of them reached the recommended_num_objects or recommended_num_references respectively. See docs for the configure or __call__ method for how to enable it.
    • Context-manager support: Can be use with the with statement. When it exists the context-manager it calls the flush method for you. Can be combined with configure or __call__ method, in order to set it to the desired Case.

Batch references

For batching cross-references between data objects in bulk.

Method and URL

POST /v1/batch/references

Parameters

The body of the data object for a new object is a list of objects containing:

nametyperequireddescription
frombeaconyesThe beacon, in the form of weaviate://{host}/{Classname}/{id}/{cref_property_name}
tobeaconyesThe beacon, in the form of weaviate://{host}/{id}

Example request

  import weaviate

client = weaviate.Client("http://localhost:8080")

# Python client specific configurations can be set with `client.batch.configure`
# the settings can be applied to both `objects` AND `references`.
# You have to only set them once.
client.batch.configure(
  # `batch_size` takes an `int` value to enable auto-batching
  # (`None` is used for manual batching)
  batch_size=100, 
  # dynamically update the `batch_size` based on import speed
  dynamic=False,
  # `timeout_retries` takes an `int` value to retry on time outs
  timeout_retries=3,
  # checks for batch-item creation errors
  # this is the default in weaviate-client >= 3.6.0
  callback=weaviate.util.check_batch_result,
)

with client.batch as batch:
    # Format for batching is as follows:
    # client.batch.add_reference(
    #   from_object_uuid=<from_object_uuid>,
    #   from_object_class_name=<from_object_class_name>,
    #   from_property_name=<from_property_name>
    #   to_object_uuid=<to_object_uuid>,
    #   to_object_class_name=<to_object_class_name>,
    # )
    batch.add_reference(
      from_object_uuid="36ddd591-2dee-4e7e-a3cc-eb86d30a4303",
      from_object_class_name="Author",
      from_property_name="wroteArticles",
      to_object_uuid="6bb06a43-e7f0-393e-9ecf-3c0f4e129064",
      to_object_class_name="Article",
    )
    batch.add_reference(
      from_object_uuid="36ddd591-2dee-4e7e-a3cc-eb86d30a4303",
      from_object_class_name="Author",
      from_property_name="wroteArticles",
      to_object_uuid="b72912b9-e5d7-304e-a654-66dc63c55b32",
      to_object_class_name="Article",
    )
  const weaviate = require("weaviate-client");

const client = weaviate.client({
  scheme: 'http',
  host: 'localhost:8080',
});

client.batch
      .referencesBatcher()
      .withReference({
        from: `weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles`,
        to: `weaviate://localhost/Article/6bb06a43-e7f0-393e-9ecf-3c0f4e129064`,
        // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
      })
      .withReference({
        from: `weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles`,
        to: `weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32`,
        // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
      })
      .withReference({
        from: `weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4304/wroteArticles`,
        to: `weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32`,
        // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
      })
      .do()
      .then(res => {
        console.log(res)
      })
      .catch(err => {
          console.error(err)
      });

/* This is also possible with a builder pattern:
const weaviate = require("weaviate-client");

const client = weaviate.client({
  scheme: 'http',
  host: 'localhost:8080',
});

client.batch
      .referencesBatcher()
      .withReference(
        client.batch
          .referencePayloadBuilder()
          .withFromClassName("Author")
          .withFromRefProp('wroteArticles')
          .withFromId("36ddd591-2dee-4e7e-a3cc-eb86d30a4303")
          .withToClassName("Article") // prior to v1.14 omit .withToClassName()
          .withToId("6bb06a43-e7f0-393e-9ecf-3c0f4e129064")
          .payload()
      )
      .withReference(
        client.batch
          .referencePayloadBuilder()
          .withFromClassName("Author")
          .withFromRefProp('wroteArticles')
          .withFromId("36ddd591-2dee-4e7e-a3cc-eb86d30a4303")
          .withToClassName("Article") // prior to v1.14 omit .withToClassName()
          .withToId("b72912b9-e5d7-304e-a654-66dc63c55b32")
          .payload()
      )
      .withReference(
        client.batch
          .referencePayloadBuilder()
          .withFromClassName("Author")
          .withFromRefProp('wroteArticles')
          .withFromId("36ddd591-2dee-4e7e-a3cc-eb86d30a4304")
          .withToClassName("Article") // prior to v1.14 omit .withToClassName()
          .withToId("b72912b9-e5d7-304e-a654-66dc63c55b32")
          .payload()
      )
      .do()
      .then(res => {
        console.log(res)
      })
      .catch(err => {
          console.error(err)
      });
*/
  package main

import (
  "context"
  "fmt"

  "github.com/semi-technologies/weaviate-go-client/v4/weaviate"
  "github.com/semi-technologies/weaviate/entities/models"
)

func main() {
  cfg := weaviate.Config{
    Host:   "localhost:8080",
    Scheme: "http",
  }
  client := weaviate.New(cfg)

  ref := []*models.BatchReference{
    {
      From: "weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles",
      To:   "weaviate://localhost/Article/6bb06a43-e7f0-393e-9ecf-3c0f4e129064",
      // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
    },
    {
      From: "weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles",
      To:   "weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32",
      // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
    },
    {
      From: "weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4304/wroteArticles",
      To:   "weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32",
      // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
    },
  }

  result, err := client.Batch().ReferencesBatcher().
    WithReference(ref[0]).
    WithReference(ref[1]).
    WithReference(ref[2]).
    Do(context.Background())

  if err != nil {
    panic(err)
  }
  fmt.Printf("%v", result)
}
  package technology.semi.weaviate;

import java.util.ArrayList;
import java.util.List;
import technology.semi.weaviate.client.Config;
import technology.semi.weaviate.client.WeaviateClient;
import technology.semi.weaviate.client.base.Result;
import technology.semi.weaviate.client.v1.batch.model.BatchReference;
import technology.semi.weaviate.client.v1.batch.model.BatchReferenceResponse;

public class App {
  public static void main(String[] args) {
    Config config = new Config("http", "localhost:8080");
    WeaviateClient client = new WeaviateClient(config);

    List<BatchReference> refs = new ArrayList() {
      {
        add(
          BatchReference.builder()
            .from("weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles")
            .to("weaviate://localhost/Article/6bb06a43-e7f0-393e-9ecf-3c0f4e129064")
            // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
            .build()
        );
        add(
          BatchReference.builder()
            .from("weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles")
            .to("weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32")
            // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
            .build()
        );
        add(
          BatchReference.builder()
            .from("weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4304/wroteArticles")
            .to("weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32")
            // prior to v1.14 omit the class name as part of the "to" beacon and specify it as weaviate://localhost/<id>
            .build()
        );
      }
    };

    Result<BatchReferenceResponse[]> result = client.batch().referencesBatcher()
      .withReference(refs.get(0))
      .withReference(refs.get(1))
      .withReference(refs.get(2))
      .run();

    if (result.hasErrors()) {
      System.out.println(result.getError());
      return;
    }
    System.out.println(result.getResult());
  }
}
  # Note that for this query to work with Weaviate versions older than v1.14 you
# need to omit the class name in the "to" beacon and specify it as
# weaviate://localhost/<id>. The long-form beacon used in "from" is unchanged.
$ curl \
-X POST \
-H "Content-Type: application/json" \
-d '[{
    "from": "weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles",
    "to": "weaviate://localhost/Article/6bb06a43-e7f0-393e-9ecf-3c0f4e129064"
  }, {
    "from": "weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4303/wroteArticles",
    "to": "weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32"
  }, {
    "from": "weaviate://localhost/Author/36ddd591-2dee-4e7e-a3cc-eb86d30a4304/wroteArticles",
    "to": "weaviate://localhost/Article/b72912b9-e5d7-304e-a654-66dc63c55b32"
  }]' \
http://localhost:8080/v1/batch/references

For detailed information and instructions of batching in Python, click here.

Error handling

When sending a batch request to your Weaviate instance, it could be the case that an error occurs. This can be caused by several reasons, for example that the connection to Weaviate is lost or that there is a mistake in a single data object that you are trying to add.

You can check if an error and what kind has occurred.

A batch request will always return a HTTP 200 status code when a the batch request was successful. That means that the batch was successfully sent to Weaviate, and there were no issues with the connection or processing of the batch and no malformed request (4xx status code). However, with a 200 status code, there might still be individual failures of the data objects which are not contained in the response. Thus, a 200 status code does not guarantee that each batch item is added/created. An example of an error on an individual data object that might be unnoticed by sending a batch request without checking the individual results is: Adding an object to the batch that is in conflict with the schema (for example a non existing class name).

The following Python code can be used to handle errors on individual data objects in the batch.

import weaviate

client = weaviate.Client("http://localhost:8080")

object_to_add = {
    "name": "Jane Doe",
    "writesFor": [{
        "beacon": "weaviate://localhost/f81bfe5e-16ba-4615-a516-46c2ae2e5a80"
    }]
}

client.batch.add_data_object(object_to_add, "Author", "36ddd591-2dee-4e7e-a3cc-eb86d30a4303")
results = client.batch.create_objects() # client.batch.flush() does not return something, but client.batch.create_objects() and client.batch.create_references() does

if results is not None:
    for result in results:
        if 'result' in result and 'errors' in result['result'] and  'error' in result['result']['errors']:
            for message in result['result']['errors']['error']:
                print(message['message'])

This can also be applied to adding references in batch. Note that sending batches, especially references, skips some validation on object and reference level. Adding this validation on single data objects like above makes it less likely for errors to pass without discovering.

More Resources

If you can’t find the answer to your question here, please look at the:

  1. Frequently Asked Questions. Or,
  2. Knowledge base of old issues. Or,
  3. For questions: Stackoverflow. Or,
  4. For issues: Github. Or,
  5. Ask your question in the Slack channel: Slack.