Overview
This section will help you integrate with Butler using the synchronous API Endpoints. We will cover how to integrate Butler using:
- Python
- NodeJS
- C#
- Raw REST APIs
When to use Sync API Integration
Sync API Integration utilizes a REST API provided by Butler which is capable of uploading and extracting results in one go. This typically makes integration significantly easier. Use this endpoint when the following conditions apply:
- Your documents are less than 5 pages
- Your workflow doesn't need to work on a batch of documents at a time
Pre-requirements
Ensure you completed steps in "Using the REST API"
You'll need the following to proceed:
- API Key
- Queue ID
Prepare environment
First, make sure to install the Butler SDK if possible:
pip install butler-sdk
npm install butler-sdk
Upload and extract a document using the API
Once done, create a new script and import the sdk and upload and extract information from files:
from butler import Client
# Get API Key from https://docs.butlerlabs.ai/reference/uploading-documents-to-the-rest-api#get-your-api-key
api_key = '<api-key>'
# Get Queue ID from https://docs.butlerlabs.ai/reference/uploading-documents-to-the-rest-api#go-to-the-model-details-page
queue_id = '<queue_id>'
# Response is a strongly typed object
response = Client(api_key).extract_file(queue_id, 'example.pdf')
# Convert to a dictionary for printing
print(response.to_dict())
import { Butler } from 'butler-sdk';
# Get API Key from https://docs.butlerlabs.ai/reference/uploading-documents-to-the-rest-api#get-your-api-key
const apiKey = '<api-key>'
# Get Queue ID from https://docs.butlerlabs.ai/reference/uploading-documents-to-the-rest-api#go-to-the-model-details-page
const queueId = '<queue_id>'
const file = fs.createReadStream('test.pdf');
# Create client
const client = new Butler(apiKey)
client.extractFile(queueId, file).then((x) => {
console.log(x);
});
using System;
using RestSharp;
namespace UploadApp
{
class ExtractionResultFormField {
public string fieldName { get; set; }
public string value { get; set; }
public string confidenceScore { get; set; }
}
class ExtractionResultTableCell {
public string columnName { get; set; }
public string value { get; set; }
}
class ExtractionResultTableRow {
public System.Collections.Generic.List<ExtractionResultTableCell> cells { get; set; }
}
class ExtractionResultTable {
public string tableName { get; set; }
public string confidenceScore { get; set; }
public System.Collections.Generic.List<ExtractionResultTableRow> rows { get; set; }
}
class ExtractionResultItem {
public string documentId { get; set; }
public string documentStatus { get; set; }
public string fileName { get; set; }
public string mimeType { get; set; }
public string documentType { get; set; }
public string confidenceScore { get; set; }
public System.Collections.Generic.List<ExtractionResultFormField> formFields { get; set; }
public System.Collections.Generic.List<ExtractionResultTable> tables { get; set; }
}
class ExtractionResultsResponse {
public System.Collections.Generic.List<ExtractionResultItem> items { get; set; }
public bool hasNext { get; set; }
}
class Program
{
static void Main(string[] args)
{
// Specify variables for use in script below
var api_key = "MY_API_KEY";
var queue_id = "QUEUE_ID";
var api_base_url = $"https://app.butlerlabs.ai/api/queues/{queue_id}";
// Define the files paths you want to upload together as one batch
var filePaths = new string[2] {
"/path/to/file/1",
"/path/to/file/2"
};
var client = new RestClient($"{api_base_url}/uploads");
var request = new RestRequest(Method.POST);
// Authorization header
request.AddHeader("Authorization", $"Bearer {api_key}");
request.AddHeader("Accept", "application/json");
// Use multipart form data as content type to signify file upload
request.AddHeader("Content-Type", "multipart/form-data");
// Add each file to the request
foreach (string filePath in filePaths) {
// Read the file
var fileBytes = System.IO.File.ReadAllBytes(filePath);
// Define the appropriate mimeType. Only pdfs, png, and jpg supported
var mimeType = "application/pdf";
var extension = System.IO.Path.GetExtension(filePath);
if (extension.Equals(".jpg" , StringComparison.OrdinalIgnoreCase) || extension.Equals(".jpeg" , StringComparison.OrdinalIgnoreCase)) {
mimeType = "image/jpeg";
}
if (extension.Equals(".png" , StringComparison.OrdinalIgnoreCase)) {
mimeType = "image/png";
}
// Add the file to the request
request.AddFile("files", fileBytes, System.IO.Path.GetFileName(filePath), mimeType);
}
IRestResponse response = client.Execute(request);
// Get the upload ID. Used for getting results
var deserial = new RestSharp.Serialization.Json.JsonDeserializer();
var json_obj = deserial.Deserialize<System.Collections.Generic.Dictionary<string, string>>(response);
var upload_id = json_obj["uploadId"];
// wait for and fetch results
client = new RestClient($"{api_base_url}/extraction_results");
request = new RestRequest(Method.GET);
// Authorization header
request.AddHeader("Authorization", $"Bearer {api_key}");
request.AddHeader("Accept", "application/json");
// Get extraction results for the specified upload
request.AddParameter("uploadId", upload_id);
response = client.Execute(request);
deserial = new RestSharp.Serialization.Json.JsonDeserializer();
var extraction_res = deserial.Deserialize<ExtractionResultsResponse>(response);
/*
* Recommended to poll by making sure document status on every item reads as "Completed"
* every 5 seconds up to 30 seconds before consuming the results
* Can access document status using:
* var document_status = extraction_res["items"][0]["documentStatus"];
* var is_finished = document_status == "Completed";
*/
System.Threading.Thread.Sleep(10 * 1000);
Console.WriteLine(extraction_res);
}
}
}
If done correctly, you should see a dictionary containing extracted information that looks something like this:
{
"documentId": "cd7dcfb2-e354-4f90-a1ab-ff972df76dd6",
"documentStatus": "Completed",
"fileName": "my_file.pdf",
"mimeType": "application/pdf",
"documentType": "Invoice",
"confidenceScore": "Low",
"formFields": [
{
"fieldName": "Invoice Number",
"value": "3557968098",
"confidenceScore": "High"
},
{
"fieldName": "Invoice Date",
"value": "Feb 28, 2019",
"confidenceScore": "High"
}
],
"tables": [
{
"tableName": "Line Items",
"confidenceScore": "Low",
"rows": [
{
"cells": [
{
"columnName": "Description",
"confidenceScore": "Low",
"value": "Usage"
},
{
"columnName": "Date",
"confidenceScore": "Low",
"value": "Feb 1 - Feb 28"
},
{
"columnName": "Quantity",
"confidenceScore": "Low",
"value": "4"
},
{
"columnName": "Amount",
"confidenceScore": "High",
"value": "20.00"
}
]
}
]
}
]
}
You'll notice that the response includes some metadata about the file as well as the extracted form fields in the formFields
property and tables in the tables
property. You can then use these extracted values in any downstream workflows you'd like.
If you reached this point, congrats! You just processed your first document with Butler's REST APIs!