Overview
This guide will help you extract data from W2s using Butler's OCR APIs in Node.js. In 15 minutes you'll be ready to add Node.js W2 OCR into your product or workflow!
Before getting started, you'll want to make sure to do the following:
- Signup for a free Butler account at https://app.butlerlabs.ai
- Write down your Butler API key from the Settings menu. Follow the Getting Started guide for more details about how to do that.
Get your API ID
Sign into the Butler product, go to the Library and search for the W2 model:
Click on the W2s card, then press the Try Now button to create a new W2 model:
Once on the model details page, go to the APIs tab:
Copy the API ID (also known as the Queue ID) and write it down. We'll use it in our code below.
Sample Node.js W2 OCR Code
You can copy and paste the following Node.js sample code to process documents with OCR using the API.
// Import necessary libraries
const axios = require('axios');
const fs = require('fs');
const FormData = require('form-data');
// Specify variables for use in script below
const apiBaseUrl = 'https://app.butlerlabs.ai/api';
// Make sure to add the API Key you wrote down above to the auth headers
const apiKey = 'MY_API_KEY';
const authHeaders = {
'Authorization': 'Bearer ' + apiKey
};
// Use the Queue API Id you grabbed earlier
const queueId = 'MY_QUEUE_ID';
// Specify the path to the file you would like to process
const localFilePaths = ['/path/to/file'];
// Specify the API URL
const uploadUrl = apiBaseUrl + '/queues/' + queueId + '/uploads';
// This async function uploads the files passed to it and returns the id
// needed for fetching results.
// It is used in our main execution function below
const uploadFiles = async (filePaths) => {
// Prepare file for upload
const formData = new FormData();
filePaths.forEach((filePath) => {
formData.append('files', fs.createReadStream(filePath));
});
// Upload files to the upload API
console.log('Uploding files to Butler for processing');
const uploadResponse = await axios.post(
uploadUrl,
formData,
{
headers: {
...authHeaders,
...formData.getHeaders(),
}
})
.catch((err) => console.log(err));
// Return the Upload ID
return uploadResponse.data.uploadId;
}
// This async function polls every 5 seconds for the extraction results using the
// upload id provided and returns the results once ready
const getExtractionResults = async (uploadId) => {
// URL to fetch the result
const extractionResultsUrl = apiBaseUrl + '/queues/' + queueId + '/extraction_results';
const params = { uploadId };
// Simple helper function for use while polling on results
const sleep = (waitTimeInMs) => new Promise(resolve => setTimeout(resolve, waitTimeInMs));
// Make sure to poll every few seconds for results.
// For smaller documents this will typically take only a few seconds
let extractionResults = null;
while (!extractionResults) {
console.log('Fetching extraction results');
const resultApiResponse = await axios.get(
extractionResultsUrl,
{ headers: { ...authHeaders }, params, }
);
const firstDocument = resultApiResponse.data.items[0];
const extractionStatus = firstDocument.documentStatus;
// If extraction has not yet completed, sleep for 5 seconds
if (extractionStatus !== 'Completed') {
console.log('Extraction still in progress. Sleeping for 5 seconds...');
await sleep(5 * 1000);
} else {
console.log('Extraction results ready');
return resultApiResponse.data;
}
}
}
// Use the main function to run our entire script
const main = async () => {
// Upload Files
const uploadId = await uploadFiles(localFilePaths);
// Get the extraction results
const extractionResults = await getExtractionResults(uploadId);
// Print out the extraction results for each document
extractionResults.items.forEach(documentResult => {
const fileName = documentResult.fileName;
console.log('Extraction results from ' + fileName);
// Print out each field name and extracted value
console.log('Fields')
documentResult.formFields.forEach(field => {
const fieldName = field.fieldName;
const extractedValue = field.value;
console.log(fieldName + ' : ' + extractedValue);
});
// Print out the results of each table
console.log('\n\nTables');
documentResult.tables.forEach(table => {
console.log('Table name: ' + table.tableName);
table.rows.forEach((row, idx) => {
let rowResults = 'Row ' + idx + ': \n';
row.cells.forEach(cell => {
// Add each cells name and extracted value to the row results
rowResults += cell.columnName + ': ' + cell.value + ' \n';
});
console.log(rowResults);
});
});
});
}
main();
Make sure to do the following before running the code:
- Replace the queueId variable with your API ID
- Replace the apiKey variable with your API Key
- Replace the localFilePaths variable with your local file location
In-Product Sample Code
You can also copy the sample code directly from the product. This code will have your API ID and API Key already pre-populated for you!
Extracted W2 Fields
Here is an example of what a W2 JSON response looks like:
{
"documentId": "f3305614-9352-4a65-a49c-6e11dd534b22",
"documentStatus": "Completed",
"fileName": "W2-sample-1.png",
"mimeType": "image/png",
"documentType": "W2s",
"confidenceScore": "High",
"formFields": [
{
"fieldName": "Form Year",
"value": "2014",
"confidenceScore": "High"
},
{
"fieldName": "SSN",
"value": "123-45-6789",
"confidenceScore": "High"
},
{
"fieldName": "EIN",
"value": "11-2233445",
"confidenceScore": "Low"
},
{
"fieldName": "Control Number",
"value": "A1B2",
"confidenceScore": "High"
},
{
"fieldName": "Employee Name",
"value": "Jane A DOE",
"confidenceScore": "Low"
},
{
"fieldName": "Employee Address",
"value": "123 Elm Street\nAnywhere Else, PA 23456",
"confidenceScore": "Low"
},
{
"fieldName": "Wages Tips and Other Compensation",
"value": "48,500.00",
"confidenceScore": "High"
},
{
"fieldName": "Federal Income Tax Withheld",
"value": "6,835.00",
"confidenceScore": "High"
},
{
"fieldName": "Social Security Wages",
"value": "50,000.00",
"confidenceScore": "High"
},
{
"fieldName": "Social Security Tax Withheld",
"value": "3,100.00",
"confidenceScore": "High"
},
{
"fieldName": "Medicare Wages And Tips",
"value": "50,000.00",
"confidenceScore": "High"
},
{
"fieldName": "Medicare Tax Withheld",
"value": "725.00",
"confidenceScore": "High"
},
{
"fieldName": "State Line 1",
"value": "PA",
"confidenceScore": "Low"
},
{
"fieldName": "State Wages Tips Etc Line 1",
"value": "50,000",
"confidenceScore": "High"
},
{
"fieldName": "State Income Tax Line 1",
"value": "1,535",
"confidenceScore": "Low"
},
{
"fieldName": "Local Wages Tips Etc Line 1",
"value": "1,535",
"confidenceScore": "Low"
},
{
"fieldName": "Local Income Tax Line 1",
"value": "750",
"confidenceScore": "Low"
},
{
"fieldName": "Locality Name Line 1",
"value": "MU",
"confidenceScore": "High"
}
],
"tables": []
}
W2 API Response Details
For full details about the W2 Model and its API response, see the W2 page.