Overview

This guide will help you extract data from Paystubs using Butler's OCR APIs in Node.js. In 15 minutes you'll be ready to add Node.js Paystub OCR into your product or workflow!

Before getting started, you'll want to make sure to do the following:

  1. Signup for a free Butler account at https://app.butlerlabs.ai
  2. Write down your Butler API key from the Settings menu. Follow the Getting Started guide for more details about how to do that.

Get your API ID

Sign into the Butler product, go to the Library and search for the Paystub model:

28802880

Click on the Paystubs card, then press the Try Now button to create a new Paystub model:

28802880

Once on the model details page, go to the APIs tab:

28802880

Copy the API ID (also known as the Queue ID) and write it down. We'll use it in our code below.

Sample Node.js Paystub OCR Code

You can copy and paste the following Node.js sample code to process documents with OCR using the API.

// Import necessary libraries
const axios = require('axios');
const fs = require('fs');
const FormData = require('form-data');

// Specify variables for use in script below
const apiBaseUrl = 'https://app.butlerlabs.ai/api';

// Make sure to add the API Key you wrote down above to the auth headers
const apiKey = 'MY_API_KEY';
const authHeaders = {
  'Authorization': 'Bearer ' + apiKey
};

// Use the Queue API Id you grabbed earlier
const queueId = 'MY_QUEUE_ID';

// Specify the path to the file you would like to process
const localFilePaths = ['/path/to/file'];

// Specify the API URL
const uploadUrl = apiBaseUrl + '/queues/' + queueId + '/uploads';

// This async function uploads the files passed to it and returns the id
// needed for fetching results.
// It is used in our main execution function below
const uploadFiles = async (filePaths) => {
  // Prepare file for upload
  const formData = new FormData();
  filePaths.forEach((filePath) => {
    formData.append('files', fs.createReadStream(filePath));
  });

  // Upload files to the upload API
  console.log('Uploding files to Butler for processing');
  const uploadResponse = await axios.post(
    uploadUrl,
    formData,
    {
      headers: {
        ...authHeaders,
        ...formData.getHeaders(),
      }
    })
    .catch((err) => console.log(err));

  // Return the Upload ID
  return uploadResponse.data.uploadId;
}

// This async function polls every 5 seconds for the extraction results using the
// upload id provided and returns the results once ready
const getExtractionResults = async (uploadId) => {
  // URL to fetch the result
  const extractionResultsUrl = apiBaseUrl + '/queues/' + queueId + '/extraction_results';
  const params = { uploadId };

  // Simple helper function for use while polling on results
  const sleep = (waitTimeInMs) => new Promise(resolve => setTimeout(resolve, waitTimeInMs));

  // Make sure to poll every few seconds for results.
  // For smaller documents this will typically take only a few seconds
  let extractionResults = null;
  while (!extractionResults) {
    console.log('Fetching extraction results');
    const resultApiResponse = await axios.get(
      extractionResultsUrl,
      { headers: { ...authHeaders }, params, }
    );

    const firstDocument = resultApiResponse.data.items[0];
    const extractionStatus = firstDocument.documentStatus;
    // If extraction has not yet completed, sleep for 5 seconds
    if (extractionStatus !== 'Completed') {
      console.log('Extraction still in progress. Sleeping for 5 seconds...');
      await sleep(5 * 1000);
    } else {
      console.log('Extraction results ready');
      return resultApiResponse.data;
    }
  }
}

// Use the main function to run our entire script
const main = async () => {
  // Upload Files
  const uploadId = await uploadFiles(localFilePaths);
  // Get the extraction results
  const extractionResults = await getExtractionResults(uploadId);

  // Print out the extraction results for each document
  extractionResults.items.forEach(documentResult => {
    const fileName = documentResult.fileName;
    console.log('Extraction results from ' + fileName);

    // Print out each field name and extracted value
    console.log('Fields')
    documentResult.formFields.forEach(field => {
      const fieldName = field.fieldName;
      const extractedValue = field.value;

      console.log(fieldName + ' : ' + extractedValue);
    });

    // Print out the results of each table
    console.log('\n\nTables');
    documentResult.tables.forEach(table => {
      console.log('Table name: ' + table.tableName);
      table.rows.forEach((row, idx) => {
        let rowResults = 'Row ' + idx + ': \n';
        row.cells.forEach(cell => {
          // Add each cells name and extracted value to the row results
          rowResults += cell.columnName + ': ' + cell.value + ' \n';
        });

        console.log(rowResults);
      });
    });
  });
}

main();

Make sure to do the following before running the code:

  1. Replace the queueId variable with your API ID
  2. Replace the apiKey variable with your API Key
  3. Replace the localFilePaths variable with your local file location

📘

In-Product Sample Code

You can also copy the sample code directly from the product. This code will have your API ID and API Key already pre-populated for you!

Extracted Paystub Fields

Here is an example of what a Paystub JSON response looks like:

{
  "documentId": "63ac0a7e-5cd6-4cbf-8d4f-05be429fb33f",
  "documentStatus": "Completed",
  "fileName": "paystub-sample-2017.jpeg",
  "mimeType": "image/jpeg",
  "documentType": "Paystubs",
  "confidenceScore": "High",
  "formFields": [
    {
      "fieldName": "Employee Address",
      "value": "123 Franklin St\nCHAPEL HILL, NC 27517",
      "confidenceScore": "Low"
    },
    {
      "fieldName": "Employer Address",
      "value": "103 South Building, Campus Box 9100\nChapel Hill, NC 27599-9100",
      "confidenceScore": "Low"
    },
    {
      "fieldName": "Employer Name",
      "value": "The University of North Carolina at Chapel Hill",
      "confidenceScore": "Low"
    },
    {
      "fieldName": "Start Date",
      "value": "07/10/2017",
      "confidenceScore": "High"
    },
    {
      "fieldName": "End Date",
      "value": "07/23/2017",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Gross Earnings",
      "value": "1,627.74",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Gross Earnings YTD",
      "value": "28,707.21",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Net Pay",
      "value": "1,040.23",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Net Pay YTD",
      "value": "18,396.25",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Pay Date",
      "value": "08/04/2017",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Federal Allowance",
      "value": "0",
      "confidenceScore": "High"
    },
    {
      "fieldName": "Federal Marital Status",
      "value": "Single",
      "confidenceScore": "High"
    },
    {
      "fieldName": "State Allowance",
      "value": "0",
      "confidenceScore": "High"
    },
    {
      "fieldName": "State Marital Status",
      "value": "Single",
      "confidenceScore": "High"
    }
  ],
  "tables": [
    {
      "tableName": "Deductions",
      "confidenceScore": "Low",
      "rows": [
        {
          "cells": [
            {
              "columnName": "Deduction Type",
              "value": "TSERS - Retirement",
              "confidenceScore": "High"
            },
            {
              "columnName": "Deduction This Period",
              "value": "25.00",
              "confidenceScore": "High"
            },
            {
              "columnName": "Deduction YTD",
              "value": "425.00",
              "confidenceScore": "High"
            }
          ]
        },
        {
          "cells": [
            {
              "columnName": "Deduction Type",
              "value": "Critical Illness",
              "confidenceScore": "High"
            },
            {
              "columnName": "Deduction This Period",
              "value": "32.10",
              "confidenceScore": "High"
            },
            {
              "columnName": "Deduction YTD",
              "value": "32.00",
              "confidenceScore": "High"
            }
          ]
        }
      ]
    },
    {
      "tableName": "Direct Deposits",
      "confidenceScore": "Low",
      "rows": [
        {
          "cells": [
            {
              "columnName": "Amount",
              "value": "1,040.23",
              "confidenceScore": "High"
            },
            {
              "columnName": "Employee Account Number",
              "value": "XXXXX000000",
              "confidenceScore": "High"
            }
          ]
        }
      ]
    },
    {
      "tableName": "Earnings",
      "confidenceScore": "Low",
      "rows": [
        {
          "cells": [
            {
              "columnName": "Earning Type",
              "value": "Regular",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning Rate",
              "value": "20.346846",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning Hours",
              "value": "74.50",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning This Period",
              "value": "1,515.84",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning YTD",
              "value": "17,446.65",
              "confidenceScore": "High"
            }
          ]
        },
        {
          "cells": [
            {
              "columnName": "Earning Type",
              "value": "Sick",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning Rate",
              "value": "20.346846",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning Hours",
              "value": "3.50",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning This Period",
              "value": "71.21",
              "confidenceScore": "High"
            },
            {
              "columnName": "Earning YTD",
              "value": "395.51",
              "confidenceScore": "High"
            }
          ]
        }
      ]
    },
    {
      "tableName": "Taxes",
      "confidenceScore": "Low",
      "rows": [
        {
          "cells": [
            {
              "columnName": "Tax Type",
              "value": "Fed Withholdng",
              "confidenceScore": "High"
            },
            {
              "columnName": "Tax This Period",
              "value": "182.98",
              "confidenceScore": "High"
            },
            {
              "columnName": "Tax YTD",
              "value": "3,319.78",
              "confidenceScore": "Low"
            }
          ]
        },
        {
          "cells": [
            {
              "columnName": "Tax Type",
              "value": "Fed MED/EE",
              "confidenceScore": "High"
            },
            {
              "columnName": "Tax This Period",
              "value": "22.12",
              "confidenceScore": "High"
            },
            {
              "columnName": "Tax YTD",
              "value": "394.81",
              "confidenceScore": "Low"
            }
          ]
        }
      ]
    }
  ]
}

📘

Full Paystub API Response

For more details on each of the fields returned from the Paystub API, see the Paystub page.