Skip to content
This repository was archived by the owner on Aug 28, 2023. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 216 additions & 0 deletions general/human_review.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Manual Trigger of Human review process for any Document\n",
"This notebook shows you how to manually trigger human review process of any document (when the Validation filter is set to no-filter in the human review configuration)."
],
"metadata": {
"id": "L0J138mj7p1s"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"# Install necessary Python libraries and restart your kernel after.\n",
"!pip install -r ../requirements.txt"
],
"outputs": [],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_v0XtSwn7fmN",
"outputId": "373b9379-f6ac-451a-e428-ad9219fb31d0"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"from google.cloud import documentai_v1beta3 as documentai\n",
"from PIL import Image, ImageDraw\n",
"\n",
"import os\n",
"import pandas as pd"
],
"outputs": [],
"metadata": {
"id": "Y8eO6Kcp7v2x"
}
},
{
"cell_type": "markdown",
"source": [
"## Set your Processor Variables "
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"PROJECT_ID = \"YOUR_PROJECT_ID_HERE\"\n",
"LOCATION = \"us\" # Format is 'us' or 'eu'\n",
"PROCESSOR_ID = \"PROCESSOR_ID\" # Create processor in Cloud Console\n",
"DOCUMENT_PATH = \"../resources/general/form.tiff\" # Update to path of target document"
],
"outputs": [],
"metadata": {
"id": "k3c1mTa6IOk3"
}
},
{
"cell_type": "markdown",
"source": [
"The following code calls the synchronous API and parses the form fields and values."
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"def review_document_sample():\n",
" # Instantiates a client\n",
" \n",
" client_options = {\"api_endpoint\": \"{}-documentai.googleapis.com\".format(LOCATION)}\n",
" client = documentai.DocumentProcessorServiceClient(client_options=client_options)\n",
"\n",
" # The full resource name of the processor, e.g.:\n",
" # projects/project-id/locations/location/processor/processor-id/humanReviewConfig\n",
" # You must create new processors in the Cloud Console first\n",
" name = f\"projects/{PROJECT_ID}/locations/{LOCATION}/processors/{PROCESSOR_ID}/humanReviewConfig\"\n",
"\n",
" with open(DOCUMENT_PATH, \"rb\") as image:\n",
" image_content = image.read()\n",
"\n",
" # Read the file into memory\n",
" document = {\"content\": image_content, \"mime_type\": \"image/tiff\"}\n",
"\n",
" # Configure the process request\n",
" request = {\"human_review_config\": name, \"document\": document}\n",
"\n",
" # Recognizes text entities in the PDF document\n",
" result = client.review_document(request=request)\n",
" \n",
" print(f\"Human Review Triggered for the document.\\n{result.operation}\\n\")\n",
"\n",
" return result"
],
"outputs": [],
"metadata": {
"id": "hO3yJpDoJ3Zf"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"result = review_document_sample()"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Human in the loop (HITL) Operation"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"**Only complete this section if a HITL Operation is triggered.** </br>"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"lro = \"LONG_RUNNING_OPERATION\" # LRO printed in the previous cell ex. projects/660199673046/locations/us/operations/174674963333130330"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"client = documentai.DocumentProcessorServiceClient()\n",
"operation = client._transport.operations_client.get_operation(lro)\n",
"if operation.done:\n",
" print(\"HITL location: {} \".format(str(operation.response.value)[5:-1]))\n",
"else:\n",
" print('Waiting on human review.')"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"!gsutil cp \"HITL_LOCATION\" response.json # Location printed above ex. gs://gcs_bucket/receipt-output/174674963333130330/data-00001-of-00001.json"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"with open(\"response.json\", \"r\") as file:\n",
" import json\n",
" entities = {}\n",
" data = json.load(file)\n",
" for entity in data['entities']:\n",
" if 'mentionText' in entity:\n",
" entities[entity['type']] = entity['mentionText']\n",
" else:\n",
" entities[entity['type']] = \"\"\n",
" \n",
" for t in entities:\n",
" print(\"{} : {}\\n \".format(t, entities[t]))"
],
"outputs": [],
"metadata": {}
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "LendingAI Bouding Boxes v3.ipynb",
"provenance": [],
"toc_visible": true
},
"environment": {
"name": "common-cpu.m65",
"type": "gcloud",
"uri": "gcr.io/deeplearning-platform-release/base-cpu:m65"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}