diff --git a/examples/data_profiler_demo.ipynb b/examples/data_profiler_demo.ipynb
new file mode 100644
index 000000000..0b95b6781
--- /dev/null
+++ b/examples/data_profiler_demo.ipynb
@@ -0,0 +1,765 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fc2826d9",
+   "metadata": {},
+   "source": [
+    "# Data Profiler - What's in your data?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b997522b",
+   "metadata": {},
+   "source": [
+    "The library is designed to easily detect sensitive data and gather statistics on your datasets with just a few lines of code.\n",
+    "\n",
+    "This demo covers the followings:\n",
+    "\n",
+    "    - Basic usage of the Data Profiler\n",
+    "    - The data reader class\n",
+    "    - Updating and merging profiles\n",
+    "    - Profile differences\n",
+    "    - Graphing a profile\n",
+    "    - Saving profiles\n",
+    "    - Data labeling\n",
+    "\n",
+    "First, let's import the libraries needed for this example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef404c84",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import json\n",
+    "\n",
+    "import pandas as pd\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "try:\n",
+    "    sys.path.insert(0, '..')\n",
+    "    import dataprofiler as dp\n",
+    "except ImportError:\n",
+    "    import dataprofiler as dp\n",
+    "    \n",
+    "data_folder = \"../dataprofiler/tests/data\"\n",
+    "\n",
+    "# remove extra tf loggin\n",
+    "tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f51971e3",
+   "metadata": {},
+   "source": [
+    "## Basic Usage of the Data Profiler"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "639e66d3",
+   "metadata": {},
+   "source": [
+    "This section shows the basic example of the Data Profiler. A CSV dataset is read using the data reader, then the Data object is given to the Data Profiler to detect sensitive data and obtain the statistics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "375eafc3-fa33-49ec-af7c-7d06644debb0",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# read, profile, and get the report in 3 lines\n",
+    "\n",
+    "# get the data\n",
+    "data = dp.Data(os.path.join(data_folder, \"csv/diamonds.csv\"))\n",
+    "\n",
+    "# profile the data\n",
+    "profile = dp.Profiler(data)\n",
+    "\n",
+    "# generate the report\n",
+    "report  = profile.report(report_options={\"output_format\": \"compact\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d2cd8345-2ebd-4455-8f95-d430808031d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.head() # data.data provides access to a pandas.DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cc43b3b-3fcd-498c-89cd-ac646feb144a",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# print the report\n",
+    "print('\\nREPORT:\\n' + '='*80)\n",
+    "print(json.dumps(report, indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "74027cfd",
+   "metadata": {},
+   "source": [
+    "## Data reader class -- Automatic Detection"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41364888",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Within the Data Profiler, there are 5 data reader classes:\n",
+    "\n",
+    "  * CSVData (delimited data: CSV, TSV, etc.)\n",
+    "  * JSONData\n",
+    "  * ParquetData\n",
+    "  * AVROData\n",
+    "  * TextData"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "823829f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# use data reader to read input data with different file types\n",
+    "data_folder = \"../dataprofiler/tests/data\"\n",
+    "csv_files = [\n",
+    "    \"csv/aws_honeypot_marx_geo.csv\",\n",
+    "    \"csv/all-strings-skip-header-author.csv\", # csv files with the author/description on the first line\n",
+    "    \"csv/sparse-first-and-last-column-empty-first-row.txt\", # csv file with the .txt extension\n",
+    "]\n",
+    "json_files = [\n",
+    "    \"json/complex_nested.json\",\n",
+    "    \"json/honeypot_intentially_mislabeled_file.csv\", # json file with the .csv extension\n",
+    "]\n",
+    "parquet_files = [\n",
+    "    \"parquet/nation.dict.parquet\",\n",
+    "    \"parquet/nation.plain.intentionally_mislabled_file.csv\", # parquet file with the .csv extension\n",
+    "]\n",
+    "avro_files = [\n",
+    "    \"avro/userdata1.avro\",\n",
+    "    \"avro/userdata1_intentionally_mislabled_file.json\", # avro file with the .json extension\n",
+    "]\n",
+    "text_files = [\n",
+    "    \"txt/discussion_reddit.txt\",\n",
+    "]\n",
+    "\n",
+    "all_files = csv_files + json_files + parquet_files + avro_files + text_files\n",
+    "\n",
+    "print('filepath' + ' ' * 58 + 'data type')\n",
+    "print('='*80)\n",
+    "for file in all_files:\n",
+    "    filepath = os.path.join(data_folder, file)\n",
+    "    ############################\n",
+    "    ##### READING THE DATA #####\n",
+    "    data = dp.Data(filepath)\n",
+    "    ############################\n",
+    "    print(\"{:<65} {:<15}\".format(file, data.data_type))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47b0290a-9434-47cd-9fd5-2c4e9e91240a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# importing from a url\n",
+    "data = dp.Data('https://raw.githubusercontent.com/capitalone/DataProfiler/main/dataprofiler/tests/data/csv/diamonds.csv')\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6cede34-2734-466f-a1f6-afd2cc04a67c",
+   "metadata": {},
+   "source": [
+    "## Data Profiling\n",
+    "As we saw above, profiling is as simple as:\n",
+    "\n",
+    "```python\n",
+    "import dataprofiler as dp\n",
+    "\n",
+    "data = dp.Data('my_data.csv')\n",
+    "profiler = dp.Profiler(data)\n",
+    "report = profiler.report(report_options={\"output_format\": \"compact\"})\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c44a0c3-a5fa-4aba-891b-9cb4e7c2b204",
+   "metadata": {},
+   "source": [
+    "### Update profiles - the case for batching / streaming data¶"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "965f8c85",
+   "metadata": {},
+   "source": [
+    "The profiler allows users to send the data to the profile in batches."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "deae4b9d-281f-4835-869c-df86309ddcb8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# divide dataset in half\n",
+    "data = dp.Data(os.path.join(data_folder, \"csv/diamonds.csv\"))\n",
+    "df = data.data\n",
+    "df1 = df.iloc[:int(len(df)/2)]\n",
+    "df2 = df.iloc[int(len(df)/2):]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03b4705e-a95f-4551-aef2-af22bd62fd5f",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Update the profile with the first half\n",
+    "profile = dp.Profiler(df1)\n",
+    "\n",
+    "############################\n",
+    "####### BATCH UPDATE #######\n",
+    "profile.update_profile(df2)\n",
+    "############################\n",
+    "report_batch  = profile.report(report_options={\"output_format\": \"compact\"})\n",
+    "\n",
+    "# print('\\nREPORT:\\n' + '='*80)\n",
+    "print(json.dumps(report_batch, indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c547f051",
+   "metadata": {},
+   "source": [
+    "### Merge profiles -- the case for parallelization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a5292962",
+   "metadata": {},
+   "source": [
+    "Two profiles can be added together to create a combined profile."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a565b8d1",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# create two profiles and merge\n",
+    "profile1 = dp.Profiler(df1)\n",
+    "profile2 = dp.Profiler(df2)\n",
+    "profile_merge = profile1 + profile2\n",
+    "\n",
+    "# check results of the merged profile\n",
+    "report_merge  = profile.report(report_options={\"output_format\": \"compact\"})\n",
+    "\n",
+    "# # print the report\n",
+    "# print('\\nREPORT:\\n' + '='*80)\n",
+    "# print(json.dumps(report_merge, indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f008bb48-32c7-4038-9096-978a9f74e545",
+   "metadata": {},
+   "source": [
+    "# Differences in Data\n",
+    "Can be appliied to both structured and unstructured datasets. \n",
+    "\n",
+    "Such reports can provide details on the differences between training and validation data like in this pseudo example:\n",
+    "```python\n",
+    "profiler_training = dp.Profiler(training_data)\n",
+    "profiler_testing = dp.Profiler(testing_data)\n",
+    "\n",
+    "validation_report = profiler_training.diff(profiler_testing)\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0a6ff30-f015-45f0-a5a4-414c837c9029",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from pprint import pprint\n",
+    "\n",
+    "# structured differences example\n",
+    "data_split_differences = profile1.diff(profile2)\n",
+    "pprint(data_split_differences)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "48baef5f-c57d-48cd-bf18-b34f566d35df",
+   "metadata": {},
+   "source": [
+    "## Graphing a Profile\n",
+    "\n",
+    "We've also added the ability to generating visual reports from a profile.\n",
+    "\n",
+    "The following plots are currently available to work directly with your profilers:\n",
+    "\n",
+    "  * missing values matrix\n",
+    "  * histogram (numeric columns only)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28b23fdb-026e-4394-ac37-bbd1e052c68c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "\n",
+    "# get the data\n",
+    "data = dp.Data(os.path.join(data_folder, \"csv/aws_honeypot_marx_geo.csv\"))\n",
+    "\n",
+    "# profile the data\n",
+    "profile = dp.Profiler(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac6fa42a-79eb-47a1-a058-f60b4de4e83d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# generate a missing values matrix\n",
+    "fig = plt.figure(figsize=(8, 6), dpi=100)\n",
+    "fig = dp.graphs.plot_missing_values_matrix(profile, ax=fig.gca(), title=\"Missing Values Matrix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3402553-d2b8-4f73-85f3-45e739fb4144",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# generate histogram of all int/float columns\n",
+    "fig = dp.graphs.plot_histograms(profile)\n",
+    "fig.set_size_inches(8, 6)\n",
+    "fig.set_dpi(100)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2335024f-a233-4e2b-aafd-dbf956562524",
+   "metadata": {},
+   "source": [
+    "## Saving and Loading a Profile"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de3e0e30-33e3-4b18-8240-7e5c6029eb97",
+   "metadata": {},
+   "source": [
+    "Not only can the Profiler create and update profiles, it's also possible to save, load then manipulate profiles."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffdb2126-cc11-49f1-aa5e-a533efc59f25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load data\n",
+    "data = dp.Data(os.path.join(data_folder, \"csv/diamonds.csv\"))\n",
+    "\n",
+    "# Generate a profile\n",
+    "profile = dp.Profiler(data)\n",
+    "\n",
+    "# Save a profile to disk for later (saves as pickle file)\n",
+    "profile.save(filepath=\"my_profile.pkl\")\n",
+    "\n",
+    "# Load a profile from disk\n",
+    "loaded_profile = dp.Profiler.load(\"my_profile.pkl\")\n",
+    "\n",
+    "# Report the compact version of the profile\n",
+    "# report = profile.report(report_options={\"output_format\":\"compact\"})\n",
+    "# print(json.dumps(report, indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4787d4ff-8bd7-4c91-b197-dc875fb1d2d9",
+   "metadata": {},
+   "source": [
+    "# Unstructured Profiling\n",
+    "\n",
+    "Similar to structured datasets, text data can also be profiled with the unstructured profiler. \n",
+    "It currently provides an easy overview of information in the text such as:\n",
+    "  * memory size\n",
+    "  * char stats\n",
+    "  * word stats\n",
+    "  * data labeling entity stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f9dca8e-72fe-4f0f-ae8e-33e8974e4107",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "profiler_string = dp.Profiler(\"This is my random text: 332-23-2123\")\n",
+    "print(json.dumps(profiler_string.report(), indent=4))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc759400-13ff-4451-bbe5-645368d7aa75",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "email_data = [\"Message-ID: <11111111.1111111111111.JavaMail.evans@thyme>\\n\" + \\\n",
+    "              \"Date: Fri, 10 Aug 2005 11:31:37 -0700 (PDT)\\n\" + \\\n",
+    "              \"From: w..smith@company.com\\n\" + \\\n",
+    "              \"To: john.smith@company.com\\n\" + \\\n",
+    "              \"Subject: RE: ABC\\n\" + \\\n",
+    "              \"Mime-Version: 1.0\\n\" + \\\n",
+    "              \"Content-Type: text/plain; charset=us-ascii\\n\" + \\\n",
+    "              \"Content-Transfer-Encoding: 7bit\\n\" + \\\n",
+    "              \"X-From: Smith, Mary W. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=SSMITH>\\n\" + \\\n",
+    "              \"X-To: Smith, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JSMITH>\\n\" + \\\n",
+    "              \"X-cc: \\n\" + \\\n",
+    "              \"X-bcc: \\n\" + \\\n",
+    "              \"X-Folder: \\SSMITH (Non-Privileged)\\Sent Items\\n\" + \\\n",
+    "              \"X-Origin: Smith-S\\n\" + \\\n",
+    "              \"X-FileName: SSMITH (Non-Privileged).pst\\n\\n\" + \\\n",
+    "              \"All I ever saw was the e-mail from the office.\\n\\n\" + \\\n",
+    "              \"Mary\\n\\n\" + \\\n",
+    "              \"-----Original Message-----\\n\" + \\\n",
+    "              \"From:   Smith, John  \\n\" + \\\n",
+    "              \"Sent:   Friday, August 10, 2005 13:07 PM\\n\" + \\\n",
+    "              \"To:     Smith, Mary W.\\n\" + \\\n",
+    "              \"Subject:        ABC\\n\\n\" + \\\n",
+    "              \"Have you heard any more regarding the ABC sale? I guess that means that \" + \\\n",
+    "              \"it's no big deal here, but you think they would have send something.\\n\\n\\n\" + \\\n",
+    "              \"John Smith\\n\" + \\\n",
+    "              \"123-456-7890\\n\"]\n",
+    "\n",
+    "profiler_email = dp.Profiler(email_data, profiler_type='unstructured')\n",
+    "print(json.dumps(profiler_email.report(), indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ffa6d25-4d77-412a-b020-e34363d49786",
+   "metadata": {},
+   "source": [
+    "## Merging Unstructured Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2a2180e-c4d0-4f06-bc0f-a9b6bcbb0d47",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "merged_profile = profiler_string + profiler_email\n",
+    "print(json.dumps(merged_profile.report(), indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f123f695-6bb5-467f-b90b-ea27fb9c6595",
+   "metadata": {},
+   "source": [
+    "## Differences in Unstructured Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0815362-5308-43fc-81de-4afea20118c2",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# unstructured differences example\n",
+    "validation_report = profiler_email.diff(profiler_string)\n",
+    "print(json.dumps(validation_report, indent=4))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98435618-b157-40b3-92e7-3c8f42ea910f",
+   "metadata": {},
+   "source": [
+    "## Data Labeling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b4cc0029-781d-4717-ad94-a26df0e9857f",
+   "metadata": {},
+   "source": [
+    "The Labeler is a pipeline designed to make building, training, and predictions with ML models quick and easy. There are 3 major components to the Labeler: the preprocessor, the model, and the postprocessor."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f013f36f-2b83-473d-88f9-090cde664b8a",
+   "metadata": {},
+   "source": [
+    "![alt text](DL-Flowchart.png \"Title\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2689e4f-02c3-480d-a420-09bba1dccbe9",
+   "metadata": {},
+   "source": [
+    "Default labels:\n",
+    "* UNKNOWN\n",
+    "* ADDRESS\n",
+    "* BAN (bank account number, 10-18 digits)\n",
+    "* CREDIT_CARD\n",
+    "* EMAIL_ADDRESS\n",
+    "* UUID\n",
+    "* HASH_OR_KEY (md5, sha1, sha256, random hash, etc.)\n",
+    "* IPV4\n",
+    "* IPV6\n",
+    "* MAC_ADDRESS\n",
+    "* PERSON\n",
+    "* PHONE_NUMBER\n",
+    "* SSN\n",
+    "* URL\n",
+    "* US_STATE\n",
+    "* DRIVERS_LICENSE\n",
+    "* DATE\n",
+    "* TIME\n",
+    "* DATETIME\n",
+    "* INTEGER\n",
+    "* FLOAT\n",
+    "* QUANTITY\n",
+    "* ORDINAL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a838d98-9c39-4539-8cff-ca862e6512bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helper functions for printing results\n",
+    "\n",
+    "def get_structured_results(results):\n",
+    "    \"\"\"Helper function to get data labels for each column.\"\"\"\n",
+    "    columns = []\n",
+    "    predictions = []\n",
+    "    samples = []\n",
+    "    for col in results['data_stats']:\n",
+    "        columns.append(col['column_name'])\n",
+    "        predictions.append(col['data_label'])\n",
+    "        samples.append(col['samples'])\n",
+    "\n",
+    "    df_results = pd.DataFrame({'Column': columns, 'Prediction': predictions, 'Sample': samples})\n",
+    "    return df_results\n",
+    "\n",
+    "def get_unstructured_results(data, results):\n",
+    "    \"\"\"Helper function to get data labels for each labeled piece of text.\"\"\"\n",
+    "    labeled_data = []\n",
+    "    for pred in results['pred'][0]:\n",
+    "        labeled_data.append([data[0][pred[0]:pred[1]], pred[2]])\n",
+    "    label_df = pd.DataFrame(labeled_data, columns=['Text', 'Labels'])\n",
+    "    return label_df\n",
+    "    \n",
+    "\n",
+    "pd.set_option('display.width', 100)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fb094133-0937-4218-b922-a515787d70a7",
+   "metadata": {},
+   "source": [
+    "### Structured Labeling\n",
+    "\n",
+    "Each column within your profile is given a suggested data label."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62fba6e3-3b2c-439b-bb56-59e977986286",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# profile data and get labels for each column\n",
+    "data = dp.Data(os.path.join(data_folder, \"csv/SchoolDataSmall.csv\"))\n",
+    "profiler = dp.Profiler(data)\n",
+    "report = profiler.report()\n",
+    "\n",
+    "\n",
+    "print('\\Label Predictions:\\n' + '=' * 85)\n",
+    "print(get_structured_results(report))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bb397e1-aa6d-48a5-8827-f802f04a0afc",
+   "metadata": {},
+   "source": [
+    "### Unstructured Labeling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f81ae0be-556e-4804-a49e-b358a3b341c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load data\n",
+    "email_data = [\"Message-ID: <11111111.1111111111111.JavaMail.evans@thyme>\\n\" + \\\n",
+    "              \"Date: Fri, 10 Aug 2005 11:31:37 -0700 (PDT)\\n\" + \\\n",
+    "              \"From: w..smith@company.com\\n\" + \\\n",
+    "              \"To: john.smith@company.com\\n\" + \\\n",
+    "              \"Subject: RE: ABC\\n\" + \\\n",
+    "              \"Mime-Version: 1.0\\n\" + \\\n",
+    "              \"Content-Type: text/plain; charset=us-ascii\\n\" + \\\n",
+    "              \"Content-Transfer-Encoding: 7bit\\n\" + \\\n",
+    "              \"X-From: Smith, Mary W. </O=ENRON/OU=NA/CN=RECIPIENTS/CN=SSMITH>\\n\" + \\\n",
+    "              \"X-To: Smith, John </O=ENRON/OU=NA/CN=RECIPIENTS/CN=JSMITH>\\n\" + \\\n",
+    "              \"X-cc: \\n\" + \\\n",
+    "              \"X-bcc: \\n\" + \\\n",
+    "              \"X-Folder: \\SSMITH (Non-Privileged)\\Sent Items\\n\" + \\\n",
+    "              \"X-Origin: Smith-S\\n\" + \\\n",
+    "              \"X-FileName: SSMITH (Non-Privileged).pst\\n\\n\" + \\\n",
+    "              \"All I ever saw was the e-mail from the office.\\n\\n\" + \\\n",
+    "              \"Mary\\n\\n\" + \\\n",
+    "              \"-----Original Message-----\\n\" + \\\n",
+    "              \"From:   Smith, John  \\n\" + \\\n",
+    "              \"Sent:   Friday, August 10, 2005 13:07 PM\\n\" + \\\n",
+    "              \"To:     Smith, Mary W.\\n\" + \\\n",
+    "              \"Subject:        ABC\\n\\n\" + \\\n",
+    "              \"Have you heard any more regarding the ABC sale? I guess that means that \" + \\\n",
+    "              \"it's no big deal here, but you think they would have send something.\\n\\n\\n\" + \\\n",
+    "              \"John Smith\\n\" + \\\n",
+    "              \"123-456-7890\\n\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3713fbc5-872d-485c-bcb9-28ec552f531f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labeler = dp.DataLabeler(labeler_type='unstructured')\n",
+    "\n",
+    "# convert prediction to word format and ner format\n",
+    "# Set the output to the NER format (start position, end position, label)\n",
+    "labeler.set_params(\n",
+    "    { 'postprocessor': { 'output_format': 'ner', 'use_word_level_argmax': True } } \n",
+    ")\n",
+    "\n",
+    "# make predictions and get labels per character\n",
+    "predictions = labeler.predict(email_data)\n",
+    "\n",
+    "# display results\n",
+    "print('=========================Prediction========================')\n",
+    "print(get_unstructured_results(email_data, predictions))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6643e98b-b056-4b9a-b415-0df3bd49aa44",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}