From 5a3924007fd613853d7dceabec66fe2d2a3d9b8e Mon Sep 17 00:00:00 2001 From: sasha-tsepilova Date: Sun, 8 Oct 2023 16:00:11 +0300 Subject: [PATCH] removed synthesizers walkthrough --- SynRD/synthesizers/sample_walkthrough.ipynb | 1148 ------------------- 1 file changed, 1148 deletions(-) delete mode 100644 SynRD/synthesizers/sample_walkthrough.ipynb diff --git a/SynRD/synthesizers/sample_walkthrough.ipynb b/SynRD/synthesizers/sample_walkthrough.ipynb deleted file mode 100644 index e606008..0000000 --- a/SynRD/synthesizers/sample_walkthrough.ipynb +++ /dev/null @@ -1,1148 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Clone this!\n", - "https://github.com/terranceliu/dp-query-release\n", - "and move /src to the /synthesizers folder" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/sasha-tsepilova/miniconda3/envs/synrd/lib/python3.7/site-packages/mbi/__init__.py:15: UserWarning: MixtureInference disabled, please install jax and jaxlib\n", - " warnings.warn('MixtureInference disabled, please install jax and jaxlib')\n", - "/home/sasha-tsepilova/miniconda3/envs/synrd/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "from SynRD.papers import Iverson22Football, Pierce2019Who\n", - "from SynRD.benchmark import Benchmark\n", - "from SynRD.synthesizers import MSTSynthesizer\n", - "from SynRD.utils import save_synthesizer, load_synthesizer, do_binning, unbin_df" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "benchmark = Benchmark()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "papers = [Iverson22Football]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 155k/155k [00:00<00:00, 596kiB/s] \n" - ] - } - ], - "source": [ - "papers = benchmark.initialize_papers(papers)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "transforms = {}\n", - "for paper in papers:\n", - " df, transform = do_binning(paper.real_dataframe)\n", - " transforms[paper.__class__.__name__.lower()] = transform" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BIO_SEXS44A21H1GI9H5OD11S1IYEAR5IMONTH5H1GI1YH1GI1MH5ID6G...S44A25S44A26S44A27S44A28S44A29H1HS3H1SU1H5ID6IH5ID13H5SS0B
010.018.013.002460.0...0.00.00.00.01.0000.00.01.0
110.023.014.014390.0...0.00.00.00.00.0000.00.01.0
210.026.015.017210.0...0.00.00.01.00.0000.00.01.0
311.024.013.009420.0...0.00.00.00.00.0000.00.01.0
410.012.014.0164100.0...0.00.00.00.00.0000.01.01.0
..................................................................
17571NaN16.0NaN14531.0...NaNNaNNaNNaNNaN111.00.04.0
175810.026.013.007431.0...0.00.00.00.00.0000.01.02.0
175910.0110.013.008420.0...0.00.00.00.00.0000.00.01.0
176011.012.014.011330.0...0.00.00.00.00.0000.00.01.0
176110.0210.013.008460.0...0.00.00.00.00.0000.00.02.0
\n", - "

1762 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " BIO_SEX S44A21 H1GI9 H5OD11 S1 IYEAR5 IMONTH5 H1GI1Y H1GI1M \\\n", - "0 1 0.0 1 8.0 13.0 0 2 4 6 \n", - "1 1 0.0 2 3.0 14.0 1 4 3 9 \n", - "2 1 0.0 2 6.0 15.0 1 7 2 1 \n", - "3 1 1.0 2 4.0 13.0 0 9 4 2 \n", - "4 1 0.0 1 2.0 14.0 1 6 4 10 \n", - "... ... ... ... ... ... ... ... ... ... \n", - "1757 1 NaN 1 6.0 NaN 1 4 5 3 \n", - "1758 1 0.0 2 6.0 13.0 0 7 4 3 \n", - "1759 1 0.0 1 10.0 13.0 0 8 4 2 \n", - "1760 1 1.0 1 2.0 14.0 1 1 3 3 \n", - "1761 1 0.0 2 10.0 13.0 0 8 4 6 \n", - "\n", - " H5ID6G ... S44A25 S44A26 S44A27 S44A28 S44A29 H1HS3 H1SU1 \\\n", - "0 0.0 ... 0.0 0.0 0.0 0.0 1.0 0 0 \n", - "1 0.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "2 0.0 ... 0.0 0.0 0.0 1.0 0.0 0 0 \n", - "3 0.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "4 0.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "... ... ... ... ... ... ... ... ... ... \n", - "1757 1.0 ... NaN NaN NaN NaN NaN 1 1 \n", - "1758 1.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "1759 0.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "1760 0.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "1761 0.0 ... 0.0 0.0 0.0 0.0 0.0 0 0 \n", - "\n", - " H5ID6I H5ID13 H5SS0B \n", - "0 0.0 0.0 1.0 \n", - "1 0.0 0.0 1.0 \n", - "2 0.0 0.0 1.0 \n", - "3 0.0 0.0 1.0 \n", - "4 0.0 1.0 1.0 \n", - "... ... ... ... \n", - "1757 1.0 0.0 4.0 \n", - "1758 0.0 1.0 2.0 \n", - "1759 0.0 0.0 1.0 \n", - "1760 0.0 0.0 1.0 \n", - "1761 0.0 0.0 2.0 \n", - "\n", - "[1762 rows x 27 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "mst = MSTSynthesizer(epsilon=1.0)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "mst.fit(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "B = 5\n", - "synth_df = mst.sample(B*len(paper.real_dataframe))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BIO_SEXS44A21H1GI9H5OD11S1IYEAR5IMONTH5H1GI1YH1GI1MH5ID6G...S44A25S44A26S44A27S44A28S44A29H1HS3H1SU1H5ID6IH5ID13H5SS0B
count1762.01321.0000001762.0000001760.0000001319.0000001762.0000001762.0000001762.0000001762.0000001762.000000...1321.0000001321.0000001321.0000001321.0000001321.0000001762.0000001762.0000001759.0000001753.0000001749.000000
mean1.00.2793341.5039737.65625014.9461710.2332586.9716231.9971626.5238370.174234...0.0560180.1264190.0257380.0809990.1188490.1066970.1645860.1563390.1203651.348199
std0.00.4488421.0415753.4758181.7488730.4230252.7957721.6532083.4027000.379418...0.2300440.3324470.1584130.2729370.3237340.3797400.6849850.3632800.3254810.644883
min1.00.0000001.0000002.00000011.0000000.0000001.0000000.0000001.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
25%1.00.0000001.0000004.00000014.0000000.0000005.0000000.0000004.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
50%1.00.0000001.0000008.00000015.0000000.0000008.0000002.0000007.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
75%1.01.0000002.00000010.00000016.0000000.0000009.0000003.0000009.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000002.000000
max1.01.0000008.00000016.00000019.0000001.00000012.0000005.00000012.0000001.000000...1.0000001.0000001.0000001.0000001.0000008.0000008.0000001.0000001.0000004.000000
\n", - "

8 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " BIO_SEX S44A21 H1GI9 H5OD11 S1 \\\n", - "count 1762.0 1321.000000 1762.000000 1760.000000 1319.000000 \n", - "mean 1.0 0.279334 1.503973 7.656250 14.946171 \n", - "std 0.0 0.448842 1.041575 3.475818 1.748873 \n", - "min 1.0 0.000000 1.000000 2.000000 11.000000 \n", - "25% 1.0 0.000000 1.000000 4.000000 14.000000 \n", - "50% 1.0 0.000000 1.000000 8.000000 15.000000 \n", - "75% 1.0 1.000000 2.000000 10.000000 16.000000 \n", - "max 1.0 1.000000 8.000000 16.000000 19.000000 \n", - "\n", - " IYEAR5 IMONTH5 H1GI1Y H1GI1M H5ID6G ... \\\n", - "count 1762.000000 1762.000000 1762.000000 1762.000000 1762.000000 ... \n", - "mean 0.233258 6.971623 1.997162 6.523837 0.174234 ... \n", - "std 0.423025 2.795772 1.653208 3.402700 0.379418 ... \n", - "min 0.000000 1.000000 0.000000 1.000000 0.000000 ... \n", - "25% 0.000000 5.000000 0.000000 4.000000 0.000000 ... \n", - "50% 0.000000 8.000000 2.000000 7.000000 0.000000 ... \n", - "75% 0.000000 9.000000 3.000000 9.000000 0.000000 ... \n", - "max 1.000000 12.000000 5.000000 12.000000 1.000000 ... \n", - "\n", - " S44A25 S44A26 S44A27 S44A28 S44A29 \\\n", - "count 1321.000000 1321.000000 1321.000000 1321.000000 1321.000000 \n", - "mean 0.056018 0.126419 0.025738 0.080999 0.118849 \n", - "std 0.230044 0.332447 0.158413 0.272937 0.323734 \n", - "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "75% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "max 1.000000 1.000000 1.000000 1.000000 1.000000 \n", - "\n", - " H1HS3 H1SU1 H5ID6I H5ID13 H5SS0B \n", - "count 1762.000000 1762.000000 1759.000000 1753.000000 1749.000000 \n", - "mean 0.106697 0.164586 0.156339 0.120365 1.348199 \n", - "std 0.379740 0.684985 0.363280 0.325481 0.644883 \n", - "min 0.000000 0.000000 0.000000 0.000000 1.000000 \n", - "25% 0.000000 0.000000 0.000000 0.000000 1.000000 \n", - "50% 0.000000 0.000000 0.000000 0.000000 1.000000 \n", - "75% 0.000000 0.000000 0.000000 0.000000 2.000000 \n", - "max 8.000000 8.000000 1.000000 1.000000 4.000000 \n", - "\n", - "[8 rows x 27 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BIO_SEXS44A21H1GI9H5OD11S1IYEAR5IMONTH5H1GI1YH1GI1MH5ID6G...S44A25S44A26S44A27S44A28S44A29H1HS3H1SU1H5ID6IH5ID13H5SS0B
count8712.06440.0000008613.0000008560.0000006421.0000008810.0000008393.0000008329.0000008226.0000008667.000000...6551.0000006742.0000006656.0000006702.0000006518.0000008563.0000008765.0000008799.0000008390.0000008561.000000
mean1.00.2729811.5704177.96764015.1653950.2371177.1379722.0619526.3316310.183339...0.0682340.1125780.0465750.1274250.1227370.4116550.1637190.1905900.0510131.365611
std0.00.4455261.3107753.7376281.8838690.4253392.6956391.6280603.3687520.386967...0.2521660.3161000.2107420.3334730.3281601.6095050.7249590.3927890.2200380.687134
min1.00.0000001.0000002.00000011.0000000.0000001.0000000.0000001.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
25%1.00.0000001.0000004.00000014.0000000.0000006.0000001.0000003.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
50%1.00.0000001.0000009.00000015.0000000.0000008.0000002.0000006.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000001.000000
75%1.01.0000002.00000010.00000017.0000000.0000009.0000003.0000009.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000002.000000
max1.01.0000008.00000016.00000019.0000001.00000012.0000005.00000012.0000001.000000...1.0000001.0000001.0000001.0000001.0000008.0000008.0000001.0000001.0000004.000000
\n", - "

8 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " BIO_SEX S44A21 H1GI9 H5OD11 S1 \\\n", - "count 8712.0 6440.000000 8613.000000 8560.000000 6421.000000 \n", - "mean 1.0 0.272981 1.570417 7.967640 15.165395 \n", - "std 0.0 0.445526 1.310775 3.737628 1.883869 \n", - "min 1.0 0.000000 1.000000 2.000000 11.000000 \n", - "25% 1.0 0.000000 1.000000 4.000000 14.000000 \n", - "50% 1.0 0.000000 1.000000 9.000000 15.000000 \n", - "75% 1.0 1.000000 2.000000 10.000000 17.000000 \n", - "max 1.0 1.000000 8.000000 16.000000 19.000000 \n", - "\n", - " IYEAR5 IMONTH5 H1GI1Y H1GI1M H5ID6G ... \\\n", - "count 8810.000000 8393.000000 8329.000000 8226.000000 8667.000000 ... \n", - "mean 0.237117 7.137972 2.061952 6.331631 0.183339 ... \n", - "std 0.425339 2.695639 1.628060 3.368752 0.386967 ... \n", - "min 0.000000 1.000000 0.000000 1.000000 0.000000 ... \n", - "25% 0.000000 6.000000 1.000000 3.000000 0.000000 ... \n", - "50% 0.000000 8.000000 2.000000 6.000000 0.000000 ... \n", - "75% 0.000000 9.000000 3.000000 9.000000 0.000000 ... \n", - "max 1.000000 12.000000 5.000000 12.000000 1.000000 ... \n", - "\n", - " S44A25 S44A26 S44A27 S44A28 S44A29 \\\n", - "count 6551.000000 6742.000000 6656.000000 6702.000000 6518.000000 \n", - "mean 0.068234 0.112578 0.046575 0.127425 0.122737 \n", - "std 0.252166 0.316100 0.210742 0.333473 0.328160 \n", - "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "75% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "max 1.000000 1.000000 1.000000 1.000000 1.000000 \n", - "\n", - " H1HS3 H1SU1 H5ID6I H5ID13 H5SS0B \n", - "count 8563.000000 8765.000000 8799.000000 8390.000000 8561.000000 \n", - "mean 0.411655 0.163719 0.190590 0.051013 1.365611 \n", - "std 1.609505 0.724959 0.392789 0.220038 0.687134 \n", - "min 0.000000 0.000000 0.000000 0.000000 1.000000 \n", - "25% 0.000000 0.000000 0.000000 0.000000 1.000000 \n", - "50% 0.000000 0.000000 0.000000 0.000000 1.000000 \n", - "75% 0.000000 0.000000 0.000000 0.000000 2.000000 \n", - "max 8.000000 8.000000 1.000000 1.000000 4.000000 \n", - "\n", - "[8 rows x 27 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "synth_df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "paper.set_synthetic_dataframe(synth_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([1. , 1. , 1. , 1. , 1. , 0.8, 1. , 1. , 0.6, 0.2, 0.6, 1. , 1. ,\n", - " 1. , 0.6, 0.2, 0.6]),\n", - " array([0. , 0. , 0. , 0. , 0. ,\n", - " 0.4 , 0. , 0. , 0.48989795, 0.4 ,\n", - " 0.48989795, 0. , 0. , 0. , 0.48989795,\n", - " 0.4 , 0.48989795]),\n", - " array([[1. , 1. , 1. , 1. , 1. , 0.1, 1. , 1. , 0. , 0. , 0. , 1. , 1. ,\n", - " 1. , 0. , 0. , 0. ],\n", - " [1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 0.9, 1. , 1. , 1. ,\n", - " 1. , 1. , 0.9, 1. ]])]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "benchmark.eval_soft_findings_each_finding(paper, 5)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.16" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "f46ed56ea36e21b73dee67d378d2fbcf3cb06eef8370e7f45e3711fdc2359e2b" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}