diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4745b5a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:latest + +WORKDIR /app + +RUN pip install pandas + +COPY pipeline.py pipeline_c.py +RUN python pipeline_c.py https://www.stats.govt.nz/assets/Uploads/Annual-enterprise-survey/Annual-enterprise-survey-2021-financial-year-provisional/Download-data/annual-enterprise-survey-2021-financial-year-provisional-csv.csv target.csv + +ENTRYPOINT [ "bash" ] \ No newline at end of file diff --git a/Untitled-1.ipynb b/Untitled-1.ipynb deleted file mode 100644 index 746d752..0000000 --- a/Untitled-1.ipynb +++ /dev/null @@ -1,43 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hello world\n" - ] - } - ], - "source": [ - "print(\"Hello world\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/pipeline.py b/pipeline.py new file mode 100644 index 0000000..280d518 --- /dev/null +++ b/pipeline.py @@ -0,0 +1,24 @@ +import pandas as pd +import argparse + +def main(source, target): + print("Starting") + + # Extract data + data = pd.read_csv(source) + + # Transform data + data.drop(["Industry_code_ANZSIC06"], axis=1, inplace=True) + + # Load data + data.to_csv(target, index=False) + + print("Complete") + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('source', help='source.csv') + parser.add_argument('target', help='target.csv') + args = parser.parse_args() + + main(args.source, args.target)