diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..fc2895f
Binary files /dev/null and b/.DS_Store differ
diff --git a/Dockerfile b/Dockerfile
index e71ecc4..3b43b8b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -16,6 +16,7 @@ ADD scripts/enable_api.sh /opt/scripts/enable_api.sh
 ADD scripts/estimate_billing.py /opt/scripts/estimate_billing.py
 ADD scripts/persist_artifacts.py /opt/scripts/persist_artifacts.py
 ADD scripts/costs_json_to_csv.py /opt/scripts/costs_json_to_csv.py
+ADD scripts/cost_script.py /opt/scripts/cost_script.py
 
 # GMS setup/run
 ADD gms/resources.sh /opt/gms/resources.sh
diff --git a/scripts/README.md b/scripts/README.md
index 88b793f..746746d 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -110,6 +110,14 @@ This functionality is also wrapped into estimate\_billing.py under the
 I'd still run these separately just to have both, but if you're only
 after the CSV this may be more convenient.
 
+# cost\_script.py
+
+Takes the output of costs_json_to_csv.py and collapses tasks that have been split into shards, giving one cost for the entire task.
+It outputs a csv labeled costs_report_final.csv.
+
+Use as follows-
+
+    python3 /opt/scripts/cost_script.py costs.tsv
 
 # Troubleshooting scripts
 
diff --git a/scripts/cost_script.py b/scripts/cost_script.py
new file mode 100644
index 0000000..ea7087b
--- /dev/null
+++ b/scripts/cost_script.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+"""
+Converts costs TSV file to summary costs TSV file
+
+Usage: cost_script.py [costs tsv file]
+"""
+#Import modules
+import sys
+import pandas as pd
+import regex as re
+
+file=sys.argv[1]
+
+#initialize list called table where we'll store all the values from tsv
+table = []
+with open(file) as f:
+    for line in f:
+        L = line.split('\t') #split by tab
+        table.append(L)
+
+#delete anything that resembles 'shard' followed by a number.
+for i in table:
+    if "shard" in i[0]:
+        if "retry" in i[0]:
+#            print("retry",i[0])
+            i[0] = re.sub('_shard-\d+','',i[0])
+#            print(i[0])
+        else:
+#            print("no retry",i[0])
+            i[0] = re.sub('_shard-\d+','',i[0])
+#            print(i[0])
+
+
+#convert list of lists to pandas dataframe using first list item as header. Grab specific columns we want. Drop the first row because it's just the list of column names
+table_df = pd.DataFrame(table, columns=table[0])
+table_df = table_df[["callName","totalCost","cpuCost","memoryCost","diskCost"]]
+table_df=table_df.drop([0])
+
+#convert all numerical values from strings to floats
+table_df = table_df.astype({'totalCost':'float','cpuCost':'float','memoryCost':'float','diskCost':'float'})
+
+#sum all rows with same callname
+table_df_sum = table_df.groupby("callName").sum()
+
+#sort by descending order of total cost
+table_df_sum=table_df_sum.sort_values(by=['totalCost'], ascending=False)
+
+#save to csv
+table_df_sum.to_csv('costs_report_final.csv', index=True)
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 76edcda..69eaf48 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,3 +1,6 @@
+numpy
+pandas
+regex
 cwl_utils
 miniwdl == 1.2.1