nextstrain · joverlee521 · Jul 8, 2025 · Jul 9, 2025 · jameshadfield · Jul 9, 2025
diff --git a/scripts/assign-colors b/scripts/assign-colors
@@ -14,7 +14,7 @@ if __name__ == '__main__':
 
     parser.add_argument('--ordering', type=str, required=True,
         help="""Input TSV file defining the color ordering where the first
-        column is the field and the second column is the trait in that field.
+        column is the category and the second column is the trait in that category.
         Blank lines are ignored. Lines starting with '#' will be ignored as comments.""")
     parser.add_argument('--color-schemes', type=str, required=True,
         help="Input color schemes where each line is a different color scheme separated by tabs.")
@@ -23,6 +23,16 @@ if __name__ == '__main__':
         metadata. If the metadata includes a 'focal' column that only contains
         boolean values, then restrict colors to traits for rows where 'focal'
         is set to True.""")
+    parser.add_argument('--ignore-categories', type=str, default=[], nargs='*',
+        help="""Do not create colors for these categories even if they are
+        included in the metadata and ordering TSV. This is useful for ignoring
+        categories in specific builds even if they share the same default
+        ordering TSV.""")
+    parser.add_argument('--force-include-categories', type=str, default=[],nargs='*',
+        help="""Force include all color orderings for these categories even if
+        there are traits not included in the metadata TSV. This is useful for
+        creating colorings for traits not (yet) present in metadata to solve
+        bootstrapping issue.""")
     parser.add_argument('--output', type=str, required=True,
         help="Output colors TSV file to be passed to augur export.")
     args = parser.parse_args()
@@ -42,6 +52,8 @@ if __name__ == '__main__':
             else:
                 name = array[0]
                 trait = array[1]
+                if name in args.ignore_categories:
+                    continue
                 if name not in assignment:
                     assignment[name] = [trait]
                 else:
@@ -53,7 +65,7 @@ if __name__ == '__main__':
     if args.metadata:
         metadata = pd.read_csv(args.metadata, delimiter='\t')
         for name, trait in assignment.items():
-            if name in metadata:
+            if name in metadata and name not in args.force_include_categories:
                 if 'focal' in metadata and metadata['focal'].dtype == 'bool':
                     focal_list = metadata.loc[metadata['focal'], name].unique()
                     subset_focal = [x for x in assignment[name] if x in focal_list]