3939import posixpath
4040import tempfile
4141import time
42- from typing import Any , Dict , Iterable , Optional , Tuple , Union
42+ from typing import Any , Dict , Iterable , List , Optional , Tuple , Union
4343
4444from absl import logging
4545from etils import epath
@@ -106,23 +106,27 @@ class DatasetIdentity:
106106 module_name : str
107107 config_name : Optional [str ] = None
108108 config_description : Optional [str ] = None
109+ config_tags : Optional [List [str ]] = None
109110 release_notes : Optional [Dict [str , str ]] = None
110111
111112 @classmethod
112113 def from_builder (cls , builder ) -> "DatasetIdentity" :
113114 if builder .builder_config :
114115 config_name = builder .builder_config .name
115116 config_description = builder .builder_config .description
117+ config_tags = builder .builder_config .tags
116118 else :
117119 config_name = None
118120 config_description = None
121+ config_tags = None
119122 return cls (
120123 name = builder .name ,
121124 version = utils .Version (builder .version ),
122125 data_dir = builder .data_dir ,
123126 module_name = str (builder .__module__ ),
124127 config_name = config_name ,
125128 config_description = config_description ,
129+ config_tags = config_tags ,
126130 release_notes = builder .release_notes ,
127131 )
128132
@@ -139,6 +143,7 @@ def from_proto(
139143 module_name = info_proto .module_name ,
140144 config_name = info_proto .config_name ,
141145 config_description = info_proto .config_description ,
146+ config_tags = info_proto .config_tags or [],
142147 release_notes = {k : v for k , v in info_proto .release_notes .items ()},
143148 )
144149
@@ -228,6 +233,7 @@ def __init__(
228233 disable_shuffling = disable_shuffling ,
229234 config_name = self ._identity .config_name ,
230235 config_description = self ._identity .config_description ,
236+ config_tags = self ._identity .config_tags ,
231237 citation = utils .dedent (citation ),
232238 module_name = self ._identity .module_name ,
233239 redistribution_info = dataset_info_pb2 .RedistributionInfo (
@@ -320,6 +326,10 @@ def config_name(self) -> str:
320326 def config_description (self ) -> str :
321327 return self ._identity .config_description
322328
329+ @property
330+ def config_tags (self ) -> List [str ]:
331+ return self ._identity .config_tags
332+
323333 @property
324334 def full_name (self ):
325335 """Full canonical name: (<dataset_name>/<config_name>/<version>)."""
@@ -650,6 +660,8 @@ def read_from_directory(self, dataset_info_dir: epath.PathLike) -> None:
650660 # Otherwise, we restore the dataset_info.json value
651661 if field .type == field .TYPE_MESSAGE :
652662 field_value .MergeFrom (field_value_restored )
663+ elif field .label == field .LABEL_REPEATED :
664+ field_value .extend (field_value_restored )
653665 else :
654666 setattr (self ._info_proto , field_name , field_value_restored )
655667
@@ -754,6 +766,11 @@ def __repr__(self):
754766 else :
755767 config_description = SKIP
756768
769+ if self ._info_proto .config_tags :
770+ config_tags = ", " .join (self .config_tags )
771+ else :
772+ config_tags = SKIP
773+
757774 file_format_str = (
758775 self .file_format .value
759776 if self .file_format
@@ -765,6 +782,7 @@ def __repr__(self):
765782 ("full_name" , repr (self .full_name )),
766783 ("description" , _indent (f'"""\n { self .description } \n """' )),
767784 ("config_description" , config_description ),
785+ ("config_tags" , config_tags ),
768786 ("homepage" , repr (self .homepage )),
769787 ("data_path" , repr (self .data_dir )),
770788 ("file_format" , file_format_str ),
0 commit comments