Skip to content

Commit

Permalink
Merge pull request #56 from mitdbg/new-features
Browse files Browse the repository at this point in the history
New features
  • Loading branch information
chjuncn authored Jan 8, 2025
2 parents ab32c70 + e8fbdad commit 54b109a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 2 deletions.
3 changes: 2 additions & 1 deletion demos/askem-var.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ class Variable(Schema):
if __name__ == "__main__":
run_pz = True
dataset = "askem"
file_path = "testdata/askem-tiny"

if run_pz:
# reference, plan, stats = run_workload()
excerpts = Dataset(dataset, schema=TextFile)
excerpts = Dataset(file_path, schema=TextFile)
output = excerpts.convert(
Variable, desc="A variable used or introduced in the paper snippet", cardinality=pz.Cardinality.ONE_TO_MANY
)
Expand Down
13 changes: 13 additions & 0 deletions src/palimpzest/datamanager/datamanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,19 @@ def register_user_source(self, src: UserSource, dataset_id: str):
# user sources are always ephemeral
self._registry[dataset_id] = ("user", src)

def get_or_register_source(self, dataset_id_or_path):
"""Return a dataset from the registry."""
if dataset_id_or_path in self._registry:
return self.get_registered_dataset(dataset_id_or_path)
else:
if os.path.isfile(dataset_id_or_path):
self.register_local_file(dataset_id_or_path, dataset_id_or_path)
elif os.path.isdir(dataset_id_or_path):
self.register_local_directory(dataset_id_or_path, dataset_id_or_path)
else:
raise Exception(f"Path {dataset_id_or_path} is invalid. Does not point to a file or directory.")
return self.get_registered_dataset(dataset_id_or_path)

def get_registered_dataset(self, dataset_id):
"""Return a dataset from the registry."""
if dataset_id not in self._registry:
Expand Down
8 changes: 7 additions & 1 deletion src/palimpzest/sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,13 @@ class Dataset(Set):

def __init__(self, source: str | DataSource, *args, **kwargs):
# convert source (str) -> source (DataSource) if need be
source = DataDirectory().get_registered_dataset(source) if isinstance(source, str) else source
if isinstance(source, str):
try:
source = DataDirectory().get_or_register_source(source)
except Exception as e:
raise Exception(f"Invalid source path: {source}") from e
elif not isinstance(source, (DataSource, Set)):
raise Exception(f"Invalid source type: {type(source)}")

# intialize class
super().__init__(source, *args, **kwargs)
Expand Down

0 comments on commit 54b109a

Please sign in to comment.