Skip to content

Commit

Permalink
add more logic to handle uploading prelabeled data
Browse files Browse the repository at this point in the history
  • Loading branch information
AstridKery committed Oct 1, 2024
1 parent fab4c4f commit ecb104e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 7 deletions.
22 changes: 20 additions & 2 deletions backend/django/core/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ def create_labels_from_csv(df, project):
stream = StringIO()

labels = {label.name: label.pk for label in project.labels.all()}

df["Label"] = df["Label"].apply(
lambda s: s.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
)

existing_labels = set(labels.keys())
df_labels = set(df["Label"].tolist())

quote_labels = df_labels - existing_labels
df["Label"] = df["Label"].apply(lambda s: f'"{s}"' if s in quote_labels else s)
df["data_id"] = df["hash"].apply(
lambda x: Data.objects.get(hash=x, project=project).pk
)
Expand Down Expand Up @@ -841,10 +851,18 @@ def create_label_metadata(project, label_data):
df_label_ids = set(label_data["Label"].tolist())

need_quotes = df_label_ids - existing_label_ids
label_data["Label"] = label_data["Label"].apply(lambda s: f'"{s}"'.replace('\\n', '\n').replace('\\t', '\t').replace('\\r', '\r') if s in need_quotes else s)
label_data["Label"] = label_data["Label"].apply(
lambda s: (
f'"{s}"'.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
if s in need_quotes
else s
)
)
df_label_ids = set(label_data["Label"].tolist())
if len(df_label_ids - existing_label_ids) > 0:
raise ValidationError("ERROR loading in label metadata. Something is going wrong with the label file.")
raise ValidationError(
"ERROR loading in label metadata. Something is going wrong with the label file."
)

label_data = label_data.merge(label_objects, on="Label", how="inner")

Expand Down
13 changes: 10 additions & 3 deletions backend/django/core/utils/utils_form.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,16 @@ def clean_data_helper(
and len(set(labels_in_data) - set(supplied_labels)) > 0
):
just_in_data = set(labels_in_data) - set(supplied_labels)
raise ValidationError(
f"There are extra labels in the file which were not created in step 2: {just_in_data}"
)
# add a correction for label descriptions with weird characters
labels_in_data_fixed = [
f'"{s}"'.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
for s in just_in_data
] + list(set(labels_in_data) - just_in_data)

if len(set(labels_in_data_fixed) - set(supplied_labels)) > 0:
raise ValidationError(
f"There are extra labels in the file which were not in step 2 of project creation: {just_in_data}"
)

if "ID" in data.columns:
# there should be no null values
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/actions/skew.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ export const skewLabel = (dataID, labelID, projectID) => {
dispatch(setMessage(response.error));
} else {
dispatch(getUnlabeled(projectID));
dispatch(getLabelCounts(projectID));
//dispatch(getLabelCounts(projectID));
}
});
};
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/components/Skew/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Skew extends React.Component {
componentDidMount() {
this.props.setFilterStr("");
this.props.getUnlabeled();
this.props.getLabelCounts();
//this.props.getLabelCounts();
}

getText(row) {
Expand Down

0 comments on commit ecb104e

Please sign in to comment.