diff --git a/Orange/widgets/data/owfile.py b/Orange/widgets/data/owfile.py index e23a249b3e..f6b18c8b08 100644 --- a/Orange/widgets/data/owfile.py +++ b/Orange/widgets/data/owfile.py @@ -9,7 +9,6 @@ QStyle, QComboBox, QMessageBox, QGridLayout, QLabel, \ QLineEdit, QSizePolicy as Policy, QCompleter from AnyQt.QtCore import Qt, QTimer, QSize, QUrl -from AnyQt.QtGui import QBrush from orangewidget.utils.filedialogs import format_filter from orangewidget.workflow.drophandler import SingleUrlDropHandler @@ -38,7 +37,7 @@ # module's namespace so that old saved settings still work from Orange.widgets.utils.filedialogs import RecentPath -DEFAULT_READER_TEXT = "Automatically detect type" +DEFAULT_READER_TEXT = "Determine type from the file extension" log = logging.getLogger(__name__) @@ -147,8 +146,11 @@ class Warning(widget.OWWidget.Warning): class Error(widget.OWWidget.Error): file_not_found = Msg("File not found.") missing_reader = Msg("Missing reader.") + select_file_type = Msg("Select file type.") sheet_error = Msg("Error listing available sheets.") unknown = Msg("Read error:\n{}") + unknown_select = Msg( + "Read error, possibly due to incorrect choice of file type:\n{}") UserAdviceMessages = [ widget.Message( @@ -264,7 +266,7 @@ def package(w): self.reader_combo = QComboBox(self) self.reader_combo.setSizePolicy(Policy.Expanding, Policy.Fixed) self.reader_combo.setMinimumSize(QSize(100, 1)) - self.reader_combo.activated[int].connect(self.select_reader) + self.reader_combo.activated[int].connect(self.on_reader_change) box.layout().addWidget(self.reader_combo) layout.addWidget(box, 0, 1) @@ -324,25 +326,27 @@ def select_file(self, n): self.set_file_list() def select_sheet(self): + # pylint: disable=unsubscriptable-object self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() + def on_reader_change(self, n): + self.select_reader(n) + self.load_data() + def select_reader(self, n): if self.source != self.LOCAL_FILE: return # ignore for URL's if self.recent_paths: - path = self.recent_paths[0] + path = self.recent_paths[0] # pylint: disable=unsubscriptable-object if n == 0: # default path.file_format = None - self.load_data() elif n <= len(self.available_readers): reader = self.available_readers[n - 1] path.file_format = reader.qualified_name() - self.load_data() else: # the rest include just qualified names path.file_format = self.reader_combo.itemText(n) - self.load_data() def _url_set(self): index = self.url_combo.currentIndex() @@ -373,11 +377,14 @@ def browse_file(self, in_demos=False): else: start_file = self.last_path() or os.path.expanduser("~/") - filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers) + filename, reader, _ = open_filename_dialog( + start_file, None, self.available_readers, + add_all="*") if not filename: return self.add_path(filename) if reader is not None: + # pylint: disable=unsubscriptable-object self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE @@ -415,20 +422,20 @@ def _try_load(self): if not url: return self.Information.no_file_selected - def mark_problematic_reader(): - self.reader_combo.setItemData(self.reader_combo.currentIndex(), - QBrush(Qt.red), Qt.ForegroundRole) - try: self.reader = self._get_reader() # also sets current reader index assert self.reader is not None except MissingReaderException: - mark_problematic_reader() - return self.Error.missing_reader + if self.reader_combo.currentIndex() > 0: + return self.Error.missing_reader + else: + return self.Error.select_file_type except Exception as ex: - mark_problematic_reader() log.exception(ex) - return lambda x=ex: self.Error.unknown(str(x)) + if self.reader_combo.currentIndex() > 0: + return lambda x=ex: self.Error.unknown(str(x)) + else: + return lambda x=ex: self.Error.unknown_select(str(x)) try: self._update_sheet_combo() @@ -439,7 +446,6 @@ def mark_problematic_reader(): try: data = self.reader.read() except Exception as ex: - mark_problematic_reader() log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: @@ -455,9 +461,26 @@ def mark_problematic_reader(): return None def _get_reader(self) -> FileFormat: + """ + Get the reader for the current file. + + For local files, this also observes the stored settings and the reader + combo, as follows: + + 1. If the file format is known (from stored settings), use it and set + the reader combo to the corresponding index (as in settings) + 2. Otherwise, detect it from the extension and set the combo to + Auto detect, overriding any previous user-set choice + 3. Otherwise, use the current combo state. + + Returns: + FileFormat: reader instance + """ if self.source == self.LOCAL_FILE: path = self.last_path() self.reader_combo.setEnabled(True) + + # pylint: disable=unsubscriptable-object if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format qname_index = {r.qualified_name(): i for i, r in enumerate(self.available_readers)} @@ -473,9 +496,21 @@ def _get_reader(self) -> FileFormat: except Exception as ex: raise MissingReaderException(f'Can not find reader "{qname}"') from ex reader = reader_class(path) + else: - self.reader_combo.setCurrentIndex(0) - reader = FileFormat.get_reader(path) + old_idx = self.reader_combo.currentIndex() + try: + self.reader_combo.setCurrentIndex(0) + reader = FileFormat.get_reader(path) + except MissingReaderException: + if old_idx == 0: + raise + # Set the path for the current file format, + # and repeat the call to return the corresponding reader + self.select_reader(old_idx) + return self._get_reader() + + # pylint: disable=unsubscriptable-object if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader @@ -504,12 +539,21 @@ def _select_active_sheet(self): self.sheet_combo.setCurrentIndex(0) def _initialize_reader_combo(self): - self.reader_combo.clear() - filters = [format_filter(f) for f in self.available_readers] - self.reader_combo.addItems([DEFAULT_READER_TEXT] + filters) - self.reader_combo.setCurrentIndex(0) - self.reader_combo.setDisabled(True) - # additional readers may be added in self._get_reader() + # Reset to initial state without losing the current index or + # emitting any signals. + combo = self.reader_combo + if not combo.count(): + filters = [format_filter(f) for f in self.available_readers] + combo.addItems([DEFAULT_READER_TEXT] + filters) + combo.setCurrentIndex(0) + else: + # additional readers may be added in self._get_reader() + n = len(self.available_readers) + 1 + if combo.currentIndex() >= n: + combo.setCurrentIndex(0) + while combo.count() > n: + combo.removeItem(combo.count() - 1) + combo.setDisabled(True) @staticmethod def _describe(table): @@ -556,10 +600,12 @@ def _describe(table): return text def storeSpecificSettings(self): + # pylint: disable=unsubscriptable-object self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): + # pylint: disable=unsubscriptable-object self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): diff --git a/Orange/widgets/data/tests/actually-a-tab-file.xlsx b/Orange/widgets/data/tests/actually-a-tab-file.xlsx new file mode 100644 index 0000000000..87dffb64de --- /dev/null +++ b/Orange/widgets/data/tests/actually-a-tab-file.xlsx @@ -0,0 +1,27 @@ +age prescription astigmatic tear_rate lenses +discrete discrete discrete discrete discrete + class +young myope no reduced none +young myope no normal soft +young myope yes reduced none +young myope yes normal hard +young hypermetrope no reduced none +young hypermetrope no normal soft +young hypermetrope yes reduced none +young hypermetrope yes normal hard +pre-presbyopic myope no reduced none +pre-presbyopic myope no normal soft +pre-presbyopic myope yes reduced none +pre-presbyopic myope yes normal hard +pre-presbyopic hypermetrope no reduced none +pre-presbyopic hypermetrope no normal soft +pre-presbyopic hypermetrope yes reduced none +pre-presbyopic hypermetrope yes normal none +presbyopic myope no reduced none +presbyopic myope no normal none +presbyopic myope yes reduced none +presbyopic myope yes normal hard +presbyopic hypermetrope no reduced none +presbyopic hypermetrope no normal soft +presbyopic hypermetrope yes reduced none +presbyopic hypermetrope yes normal none diff --git a/Orange/widgets/data/tests/an_excel_file-too.foo b/Orange/widgets/data/tests/an_excel_file-too.foo new file mode 100644 index 0000000000..bc3fd8089b Binary files /dev/null and b/Orange/widgets/data/tests/an_excel_file-too.foo differ diff --git a/Orange/widgets/data/tests/an_excel_file.foo b/Orange/widgets/data/tests/an_excel_file.foo new file mode 100644 index 0000000000..bc3fd8089b Binary files /dev/null and b/Orange/widgets/data/tests/an_excel_file.foo differ diff --git a/Orange/widgets/data/tests/an_excel_file.xlsx b/Orange/widgets/data/tests/an_excel_file.xlsx new file mode 100644 index 0000000000..bc3fd8089b Binary files /dev/null and b/Orange/widgets/data/tests/an_excel_file.xlsx differ diff --git a/Orange/widgets/data/tests/test_owfile.py b/Orange/widgets/data/tests/test_owfile.py index 27ef58e747..c414e1f988 100644 --- a/Orange/widgets/data/tests/test_owfile.py +++ b/Orange/widgets/data/tests/test_owfile.py @@ -1,5 +1,5 @@ # Test methods with long descriptive names can omit docstrings -# pylint: disable=missing-docstring,protected-access +# pylint: disable=missing-docstring,protected-access,too-many-public-methods from os import path, remove, getcwd from os.path import dirname import unittest @@ -41,7 +41,9 @@ class FailedSheetsFormat(FileFormat): def read(self): pass + @property def sheets(self): + # pylint: disable=broad-exception-raised raise Exception("Not working") @@ -137,6 +139,7 @@ def _drop_event(self, url): def test_check_file_size(self): self.assertFalse(self.widget.Warning.file_too_big.is_shown()) self.widget.SIZE_LIMIT = 4000 + # We're avoiding __new__, pylint: disable=unnecessary-dunder-call self.widget.__init__() self.assertTrue(self.widget.Warning.file_too_big.is_shown()) @@ -337,7 +340,9 @@ def test_check_datetime_disabled(self): with named_file(dat, suffix=".tab") as filename: self.open_dataset(filename) domain_editor = self.widget.domain_editor - idx = lambda x: self.widget.domain_editor.model().createIndex(x, 1) + + def idx(x): + return self.widget.domain_editor.model().createIndex(x, 1) qcombobox = QComboBox() combo = ComboDelegate(domain_editor, @@ -361,13 +366,13 @@ def test_reader_custom_tab(self): outdata = self.get_output(self.widget.Outputs.data) self.assertEqual(len(outdata), 150) # loaded iris - def test_no_reader_extension(self): + def test_unknown_extension(self): with named_file("", suffix=".xyz_unknown") as fn: no_reader = RecentPath(fn, None, None) self.widget = self.create_widget(OWFile, stored_settings={"recent_paths": [no_reader]}) self.widget.load_data() - self.assertTrue(self.widget.Error.missing_reader.is_shown()) + self.assertTrue(self.widget.Error.select_file_type.is_shown()) def test_fail_sheets(self): with named_file("", suffix=".failed_sheet") as fn: @@ -418,6 +423,22 @@ def test_no_specified_reader(self): self.assertTrue(self.widget.Error.missing_reader.is_shown()) self.assertEqual(self.widget.reader_combo.currentText(), "not.a.file.reader.class") + + def _select_reader(self, name): + reader_combo = self.widget.reader_combo + len_with_qname = len(reader_combo) + for i in range(len_with_qname): + text = reader_combo.itemText(i) + if text.startswith(name): + break + else: + assert f"No reader starts with {name!r}" + reader_combo.setCurrentIndex(i) + reader_combo.activated.emit(i) + + def _select_tab_reader(self): + self._select_reader("Tab-separated") + def test_select_reader(self): filename = FileFormat.locate("iris.tab", dataset_dirs) @@ -436,12 +457,7 @@ def test_select_reader(self): self.assertEqual(self.widget.reader_combo.currentText(), "not.a.file.reader.class") self.assertEqual(self.widget.reader, None) - # select the tab reader - for i in range(len_with_qname): - text = self.widget.reader_combo.itemText(i) - if text.startswith("Tab-separated"): - break - self.widget.reader_combo.activated.emit(i) + self._select_tab_reader() self.assertEqual(len(self.widget.reader_combo), len_with_qname - 1) self.assertTrue(self.widget.reader_combo.currentText().startswith("Tab-separated")) self.assertIsInstance(self.widget.reader, TabReader) @@ -452,6 +468,105 @@ def test_select_reader(self): self.assertEqual(self.widget.reader_combo.currentText(), DEFAULT_READER_TEXT) self.assertIsInstance(self.widget.reader, TabReader) + def test_auto_detect_and_override(self): + tab_as_xlsx = FileFormat.locate("actually-a-tab-file.xlsx", dataset_dirs) + iris = FileFormat.locate("iris", dataset_dirs) + + reader_combo = self.widget.reader_combo + + reader_combo.setCurrentIndex(0) + reader_combo.activated.emit(0) + assert (self.widget.reader_combo.currentText() + == "Determine type from the file extension") + + def open_file(_a, _b, _c, filters, _e): + return filename, filters.split(";;")[0] + + with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName", + open_file): + + # Loading a tab file with extension xlsx fails with auto-detect + filename = tab_as_xlsx + self.widget.browse_file() + + self.assertEqual(self.widget.reader_combo.currentText(), + "Determine type from the file extension") + self.assertTrue(self.widget.Error.unknown_select.is_shown()) + self.assertIsNone(self.get_output(self.widget.Outputs.data)) + + # Select the tab reader: it should work + self._select_tab_reader() + assert "Tab-separated" in self.widget.reader_combo.currentText() + + self.assertFalse(self.widget.Error.unknown_select.is_shown()) + self.assertIsInstance(self.widget.reader, TabReader) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + + # Switching to iris resets the combo to auto-detect + filename = iris + self.widget.browse_file() + self.assertEqual(self.widget.reader_combo.currentText(), + "Determine type from the file extension") + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + + # Taking the tab-as-xlsx file from recent paths should restore + # the file type for that file + self.widget.file_combo.setCurrentIndex(1) + self.widget.file_combo.activated.emit(1) + self.assertIn("Tab-separated", self.widget.reader_combo.currentText()) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + + # Reloading should work + self.widget.load_data() + self.assertIn("Tab-separated", self.widget.reader_combo.currentText()) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + + # Loading this file - not from history - should fail + filename = tab_as_xlsx + self.widget.browse_file() + self.assertTrue(self.widget.Error.unknown_select.is_shown()) + self.assertIsNone(self.get_output(self.widget.Outputs.data)) + + # Set the correct type again (preparation for the next text block) + self._select_tab_reader() + assert not self.widget.Error.unknown_select.is_shown() + assert isinstance(self.widget.reader, TabReader) + assert self.get_output(self.widget.Outputs.data) is not None + + # Now load a real Excel file: this is a known excention so the combo + # should return to auto-detect + filename = FileFormat.locate("an_excel_file.xlsx", dataset_dirs) + self.widget.browse_file() + self.assertEqual(self.widget.reader_combo.currentText(), + "Determine type from the file extension") + self.assertFalse(self.widget.Error.unknown_select.is_shown()) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + + # Load iris to prepare for the next test block + filename = iris + self.widget.browse_file() + assert (self.widget.reader_combo.currentText() + == "Determine type from the file extension") + assert self.get_output(self.widget.Outputs.data) is not None + + # Files with unknown extensions require manual selection + filename = FileFormat.locate("an_excel_file.foo", dataset_dirs) + self.widget.browse_file() + self.assertTrue(self.widget.Error.select_file_type.is_shown()) + self.assertIsNone(self.get_output(self.widget.Outputs.data)) + + self._select_reader("Excel") + self.assertFalse(self.widget.Error.unknown_select.is_shown()) + self.assertFalse(self.widget.Error.select_file_type.is_shown()) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + + # Consecutive loading of files with the same extension keeps selection + filename = FileFormat.locate("an_excel_file-too.foo", dataset_dirs) + self.widget.browse_file() + self.assertFalse(self.widget.Error.unknown_select.is_shown()) + self.assertFalse(self.widget.Error.select_file_type.is_shown()) + self.assertIsNotNone(self.get_output(self.widget.Outputs.data)) + def test_select_reader_errors(self): filename = FileFormat.locate("iris.tab", dataset_dirs) diff --git a/i18n/si/msgs.jaml b/i18n/si/msgs.jaml index c082112ff3..ef64b875a1 100644 --- a/i18n/si/msgs.jaml +++ b/i18n/si/msgs.jaml @@ -6161,7 +6161,7 @@ widgets/data/owfeaturestatistics.py: __main__: false iris: false widgets/data/owfile.py: - Automatically detect type: Samodejno zaznaj vrsto datoteke + Determine type from the file extension: Določi vrsto iz končnice datoteke def `add_origin`: type: false origin: false @@ -6196,8 +6196,10 @@ widgets/data/owfile.py: class `Error`: File not found.: Datoteka ni najdena. Missing reader.: Bralnik za ta tip ne obstaja. + Select file type.: Izberite vrsto datoteke. Error listing available sheets.: Napaka ob ustvarjanju seznama listov. Read error:\n{}: Napaka ob branju:\n{} + Read error, possibly due to incorrect choice of file type:\n{}: Napaka ob branju, morda zaradi napačne izbire vrste datoteke:\n{} 'Use CSV File Import widget for advanced options ': 'Uporabi Bralnik CSV za napredne možnosti ' for comma-separated files: za datoteke ločene z vejico use-csv-file-import: falxe @@ -6232,10 +6234,11 @@ widgets/data/owfile.py: File: Datoteka Cannot find the directory with documentation datasets: Ne najdem mape s podatki iz dokumentacije ~/: false + *: false def `load_data`: No data.: Ni podatkov. def `_get_reader`: - Can not find reader "{qname}": Bralnik "{qname}" ne obstaja. + Can not find reader "{qname}": Ne najdem bralnika "{qname}" def `_describe`: attributes: false Name: Ime