From b4ffae357c9f2605af62c17ea736d6f57a8d2a1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Primo=C5=BE=20Godec?= Date: Thu, 8 Apr 2021 17:30:05 +0200 Subject: [PATCH 1/2] Corpus: infer text features when include is boolean --- orangecontrib/text/corpus.py | 6 ++++- orangecontrib/text/tests/test_corpus.py | 29 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py index 4619d931e..1cdaa34aa 100644 --- a/orangecontrib/text/corpus.py +++ b/orangecontrib/text/corpus.py @@ -235,7 +235,11 @@ def _infer_text_features(self): if attr.is_string: if first is None: first = attr - if attr.attributes.get('include', 'False') == 'True': + incl = attr.attributes.get('include', False) + # variable attributes can be boolean from Orange 3.29 + # they are string in older versions + # incl == True, since without == string "False" would be True + if incl == "True" or incl == True: include_feats.append(attr) if len(include_feats) == 0 and first: include_feats.append(first) diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py index 3419d4f47..38b7ca559 100644 --- a/orangecontrib/text/tests/test_corpus.py +++ b/orangecontrib/text/tests/test_corpus.py @@ -248,6 +248,35 @@ def test_infer_text_features(self): self.assertEqual(len(tf), 1) self.assertEqual(tf[0].name, 'Text') + def test_infer_text_features_str_include(self): + """ + In orange 3.29 include attribute is read as boolean. corpus must still + support older versions of Orange where include attribute is a string. + Test behaviour with string attribute. + """ + c = Corpus.from_file('andersen') + c.domain["Content"].attributes["include"] = "False" + c._infer_text_features() + self.assertListEqual(c.text_features, [c.domain["Title"]]) + + c.domain["Content"].attributes["include"] = "True" + c._infer_text_features() + self.assertListEqual(c.text_features, [c.domain["Content"]]) + + def test_infer_text_features_bool_include(self): + """ + In orange 3.29 include attribute is read as boolean. + Test behaviour with boolean attribute. + """ + c = Corpus.from_file('andersen') + c.domain["Content"].attributes["include"] = False + c._infer_text_features() + self.assertListEqual(c.text_features, [c.domain["Title"]]) + + c.domain["Content"].attributes["include"] = True + c._infer_text_features() + self.assertListEqual(c.text_features, [c.domain["Content"]]) + def test_documents(self): c = Corpus.from_file('book-excerpts') docs = c.documents From 75b43d2074206b40b59563d4d2700e4bf42c883f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Primo=C5=BE=20Godec?= Date: Thu, 8 Apr 2021 17:30:29 +0200 Subject: [PATCH 2/2] Corpus widget: run with WidgetPreview --- orangecontrib/text/widgets/owcorpus.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/orangecontrib/text/widgets/owcorpus.py b/orangecontrib/text/widgets/owcorpus.py index 2d4aa6a4f..3c1f5fce6 100644 --- a/orangecontrib/text/widgets/owcorpus.py +++ b/orangecontrib/text/widgets/owcorpus.py @@ -307,9 +307,5 @@ def describe(features): if __name__ == '__main__': - from AnyQt.QtWidgets import QApplication - app = QApplication([]) - widget = OWCorpus() - widget.show() - app.exec() - widget.saveSettings() + from orangewidget.utils.widgetpreview import WidgetPreview + WidgetPreview(OWCorpus).run()