From b4ffae357c9f2605af62c17ea736d6f57a8d2a1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?=
Date: Thu, 8 Apr 2021 17:30:05 +0200
Subject: [PATCH 1/2] Corpus: infer text features when include is boolean
---
orangecontrib/text/corpus.py | 6 ++++-
orangecontrib/text/tests/test_corpus.py | 29 +++++++++++++++++++++++++
2 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py
index 4619d931e..1cdaa34aa 100644
--- a/orangecontrib/text/corpus.py
+++ b/orangecontrib/text/corpus.py
@@ -235,7 +235,11 @@ def _infer_text_features(self):
if attr.is_string:
if first is None:
first = attr
- if attr.attributes.get('include', 'False') == 'True':
+ incl = attr.attributes.get('include', False)
+ # variable attributes can be boolean from Orange 3.29
+ # they are string in older versions
+ # incl == True, since without == string "False" would be True
+ if incl == "True" or incl == True:
include_feats.append(attr)
if len(include_feats) == 0 and first:
include_feats.append(first)
diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py
index 3419d4f47..38b7ca559 100644
--- a/orangecontrib/text/tests/test_corpus.py
+++ b/orangecontrib/text/tests/test_corpus.py
@@ -248,6 +248,35 @@ def test_infer_text_features(self):
self.assertEqual(len(tf), 1)
self.assertEqual(tf[0].name, 'Text')
+ def test_infer_text_features_str_include(self):
+ """
+ In orange 3.29 include attribute is read as boolean. corpus must still
+ support older versions of Orange where include attribute is a string.
+ Test behaviour with string attribute.
+ """
+ c = Corpus.from_file('andersen')
+ c.domain["Content"].attributes["include"] = "False"
+ c._infer_text_features()
+ self.assertListEqual(c.text_features, [c.domain["Title"]])
+
+ c.domain["Content"].attributes["include"] = "True"
+ c._infer_text_features()
+ self.assertListEqual(c.text_features, [c.domain["Content"]])
+
+ def test_infer_text_features_bool_include(self):
+ """
+ In orange 3.29 include attribute is read as boolean.
+ Test behaviour with boolean attribute.
+ """
+ c = Corpus.from_file('andersen')
+ c.domain["Content"].attributes["include"] = False
+ c._infer_text_features()
+ self.assertListEqual(c.text_features, [c.domain["Title"]])
+
+ c.domain["Content"].attributes["include"] = True
+ c._infer_text_features()
+ self.assertListEqual(c.text_features, [c.domain["Content"]])
+
def test_documents(self):
c = Corpus.from_file('book-excerpts')
docs = c.documents
From 75b43d2074206b40b59563d4d2700e4bf42c883f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?=
Date: Thu, 8 Apr 2021 17:30:29 +0200
Subject: [PATCH 2/2] Corpus widget: run with WidgetPreview
---
orangecontrib/text/widgets/owcorpus.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/orangecontrib/text/widgets/owcorpus.py b/orangecontrib/text/widgets/owcorpus.py
index 2d4aa6a4f..3c1f5fce6 100644
--- a/orangecontrib/text/widgets/owcorpus.py
+++ b/orangecontrib/text/widgets/owcorpus.py
@@ -307,9 +307,5 @@ def describe(features):
if __name__ == '__main__':
- from AnyQt.QtWidgets import QApplication
- app = QApplication([])
- widget = OWCorpus()
- widget.show()
- app.exec()
- widget.saveSettings()
+ from orangewidget.utils.widgetpreview import WidgetPreview
+ WidgetPreview(OWCorpus).run()