From de1fd987ac15af2a3a61338836b17b83cd530144 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Mon, 30 Sep 2019 17:00:12 +0200
Subject: [PATCH] Fix stowords filtering

---
 orangecontrib/text/preprocess/filter.py     |  5 +++--
 orangecontrib/text/tests/test_preprocess.py | 20 +++++++++++++++++---
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/orangecontrib/text/preprocess/filter.py b/orangecontrib/text/preprocess/filter.py
index f79ab09fa..e9e84a1f6 100644
--- a/orangecontrib/text/preprocess/filter.py
+++ b/orangecontrib/text/preprocess/filter.py
@@ -78,7 +78,7 @@ def supported_languages():
         except LookupError:  # when no NLTK data is available
             pass
 
-        return [file.capitalize() for file in stopwords_listdir]
+        return sorted(file.capitalize() for file in stopwords_listdir)
 
     @wait_nltk_data
     def __init__(self, language='English', word_list=None):
@@ -96,7 +96,8 @@ def language(self, value):
         if not self._language:
             self.stopwords = []
         else:
-            self.stopwords = set(stopwords.words(self.language.lower()))
+            self.stopwords = set(
+                x.strip() for x in stopwords.words(self.language.lower()))
 
     def __str__(self):
         config = ''
diff --git a/orangecontrib/text/tests/test_preprocess.py b/orangecontrib/text/tests/test_preprocess.py
index e2cc7f943..0dc51a3ea 100644
--- a/orangecontrib/text/tests/test_preprocess.py
+++ b/orangecontrib/text/tests/test_preprocess.py
@@ -276,10 +276,24 @@ def check(self, token):
         self.assertEqual(df([['a', '1']]), [['a']])
 
     def test_stopwords(self):
-        filter = preprocess.StopwordsFilter('english')
+        f = preprocess.StopwordsFilter('english')
 
-        self.assertFalse(filter.check('a'))
-        self.assertTrue(filter.check('filter'))
+        self.assertFalse(f.check('a'))
+        self.assertTrue(f.check('filter'))
+
+        self.assertListEqual(
+            ["snake", "house"],
+            f(["a", "snake", "is", "in", "a", "house"]))
+
+    def test_stopwords_slovene(self):
+        f = preprocess.StopwordsFilter('slovene')
+
+        self.assertFalse(f.check('in'))
+        self.assertTrue(f.check('abeceda'))
+
+        self.assertListEqual(
+            ["kača", "hiši"],
+            f(["kača", "je", "v", "hiši", "in"]))
 
     def test_lexicon(self):
         filter = preprocess.LexiconFilter(['filter'])