Skip to content

Commit

Permalink
Test POS tag filter
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Jul 22, 2021
1 parent 039e0bd commit ec7e743
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions orangecontrib/text/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ def test_retain_ids(self):
corpus = pp(corpus)
self.assertTrue((corpus.ids == self.corpus.ids).all())

def test_filter_pos_tags(self):
pp_list = [preprocess.LowercaseTransformer(),
preprocess.WordPunctTokenizer(),
tag.AveragedPerceptronTagger(),
preprocess.StopwordsFilter()]
corpus = self.corpus
corpus.metas[0, 0] = "This is the most beautiful day in the world"
for pp in pp_list:
corpus = pp(corpus)
self.assertEqual(len(corpus.tokens), len(corpus.pos_tags))
self.assertEqual(len(corpus.tokens[0]), len(corpus.pos_tags[0]))
self.assertEqual(corpus.tokens[0], ["beautiful", "day", "world"])
self.assertEqual(corpus.pos_tags[0], ["JJ", "NN", "NN"])


class TransformationTests(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -331,8 +345,11 @@ def _check(self, token):
return not token.isdigit()

df = DigitsFilter()
self.assertEqual(df._preprocess([]), [])
self.assertEqual(df._preprocess(['a', '1']), ['a'])
filtered = list(itertools.compress([], df._preprocess([])))
self.assertEqual(filtered, [])
filtered = list(itertools.compress(['a', '1'],
df._preprocess(['a', '1'])))
self.assertEqual(filtered, ['a'])

def test_stopwords(self):
f = preprocess.StopwordsFilter('english')
Expand Down

0 comments on commit ec7e743

Please sign in to comment.