Skip to content

Commit 0f29b13

Browse files
committed
[FIX] VectorizationComputeValue - fix unpickling old pickles
1 parent 2bb750f commit 0f29b13

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed
Binary file not shown.

orangecontrib/text/tests/test_vectorization_base.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
import os
12
import unittest
23

34
import numpy as np
45
import scipy.sparse as sp
56

67
from orangecontrib.text.corpus import Corpus
7-
from orangecontrib.text.vectorization.base import BaseVectorizer
8+
from orangecontrib.text.vectorization.base import (
9+
BaseVectorizer,
10+
VectorizationComputeValue,
11+
)
812

913

1014
class BaseVectorizationTest(unittest.TestCase):
@@ -32,3 +36,33 @@ def test_variable_attributes(self):
3236

3337
for a in c2.domain.attributes:
3438
self.assertIn('foo', a.attributes)
39+
40+
41+
class TestVectorizationComputeValue(unittest.TestCase):
42+
def test_unpickling_old_pickle(self):
43+
"""
44+
Before orange3-text version 1.12.0 variable was wrongly set to current
45+
variable (variable that has this compute value attached) instead of
46+
original variable which caused fails after latest changes in core
47+
Orange. Since variable from VectorizationComputeValue is never used in
48+
practice we do not set it anymore (it is always None for
49+
VectorizationComputeValue).
50+
Anyway it is still set in pickles create before 1.12.0. With this test
51+
we test that old pickle with variables that have VectorizationComputeValue
52+
are un-pickled correctly.
53+
"""
54+
path = os.path.join(
55+
os.path.dirname(os.path.abspath(__file__)), "data", "old-bow-pickle.pkl"
56+
)
57+
data = Corpus.from_file(path)
58+
self.assertEqual(len(data), 3)
59+
self.assertIsInstance(data.domain["!"].compute_value, VectorizationComputeValue)
60+
self.assertIsInstance(
61+
data.domain["aboard"].compute_value, VectorizationComputeValue
62+
)
63+
self.assertIsNone(data.domain["!"].compute_value.variable)
64+
self.assertIsNone(data.domain["aboard"].compute_value.variable)
65+
66+
67+
if __name__ == "__main__":
68+
unittest.main()

orangecontrib/text/vectorization/base.py

+14
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,17 @@ def __init__(self, compute_shared, name):
7676
def compute(self, _, shared_data):
7777
ind = shared_data.feature_name_to_index[self.name]
7878
return shared_data.X[:, ind]
79+
80+
def __setstate__(self, state):
81+
"""
82+
Before orange3-text version 1.12.0 variable was wrongly set to current
83+
variable (variable that has this compute value attached) instead of
84+
original variable which caused fails after latest changes in core
85+
Orange. Since variable from VectorizationComputeValue is never used in
86+
practice we do not set it anymore (it is always None for
87+
VectorizationComputeValue).
88+
Anyway it is still set in pickles create before 1.12.0 and this line
89+
removes it when unpickling old pickles.
90+
"""
91+
state["variable"] = None
92+
self.__dict__.update(state)

0 commit comments

Comments
 (0)