Fixing up possible mistakes and improving readability.

gugarosa · Mar 30, 2020 · c745cd8 · c745cd8
1 parent f578294
commit c745cd8
Show file tree

Hide file tree

Showing 26 changed files with 118 additions and 120 deletions.
diff --git a/examples/core/create_node.py b/examples/core/create_node.py
@@ -1,15 +1,14 @@
 import numpy as np
-
 from opfython.core.node import Node
 
 # Defining an index
 idx = 0
 
 # Defining a label
-label = 0
+label = 1
 
 # Defining an array of features
 features = np.asarray([2, 2.5, 1.5, 4])
 
 # Creating a Node
-n = Node(idx, label, features)
+n = Node(idx, label, features)
diff --git a/examples/math/calculate_distances.py b/examples/math/calculate_distances.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import opfython.math.distance as d
 
 # Defining an array
@@ -10,3 +9,5 @@
 
 # Calculating their distance
 dist = d.euclidean_distance(x, y)
+
+print(dist)
diff --git a/examples/math/general_purpose.py b/examples/math/general_purpose.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import opfython.math.general as g
 
 # Defining a general purpose array

diff --git a/examples/models/create_supervised_opf.py b/examples/models/create_supervised_opf.py
@@ -1,5 +1,4 @@
 from opfython.models.supervised import SupervisedOPF
 
 # Creates a SupervisedOPF instance
-opf = SupervisedOPF(distance='log_squared_euclidean',
-                    pre_computed_distance=None)
+opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None)
diff --git a/examples/models/create_unsupervised_opf.py b/examples/models/create_unsupervised_opf.py
@@ -1,5 +1,4 @@
 from opfython.models.unsupervised import UnsupervisedOPF
 
 # Creates an UnsupervisedOPF instance
-opf = UnsupervisedOPF(
-    min_k=1, max_k=10, distance='log_squared_euclidean', pre_computed_distance=None)
+opf = UnsupervisedOPF(min_k=1, max_k=10, distance='log_squared_euclidean', pre_computed_distance=None)
diff --git a/examples/stream/load_file.py b/examples/stream/load_file.py
@@ -1,10 +1,10 @@
 import opfython.stream.loader as l
 
 # Loading a .csv file
-csv = l.load_csv('data/sample.csv')
+csv = l.load_csv('data/boat.csv')
 
 # Loading a .txt file
-txt = l.load_txt('data/sample.txt')
+txt = l.load_txt('data/boat.txt')
 
 # Loading a .json file
-json = l.load_json('data/sample.json')
+json = l.load_json('data/boat.json')
diff --git a/examples/stream/parse_loaded_file.py b/examples/stream/parse_loaded_file.py
@@ -2,7 +2,7 @@
 import opfython.stream.parser as p
 
 # Loading a .txt file to a numpy array
-txt = l.load_txt('data/sample.txt')
+txt = l.load_txt('data/boat.txt')
 
 # Parsing a pre-loaded numpy array
 X, Y = p.parse_loader(txt)
diff --git a/opfython/core/heap.py b/opfython/core/heap.py
@@ -179,32 +179,41 @@ def is_empty(self):
         return False
 
     def dad(self, i):
-        """Gathers the position of the dad's node.
+        """Gathers the position of the node's dad.
+
+        Args:
+            i (int): Node's position.
 
         Returns:
-            The position of dad's node.
+            The position of node's dad.
 
         """
 
         # Returns the dad's position
         return int(((i - 1) / 2))
 
     def left_son(self, i):
-        """Gathers the position of the left son's node.
+        """Gathers the position of the node's left son.
+
+        Args:
+            i (int): Node's position.
 
         Returns:
-            The position of left son's node.
+            The position of node's left son
 
         """
 
         # Returns the left son's position
         return int((2 * i + 1))
 
     def right_son(self, i):
-        """Gathers the position of the right son's node.
+        """Gathers the position of the node's right son.
+
+        Args:
+            i (int): Node's position.
 
         Returns:
-            The position of right son's node.
+            The position of node's right son.
 
         """
 

diff --git a/opfython/core/node.py b/opfython/core/node.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import opfython.utils.constants as c
 import opfython.utils.exception as e
 import opfython.utils.logging as l
@@ -157,7 +156,8 @@ def cost(self):
 
     @cost.setter
     def cost(self, cost):
-        if not (isinstance(cost, float) or isinstance(cost, int) or isinstance(cost, np.int32) or isinstance(cost, np.int64)):
+        if not (isinstance(cost, float) or isinstance(cost, int)
+                or isinstance(cost, np.int32) or isinstance(cost, np.int64)):
             raise e.TypeError('`cost` should be a float or integer')
 
         self._cost = cost
@@ -172,7 +172,8 @@ def density(self):
 
     @density.setter
     def density(self, density):
-        if not (isinstance(density, float) or isinstance(density, int) or isinstance(density, np.int32) or isinstance(density, np.int64)):
+        if not (isinstance(density, float) or isinstance(density, int)
+                or isinstance(density, np.int32) or isinstance(density, np.int64)):
             raise e.TypeError('`density` should be a float or integer')
 
         self._density = density
@@ -187,7 +188,8 @@ def radius(self):
 
     @radius.setter
     def radius(self, radius):
-        if not (isinstance(radius, float) or isinstance(radius, int) or isinstance(radius, np.int32) or isinstance(radius, np.int64)):
+        if not (isinstance(radius, float) or isinstance(radius, int)
+                or isinstance(radius, np.int32) or isinstance(radius, np.int64)):
             raise e.TypeError('`radius` should be a float or integer')
 
         self._radius = radius

diff --git a/opfython/core/opf.py b/opfython/core/opf.py
@@ -1,7 +1,6 @@
 import pickle
 
 import numpy as np
-
 import opfython.math.distance as d
 import opfython.stream.loader as loader
 import opfython.utils.constants as c
@@ -17,7 +16,8 @@ class OPF:
     """A basic class to define all common OPF-related methods.
 
     References:
-        J. P. Papa, A. X. Falcão and C. T. N. Suzuki. LibOPF: A library for the design of optimum-path forest classifiers (2015).
+        J. P. Papa, A. X. Falcão and C. T. N. Suzuki.
+        LibOPF: A library for the design of optimum-path forest classifiers (2015).
 
     """
 
@@ -47,7 +47,7 @@ def __init__(self, distance='log_squared_euclidean', pre_computed_distance=None)
             self.pre_computed_distance = True
 
             # Apply the distances matrix
-            self.pre_distances = self._read_distances(pre_computed_distance)
+            self._read_distances(pre_computed_distance)
 
         # If OPF should not use a pre-computed distance
         else:
@@ -57,8 +57,7 @@ def __init__(self, distance='log_squared_euclidean', pre_computed_distance=None)
             # Marks the pre-distances property as None
             self.pre_distances = None
 
-        logger.debug(
-            f'Distance: {self.distance} | Pre-computed distance: {self.pre_computed_distance}.')
+        logger.debug(f'Distance: {self.distance} | Pre-computed distance: {self.pre_computed_distance}.')
         logger.info('Class created.')
 
     @property
@@ -87,8 +86,12 @@ def distance(self):
 
     @distance.setter
     def distance(self, distance):
-        if distance not in ['bray_curtis', 'canberra', 'chi_squared', 'euclidean', 'gaussian', 'log_euclidean', 'log_squared_euclidean', 'manhattan', 'squared_chi_squared', 'squared_cord', 'squared_euclidean']:
-            raise e.TypeError('`distance` should be `bray_curtis`, `canberra`, `chi_squared`, `euclidean`, `gaussian`, `log_euclidean`, `log_squared_euclidean`, `manhattan`, `squared_chi_squared`, `squared_cord` or `squared_euclidean`')
+        if distance not in ['bray_curtis', 'canberra', 'chi_squared', 'euclidean',
+                            'gaussian', 'log_euclidean', 'log_squared_euclidean', 
+                            'manhattan', 'squared_chi_squared', 'squared_cord', 'squared_euclidean']:
+            raise e.TypeError('`distance` should be `bray_curtis`, `canberra`, `chi_squared`, '
+                              '`euclidean`, `gaussian`, `log_euclidean`, `log_squared_euclidean`, '
+                              '`manhattan`, `squared_chi_squared`, `squared_cord` or `squared_euclidean`')
 
         self._distance = distance
 
@@ -138,31 +141,28 @@ def pre_distances(self, pre_distances):
 
         self._pre_distances = pre_distances
 
-    def _read_distances(self, file_path):
+    def _read_distances(self, file_name):
         """Reads the distance between nodes from a pre-defined file.
 
         Args:
-            file_path (str): File to be loaded.
-
-        Returns:
-            A matrix with pre-computed distances.
+            file_name (str): File to be loaded.
 
         """
 
         logger.debug('Running private method: read_distances().')
 
         # Getting file extension
-        extension = file_path.split('.')[-1]
+        extension = file_name.split('.')[-1]
 
         # Check if extension is .csv
         if extension == 'csv':
             # If yes, call the method that actually loads csv
-            distances = loader.load_csv(file_path)
+            distances = loader.load_csv(file_name)
 
         # Check if extension is .txt
         elif extension == 'txt':
             # If yes, call the method that actually loads txt
-            distances = loader.load_txt(file_path)
+            distances = loader.load_txt(file_name)
 
         # If extension is not recognized
         else:
@@ -176,7 +176,8 @@ def _read_distances(self, file_path):
             raise e.ValueError(
                 'Pre-computed distances could not been properly loaded')
 
-        return distances
+        # Apply the distances matrix to the property
+        self.pre_distances = distances
 
     def load(self, file_name):
         """Loads the object from a pickle encoding.

diff --git a/opfython/core/subgraph.py b/opfython/core/subgraph.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import opfython.stream.loader as loader
 import opfython.stream.parser as p
 import opfython.utils.constants as c

diff --git a/opfython/math/distance.py b/opfython/math/distance.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import opfython.utils.constants as c
 
 

diff --git a/opfython/math/general.py b/opfython/math/general.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import opfython.math.distance as d
 import opfython.utils.logging as l
 
@@ -33,7 +32,7 @@ def confusion_matrix(labels, preds):
     # For every label and prediction
     for label, pred in zip(labels, preds):
         # Increments the corresponding cell from the confusion matrix
-        c_matrix[label-1][pred-1] += 1
+        c_matrix[label - 1][pred - 1] += 1
 
     return c_matrix
 
@@ -93,10 +92,10 @@ def opf_accuracy(labels, preds):
         # If label is different from prediction
         if label != pred:
             # Increments the corresponding cell from the error matrix
-            errors[pred-1][0] += 1
+            errors[pred - 1][0] += 1
 
             # Increments the corresponding cell from the error matrix
-            errors[label-1][1] += 1
+            errors[label - 1][1] += 1
 
     # Calculating the float value of the true label errors
     errors[:, 1] /= counts
@@ -145,7 +144,7 @@ def opf_accuracy_per_label(labels, preds):
         # If label is different from prediction
         if label != pred:
             # Increments the corresponding cell from the error array
-            errors[label-1] += 1
+            errors[label - 1] += 1
 
     # Calculating the float value of the true label errors
     errors /= counts

diff --git a/opfython/models/knn_supervised.py b/opfython/models/knn_supervised.py
@@ -1,7 +1,6 @@
 import time
 
 import numpy as np
-
 import opfython.math.general as g
 import opfython.utils.constants as c
 import opfython.utils.exception as e
@@ -34,8 +33,7 @@ def __init__(self, max_k=1, distance='log_squared_euclidean', pre_computed_dista
         logger.info('Overriding class: OPF -> KNNSupervisedOPF.')
 
         # Override its parent class with the receiving arguments
-        super(KNNSupervisedOPF, self).__init__(
-            distance=distance, pre_computed_distance=pre_computed_distance)
+        super(KNNSupervisedOPF, self).__init__(distance, pre_computed_distance)
 
         # Defining the maximum `k` value for cutting the subgraph
         self.max_k = max_k
@@ -171,9 +169,6 @@ def _learn(self, X_train, Y_train, X_val, Y_val):
             X_val (np.array): Array of validation features.
             Y_val (np.array): Array of validation labels.
 
-        Returns:
-            The best `k` value found over the validation set.
-
         """
 
         logger.info('Learning best `k` value ...')
@@ -227,7 +222,8 @@ def _learn(self, X_train, Y_train, X_val, Y_val):
             # Destroy the arcs
             self.subgraph.destroy_arcs()
 
-        return best_k
+        # Applying the best k to the subgraph's property
+        self.subgraph.best_k = best_k
 
     def fit(self, X_train, Y_train, X_val, Y_val):
         """Fits data in the classifier.
@@ -246,7 +242,7 @@ def fit(self, X_train, Y_train, X_val, Y_val):
         start = time.time()
 
         # Performing the learning process in order to find the best `k` value
-        self.subgraph.best_k = self._learn(X_train, Y_train, X_val, Y_val)
+        self._learn(X_train, Y_train, X_val, Y_val)
 
         # Creating arcs with the best `k` value
         self.subgraph.create_arcs(
@@ -271,8 +267,7 @@ def fit(self, X_train, Y_train, X_val, Y_val):
         # Calculating training task time
         train_time = end - start
 
-        logger.info(
-            f'Classifier has been fitted with k = {self.subgraph.best_k}.')
+        logger.info(f'Classifier has been fitted with k = {self.subgraph.best_k}.')
         logger.info(f'Training time: {train_time} seconds.')
 
     def predict(self, X_test, verbose=False):
@@ -330,20 +325,18 @@ def predict(self, X_test, verbose=False):
                     neighbours_idx[best_k] = j
 
                     # Gathers current `k`
-                    current_k = best_k
+                    cur_k = best_k
 
                     # While current `k` is bigger than 0 and the `k` distance is smaller than `k-1` distance
-                    while current_k > 0 and distances[current_k] < distances[current_k - 1]:
+                    while cur_k > 0 and distances[cur_k] < distances[cur_k - 1]:
                         # Swaps the distance from `k` and `k-1`
-                        distances[current_k], distances[current_k -
-                                                        1] = distances[current_k - 1], distances[current_k]
+                        distances[cur_k], distances[cur_k - 1] = distances[cur_k - 1], distances[cur_k]
 
                         # Swaps the neighbours indexex from `k` and `k-1`
-                        neighbours_idx[current_k], neighbours_idx[current_k -
-                                                                  1] = neighbours_idx[current_k - 1], neighbours_idx[current_k]
+                        neighbours_idx[cur_k], neighbours_idx[cur_k - 1] = neighbours_idx[cur_k - 1], neighbours_idx[cur_k]
 
                         # Decrements `k`
-                        current_k -= 1
+                        cur_k -= 1
 
             # Defining the density as 0
             density = 0.0
Original file line number	Diff line number	Diff line change
		@@ -1,5 +1,4 @@
		import numpy as np

		import opfython.utils.constants as c


Expand Down