Skip to content

Commit

Permalink
Fixing up possible mistakes and improving readability.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugarosa committed Mar 30, 2020
1 parent f578294 commit c745cd8
Show file tree
Hide file tree
Showing 26 changed files with 118 additions and 120 deletions.
5 changes: 2 additions & 3 deletions examples/core/create_node.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import numpy as np

from opfython.core.node import Node

# Defining an index
idx = 0

# Defining a label
label = 0
label = 1

# Defining an array of features
features = np.asarray([2, 2.5, 1.5, 4])

# Creating a Node
n = Node(idx, label, features)
n = Node(idx, label, features)
3 changes: 2 additions & 1 deletion examples/math/calculate_distances.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

import opfython.math.distance as d

# Defining an array
Expand All @@ -10,3 +9,5 @@

# Calculating their distance
dist = d.euclidean_distance(x, y)

print(dist)
1 change: 0 additions & 1 deletion examples/math/general_purpose.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

import opfython.math.general as g

# Defining a general purpose array
Expand Down
3 changes: 1 addition & 2 deletions examples/models/create_supervised_opf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from opfython.models.supervised import SupervisedOPF

# Creates a SupervisedOPF instance
opf = SupervisedOPF(distance='log_squared_euclidean',
pre_computed_distance=None)
opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None)
3 changes: 1 addition & 2 deletions examples/models/create_unsupervised_opf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from opfython.models.unsupervised import UnsupervisedOPF

# Creates an UnsupervisedOPF instance
opf = UnsupervisedOPF(
min_k=1, max_k=10, distance='log_squared_euclidean', pre_computed_distance=None)
opf = UnsupervisedOPF(min_k=1, max_k=10, distance='log_squared_euclidean', pre_computed_distance=None)
6 changes: 3 additions & 3 deletions examples/stream/load_file.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import opfython.stream.loader as l

# Loading a .csv file
csv = l.load_csv('data/sample.csv')
csv = l.load_csv('data/boat.csv')

# Loading a .txt file
txt = l.load_txt('data/sample.txt')
txt = l.load_txt('data/boat.txt')

# Loading a .json file
json = l.load_json('data/sample.json')
json = l.load_json('data/boat.json')
2 changes: 1 addition & 1 deletion examples/stream/parse_loaded_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import opfython.stream.parser as p

# Loading a .txt file to a numpy array
txt = l.load_txt('data/sample.txt')
txt = l.load_txt('data/boat.txt')

# Parsing a pre-loaded numpy array
X, Y = p.parse_loader(txt)
21 changes: 15 additions & 6 deletions opfython/core/heap.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,32 +179,41 @@ def is_empty(self):
return False

def dad(self, i):
"""Gathers the position of the dad's node.
"""Gathers the position of the node's dad.
Args:
i (int): Node's position.
Returns:
The position of dad's node.
The position of node's dad.
"""

# Returns the dad's position
return int(((i - 1) / 2))

def left_son(self, i):
"""Gathers the position of the left son's node.
"""Gathers the position of the node's left son.
Args:
i (int): Node's position.
Returns:
The position of left son's node.
The position of node's left son
"""

# Returns the left son's position
return int((2 * i + 1))

def right_son(self, i):
"""Gathers the position of the right son's node.
"""Gathers the position of the node's right son.
Args:
i (int): Node's position.
Returns:
The position of right son's node.
The position of node's right son.
"""

Expand Down
10 changes: 6 additions & 4 deletions opfython/core/node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

import opfython.utils.constants as c
import opfython.utils.exception as e
import opfython.utils.logging as l
Expand Down Expand Up @@ -157,7 +156,8 @@ def cost(self):

@cost.setter
def cost(self, cost):
if not (isinstance(cost, float) or isinstance(cost, int) or isinstance(cost, np.int32) or isinstance(cost, np.int64)):
if not (isinstance(cost, float) or isinstance(cost, int)
or isinstance(cost, np.int32) or isinstance(cost, np.int64)):
raise e.TypeError('`cost` should be a float or integer')

self._cost = cost
Expand All @@ -172,7 +172,8 @@ def density(self):

@density.setter
def density(self, density):
if not (isinstance(density, float) or isinstance(density, int) or isinstance(density, np.int32) or isinstance(density, np.int64)):
if not (isinstance(density, float) or isinstance(density, int)
or isinstance(density, np.int32) or isinstance(density, np.int64)):
raise e.TypeError('`density` should be a float or integer')

self._density = density
Expand All @@ -187,7 +188,8 @@ def radius(self):

@radius.setter
def radius(self, radius):
if not (isinstance(radius, float) or isinstance(radius, int) or isinstance(radius, np.int32) or isinstance(radius, np.int64)):
if not (isinstance(radius, float) or isinstance(radius, int)
or isinstance(radius, np.int32) or isinstance(radius, np.int64)):
raise e.TypeError('`radius` should be a float or integer')

self._radius = radius
Expand Down
33 changes: 17 additions & 16 deletions opfython/core/opf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pickle

import numpy as np

import opfython.math.distance as d
import opfython.stream.loader as loader
import opfython.utils.constants as c
Expand All @@ -17,7 +16,8 @@ class OPF:
"""A basic class to define all common OPF-related methods.
References:
J. P. Papa, A. X. Falcão and C. T. N. Suzuki. LibOPF: A library for the design of optimum-path forest classifiers (2015).
J. P. Papa, A. X. Falcão and C. T. N. Suzuki.
LibOPF: A library for the design of optimum-path forest classifiers (2015).
"""

Expand Down Expand Up @@ -47,7 +47,7 @@ def __init__(self, distance='log_squared_euclidean', pre_computed_distance=None)
self.pre_computed_distance = True

# Apply the distances matrix
self.pre_distances = self._read_distances(pre_computed_distance)
self._read_distances(pre_computed_distance)

# If OPF should not use a pre-computed distance
else:
Expand All @@ -57,8 +57,7 @@ def __init__(self, distance='log_squared_euclidean', pre_computed_distance=None)
# Marks the pre-distances property as None
self.pre_distances = None

logger.debug(
f'Distance: {self.distance} | Pre-computed distance: {self.pre_computed_distance}.')
logger.debug(f'Distance: {self.distance} | Pre-computed distance: {self.pre_computed_distance}.')
logger.info('Class created.')

@property
Expand Down Expand Up @@ -87,8 +86,12 @@ def distance(self):

@distance.setter
def distance(self, distance):
if distance not in ['bray_curtis', 'canberra', 'chi_squared', 'euclidean', 'gaussian', 'log_euclidean', 'log_squared_euclidean', 'manhattan', 'squared_chi_squared', 'squared_cord', 'squared_euclidean']:
raise e.TypeError('`distance` should be `bray_curtis`, `canberra`, `chi_squared`, `euclidean`, `gaussian`, `log_euclidean`, `log_squared_euclidean`, `manhattan`, `squared_chi_squared`, `squared_cord` or `squared_euclidean`')
if distance not in ['bray_curtis', 'canberra', 'chi_squared', 'euclidean',
'gaussian', 'log_euclidean', 'log_squared_euclidean',
'manhattan', 'squared_chi_squared', 'squared_cord', 'squared_euclidean']:
raise e.TypeError('`distance` should be `bray_curtis`, `canberra`, `chi_squared`, '
'`euclidean`, `gaussian`, `log_euclidean`, `log_squared_euclidean`, '
'`manhattan`, `squared_chi_squared`, `squared_cord` or `squared_euclidean`')

self._distance = distance

Expand Down Expand Up @@ -138,31 +141,28 @@ def pre_distances(self, pre_distances):

self._pre_distances = pre_distances

def _read_distances(self, file_path):
def _read_distances(self, file_name):
"""Reads the distance between nodes from a pre-defined file.
Args:
file_path (str): File to be loaded.
Returns:
A matrix with pre-computed distances.
file_name (str): File to be loaded.
"""

logger.debug('Running private method: read_distances().')

# Getting file extension
extension = file_path.split('.')[-1]
extension = file_name.split('.')[-1]

# Check if extension is .csv
if extension == 'csv':
# If yes, call the method that actually loads csv
distances = loader.load_csv(file_path)
distances = loader.load_csv(file_name)

# Check if extension is .txt
elif extension == 'txt':
# If yes, call the method that actually loads txt
distances = loader.load_txt(file_path)
distances = loader.load_txt(file_name)

# If extension is not recognized
else:
Expand All @@ -176,7 +176,8 @@ def _read_distances(self, file_path):
raise e.ValueError(
'Pre-computed distances could not been properly loaded')

return distances
# Apply the distances matrix to the property
self.pre_distances = distances

def load(self, file_name):
"""Loads the object from a pickle encoding.
Expand Down
1 change: 0 additions & 1 deletion opfython/core/subgraph.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

import opfython.stream.loader as loader
import opfython.stream.parser as p
import opfython.utils.constants as c
Expand Down
1 change: 0 additions & 1 deletion opfython/math/distance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

import opfython.utils.constants as c


Expand Down
9 changes: 4 additions & 5 deletions opfython/math/general.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np

import opfython.math.distance as d
import opfython.utils.logging as l

Expand Down Expand Up @@ -33,7 +32,7 @@ def confusion_matrix(labels, preds):
# For every label and prediction
for label, pred in zip(labels, preds):
# Increments the corresponding cell from the confusion matrix
c_matrix[label-1][pred-1] += 1
c_matrix[label - 1][pred - 1] += 1

return c_matrix

Expand Down Expand Up @@ -93,10 +92,10 @@ def opf_accuracy(labels, preds):
# If label is different from prediction
if label != pred:
# Increments the corresponding cell from the error matrix
errors[pred-1][0] += 1
errors[pred - 1][0] += 1

# Increments the corresponding cell from the error matrix
errors[label-1][1] += 1
errors[label - 1][1] += 1

# Calculating the float value of the true label errors
errors[:, 1] /= counts
Expand Down Expand Up @@ -145,7 +144,7 @@ def opf_accuracy_per_label(labels, preds):
# If label is different from prediction
if label != pred:
# Increments the corresponding cell from the error array
errors[label-1] += 1
errors[label - 1] += 1

# Calculating the float value of the true label errors
errors /= counts
Expand Down
27 changes: 10 additions & 17 deletions opfython/models/knn_supervised.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import time

import numpy as np

import opfython.math.general as g
import opfython.utils.constants as c
import opfython.utils.exception as e
Expand Down Expand Up @@ -34,8 +33,7 @@ def __init__(self, max_k=1, distance='log_squared_euclidean', pre_computed_dista
logger.info('Overriding class: OPF -> KNNSupervisedOPF.')

# Override its parent class with the receiving arguments
super(KNNSupervisedOPF, self).__init__(
distance=distance, pre_computed_distance=pre_computed_distance)
super(KNNSupervisedOPF, self).__init__(distance, pre_computed_distance)

# Defining the maximum `k` value for cutting the subgraph
self.max_k = max_k
Expand Down Expand Up @@ -171,9 +169,6 @@ def _learn(self, X_train, Y_train, X_val, Y_val):
X_val (np.array): Array of validation features.
Y_val (np.array): Array of validation labels.
Returns:
The best `k` value found over the validation set.
"""

logger.info('Learning best `k` value ...')
Expand Down Expand Up @@ -227,7 +222,8 @@ def _learn(self, X_train, Y_train, X_val, Y_val):
# Destroy the arcs
self.subgraph.destroy_arcs()

return best_k
# Applying the best k to the subgraph's property
self.subgraph.best_k = best_k

def fit(self, X_train, Y_train, X_val, Y_val):
"""Fits data in the classifier.
Expand All @@ -246,7 +242,7 @@ def fit(self, X_train, Y_train, X_val, Y_val):
start = time.time()

# Performing the learning process in order to find the best `k` value
self.subgraph.best_k = self._learn(X_train, Y_train, X_val, Y_val)
self._learn(X_train, Y_train, X_val, Y_val)

# Creating arcs with the best `k` value
self.subgraph.create_arcs(
Expand All @@ -271,8 +267,7 @@ def fit(self, X_train, Y_train, X_val, Y_val):
# Calculating training task time
train_time = end - start

logger.info(
f'Classifier has been fitted with k = {self.subgraph.best_k}.')
logger.info(f'Classifier has been fitted with k = {self.subgraph.best_k}.')
logger.info(f'Training time: {train_time} seconds.')

def predict(self, X_test, verbose=False):
Expand Down Expand Up @@ -330,20 +325,18 @@ def predict(self, X_test, verbose=False):
neighbours_idx[best_k] = j

# Gathers current `k`
current_k = best_k
cur_k = best_k

# While current `k` is bigger than 0 and the `k` distance is smaller than `k-1` distance
while current_k > 0 and distances[current_k] < distances[current_k - 1]:
while cur_k > 0 and distances[cur_k] < distances[cur_k - 1]:
# Swaps the distance from `k` and `k-1`
distances[current_k], distances[current_k -
1] = distances[current_k - 1], distances[current_k]
distances[cur_k], distances[cur_k - 1] = distances[cur_k - 1], distances[cur_k]

# Swaps the neighbours indexex from `k` and `k-1`
neighbours_idx[current_k], neighbours_idx[current_k -
1] = neighbours_idx[current_k - 1], neighbours_idx[current_k]
neighbours_idx[cur_k], neighbours_idx[cur_k - 1] = neighbours_idx[cur_k - 1], neighbours_idx[cur_k]

# Decrements `k`
current_k -= 1
cur_k -= 1

# Defining the density as 0
density = 0.0
Expand Down
Loading

0 comments on commit c745cd8

Please sign in to comment.