1
+ __author__ = 'mike_bowles'
2
+ import pandas as pd
3
+ from pandas import DataFrame
4
+ import matplotlib .pyplot as plot
5
+ from math import exp
6
+ target_url = ("http://archive.ics.uci.edu/ml/machine-"
7
+ "learning-databases/abalone/abalone.data" )
8
+ #read abalone data
9
+ abalone = pd .read_csv (target_url ,header = None , prefix = "V" )
10
+ abalone .columns = ['Sex' , 'Length' , 'Diameter' , 'Height' ,
11
+ 'Whole Wt' , 'Shucked Wt' ,
12
+ 'Viscera Wt' , 'Shell Wt' , 'Rings' ]
13
+ #get summary to use for scaling
14
+ summary = abalone .describe ()
15
+ minRings = summary .iloc [3 ,7 ]
16
+ maxRings = summary .iloc [7 ,7 ]
17
+ nrows = len (abalone .index )
18
+
19
+ for i in range (nrows ):
20
+ #plot rows of data as if they were series data
21
+ dataRow = abalone .iloc [i ,1 :8 ]
22
+ labelColor = (abalone .iloc [i ,8 ] - minRings ) / (maxRings - minRings )
23
+ dataRow .plot (color = plot .cm .RdYlBu (labelColor ), alpha = 0.5 )
24
+
25
+ plot .xlabel ("Attribute Index" )
26
+ plot .ylabel (("Attribute Values" ))
27
+ plot .show ()
28
+
29
+ #renormalize using mean and standard variation, then compress
30
+ # with logit function
31
+
32
+ meanRings = summary .iloc [1 ,7 ]
33
+ sdRings = summary .iloc [2 ,7 ]
34
+
35
+ for i in range (nrows ):
36
+ #plot rows of data as if they were series data
37
+ dataRow = abalone .iloc [i ,1 :8 ]
38
+ normTarget = (abalone .iloc [i ,8 ] - meanRings )/ sdRings
39
+ labelColor = 1.0 / (1.0 + exp (- normTarget ))
40
+ dataRow .plot (color = plot .cm .RdYlBu (labelColor ), alpha = 0.5 )
41
+
42
+ plot .xlabel ("Attribute Index" )
43
+ plot .ylabel (("Attribute Values" ))
44
+ plot .show ()
0 commit comments