-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnaivebayes2.R
117 lines (93 loc) · 2.36 KB
/
naivebayes2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
library(RTextTools)
library(e1071)
setwd("/home/akat/Documents/sentiment-dataset/final-dataset")
posFile = "./posout.txt"
negFile = "./negout.txt"
neuFile = "./neutral.txt"
conn = file(posFile,open="r")
lines <- readLines(conn)
res= ""
positive=rbind()
count = 2820
for (i in 1:length(lines)){
if(lines[i] == "<--->"){
temp = rbind(c(res,"positive"))
positive= rbind(positive,temp)
res= ""
count=count-1
}
else{
temp = gsub(pattern="[[:punct:]]", lines[i], replacement=" ")
res = paste(res,temp)
}
if(count <= 0){
break
}
}
print("positive bound")
close(conn)
conn = file(negFile,open="r")
lines <- readLines(conn)
res= "";
negative=rbind();
count = 2820
for (i in 1:length(lines)){
if(lines[i] == "<--->"){
temp = rbind(c(res,"negative"))
negative= rbind(negative,temp)
res= ""
count=count-1
}
else{
temp = gsub(pattern="[[:punct:]]", lines[i], replacement=" ")
res = paste(res,temp)
}
if(count <= 0){
break
}
}
print("negative bound")
close(conn)
conn = file(neuFile,open="r")
lines <- readLines(conn)
res= ""
neutral=rbind()
count = 141
for (i in 1:length(lines)){
if(lines[i] == "<--->"){
temp = rbind(c(res,"neutral"))
neutral= rbind(neutral,temp)
res= ""
count=count-1
}
else{
temp = gsub(pattern="[[:punct:]]", lines[i], replacement=" ")
res = paste(res,temp)
}
if(count <= 0){
break
}
}
print("neutral bound")
close(conn)
test = rbind(
c('good', 'positive'),
c('nice', 'positive'),
c('bad', 'negative'),
c('poor', 'negative'),
c('great', 'positive')
)
print("binding dataset")
dataset = rbind(positive,negative,test)
print("creating matrix")
matrix= create_matrix(dataset[,1], language="english", ngramLength=1,
removeStopwords=TRUE, removeNumbers=TRUE, removePunctuation=TRUE,
stemWords=FALSE)
mat = as.matrix(matrix)
print("training model")
classifier = naiveBayes(mat[1:5640,], as.factor(dataset[1:5640,2]) )
print("predicting")
predicted = predict(classifier,mat[5641:5645,]);predicted
print(predicted)
table(dataset[5641:5645,2], predicted)
recall_accuracy(dataset[5641:5645, 2], predicted)