-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmemiMongo.R
112 lines (96 loc) · 3.06 KB
/
memiMongo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
library(rmongodb)
library(plyr)
host<- ""
db <- ""
username <- ""
password <- ""
mongo <- mongo.create(host=host , db=db , username=username, password=password)
getDocumentList <- function(){
if(mongo.is.connected(mongo) == TRUE) {
collection <- "docs"
namespace <- paste(db, collection, sep=".")
dist <- mongo.distinct(mongo, namespace, "text")
return(as.list(dist))
}else{
print("not connected")
}
}
#returns list of stopwords
getAllStopwords <- function(){
if(mongo.is.connected(mongo) == TRUE) {
collection <- "stopwords"
namespace <- paste(db, collection, sep=".")
dist <- mongo.distinct(mongo, namespace, "stopword")
return(as.list(dist))
}else{
print("not connected")
}
}
#write stopword into db
setAdditional <- function(word){
if(mongo.is.connected(mongo) == TRUE) {
collection <- "stopwords"
namespace <- paste(db, collection, sep=".")
one <- mongo.find.one(mongo, namespace, paste(c('{"stopword":\"', word, "\"}"), sep="", collapse=""))
if(length(one)==0){
b <- mongo.bson.from.list(list(stopword=word))
ok <- mongo.insert(mongo, namespace, b)
}
}else{
print("not connected")
}
}
setPost <- function(author, text, date){
if(mongo.is.connected(mongo) == TRUE) {
collection <- "post"
namespace <- paste(db, collection, sep=".")
b <- mongo.bson.from.list(list(author=author, text=text, date=date))
ok <- mongo.insert(mongo, namespace, b)
}else{
print("not connected")
}
}
getPosts <- function(){
if(mongo.is.connected(mongo) == TRUE) {
collection <- "thread"
namespace <- paste(db, collection, sep=".")
#dist <- mongo.distinct(mongo, namespace)
dist <- mongo.find.all(mongo, namespace, query = '{"posts.date":{"$exists":1}}',fields = list('posts.text'=1, 'posts.date' = 1, '_id' = 0, 'topic'=1))
return(dist)
}else{
print("not connected")
}
}
getPostsAsCSV <- function(){
data <- getPosts()
topics <- NULL
dates <- NULL
sizes <- NULL
for(topic in data){
for(post in topic$posts){
if (!is.na(as.character(strptime(post$date, format="%Y-%m-%d")))){
topics <- c(topics, topic$topic)
dates <- c(dates, as.character(strptime(post$date, format="%Y-%m-%d")))
sizes <- c(sizes, sapply(gregexpr("\\W+", post$text), length) + 1)
}
}
}
df <- data.frame(dates, topics, sizes)
names(df) <- c("date", "topic", "size")
return(df)
}
setThread <- function(topic, post){
if(mongo.is.connected(mongo) == TRUE) {
collection <- "thread"
namespace <- paste(db, collection, sep=".")
#subCollection <- mongo.bson.from.list(post)
#list(author=unlist(lapply(post, function(xl) xl$author)), text=unlist(lapply(post, function(xl) xl$text)), date=unlist(lapply(post, function(xl) xl$date))))
one <- mongo.find.one(mongo, namespace, paste(c('{"topic":\"', topic, "\"}"), sep="", collapse=""))
if(length(one)==0){
b <- mongo.bson.from.list(list(topic=topic, posts=post))
ok <- mongo.insert(mongo, namespace, b)
}
}else{
print("not connected")
}
}