-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbaidu.go
143 lines (120 loc) · 3.16 KB
/
baidu.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package serp
import (
"encoding/json"
"fmt"
"github.com/mslmio/oxylabs-sdk-go/oxylabs"
)
// Accepted parameters for baidu.
var BaiduSearchAcceptedDomainParameters = []oxylabs.Domain{
oxylabs.DOMAIN_COM,
oxylabs.DOMAIN_CN,
}
// checkParameterValidity checks validity of baidu search parameters.
func (opt *BaiduSearchOpts) checkParameterValidity() error {
if !oxylabs.InList(opt.Domain, BaiduSearchAcceptedDomainParameters) {
return fmt.Errorf("invalid domain parameter: %s", opt.Domain)
}
if !oxylabs.IsUserAgentValid(opt.UserAgent) {
return fmt.Errorf("invalid user agent parameter: %v", opt.UserAgent)
}
return nil
}
// checkParameterValidity checks validity of baidu url parameters.
func (opt *BaiduUrlOpts) checkParameterValidity() error {
if !oxylabs.IsUserAgentValid(opt.UserAgent) {
return fmt.Errorf("invalid user agent parameter: %v", opt.UserAgent)
}
return nil
}
type BaiduSearchOpts struct {
Domain oxylabs.Domain
StartPage int
Pages int
Limit int
UserAgent oxylabs.UserAgent
CallbackUrl string
}
// ScrapeBaiduSearch scrapes baidu with baidu_search as source.
func (c *SerpClient) ScrapeBaiduSearch(
query string,
opts ...*BaiduSearchOpts,
) (*Response, error) {
// Prepare options
opt := &BaiduSearchOpts{}
if len(opts) > 0 && opts[len(opts)-1] != nil {
opt = opts[len(opts)-1]
}
// Set defaults.
SetDefaultDomain(&opt.Domain)
SetDefaultStartPage(&opt.StartPage)
SetDefaultLimit(&opt.Limit)
SetDefaultUserAgent(&opt.UserAgent)
// Check validity of parameters.
err := opt.checkParameterValidity()
if err != nil {
return nil, err
}
// Prepare payload.
payload := map[string]interface{}{
"source": "baidu_search",
"domain": opt.Domain,
"query": query,
"start_page": opt.StartPage,
"pages": opt.Pages,
"limit": opt.Limit,
"user_agent_type": opt.UserAgent,
"callback_url": opt.CallbackUrl,
}
jsonPayload, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("error marshalling payload: %v", err)
}
res, err := c.Req(jsonPayload, false, "POST")
if err != nil {
return nil, err
}
return res, nil
}
type BaiduUrlOpts struct {
UserAgent oxylabs.UserAgent
CallbackUrl string
}
// ScrapeBaiduUrl scrapes baidu with baidu as source.
func (c *SerpClient) ScrapeBaiduUrl(
url string,
opts ...*BaiduUrlOpts,
) (*Response, error) {
// Check validity of url.
err := oxylabs.ValidateURL(url, "baidu")
if err != nil {
return nil, err
}
// Prepare options
opt := &BaiduUrlOpts{}
if len(opts) > 0 && opts[len(opts)-1] != nil {
opt = opts[len(opts)-1]
}
// Set defaults.
SetDefaultUserAgent(&opt.UserAgent)
// Check validity of parameters.
err = opt.checkParameterValidity()
if err != nil {
return nil, err
}
// Prepare payload.
payload := map[string]interface{}{
"source": "baidu",
"url": url,
"user_agent_type": opt.UserAgent,
"callback_url": opt.CallbackUrl,
}
jsonPayload, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("error marshalling payload: %v", err)
}
res, err := c.Req(jsonPayload, false, "POST")
if err != nil {
return nil, err
}
return res, nil
}