-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhindu_scraper.py
49 lines (36 loc) · 1.23 KB
/
hindu_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import requests as req
from bs4 import BeautifulSoup
def hindu_scraper(url,path,req_type):
link = url
link = req.get(link)
soup = BeautifulSoup(link.text, 'html.parser')
result = soup.find_all('div', attrs={'class': "_yeti_done"})
result = soup.find_all('p')
article = []
l1 = len(result)
flag=0
i = 0
while i < l1:
j = 0
exp = result[i] # this is pne array of paragraph
l = len(exp)
while j < l:
if(exp.contents[j].string == "Please enter a valid email address."):
flag = 1
break
article.append(exp.contents[j].string)
j = j+1
if flag == 1:
flag = 0
break
i = i+1
# article.pop(0)
article = [x for x in article if x is not None]
if req_type==1:
name = "/Users/mohit/Desktop/project/ranking_pro/ranking_code/og_file/og_article"
else :
name= path
file = open(name, "a")
for x in article:
file.write(x)
file.close()