Skip to content

Commit 018e5fd

Browse files
committed
Init
0 parents  commit 018e5fd

17 files changed

+1794
-0
lines changed

.gitignore

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# python generated files
2+
__pycache__/
3+
*.py[oc]
4+
build/
5+
dist/
6+
wheels/
7+
*.egg-info
8+
9+
# venv
10+
.venv
11+
12+
# pytest
13+
.pytest_cache/
14+
15+
# docs
16+
docs/

.python-version

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.10.14

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Black Fish
2+
3+
多配置源同步文章爬虫
4+
5+
6+
嘶吼、火线 还有 qianxin攻防社区的

pyproject.toml

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[project]
2+
name = "black-fish"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
dependencies = [
6+
"drissionpage>=4.0",
7+
"asyncio>=3.4.3",
8+
"aiohttp>=3.9.5",
9+
"loguru>=0.7.2",
10+
"parsel>=1.9.1",
11+
"markdownify>=0.13.1",
12+
"aiofiles>=24.1.0",
13+
]
14+
readme = "README.md"
15+
requires-python = ">= 3.8"
16+
17+
18+
[project.scripts]
19+
dev = 'black_fish:main'
20+
21+
[build-system]
22+
requires = ["hatchling"]
23+
build-backend = "hatchling.build"
24+
25+
[tool.rye]
26+
managed = true
27+
dev-dependencies = [
28+
"pytest>=8.3.1",
29+
]
30+
31+
[tool.hatch.metadata]
32+
allow-direct-references = true
33+
34+
[tool.hatch.build.targets.wheel]
35+
packages = ["src/black_fish"]
36+

requirements-dev.lock

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# generated by rye
2+
# use `rye lock` or `rye sync` to update this lockfile
3+
#
4+
# last locked with the following flags:
5+
# pre: false
6+
# features: []
7+
# all-features: false
8+
# with-sources: false
9+
# generate-hashes: false
10+
# universal: false
11+
12+
-e file:.
13+
aiofiles==24.1.0
14+
# via black-fish
15+
aiohttp==3.9.5
16+
# via black-fish
17+
aiosignal==1.3.1
18+
# via aiohttp
19+
async-timeout==4.0.3
20+
# via aiohttp
21+
asyncio==3.4.3
22+
# via black-fish
23+
attrs==23.2.0
24+
# via aiohttp
25+
beautifulsoup4==4.12.3
26+
# via markdownify
27+
certifi==2024.7.4
28+
# via requests
29+
charset-normalizer==3.3.2
30+
# via requests
31+
click==8.1.7
32+
# via drissionpage
33+
cssselect==1.2.0
34+
# via drissionpage
35+
# via parsel
36+
datarecorder==3.5.1
37+
# via downloadkit
38+
downloadkit==2.0.2
39+
# via drissionpage
40+
drissionpage==4.0.5.6
41+
# via black-fish
42+
et-xmlfile==1.1.0
43+
# via openpyxl
44+
exceptiongroup==1.2.2
45+
# via pytest
46+
filelock==3.15.4
47+
# via tldextract
48+
frozenlist==1.4.1
49+
# via aiohttp
50+
# via aiosignal
51+
idna==3.7
52+
# via requests
53+
# via tldextract
54+
# via yarl
55+
iniconfig==2.0.0
56+
# via pytest
57+
jmespath==1.0.1
58+
# via parsel
59+
loguru==0.7.2
60+
# via black-fish
61+
lxml==5.2.2
62+
# via drissionpage
63+
# via parsel
64+
markdownify==0.13.1
65+
# via black-fish
66+
multidict==6.0.5
67+
# via aiohttp
68+
# via yarl
69+
openpyxl==3.1.5
70+
# via datarecorder
71+
packaging==24.1
72+
# via parsel
73+
# via pytest
74+
parsel==1.9.1
75+
# via black-fish
76+
pluggy==1.5.0
77+
# via pytest
78+
psutil==6.0.0
79+
# via drissionpage
80+
pytest==8.3.1
81+
requests==2.32.3
82+
# via downloadkit
83+
# via drissionpage
84+
# via requests-file
85+
# via tldextract
86+
requests-file==2.1.0
87+
# via tldextract
88+
six==1.16.0
89+
# via markdownify
90+
soupsieve==2.5
91+
# via beautifulsoup4
92+
tldextract==5.1.2
93+
# via drissionpage
94+
tomli==2.0.1
95+
# via pytest
96+
urllib3==2.2.2
97+
# via requests
98+
w3lib==2.2.1
99+
# via parsel
100+
websocket-client==1.8.0
101+
# via drissionpage
102+
yarl==1.9.4
103+
# via aiohttp

requirements.lock

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# generated by rye
2+
# use `rye lock` or `rye sync` to update this lockfile
3+
#
4+
# last locked with the following flags:
5+
# pre: false
6+
# features: []
7+
# all-features: false
8+
# with-sources: false
9+
# generate-hashes: false
10+
# universal: false
11+
12+
-e file:.
13+
aiofiles==24.1.0
14+
# via black-fish
15+
aiohttp==3.9.5
16+
# via black-fish
17+
aiosignal==1.3.1
18+
# via aiohttp
19+
async-timeout==4.0.3
20+
# via aiohttp
21+
asyncio==3.4.3
22+
# via black-fish
23+
attrs==23.2.0
24+
# via aiohttp
25+
beautifulsoup4==4.12.3
26+
# via markdownify
27+
certifi==2024.7.4
28+
# via requests
29+
charset-normalizer==3.3.2
30+
# via requests
31+
click==8.1.7
32+
# via drissionpage
33+
cssselect==1.2.0
34+
# via drissionpage
35+
# via parsel
36+
datarecorder==3.5.1
37+
# via downloadkit
38+
downloadkit==2.0.2
39+
# via drissionpage
40+
drissionpage==4.0.5.6
41+
# via black-fish
42+
et-xmlfile==1.1.0
43+
# via openpyxl
44+
filelock==3.15.4
45+
# via tldextract
46+
frozenlist==1.4.1
47+
# via aiohttp
48+
# via aiosignal
49+
idna==3.7
50+
# via requests
51+
# via tldextract
52+
# via yarl
53+
jmespath==1.0.1
54+
# via parsel
55+
loguru==0.7.2
56+
# via black-fish
57+
lxml==5.2.2
58+
# via drissionpage
59+
# via parsel
60+
markdownify==0.13.1
61+
# via black-fish
62+
multidict==6.0.5
63+
# via aiohttp
64+
# via yarl
65+
openpyxl==3.1.5
66+
# via datarecorder
67+
packaging==24.1
68+
# via parsel
69+
parsel==1.9.1
70+
# via black-fish
71+
psutil==6.0.0
72+
# via drissionpage
73+
requests==2.32.3
74+
# via downloadkit
75+
# via drissionpage
76+
# via requests-file
77+
# via tldextract
78+
requests-file==2.1.0
79+
# via tldextract
80+
six==1.16.0
81+
# via markdownify
82+
soupsieve==2.5
83+
# via beautifulsoup4
84+
tldextract==5.1.2
85+
# via drissionpage
86+
urllib3==2.2.2
87+
# via requests
88+
w3lib==2.2.1
89+
# via parsel
90+
websocket-client==1.8.0
91+
# via drissionpage
92+
yarl==1.9.4
93+
# via aiohttp

src/black_fish/__init__.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import asyncio
2+
import sys
3+
4+
from loguru import logger
5+
6+
from black_fish.executor import Executor
7+
from .config import Config
8+
from .spiders.xz import XZSpider
9+
from .spiders.tttang import TTTangSpider
10+
from .base_spider import BaseArticleSpider
11+
12+
import time
13+
14+
async def runtime():
15+
config = Config()
16+
executor = Executor(config)
17+
xz_spider = XZSpider()
18+
# tttang_spider = TTTangSpider()
19+
executor.spiders = [xz_spider]
20+
21+
start = time.time()
22+
await executor.run()
23+
24+
# await tttang_spider.fetch_remote_preview_articles()
25+
26+
# await xz_spider.prepare_for_run()
27+
# await xz_spider.close()
28+
29+
# f = open("./xz-sample.html", "r")
30+
# xz_sample_content = f.read()
31+
# xz_spider.parse_to_md(xz_sample_content)
32+
33+
end = time.time()
34+
print(end - start)
35+
36+
37+
def main():
38+
logger.remove()
39+
logger.add(sys.stdout, colorize=True, format="<green>{time}</green> <level>{message}</level>")
40+
asyncio.run(runtime())

0 commit comments

Comments
 (0)