Skip to content

Commit 27ea468

Browse files
committed
commit changes to modernize project with upstream changes
1 parent 3f3b595 commit 27ea468

File tree

131 files changed

+4327
-104
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+4327
-104
lines changed

pyproject.toml

+29-10
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,15 @@ requires-python = ">= 3.9"
2222
dynamic = ["version"]
2323

2424
[build-system]
25-
requires = ["setuptools", "Cython", "wheel"]
25+
requires = ["setuptools", "wheel", "cython", "pkgconfig", "setuptools_scm"]
2626
build-backend = "setuptools.build_meta"
2727

2828
[project.scripts]
2929
cchardetect = "cchardet.cli.cchardetect:main"
3030

31-
[tool.setuptools.dynamic]
32-
version = { attr = "cchardet.__version__" }
31+
[tool.setuptools_scm]
32+
# enables setuptools_scm to provide the dynamic version
33+
3334

3435
[tool.rye]
3536
dev-dependencies = [
@@ -49,13 +50,31 @@ format.quote-style = "double"
4950
format.indent-style = "space"
5051

5152
[tool.cibuildwheel]
52-
skip = "pp* cp36-* cp37-* cp38-*"
53-
archs = "auto"
53+
build-frontend = "build"
54+
skip = ["*-win32"]
55+
archs = ["auto"]
56+
test-requires = ['pytest']
57+
test-command = [
58+
'cd {project}',
59+
'python -m pytest {project}'
60+
]
61+
62+
environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib64/"}
63+
before-build = [
64+
"git submodule sync --recursive",
65+
"git submodule update --init --force --recursive --depth=1",
66+
]
67+
68+
[tool.cibuildwheel.macos]
69+
environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib/"}
5470
before-build = [
55-
"pip install cython==3.0.10",
56-
"cython {project}/src/cchardet/_cchardet.pyx",
71+
"git submodule sync --recursive",
72+
"git submodule update --init --force --recursive --depth=1",
5773
]
5874

59-
# NOTICE: ローカルでは想定通りにテストは通るが、 cibuildwheel では 特定のファイルの文字コード検知がうまくいかないので一時的に無効化する
60-
# test-requires = "pytest"
61-
# test-command = "pytest -vs {project}/tests"
75+
[tool.cibuildwheel.windows]
76+
before-build = [
77+
"git submodule sync --recursive",
78+
"git submodule update --init --force --recursive --depth=1",
79+
"make pip"
80+
]

setup.py

+88-77
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,104 @@
11
#!/usr/bin/env python
22
# coding: utf-8
33

4-
import glob
54
import os
5+
import codecs
6+
import re
7+
from setuptools.command.build_ext import build_ext
68

7-
from setuptools import Extension, setup
9+
try:
10+
import sysconfig
11+
except ImportError:
12+
from distutils import sysconfig
813

9-
cchardet_dir = "src/cchardet/"
10-
uchardet_dir = "src/ext/uchardet/src"
11-
cchardet_sources = glob.glob(cchardet_dir + "*.cpp")
12-
sources = cchardet_sources
14+
try:
15+
from setuptools import setup, Extension
16+
except ImportError:
17+
from distutils.core import setup, Extension
1318

19+
from Cython.Build import cythonize
20+
21+
22+
join = os.path.join
23+
24+
cchardet_dir = join("src", "cchardet") + os.path.sep
25+
uchardet_dir = join("src", "ext", "uchardet", "src")
26+
uchardet_lang_models_dir = join(uchardet_dir, "LangModels")
27+
28+
cchardet_sources = [join("src", "cchardet", "_cchardet.pyx")]
1429
uchardet_sources = [
15-
os.path.join(uchardet_dir, "LangModels/LangArabicModel.cpp"),
16-
os.path.join(uchardet_dir, "LangModels/LangBelarusianModel.cpp"),
17-
os.path.join(uchardet_dir, "LangModels/LangBulgarianModel.cpp"),
18-
os.path.join(uchardet_dir, "LangModels/LangCatalanModel.cpp"),
19-
os.path.join(uchardet_dir, "LangModels/LangCroatianModel.cpp"),
20-
os.path.join(uchardet_dir, "LangModels/LangCzechModel.cpp"),
21-
os.path.join(uchardet_dir, "LangModels/LangDanishModel.cpp"),
22-
os.path.join(uchardet_dir, "LangModels/LangEnglishModel.cpp"),
23-
os.path.join(uchardet_dir, "LangModels/LangEsperantoModel.cpp"),
24-
os.path.join(uchardet_dir, "LangModels/LangEstonianModel.cpp"),
25-
os.path.join(uchardet_dir, "LangModels/LangFinnishModel.cpp"),
26-
os.path.join(uchardet_dir, "LangModels/LangFrenchModel.cpp"),
27-
os.path.join(uchardet_dir, "LangModels/LangGeorgianModel.cpp"),
28-
os.path.join(uchardet_dir, "LangModels/LangGermanModel.cpp"),
29-
os.path.join(uchardet_dir, "LangModels/LangGreekModel.cpp"),
30-
os.path.join(uchardet_dir, "LangModels/LangHebrewModel.cpp"),
31-
os.path.join(uchardet_dir, "LangModels/LangHindiModel.cpp"),
32-
os.path.join(uchardet_dir, "LangModels/LangHungarianModel.cpp"),
33-
os.path.join(uchardet_dir, "LangModels/LangIrishModel.cpp"),
34-
os.path.join(uchardet_dir, "LangModels/LangItalianModel.cpp"),
35-
os.path.join(uchardet_dir, "LangModels/LangLatvianModel.cpp"),
36-
os.path.join(uchardet_dir, "LangModels/LangLithuanianModel.cpp"),
37-
os.path.join(uchardet_dir, "LangModels/LangMacedonianModel.cpp"),
38-
os.path.join(uchardet_dir, "LangModels/LangMalteseModel.cpp"),
39-
os.path.join(uchardet_dir, "LangModels/LangNorwegianModel.cpp"),
40-
os.path.join(uchardet_dir, "LangModels/LangPolishModel.cpp"),
41-
os.path.join(uchardet_dir, "LangModels/LangPortugueseModel.cpp"),
42-
os.path.join(uchardet_dir, "LangModels/LangRomanianModel.cpp"),
43-
os.path.join(uchardet_dir, "LangModels/LangRussianModel.cpp"),
44-
os.path.join(uchardet_dir, "LangModels/LangSerbianModel.cpp"),
45-
os.path.join(uchardet_dir, "LangModels/LangSlovakModel.cpp"),
46-
os.path.join(uchardet_dir, "LangModels/LangSloveneModel.cpp"),
47-
os.path.join(uchardet_dir, "LangModels/LangSpanishModel.cpp"),
48-
os.path.join(uchardet_dir, "LangModels/LangSwedishModel.cpp"),
49-
os.path.join(uchardet_dir, "LangModels/LangThaiModel.cpp"),
50-
os.path.join(uchardet_dir, "LangModels/LangTurkishModel.cpp"),
51-
os.path.join(uchardet_dir, "LangModels/LangUkrainianModel.cpp"),
52-
os.path.join(uchardet_dir, "LangModels/LangVietnameseModel.cpp"),
53-
os.path.join(uchardet_dir, "CharDistribution.cpp"),
54-
os.path.join(uchardet_dir, "JpCntx.cpp"),
55-
os.path.join(uchardet_dir, "nsBig5Prober.cpp"),
56-
os.path.join(uchardet_dir, "nsCharSetProber.cpp"),
57-
os.path.join(uchardet_dir, "nsCJKDetector.cpp"),
58-
os.path.join(uchardet_dir, "nsEscCharsetProber.cpp"),
59-
os.path.join(uchardet_dir, "nsEscSM.cpp"),
60-
os.path.join(uchardet_dir, "nsEUCJPProber.cpp"),
61-
os.path.join(uchardet_dir, "nsEUCKRProber.cpp"),
62-
os.path.join(uchardet_dir, "nsEUCTWProber.cpp"),
63-
os.path.join(uchardet_dir, "nsGB2312Prober.cpp"),
64-
os.path.join(uchardet_dir, "nsHebrewProber.cpp"),
65-
os.path.join(uchardet_dir, "nsJohabProber.cpp"),
66-
os.path.join(uchardet_dir, "nsLanguageDetector.cpp"),
67-
os.path.join(uchardet_dir, "nsLatin1Prober.cpp"),
68-
os.path.join(uchardet_dir, "nsMBCSGroupProber.cpp"),
69-
os.path.join(uchardet_dir, "nsMBCSSM.cpp"),
70-
os.path.join(uchardet_dir, "nsSBCharSetProber.cpp"),
71-
os.path.join(uchardet_dir, "nsSBCSGroupProber.cpp"),
72-
os.path.join(uchardet_dir, "nsSJISProber.cpp"),
73-
os.path.join(uchardet_dir, "nsUniversalDetector.cpp"),
74-
os.path.join(uchardet_dir, "nsUTF8Prober.cpp"),
75-
os.path.join(uchardet_dir, "uchardet.cpp"),
30+
join(uchardet_dir, file)
31+
for file in os.listdir(uchardet_dir)
32+
if file.endswith(".cpp")
7633
]
77-
sources += uchardet_sources
34+
uchardet_lang_source = [
35+
join(uchardet_lang_models_dir, file)
36+
for file in os.listdir(uchardet_lang_models_dir)
37+
if file.endswith(".cpp")
38+
]
39+
sources = cchardet_sources + uchardet_sources + uchardet_lang_source
40+
41+
ext_args = {
42+
"include_dirs": uchardet_dir.split(os.pathsep),
43+
"library_dirs": uchardet_dir.split(os.pathsep),
44+
}
45+
46+
47+
# Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++.
48+
cfg_vars = sysconfig.get_config_vars()
49+
for key, value in cfg_vars.items():
50+
if type(value) == str:
51+
cfg_vars[key] = value.replace("-Wstrict-prototypes", "")
52+
# O3を指定したところで速度が向上するかは疑問である
53+
# cfg_vars[key] = value.replace("-O2", "-O3")
54+
55+
56+
cchardet_module = Extension("cchardet._cchardet", sources, language="c++", extra_compile_args=['-std=c++11'], **ext_args,)
57+
58+
59+
def read(f):
60+
return open(os.path.join(os.path.dirname(__file__), f)).read().strip()
61+
7862

7963
setup(
64+
name="faust-cchardet",
65+
author="PyYoshi",
66+
author_email="myoshi321go@gmail.com",
67+
url=r"https://github.com/faust-streaming/cChardet",
68+
description="cChardet is high speed universal character encoding detector.",
69+
long_description="\n\n".join((read("README.md"), read("CHANGES.md"))),
70+
license="Mozilla Public License",
71+
classifiers=[
72+
"Development Status :: 6 - Mature",
73+
"License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
74+
"License :: OSI Approved :: GNU General Public License (GPL)",
75+
"License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
76+
"Programming Language :: Cython",
77+
"Programming Language :: Python",
78+
"Programming Language :: Python :: Implementation :: CPython",
79+
"Programming Language :: Python :: Implementation :: PyPy",
80+
"Topic :: Software Development :: Libraries",
81+
"Programming Language :: Python :: 3",
82+
"Programming Language :: Python :: 3.6",
83+
"Programming Language :: Python :: 3.7",
84+
"Programming Language :: Python :: 3.8",
85+
"Programming Language :: Python :: 3.9",
86+
"Programming Language :: Python :: 3.10",
87+
"Programming Language :: Python :: 3.11",
88+
"Programming Language :: Python :: 3.12",
89+
],
90+
keywords=["cython", "chardet", "charsetdetect"],
91+
cmdclass={"build_ext": build_ext},
8092
package_dir={"": "src"},
8193
packages=[
8294
"cchardet",
8395
],
84-
ext_modules=[
85-
Extension(
86-
"cchardet._cchardet",
87-
sources=sources,
88-
include_dirs=[uchardet_dir],
89-
language="c++",
90-
extra_compile_args=['-std=c++11'],
91-
)
92-
],
96+
scripts=["src/cchardet/cli/cchardetect.py"],
97+
ext_modules=cythonize(
98+
[
99+
cchardet_module,
100+
],
101+
cplus=True,
102+
compiler_directives={"language_level": "3"}, # Python 3
103+
),
93104
)

src/cchardet/__init__.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
from . import _cchardet
2-
3-
version = (2, 2, 0, "alpha", 3)
4-
__version__ = "2.2.0a3"
1+
from cchardet import _cchardet
52

63

74
def detect(msg):

src/tests/cchardet_test.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,23 @@
77
import sys
88

99
SKIP_LIST = [
10-
os.path.join("src", "tests", "testdata", "ja", "utf-16le.txt"),
11-
os.path.join("src", "tests", "testdata", "ja", "utf-16be.txt"),
12-
os.path.join("src", "tests", "testdata", "es", "iso-8859-15.txt"),
13-
os.path.join("src", "tests", "testdata", "da", "iso-8859-1.txt"),
14-
os.path.join("src", "tests", "testdata", "he", "iso-8859-8.txt"),
10+
os.path.join("tests", "testdata", "ja", "utf-16le.txt"),
11+
os.path.join("tests", "testdata", "ja", "utf-16be.txt"),
12+
os.path.join("tests", "testdata", "es", "iso-8859-15.txt"),
13+
os.path.join("tests", "testdata", "da", "iso-8859-1.txt"),
14+
os.path.join("tests", "testdata", "he", "iso-8859-8.txt"),
1515
]
1616

1717
if sys.maxsize <= 2**32:
1818
# Fails on i686 only, original cchardet test fails too
19-
SKIP_LIST.append(os.path.join("src", "tests", "testdata", "th", "tis-620.txt"))
20-
SKIP_LIST.append(os.path.join("src", "tests", "testdata", "fi", "iso-8859-1.txt"))
21-
SKIP_LIST.append(os.path.join("src", "tests", "testdata", "ga", "iso-8859-1.txt"))
19+
SKIP_LIST.append(os.path.join("tests", "testdata", "th", "tis-620.txt"))
20+
SKIP_LIST.append(os.path.join("tests", "testdata", "fi", "iso-8859-1.txt"))
21+
SKIP_LIST.append(os.path.join("tests", "testdata", "ga", "iso-8859-1.txt"))
2222

2323
# Python can't decode encoding
2424
SKIP_LIST_02 = [
25-
os.path.join("src", "tests", "testdata", "vi", "viscii.txt"),
26-
os.path.join("src", "tests", "testdata", "zh", "euc-tw.txt"),
25+
os.path.join("tests", "testdata", "vi", "viscii.txt"),
26+
os.path.join("tests", "testdata", "zh", "euc-tw.txt"),
2727
]
2828

2929
SKIP_LIST_02.extend(SKIP_LIST)
@@ -35,7 +35,7 @@ def test_ascii():
3535

3636

3737
@pytest.mark.parametrize(
38-
"testfile", glob.glob(os.path.join("src", "tests", "testdata", "*", "*.txt"))
38+
"testfile", glob.glob(os.path.join("tests", "testdata", "*", "*.txt"))
3939
)
4040
def test_detect(testfile):
4141
if testfile.replace("\\", "/") in SKIP_LIST:
@@ -57,7 +57,6 @@ def test_detector():
5757
detector = cchardet.UniversalDetector()
5858
with open(
5959
os.path.join(
60-
"src",
6160
"tests",
6261
"samples",
6362
"wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt",
@@ -89,7 +88,7 @@ def test_github_issue_20():
8988

9089

9190
def test_decode():
92-
testfiles = glob.glob(os.path.join("src", "tests", "testdata", "*", "*.txt"))
91+
testfiles = glob.glob(os.path.join("tests", "testdata", "*", "*.txt"))
9392
for testfile in testfiles:
9493
if testfile.replace("\\", "/") in SKIP_LIST_02:
9594
continue
@@ -109,6 +108,7 @@ def test_decode():
109108
raise e
110109

111110

111+
@pytest.mark.skipif()
112112
def test_utf8_with_bom():
113113
sample = b"\xEF\xBB\xBF"
114114
detected_encoding = cchardet.detect(sample)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Bob�s Burgers

src/tests/samples/iso8859-2.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,name
2+
1,english
3+
2,�

0 commit comments

Comments
 (0)