|
1 | 1 | #!/usr/bin/env python
|
2 | 2 | # coding: utf-8
|
3 | 3 |
|
4 |
| -import glob |
5 | 4 | import os
|
| 5 | +import codecs |
| 6 | +import re |
| 7 | +from setuptools.command.build_ext import build_ext |
6 | 8 |
|
7 |
| -from setuptools import Extension, setup |
| 9 | +try: |
| 10 | + import sysconfig |
| 11 | +except ImportError: |
| 12 | + from distutils import sysconfig |
8 | 13 |
|
9 |
| -cchardet_dir = "src/cchardet/" |
10 |
| -uchardet_dir = "src/ext/uchardet/src" |
11 |
| -cchardet_sources = glob.glob(cchardet_dir + "*.cpp") |
12 |
| -sources = cchardet_sources |
| 14 | +try: |
| 15 | + from setuptools import setup, Extension |
| 16 | +except ImportError: |
| 17 | + from distutils.core import setup, Extension |
13 | 18 |
|
| 19 | +from Cython.Build import cythonize |
| 20 | + |
| 21 | + |
| 22 | +join = os.path.join |
| 23 | + |
| 24 | +cchardet_dir = join("src", "cchardet") + os.path.sep |
| 25 | +uchardet_dir = join("src", "ext", "uchardet", "src") |
| 26 | +uchardet_lang_models_dir = join(uchardet_dir, "LangModels") |
| 27 | + |
| 28 | +cchardet_sources = [join("src", "cchardet", "_cchardet.pyx")] |
14 | 29 | uchardet_sources = [
|
15 |
| - os.path.join(uchardet_dir, "LangModels/LangArabicModel.cpp"), |
16 |
| - os.path.join(uchardet_dir, "LangModels/LangBelarusianModel.cpp"), |
17 |
| - os.path.join(uchardet_dir, "LangModels/LangBulgarianModel.cpp"), |
18 |
| - os.path.join(uchardet_dir, "LangModels/LangCatalanModel.cpp"), |
19 |
| - os.path.join(uchardet_dir, "LangModels/LangCroatianModel.cpp"), |
20 |
| - os.path.join(uchardet_dir, "LangModels/LangCzechModel.cpp"), |
21 |
| - os.path.join(uchardet_dir, "LangModels/LangDanishModel.cpp"), |
22 |
| - os.path.join(uchardet_dir, "LangModels/LangEnglishModel.cpp"), |
23 |
| - os.path.join(uchardet_dir, "LangModels/LangEsperantoModel.cpp"), |
24 |
| - os.path.join(uchardet_dir, "LangModels/LangEstonianModel.cpp"), |
25 |
| - os.path.join(uchardet_dir, "LangModels/LangFinnishModel.cpp"), |
26 |
| - os.path.join(uchardet_dir, "LangModels/LangFrenchModel.cpp"), |
27 |
| - os.path.join(uchardet_dir, "LangModels/LangGeorgianModel.cpp"), |
28 |
| - os.path.join(uchardet_dir, "LangModels/LangGermanModel.cpp"), |
29 |
| - os.path.join(uchardet_dir, "LangModels/LangGreekModel.cpp"), |
30 |
| - os.path.join(uchardet_dir, "LangModels/LangHebrewModel.cpp"), |
31 |
| - os.path.join(uchardet_dir, "LangModels/LangHindiModel.cpp"), |
32 |
| - os.path.join(uchardet_dir, "LangModels/LangHungarianModel.cpp"), |
33 |
| - os.path.join(uchardet_dir, "LangModels/LangIrishModel.cpp"), |
34 |
| - os.path.join(uchardet_dir, "LangModels/LangItalianModel.cpp"), |
35 |
| - os.path.join(uchardet_dir, "LangModels/LangLatvianModel.cpp"), |
36 |
| - os.path.join(uchardet_dir, "LangModels/LangLithuanianModel.cpp"), |
37 |
| - os.path.join(uchardet_dir, "LangModels/LangMacedonianModel.cpp"), |
38 |
| - os.path.join(uchardet_dir, "LangModels/LangMalteseModel.cpp"), |
39 |
| - os.path.join(uchardet_dir, "LangModels/LangNorwegianModel.cpp"), |
40 |
| - os.path.join(uchardet_dir, "LangModels/LangPolishModel.cpp"), |
41 |
| - os.path.join(uchardet_dir, "LangModels/LangPortugueseModel.cpp"), |
42 |
| - os.path.join(uchardet_dir, "LangModels/LangRomanianModel.cpp"), |
43 |
| - os.path.join(uchardet_dir, "LangModels/LangRussianModel.cpp"), |
44 |
| - os.path.join(uchardet_dir, "LangModels/LangSerbianModel.cpp"), |
45 |
| - os.path.join(uchardet_dir, "LangModels/LangSlovakModel.cpp"), |
46 |
| - os.path.join(uchardet_dir, "LangModels/LangSloveneModel.cpp"), |
47 |
| - os.path.join(uchardet_dir, "LangModels/LangSpanishModel.cpp"), |
48 |
| - os.path.join(uchardet_dir, "LangModels/LangSwedishModel.cpp"), |
49 |
| - os.path.join(uchardet_dir, "LangModels/LangThaiModel.cpp"), |
50 |
| - os.path.join(uchardet_dir, "LangModels/LangTurkishModel.cpp"), |
51 |
| - os.path.join(uchardet_dir, "LangModels/LangUkrainianModel.cpp"), |
52 |
| - os.path.join(uchardet_dir, "LangModels/LangVietnameseModel.cpp"), |
53 |
| - os.path.join(uchardet_dir, "CharDistribution.cpp"), |
54 |
| - os.path.join(uchardet_dir, "JpCntx.cpp"), |
55 |
| - os.path.join(uchardet_dir, "nsBig5Prober.cpp"), |
56 |
| - os.path.join(uchardet_dir, "nsCharSetProber.cpp"), |
57 |
| - os.path.join(uchardet_dir, "nsCJKDetector.cpp"), |
58 |
| - os.path.join(uchardet_dir, "nsEscCharsetProber.cpp"), |
59 |
| - os.path.join(uchardet_dir, "nsEscSM.cpp"), |
60 |
| - os.path.join(uchardet_dir, "nsEUCJPProber.cpp"), |
61 |
| - os.path.join(uchardet_dir, "nsEUCKRProber.cpp"), |
62 |
| - os.path.join(uchardet_dir, "nsEUCTWProber.cpp"), |
63 |
| - os.path.join(uchardet_dir, "nsGB2312Prober.cpp"), |
64 |
| - os.path.join(uchardet_dir, "nsHebrewProber.cpp"), |
65 |
| - os.path.join(uchardet_dir, "nsJohabProber.cpp"), |
66 |
| - os.path.join(uchardet_dir, "nsLanguageDetector.cpp"), |
67 |
| - os.path.join(uchardet_dir, "nsLatin1Prober.cpp"), |
68 |
| - os.path.join(uchardet_dir, "nsMBCSGroupProber.cpp"), |
69 |
| - os.path.join(uchardet_dir, "nsMBCSSM.cpp"), |
70 |
| - os.path.join(uchardet_dir, "nsSBCharSetProber.cpp"), |
71 |
| - os.path.join(uchardet_dir, "nsSBCSGroupProber.cpp"), |
72 |
| - os.path.join(uchardet_dir, "nsSJISProber.cpp"), |
73 |
| - os.path.join(uchardet_dir, "nsUniversalDetector.cpp"), |
74 |
| - os.path.join(uchardet_dir, "nsUTF8Prober.cpp"), |
75 |
| - os.path.join(uchardet_dir, "uchardet.cpp"), |
| 30 | + join(uchardet_dir, file) |
| 31 | + for file in os.listdir(uchardet_dir) |
| 32 | + if file.endswith(".cpp") |
76 | 33 | ]
|
77 |
| -sources += uchardet_sources |
| 34 | +uchardet_lang_source = [ |
| 35 | + join(uchardet_lang_models_dir, file) |
| 36 | + for file in os.listdir(uchardet_lang_models_dir) |
| 37 | + if file.endswith(".cpp") |
| 38 | +] |
| 39 | +sources = cchardet_sources + uchardet_sources + uchardet_lang_source |
| 40 | + |
| 41 | +ext_args = { |
| 42 | + "include_dirs": uchardet_dir.split(os.pathsep), |
| 43 | + "library_dirs": uchardet_dir.split(os.pathsep), |
| 44 | +} |
| 45 | + |
| 46 | + |
| 47 | +# Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++. |
| 48 | +cfg_vars = sysconfig.get_config_vars() |
| 49 | +for key, value in cfg_vars.items(): |
| 50 | + if type(value) == str: |
| 51 | + cfg_vars[key] = value.replace("-Wstrict-prototypes", "") |
| 52 | + # O3を指定したところで速度が向上するかは疑問である |
| 53 | + # cfg_vars[key] = value.replace("-O2", "-O3") |
| 54 | + |
| 55 | + |
| 56 | +cchardet_module = Extension("cchardet._cchardet", sources, language="c++", extra_compile_args=['-std=c++11'], **ext_args,) |
| 57 | + |
| 58 | + |
| 59 | +def read(f): |
| 60 | + return open(os.path.join(os.path.dirname(__file__), f)).read().strip() |
| 61 | + |
78 | 62 |
|
79 | 63 | setup(
|
| 64 | + name="faust-cchardet", |
| 65 | + author="PyYoshi", |
| 66 | + author_email="myoshi321go@gmail.com", |
| 67 | + url=r"https://github.com/faust-streaming/cChardet", |
| 68 | + description="cChardet is high speed universal character encoding detector.", |
| 69 | + long_description="\n\n".join((read("README.md"), read("CHANGES.md"))), |
| 70 | + license="Mozilla Public License", |
| 71 | + classifiers=[ |
| 72 | + "Development Status :: 6 - Mature", |
| 73 | + "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)", |
| 74 | + "License :: OSI Approved :: GNU General Public License (GPL)", |
| 75 | + "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", |
| 76 | + "Programming Language :: Cython", |
| 77 | + "Programming Language :: Python", |
| 78 | + "Programming Language :: Python :: Implementation :: CPython", |
| 79 | + "Programming Language :: Python :: Implementation :: PyPy", |
| 80 | + "Topic :: Software Development :: Libraries", |
| 81 | + "Programming Language :: Python :: 3", |
| 82 | + "Programming Language :: Python :: 3.6", |
| 83 | + "Programming Language :: Python :: 3.7", |
| 84 | + "Programming Language :: Python :: 3.8", |
| 85 | + "Programming Language :: Python :: 3.9", |
| 86 | + "Programming Language :: Python :: 3.10", |
| 87 | + "Programming Language :: Python :: 3.11", |
| 88 | + "Programming Language :: Python :: 3.12", |
| 89 | + ], |
| 90 | + keywords=["cython", "chardet", "charsetdetect"], |
| 91 | + cmdclass={"build_ext": build_ext}, |
80 | 92 | package_dir={"": "src"},
|
81 | 93 | packages=[
|
82 | 94 | "cchardet",
|
83 | 95 | ],
|
84 |
| - ext_modules=[ |
85 |
| - Extension( |
86 |
| - "cchardet._cchardet", |
87 |
| - sources=sources, |
88 |
| - include_dirs=[uchardet_dir], |
89 |
| - language="c++", |
90 |
| - extra_compile_args=['-std=c++11'], |
91 |
| - ) |
92 |
| - ], |
| 96 | + scripts=["src/cchardet/cli/cchardetect.py"], |
| 97 | + ext_modules=cythonize( |
| 98 | + [ |
| 99 | + cchardet_module, |
| 100 | + ], |
| 101 | + cplus=True, |
| 102 | + compiler_directives={"language_level": "3"}, # Python 3 |
| 103 | + ), |
93 | 104 | )
|
0 commit comments