diff --git a/requirements.txt b/requirements.txt index d3aadaf..ae07714 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ nltk -nltk-punkt razdel==0.5.0 sentencepiece torch>=2.0.1 -transformers>=4.38.1 \ No newline at end of file +transformers>=4.38.1 +accelerate diff --git a/setup.py b/setup.py index 8db382f..bd4136b 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ from setuptools import setup, find_packages +from setuptools.command.install import install import smart_chunker @@ -24,6 +25,20 @@ The smart chunker supports Russian and English. ''' +class PostInstallCommand(install): + def run(self): + super().run() + try: + import nltk + + try: + nltk.data.find('tokenizers/punkt') + except LookupError: + nltk.download('punkt') + except Exception as e: + print(f"Warning: could not ensure nltk punkt data is available: {e}") + + setup( name='smart-chunker', version=smart_chunker.__version__, @@ -50,6 +65,7 @@ 'Programming Language :: Python :: 3.12', ], keywords=['smart-chunker', 'rag', 'chunker', 'cross-encoder', 'encoder', 'reranker'], - install_requires=['nltk', 'nltk-punkt', 'razdel==0.5.0', 'sentencepiece', 'torch>=2.0.1', 'transformers>=4.38.1'], + install_requires=['nltk', 'razdel==0.5.0', 'sentencepiece', 'torch>=2.0.1', 'transformers>=4.38.1'], + cmdclass={'install': PostInstallCommand}, test_suite='tests' )