Skip to content

Commit ffff193

Browse files
author
Christian Newman
committed
Try loading local fast-text model
1 parent 9d3bfab commit ffff193

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

Dockerfile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@ RUN apt-get update && \
1010
COPY . .
1111
RUN pip install -e .
1212

13+
# Download FastText model during build
14+
RUN python3 -c "import gensim.downloader as api; api.load('fasttext-wiki-news-subwords-300')"
15+
1316
# ntlk downloads
1417
RUN python3 -c "import nltk; nltk.download('averaged_perceptron_tagger');nltk.download('universal_tagset')"
1518

16-
# Pythong scripts and data
19+
# Python scripts and data
1720
COPY src/classifier_multiclass.py \
1821
src/download_code2vec_vectors.py \
1922
src/feature_generator.py \
@@ -68,4 +71,4 @@ CMD date; \
6871
echo "Running..."; \
6972
/main -r --words words/abbreviationList.csv
7073

71-
ENV TZ=US/Michigan
74+
ENV TZ=US/Michigan

src/create_models.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def createModel(pklFile="", rootDir=""):
4343
"""
4444
# Configure logging
4545
logging.basicConfig(level=logging.INFO,
46-
format='%(asctime)s - %(levelname)s - %(message)s')
46+
format='%(asctime)s - %(levelname)s - %(message)s')
4747
logger = logging.getLogger(__name__)
4848

4949
modelGensimEnglish = None
@@ -52,9 +52,18 @@ def createModel(pklFile="", rootDir=""):
5252

5353
# Attempt to load FastText model
5454
try:
55-
logger.info("Loading FastText model...")
56-
modelGensimEnglish = api.load('fasttext-wiki-news-subwords-300')
57-
logger.info("FastText model loaded successfully")
55+
logger.info("Attempting to load local FastText model...")
56+
# The model should be in the gensim-data directory after download
57+
model_path = os.path.expanduser('~/gensim-data/fasttext-wiki-news-subwords-300/fasttext-wiki-news-subwords-300.model')
58+
59+
if os.path.exists(model_path):
60+
import gensim
61+
modelGensimEnglish = gensim.models.fasttext.load_facebook_model(model_path)
62+
logger.info("Local FastText model loaded successfully")
63+
else:
64+
logger.info("Local model not found, attempting to download...")
65+
modelGensimEnglish = api.load('fasttext-wiki-news-subwords-300')
66+
logger.info("FastText model downloaded and loaded successfully")
5867
except Exception as e:
5968
logger.warning(f"FastText model could not be loaded: {e}")
6069

@@ -65,6 +74,8 @@ def createModel(pklFile="", rootDir=""):
6574
# Paths for method vectors
6675
method_txt_path = os.path.join(rootDir, 'code2vec', 'target_vecs.txt')
6776
method_native_path = os.path.join(rootDir, 'code2vec', 'target_vecs.kv')
77+
78+
return modelGensimTokens, modelGensimMethods, modelGensimEnglish
6879

6980
# Helper function to load models safely
7081
def load_model(txt_path, native_path, model_name):

0 commit comments

Comments
 (0)