Skip to content

Commit ce3b6c7

Browse files
author
Christian Newman
committed
re-structure directory to work for both Docker and non-docker use cases
1 parent 905a644 commit ce3b6c7

File tree

4 files changed

+15
-12
lines changed

4 files changed

+15
-12
lines changed

Dockerfile

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@ RUN apt-get update && \
77
pip install -r requirements.txt && \
88
rm -rf /var/lib/apt/lists/*
99

10+
COPY . .
11+
RUN pip install -e .
12+
1013
# ntlk downloads
1114
RUN python3 -c "import nltk; nltk.download('averaged_perceptron_tagger');nltk.download('universal_tagset')"
1215

1316
# Pythong scripts and data
14-
COPY classifier_multiclass.py \
15-
download_code2vec_vectors.py \
16-
feature_generator.py \
17-
tag_identifier.py \
18-
create_models.py \
17+
COPY src/classifier_multiclass.py \
18+
src/download_code2vec_vectors.py \
19+
src/feature_generator.py \
20+
src/tag_identifier.py \
21+
src/create_models.py \
1922
version.py \
2023
serve.json \
2124
main \

main

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#!/usr/bin/env python
22

3-
import os, sqlite3, random, nltk, argparse
3+
import os, sqlite3, random, argparse
44
from datetime import datetime
5-
import src.classifier_multiclass
5+
from src.classifier_multiclass import perform_classification, TrainingAlgorithm
66
import pandas as pd
77
import numpy as np
88
from src.tag_identifier import start_server
99
from src.download_code2vec_vectors import *
10-
import json
10+
from src.feature_generator import custom_to_numeric, universal_to_custom, createFeatures
1111
from src.create_models import createModel, stable_features, mutable_feature_list, columns_to_drop
12-
from src.version import __version__
12+
from version import __version__
1313

1414
# Get the directory of the current script
1515
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -123,14 +123,14 @@ def train(config):
123123
results_text_file.write(f"SQL: {sql_statement}\n")
124124
results_text_file.write(f"Features: {df_features}\n")
125125

126-
algorithms = [classifier_multiclass.TrainingAlgorithm.XGBOOST]
126+
algorithms = [TrainingAlgorithm.XGBOOST]
127127
#pd.set_option('display.max_rows', None) # Show all rows
128128
pd.set_option('display.max_columns', None) # Show all columns
129129
pd.set_option('display.width', None) # Prevent line wrapping
130130
pd.set_option('display.max_colwidth', None) # Show full content of each cell
131131

132132
print(df_features)
133-
classifier_multiclass.perform_classification(df_features, df_class, results_text_file,
133+
perform_classification(df_features, df_class, results_text_file,
134134
output_dir, algorithms, trainingSeed,
135135
classifierSeed, columns_to_drop)
136136

src/setup.py renamed to setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
'scanl_tagger=scanl_tagger.main:main',
1818
],
1919
},
20-
python_requires='>=3.10',
20+
python_requires='>=3.12',
2121
author="Christian Newman",
2222
description="A machine learning based tagger for source code analysis",
2323
)
File renamed without changes.

0 commit comments

Comments
 (0)