-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
68 lines (56 loc) · 1.64 KB
/
app.py
File metadata and controls
68 lines (56 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# import packages
import numpy as np
import os
from random import shuffle
import re
import zipfile
import lxml.etree
import urllib
from collections import Counter
import nltk
import enchant
import io
import itertools
import collections
from gensim.models import FastText
import warnings
warnings.filterwarnings("ignore")
from flask import Flask,render_template,url_for,request
from flask_restful import reqparse, abort, Api, Resource
app = Flask(__name__)
api = Api(app)
# argument parsing
parser = reqparse.RequestParser()
parser.add_argument('query')
# load model
model_fasttext = FastText.load('model.bin')
enchant_us = enchant.Dict('en_US')
fasttext_min_similarity = 0.6
# function to identify possible misspellings
def include_spell_mistake(word, similar_word, score):
edit_distance_threshold = 1 if len(word) <= 4 else 2
score_1 = score > fasttext_min_similarity
score_2 = len(similar_word) > 3
score_3 = not enchant_us.check(similar_word)
score_4 = word[0] == similar_word[0]
score_5 = nltk.edit_distance(word, similar_word) <= edit_distance_threshold
score = score_1 + score_2 + score_3 + score_4 + score_5
if score > 3:
return True
else:
return False
class spellCheck(Resource):
def get(self):
args = parser.parse_args()
word = args['query']
w2m = []
most_similar = model_fasttext.wv.most_similar(word, topn=50)
for similar_word, score in most_similar:
if include_spell_mistake(word, similar_word, score):
w2m.append(similar_word)
output = {'prediction': w2m[:3]}
return output
# add endpoint
api.add_resource(spellCheck, '/spellCorrect')
if __name__ == '__main__':
app.run(debug=True)