Skip to content

Commit cd31190

Browse files
adrienballClemDoum
authored andcommitted
Intents and slots alternatives (#148)
1 parent 2bc570d commit cd31190

File tree

89 files changed

+229697
-204
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+229697
-204
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ members = [
1919

2020
[dependencies]
2121
crfsuite = { git = "https://github.com/snipsco/crfsuite-rs", tag = "0.3.1" }
22-
snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.65.0" }
23-
snips-nlu-parsers = { git = "https://github.com/snipsco/snips-nlu-parsers", tag = "0.3.0" }
24-
snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.9.0" }
22+
snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.67.1" }
23+
snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.9.1" }
24+
snips-nlu-parsers = { git = "https://github.com/snipsco/snips-nlu-parsers", tag = "0.4.1" }
2525
failure = "0.1"
2626
base64 = "0.10"
2727
itertools = { version = "0.8", default-features = false }
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
{
2+
"unit_name": "nlu_engine",
3+
"intent_parsers_configs": [
4+
{
5+
"unit_name": "lookup_intent_parser",
6+
"ignore_stop_words": true
7+
},
8+
{
9+
"unit_name": "deterministic_intent_parser",
10+
"max_pattern_length": 1000,
11+
"max_queries": 100,
12+
"ignore_stop_words": true
13+
},
14+
{
15+
"unit_name": "probabilistic_intent_parser",
16+
"intent_classifier_config": {
17+
"data_augmentation_config": {
18+
"noise_factor": 5,
19+
"unknown_word_prob": 0,
20+
"max_unknown_words": 0,
21+
"add_builtin_entities_examples": false,
22+
"unknown_words_replacement_string": null,
23+
"min_utterances": 20
24+
},
25+
"unit_name": "log_reg_intent_classifier",
26+
"featurizer_config": {
27+
"cooccurrence_vectorizer_config": {
28+
"window_size": 5,
29+
"unit_name": "cooccurrence_vectorizer",
30+
"filter_stop_words": true,
31+
"unknown_words_replacement_string": null,
32+
"keep_order": true
33+
},
34+
"unit_name": "featurizer",
35+
"added_cooccurrence_feature_ratio": 0.25,
36+
"tfidf_vectorizer_config": {
37+
"unit_name": "tfidf_vectorizer",
38+
"use_stemming": false,
39+
"word_clusters_name": null
40+
},
41+
"pvalue_threshold": 0.4
42+
},
43+
"random_seed": null
44+
},
45+
"slot_filler_config": {
46+
"unit_name": "crf_slot_filler",
47+
"random_seed": null,
48+
"tagging_scheme": 1,
49+
"data_augmentation_config": {
50+
"capitalization_ratio": 0.2,
51+
"min_utterances": 200,
52+
"add_builtin_entities_examples": true
53+
},
54+
"crf_args": {
55+
"c2": 0.1,
56+
"c1": 0.1,
57+
"algorithm": "lbfgs"
58+
},
59+
"feature_factory_configs": [
60+
{
61+
"args": {
62+
"common_words_gazetteer_name": "top_10000_words_stemmed",
63+
"use_stemming": true,
64+
"n": 1
65+
},
66+
"factory_name": "ngram",
67+
"offsets": [
68+
-2,
69+
-1,
70+
0,
71+
1,
72+
2
73+
]
74+
},
75+
{
76+
"args": {
77+
"common_words_gazetteer_name": "top_10000_words_stemmed",
78+
"use_stemming": true,
79+
"n": 2
80+
},
81+
"factory_name": "ngram",
82+
"offsets": [
83+
-2,
84+
1
85+
]
86+
},
87+
{
88+
"args": {},
89+
"factory_name": "is_digit",
90+
"offsets": [
91+
-1,
92+
0,
93+
1
94+
]
95+
},
96+
{
97+
"args": {},
98+
"factory_name": "is_first",
99+
"offsets": [
100+
-2,
101+
-1,
102+
0
103+
]
104+
},
105+
{
106+
"args": {},
107+
"factory_name": "is_last",
108+
"offsets": [
109+
0,
110+
1,
111+
2
112+
]
113+
},
114+
{
115+
"args": {
116+
"n": 1
117+
},
118+
"factory_name": "shape_ngram",
119+
"offsets": [
120+
0
121+
]
122+
},
123+
{
124+
"args": {
125+
"n": 2
126+
},
127+
"factory_name": "shape_ngram",
128+
"offsets": [
129+
-1,
130+
0
131+
]
132+
},
133+
{
134+
"args": {
135+
"n": 3
136+
},
137+
"factory_name": "shape_ngram",
138+
"offsets": [
139+
-1
140+
]
141+
},
142+
{
143+
"args": {
144+
"tagging_scheme_code": 2,
145+
"use_stemming": true
146+
},
147+
"factory_name": "entity_match",
148+
"drop_out": 0.5,
149+
"offsets": [
150+
-2,
151+
-1,
152+
0
153+
]
154+
},
155+
{
156+
"args": {
157+
"tagging_scheme_code": 1
158+
},
159+
"factory_name": "builtin_entity_match",
160+
"offsets": [
161+
-2,
162+
-1,
163+
0
164+
]
165+
}
166+
]
167+
}
168+
}
169+
]
170+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"entities": {
3+
"game": {
4+
"automatically_extensible": true,
5+
"data": [
6+
{
7+
"synonyms": [
8+
"space invader"
9+
],
10+
"value": "Space Invader Limited Edition"
11+
},
12+
{
13+
"synonyms": [
14+
"invader attack"
15+
],
16+
"value": "Invader Attack 3"
17+
},
18+
{
19+
"synonyms": [
20+
"invader war"
21+
],
22+
"value": "Invader War Demo"
23+
},
24+
{
25+
"synonyms": [
26+
"star invader"
27+
],
28+
"value": "Star Invader II"
29+
}
30+
],
31+
"matching_strictness": 0.5,
32+
"use_synonyms": true
33+
}
34+
},
35+
"intents": {
36+
"PlayGame": {
37+
"utterances": [
38+
{
39+
"data": [
40+
{
41+
"text": "I want to play to "
42+
},
43+
{
44+
"entity": "game",
45+
"slot_name": "game",
46+
"text": "space invader"
47+
}
48+
]
49+
},
50+
{
51+
"data": [
52+
{
53+
"text": "please launch the "
54+
},
55+
{
56+
"entity": "game",
57+
"slot_name": "game",
58+
"text": "invader attack"
59+
},
60+
{
61+
"text": " game"
62+
}
63+
]
64+
}
65+
]
66+
}
67+
},
68+
"language": "en"
69+
}
File renamed without changes.

0 commit comments

Comments
 (0)