Skip to content

Commit 9fd3be0

Browse files
Encoder improvement using staticCheckIfImpactPhenotype()
1 parent eb2a78c commit 9fd3be0

File tree

8 files changed

+113
-61
lines changed

8 files changed

+113
-61
lines changed

core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIModelsCheck.kt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,15 @@ class AIModelsCheck : IntegrationTestRestBase() {
5050
// initClass(MultiTypeController())
5151

5252

53-
// initClass(AllOrNoneController())
53+
initClass(AllOrNoneController())
5454
// initClass(ArithmeticController())
5555
// initClass(BasicController())
5656
// initClass(ImplyController())
5757
// initClass(MixedController())
5858
// initClass(OnlyOneController())
5959
// initClass(OrController())
6060
// initClass(RequiredController())
61-
initClass(ZeroOrOneController())
61+
// initClass(ZeroOrOneController())
6262
}
6363

6464
@JvmStatic
@@ -75,7 +75,7 @@ class AIModelsCheck : IntegrationTestRestBase() {
7575
}
7676
}
7777

78-
val modelName = "KNN" // Choose "GAUSSIAN", "KNN", "GLM", "KDE", "NN", etc.
78+
val modelName = "KNN" // Choose "GAUSSIAN", "GLM", "KDE", "KNN", "NN", etc.
7979
val runIterations = 5000
8080
val encoderType4Test = EMConfig.EncoderType.RAW
8181

@@ -258,9 +258,13 @@ class AIModelsCheck : IntegrationTestRestBase() {
258258
val encoderTemp = InputEncoderUtilWrapper(action, encoderType = config.aiEncoderType)
259259

260260
//print gene types
261-
println("Expanded genes are: " +
262-
encoderTemp.endPointToGeneList()
263-
.joinToString(", ") { it.getLeafGene()::class.simpleName ?: "Unknown" })
261+
println(
262+
"Expanded genes are: " +
263+
encoderTemp.endPointToGeneList()
264+
.joinToString(", ") { ng ->
265+
"${ng.gene.name}:${ng.gene::class.simpleName ?: "Unknown"}"
266+
}
267+
)
264268

265269
val hasUnsupportedGene = !encoderTemp.areAllGenesSupported()
266270
if (hasUnsupportedGene) {

core-it/src/test/kotlin/org/evomaster/core/problem/rest/aiclassification/AIMoldelsCheckWFD.kt

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ import org.evomaster.core.problem.rest.data.Endpoint
2525
import org.evomaster.core.problem.rest.schema.OpenApiAccess
2626
import org.evomaster.core.problem.rest.service.sampler.AbstractRestSampler
2727
import org.evomaster.core.search.action.Action
28+
import org.evomaster.core.search.gene.ObjectGene
2829
import org.evomaster.core.search.service.Randomness
30+
import org.jetbrains.kotlin.fir.builder.escapedStringToCharacter
2931
import java.net.HttpURLConnection
3032
import java.net.URL
3133
import javax.ws.rs.core.MediaType
@@ -49,17 +51,18 @@ class AIModelsCheckWFD : IntegrationTestRestBase() {
4951
}
5052
}
5153

52-
val modelName = "KNN" // Choose "GAUSSIAN", "KNN", "GLM", "KDE", "NN", etc.
53-
val runIterations = 5000
54+
val modelName = "KNN" // Choose "GAUSSIAN", "GLM", "KDE", "KNN", "NN", etc.
55+
val runIterations = 200
5456
val encoderType4Test = EMConfig.EncoderType.RAW
5557
val saveReport: Boolean = false
5658

5759
val decisionMaking = "sendAnyway" // choose "probabilistic" to make the machine decide weather to send the request or not
5860

5961
val baseUrlOfSut = "http://localhost:8080"
60-
val v2orV3 = "v2"
6162

62-
val swaggerUrl = "$baseUrlOfSut/$v2orV3/api-docs"
63+
// val v2orV3 = "v2"
64+
// val swaggerUrl = "$baseUrlOfSut/$v2orV3/api-docs"
65+
val swaggerUrl = "http://localhost:8080/api/v3/openapi.json"
6366

6467

6568
@Inject
@@ -209,8 +212,7 @@ class AIModelsCheckWFD : IntegrationTestRestBase() {
209212
}
210213

211214
fun runClassifierExample() {
212-
val schema = OpenApiAccess.getOpenAPIFromLocation("$baseUrlOfSut/v2/api-docs")
213-
// val schema = OpenApiAccess.getOpenAPIFromLocation("$baseUrlOfSut/v3/api-docs")
215+
val schema = OpenApiAccess.getOpenAPIFromLocation(swaggerUrl)
214216
val restSchema = RestSchema(schema)
215217

216218
val options = RestActionBuilderV3.Options(config)
@@ -242,9 +244,12 @@ class AIModelsCheckWFD : IntegrationTestRestBase() {
242244
println("*************************************************")
243245
println("Path: $endPoint")
244246

247+
println(sampledAction.parameters.joinToString(", ") {
248+
"${it.name}=${it.primaryGene().getValueAsRawString()}" })
249+
245250
val geneValues = sampledAction.parameters
246251
.map { it.primaryGene().getValueAsRawString().replace("EVOMASTER", "") }
247-
println("Input Genes: ${geneValues.joinToString(", ")}")
252+
println("Input Gene: ${geneValues.joinToString(", ")}")
248253
println("Genes Size: ${geneValues.size}")
249254

250255
val individual =
@@ -253,14 +258,24 @@ class AIModelsCheckWFD : IntegrationTestRestBase() {
253258

254259
val encoderTemp = InputEncoderUtilWrapper(action, encoderType = config.aiEncoderType)
255260

256-
//print gene types
257-
println("Expanded genes are: " +
258-
encoderTemp.endPointToGeneList()
259-
.joinToString(", ") { it.getLeafGene()::class.simpleName ?: "Unknown" })
260-
261-
val geneList = encoderTemp.endPointToGeneList()
262-
val typesRow = geneList.joinToString(", ") { gene -> gene.javaClass.simpleName }
263-
println("Genes type in the gene list: $typesRow")
261+
println(
262+
"Expanded genes are: " +
263+
encoderTemp.endPointToGeneList()
264+
.joinToString(", ") { ng ->
265+
"[Name:${ng.gene.name}, Value:${ng.gene.getValueAsRawString()}, Class:${ng.gene::class.simpleName ?: "Unknown"}]"
266+
}
267+
)
268+
269+
// println(
270+
// "Expanded genes are: " +
271+
// encoderTemp.endPointToGeneList()
272+
// .onEach { ng ->
273+
// println("Is ${ng.gene.name} an ObjectGene? ${ng.gene is ObjectGene}")
274+
// }
275+
// .joinToString(", ") { ng ->
276+
// "${ng.gene.name}:${ng.gene.getLeafGene()::class.simpleName ?: "Unknown"}"
277+
// }
278+
// )
264279

265280
val hasUnsupportedGene = !encoderTemp.areAllGenesSupported()
266281
if (hasUnsupportedGene) {

core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/deterministic/Deterministic400EndpointModel.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class Deterministic400EndpointModel(
2222

2323
private val constraints: MutableList<ConstraintFor400> = mutableListOf()
2424

25-
private val modelMetrics: ModelMetrics = ModelMetricsWithTimeWindow(20)
25+
private val modelMetrics: ModelMetrics = ModelMetricsWithTimeWindow(100)
2626

2727
override fun updateModel(
2828
input: RestCallAction,

core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/AbstractProbabilistic400Classifier.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ abstract class AbstractProbabilistic400Classifier<T : AIModel>(
4545
return@getOrPut null
4646
}
4747

48-
val listGenes = encoder.endPointToGeneList().map { it.getLeafGene() }
48+
val listGenes = encoder.endPointToGeneList().map { it.gene.getLeafGene() }
4949
createEndpointModel(endpoint, warmup, listGenes.size, encoderType, randomness)
5050
}
5151

core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/AbstractProbabilistic400EndpointModel.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ abstract class AbstractProbabilistic400EndpointModel(
2929
protected var initialized: Boolean = false
3030

3131
val modelMetricsFullHistory: ModelMetricsFullHistory = ModelMetricsFullHistory()
32-
val modelMetricsWithTimeWindow: ModelMetrics = ModelMetricsWithTimeWindow(1000)
32+
val modelMetricsWithTimeWindow: ModelMetrics = ModelMetricsWithTimeWindow(100)
3333

3434
/** Ensure endpoint matches this model */
3535
protected fun verifyEndpoint(inputEndpoint: Endpoint) {

core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/InputEncoderUtilWrapper.kt

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,28 +47,36 @@ class InputEncoderUtilWrapper(
4747
ArrayGene::class
4848
)
4949

50+
data class ParamAndGene(
51+
val paramName: String,
52+
val gene: Gene
53+
)
54+
5055
fun isSupported(g: Gene): Boolean =
5156
supportedGeneTypes.any { it.isInstance(g) }
5257

53-
fun areAllGenesSupported(): Boolean {
54-
val leafs = endPointToGeneList().map { gene -> gene.getLeafGene() }
55-
return leafs.all { isSupported(it) }
56-
}
58+
fun areAllGenesSupported(): Boolean =
59+
endPointToGeneList().all { isSupported(it.gene.getLeafGene()) }
60+
61+
private fun expandGene(g: Gene): List<Gene> {
62+
63+
val gene = g.getLeafGene()
5764

58-
fun expandGene(g: Gene): List<Gene> = when (g) {
59-
is ObjectGene -> {
65+
if (gene is ObjectGene) {
6066
val expanded = mutableListOf<Gene>()
61-
g.fixedFields.forEach { expanded.addAll(expandGene(it)) }
62-
g.additionalFields?.forEach { pair ->
67+
gene.fixedFields.forEach { expanded.addAll(expandGene(it)) }
68+
gene.additionalFields?.forEach { pair ->
6369
expanded.addAll(expandGene(pair.second))
6470
}
65-
expanded
71+
return expanded
6672
}
67-
else -> listOf(g)
73+
74+
return listOf(gene)
6875
}
6976

70-
fun endPointToGeneList(): List<Gene> {
71-
val listGenes = mutableListOf<Gene>()
77+
fun endPointToGeneList(): List<ParamAndGene> {
78+
val paramAndGenes = mutableListOf<ParamAndGene>()
79+
7280
action.parameters
7381
.filter { p ->
7482
val name = p.name
@@ -77,9 +85,13 @@ class InputEncoderUtilWrapper(
7785
}
7886
.forEach { p ->
7987
val g = p.primaryGene()
80-
listGenes.addAll(expandGene(g))
88+
val expanded = expandGene(g)
89+
expanded.forEach { subGene ->
90+
paramAndGenes.add(ParamAndGene(subGene.name, subGene))
91+
}
8192
}
82-
return listGenes
93+
94+
return paramAndGenes
8395
}
8496

8597

@@ -102,12 +114,12 @@ class InputEncoderUtilWrapper(
102114
*/
103115
fun encode(): List<Double> {
104116
val sentinel = -1e6 // for null handling
105-
val listGenes = endPointToGeneList()
117+
val listGenes = endPointToGeneList().map { it.gene }
106118
val rawEncodedFeatures = mutableListOf<Double>()
107119

108120
for (g in listGenes) {
109121

110-
if(g.getValueAsPrintableString()==""){
122+
if(!g.staticCheckIfImpactPhenotype() || g.getValueAsPrintableString()==""){
111123
rawEncodedFeatures.add(sentinel)
112124
continue
113125
}
@@ -150,8 +162,13 @@ class InputEncoderUtilWrapper(
150162
* its effective size.
151163
*/
152164
is ArrayGene<*> -> {
153-
val count = leaf.getViewOfElements()
154-
.count { e -> e.getValueAsPrintableString().isNotBlank() }
165+
val elements = leaf.getViewOfElements()
166+
// If the array is empty, encode as sentinel
167+
val count = if (elements.isEmpty()) {
168+
sentinel
169+
} else {
170+
elements.count { e -> e.getValueAsPrintableString().isNotBlank()}
171+
}
155172
rawEncodedFeatures.add(count.toDouble())
156173
}
157174

core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/kde/KDE400EndpointModel.kt

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,8 @@ class KDE400EndpointModel (
3333
private var density400: KDE? = null
3434
private var densityNot400: KDE? = null
3535

36-
/**
37-
* Optional cap on stored samples per class (0 = unlimited).
38-
* If >0, uses reservoir-style uniform downsampling.
39-
*/
40-
var maxSamplesPerClass: Int = 0
36+
/** Cap on stored samples per class based on the reservoir-style uniform downsampling.*/
37+
var maxSamplesPerClass: Int = 10000
4138

4239
/** Must be called once to initialize the model properties */
4340
override fun initializeIfNeeded(inputVector: List<Double>) {
@@ -148,7 +145,7 @@ class KDE400EndpointModel (
148145
* Represents a Kernel Density Estimator (KDE) that approximates the distribution of a dataset
149146
* using Gaussian kernels with diagonal bandwidth.
150147
* @property d Dimensionality of the data points.
151-
* @property maxStored Maximum number of samples to store in memory. If the value is <= 0, all samples are stored.
148+
* @property maxStored Maximum number of samples to store in memory.
152149
*/
153150
class KDE(private val d: Int, private val maxStored: Int = 0) {
154151

@@ -174,18 +171,19 @@ class KDE400EndpointModel (
174171
M2[j] += delta * delta2
175172
}
176173

177-
// Store sample (unbounded or reservoir downsample)
178-
if (maxStored <= 0) {
174+
/**
175+
* Reservoir-style replacement to avoid memory issue
176+
* - Fill up until we reach maxStored.
177+
* - After that, replace existing samples with decreasing probability
178+
* to maintain a uniform random subset of all seen data.
179+
*/
180+
if (samples.size < maxStored) {
179181
samples.add(x)
180182
} else {
181-
if (samples.size < maxStored) {
182-
samples.add(x)
183-
} else {
184-
// reservoir: replace with decreasing probability
185-
val r = kotlin.random.Random.nextLong(seen)
186-
if (r < maxStored) {
187-
samples[r.toInt()] = x
188-
}
183+
// reservoir: replace with decreasing probability
184+
val r = kotlin.random.Random.nextLong(seen)
185+
if (r < maxStored) {
186+
samples[r.toInt()] = x
189187
}
190188
}
191189
}

core/src/main/kotlin/org/evomaster/core/problem/rest/classifier/probabilistic/knn/KNN400EndpointModel.kt

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class KNN400EndpointModel (
2828

2929
companion object {
3030
private const val NOT_400 = 200
31+
private const val MAX_SAMPLES = 10_000 // Fixed-size buffer for samples to avoid memory overload
3132
}
3233

3334
/**
@@ -38,6 +39,9 @@ class KNN400EndpointModel (
3839
*/
3940
val samples = mutableListOf<Pair<List<Double>, Int>>()
4041

42+
/** Total number of samples observed so far (including discarded ones). */
43+
private var seen: Long = 0L
44+
4145
// Euclidean distance between two points in the feature space
4246
private fun distance(a: List<Double>, b: List<Double>): Double {
4347
return sqrt(a.zip(b).sumOf { (ai, bi) -> (ai - bi) * (ai - bi) })
@@ -112,12 +116,26 @@ class KNN400EndpointModel (
112116
* Store only classes of interest (i.e., 400 and not 400 groups)
113117
*/
114118
val trueStatusCode = output.getStatusCode()
115-
if (trueStatusCode == 400) {
116-
samples.add(inputVector to 400)
119+
val label = if (trueStatusCode == 400) 400 else NOT_400
120+
121+
/**
122+
* Keep the sample list bounded using reservoir sampling.
123+
*
124+
* - If we have not yet filled the reservoir (samples.size < MAX_SAMPLES), add the new sample.
125+
* - Otherwise, replace an existing sample with decreasing probability to maintain
126+
* a uniform random subset of all seen data.
127+
*/
128+
seen++
129+
if (samples.size < MAX_SAMPLES) {
130+
samples.add(inputVector to label)
117131
} else {
118-
samples.add(inputVector to NOT_400)
132+
val r = kotlin.random.Random.nextLong(seen)
133+
if (r < MAX_SAMPLES) {
134+
samples[r.toInt()] = inputVector to label
135+
}
119136
}
120137

138+
121139
}
122140

123141
}

0 commit comments

Comments
 (0)