diff --git a/utils/src/main/java/perfgenie/utils/ArgusQueryT.java b/utils/src/main/java/perfgenie/utils/ArgusQueryT.java index 65cb406..625d269 100644 --- a/utils/src/main/java/perfgenie/utils/ArgusQueryT.java +++ b/utils/src/main/java/perfgenie/utils/ArgusQueryT.java @@ -288,6 +288,89 @@ public static List getCanaryPods(String startquery, String endquery, Str } } + /* Like getArgusMetric, but returns all of the datapoints in a double[]. + */ + public static DatapointsQueryResponse getArgusMetricDatapoints(String m, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { + if (pods.size() == 0) { + return null; + } + if ((System.currentTimeMillis() - lastUpdated) > 5 * 60 * 1000) {//5 min + updateAccessToken(); + lastUpdated = System.currentTimeMillis(); + } + DatapointsQueryResponse response = new DatapointsQueryResponse(); + String queryT = ac.queries.get(ac.metrics.get(m).get("type")); + + String query = queryT.replaceAll("START", String.valueOf(timestampStart)); + query = query.replaceAll("END", String.valueOf(timestampEnd)); + query = query.replaceAll("SCOPE", ac.metrics.get(m).get("scope")); + query = query.replaceAll("METRIC", ac.metrics.get(m).get("metric")); + query = query.replaceAll("INSTANCE", instance); + query = query.replaceAll("DOMAIN", domain); + query = query.replaceAll("CELL", cell); + String podstr = ""; + for (int i = 0; i < pods.size(); i++) { + if (i == 0) { + podstr = pods.get(i); + } else { + podstr = podstr + "|" + pods.get(i); + } + } + query = query.replaceAll("POD", podstr); + + response.query = query; + + try { + query = URLEncoder.encode(query, StandardCharsets.UTF_8.toString()); + } catch (Exception e) { + System.out.println(cell+ " " +m+"1 " + e.getMessage()); + return null; + } + String metricCommand = "curl -H \"Authorization: Bearer " + accessToken + "\" " + "https://monitoring-api.salesforce.com/argusws/metrics?expression=" + query; + + String metric = ""; + if (accessToken != null) { + metric = "{\"array\":" + executeCurlCommand(metricCommand) + "}"; + } else { + try { + if (substrate == null) { + metric = "{\"array\":" + Resources.toString(Resources.getResource("apt.json"), StandardCharsets.UTF_8) + "}"; + } + } catch (Exception e) { + metric = "{}"; + System.out.println(cell + " " +m+"2 " + e.getMessage()); + } + } + + ArrayList data = new ArrayList<>(); + try { + JSONObject jsonObject = new JSONObject(metric); + JSONArray jsonArray = jsonObject.getJSONArray("array"); + for (int i = 0; i < jsonArray.length(); i++) { + JSONObject object = jsonArray.getJSONObject(i); + JSONObject datapoints = object.getJSONObject("datapoints"); + Iterator keys = datapoints.keys(); + while (keys.hasNext()) { + String k = keys.next().toString(); + data.add(datapoints.getDouble(String.valueOf(k))); + } + } + if (!data.isEmpty()) { + response.setDatapoints(data.stream().mapToDouble(Double::doubleValue).toArray()); + return response; + } + } catch (Exception e) { + System.out.println("query->" + response.query); + System.out.println("metric->" + metric); + System.out.println(cell + " " +m+"3 " + e.getMessage()); + return null; + } + System.out.println("query->" + response.query); + System.out.println("metric->" + metric); + System.out.println(cell + " " +m+"4 "); + return null; + } + public static QueryResponse getArgusMetric(String m, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { if (pods.size() == 0) { return null; @@ -430,6 +513,33 @@ public void setMetric(Double metric) { } } + /* Query response that returns all datapoints in a double array, not just the last one. + */ + static class DatapointsQueryResponse { + double [] datapoints; + + public String getQuery() { return query; } + + public void setQuery(String query) { + this.query = query; + } + + String query; + + DatapointsQueryResponse() { + datapoints = null; + query = null; + } + + public double [] getDatapoints() { + return datapoints; + } + + public void setDatapoints(double [] datapoints) { + this.datapoints = datapoints; + } + } + public static QueryResponse getMetric(String queryT, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { if (pods.size() == 0) { return null; diff --git a/utils/src/main/java/perfgenie/utils/SideBySide.java b/utils/src/main/java/perfgenie/utils/SideBySide.java index 32665d5..79e59c0 100644 --- a/utils/src/main/java/perfgenie/utils/SideBySide.java +++ b/utils/src/main/java/perfgenie/utils/SideBySide.java @@ -4,15 +4,33 @@ import org.json.JSONObject; import java.util.*; +import java.util.stream.DoubleStream; import static perfgenie.utils.Canary.*; +/** Triple for median, confidence and variance. + */ +class VarianceResult { + double median; + double confidence; + double variance; + + VarianceResult(double median, double confidence, double variance) { + this.median = median; + this.confidence = confidence; + this.variance = variance; + } +} + public class SideBySide { public static long mindiff = 3600000; public static long maxTimeWindow = 5 * 60 * 60 * 1000; // 5 hours due to argus query limitations, need to switch to huron + static int bootstrapCount = 1000; + static int bootstrapSize = 5000; + public static CanaryResponse processSideBySideCanary(long timestampStart, long timestampEnd, String cell) { return processSideBySideCanaryTask(timestampStart, timestampEnd, podsInstance.get(cell), podsDomain.get(cell), cell); } @@ -101,7 +119,7 @@ public static CanaryResponse processSideBySideCanaryTask(long timestampStart, lo Double metricPercentChange = 100.0 * ((res1.getMetric() / rCount1) - (res2.getMetric() / rCount2)) / (res1.getMetric() / rCount1); record.add(metricPercentChange); header.add(metricList.get(i) + "/r %c:number"); - }else { + } else { record.add(null); header.add(metricList.get(i) + "1:number"); record.add(null); @@ -125,11 +143,149 @@ public static CanaryResponse processSideBySideCanaryTask(long timestampStart, lo record.add(getMetricDashboardURL(canary.finalStart,canary.finalEnd,instance,domain,cell)); header.add("metrics:url"); + VarianceResult varianceZulu = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod1); + VarianceResult varianceZing = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod2); + + record.add(varianceZulu.variance);//cell + header.add("varianceZulu:number"); + record.add(varianceZing.variance);//cell + header.add("varianceZing:number"); + return new CanaryResponse(header,record); } return null; } + /* Gets variance info for given metric. + + We get the metric for all selected pods and times and then calculate the variance, which is defined as + sum(max - min) / length across pods and observations with observations sorted from highest to lowest. + + The function also calculates the median value and its confidence using bootstrap, if bootstrap count is greater + than zero. This works by taking N samples from all the pod's values, calculating medians of the those datasets + and then reporting the mean and confidence on this distribution. + */ + static VarianceResult getVarianceOf(String metric, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { + ArrayList data = new ArrayList<>(); + + // get the required metric + for (String pod : pods) { + ArgusQueryT.DatapointsQueryResponse res = ArgusQueryT.getArgusMetricDatapoints(metric, timestampStart, timestampEnd, instance, domain, cell, Collections.singletonList(pod)); + if (res == null) + continue; + // we have valid result, create array from the datapoints we got back, sort & reverse so that we go from highest to lowest + double[] arr = res.datapoints; + Arrays.sort(arr); + reverseArray(arr); + data.add(arr); + } + + // calculate the stats + return calculateVariance(data); + } + + /* calculates the variance of given set of observations. This is *not* the standard statistical variance, but an + integral metric that sorts each dataset from highest to lowest and then at each observation sums up the + difference between min and max. The summarized number is then divided by the number of observations + */ + static VarianceResult calculateVariance(ArrayList from) { + // sort the arrays in descending order + int maxL = 0; + for (double [] a : from) { + Arrays.sort(a); + reverseArray(a); + // and figure out the max length + if (maxL < a.length) + maxL = a.length; + } + double result = 0; + for (int i = 0; i < maxL; i++) { + double min = Double.POSITIVE_INFINITY; + double max = Double.NEGATIVE_INFINITY; + for (double [] a : from) { + if (i < a.length) { + if (a[i] < min) + min = a[i]; + if (a[i] > max) + max = a[i]; + } + } + result = result + (max - min); + } + + // if bootstrap is disabled we are done + if (bootstrapCount == 0) + return new VarianceResult(0, 0, result / maxL); + + // flatten the pod datasets (we don't care they are sorted as we do random sapling anyways) + double[] input = from.stream() + .flatMapToDouble(DoubleStream::of) + .toArray(); + + // the array of medians for the sampled datasets (not expecting normal distribution) + double[] medians = new double[bootstrapCount]; + // create the sampled datasets and fill the medians array + for (int i = 0; i < medians.length; i++) { + double[] dataset = sample(input, bootstrapSize); + medians[i] = calculateMedian(dataset); + } + // calculate array's median (now we expect normal distribution), and standard deviation & error + double medianMean = calculateMean(medians); + double sd = calculateSd(medians, medianMean); + double se = sd / Math.sqrt(medians.length); + + // determine z-score based on the confidence intervals we want and calculate the confidence + double zScore = 3.291; // for 0.999 + //double zScore = 2.576; // for 0.99 + //double zScore = 1.960; // for 0.95 + double confidence = se * zScore; + + return new VarianceResult(medianMean, confidence, result / maxL); + } + + static double[] sample(double[] input, int sampleSize) { + // Concatenate all double arrays into a single double[] + double [] result = new double [sampleSize]; + Random rand = new Random(); + for (int i = 0; i < sampleSize; i++) { + int index = rand.nextInt(input.length); + result[i] = input[index]; + } + return result; + } + + static void reverseArray(double [] a) { + for (int i = 0; i < a.length / 2; i++) { + double temp = a[i]; + a[i] = a[a.length - 1 - i]; + a[a.length - 1 - i] = temp; + } + } + + static double calculateMean(double [] from) { + if (from.length == 0) + return 0; + double sum = 0.0; + for (double v : from) + sum += v; + return sum / from.length; + } + + static double calculateSd(double [] from, double mean) { + if (from.length == 0) + return 0; + double sd = 0.0; + for (double v : from) + sd += Math.pow(v - mean, 2); + return Math.sqrt(sd / from.length); + } + + static double calculateMedian(double[] from) { + Arrays.sort(from); + return from[from.length / 2]; + } + + public static String getCanaryDashboardURL(List pod1, List pod2, long curfinalStart, long curfinalEnd, String instance, String domain, String cell) { String URL = "https://moncloud-grafana.sfproxy.monitoring.aws-esvc1-useast2.aws.sfdc.cl/d/evIw19pHz/zulu-zing-canary-falcon-rpulle-automation?orgId=1&"; @@ -164,7 +320,6 @@ public static String getMetricDashboardURL(long curfinalStart, long curfinalEnd, return URL1; } - public static CanaryDetails processZingCanary(String start, String end, String instance, String domain, String cell) { String metric = ArgusQueryT.getGCMetric(start, end, instance, domain, cell); return parse(metric); @@ -425,6 +580,7 @@ private static void generateCombinationsHelper(List list, int k, int sta } } + // run this test to do stuff public static void main(String[] args) { try { int start = 6; diff --git a/utils/src/main/resources/argus.json b/utils/src/main/resources/argus.json index 6c45000..e15ab90 100644 --- a/utils/src/main/resources/argus.json +++ b/utils/src/main/resources/argus.json @@ -3,7 +3,8 @@ "cumulative": "DIFF(GROUPBYTAG(START:END:SCOPE:METRIC{cell=CELL,k8s_container_name=coreapp,k8s_pod_name=POD,role=app}:max:all-max,#k8s_container_name#,#SUM#),GROUPBYTAG(START:END:SCOPE:METRIC{cell=CELL,k8s_container_name=coreapp,k8s_pod_name=POD,role=app}:min:all-min,#k8s_container_name#,#SUM#))", "average": "DOWNSAMPLE(ALIASBYREGEX(ALIAS(GROUPBYTAG(START:END:SCOPE:METRIC{cell=CELL,k8s_container_name=coreapp,k8s_pod_name=POD,role=app}:avg:1m-avg,#k8s_container_name#,#AVERAGE#),#:#,#literal#,#CELL#,#literal#),#(.*)::\\{#),#1d-avg#,#0#,#abs#)", "ccumulative": "DIFF(GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:max:all-max,#k8s_container_name#,#SUM#),GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:min:all-min,#k8s_container_name#,#SUM#))", - "caverage": "DOWNSAMPLE(ALIASBYREGEX(ALIAS(GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:avg:1m-avg,#k8s_container_name#,#AVERAGE#),#:#,#literal#,#CELL#,#literal#),#(.*)::\\{#),#1d-avg#,#0#,#abs#)" + "caverage": "DOWNSAMPLE(ALIASBYREGEX(ALIAS(GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:avg:1m-avg,#k8s_container_name#,#AVERAGE#),#:#,#literal#,#CELL#,#literal#),#(.*)::\\{#),#1d-avg#,#0#,#abs#)", + "cdatapointsPerRequest" : "DIVIDE(DERIVATIVE(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}),LAG(START:END:SCOPE:SFDC_type-ServerMetrics.LogMetric-COUNT{k8s_container_name=coreapp,k8s_pod_name=POD,role=app},1))" }, "metrics": { "rCPUTime" :{ @@ -38,6 +39,14 @@ "type": "ccumulative", "enabled": true }, + "cCPUTimePerReq":{ + "name": "cCPUTimeReq", + "description": "container CPU time / Req (all datapoints)", + "scope": "cadvisor.aws.INSTANCE.DOMAIN", + "metric": "container_cpu_usage_seconds_total", + "type": "cdatapointsPerRequest", + "enabled": true + }, "5xx":{ "name": "5xx", "description": "5xx response count",