Skip to content

Commit 988aba1

Browse files
committed
enable task retry recording
1 parent f870ed1 commit 988aba1

File tree

6 files changed

+72
-6
lines changed

6 files changed

+72
-6
lines changed

lib/recorder.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const std::string Recorder::REC_START = "|recording started";
1010
const std::string Recorder::SWSS_FNAME = "swss.rec";
1111
const std::string Recorder::SAIREDIS_FNAME = "sairedis.rec";
1212
const std::string Recorder::RESPPUB_FNAME = "responsepublisher.rec";
13-
13+
const std::string Recorder::RETRY_FNAME = "retry.rec";
1414

1515
Recorder& Recorder::Instance()
1616
{
@@ -19,6 +19,17 @@ Recorder& Recorder::Instance()
1919
}
2020

2121

22+
RetryRec::RetryRec()
23+
{
24+
/* Set Default values */
25+
setRecord(true);
26+
setRotate(false);
27+
setLocation(Recorder::DEFAULT_DIR);
28+
setFileName(Recorder::RETRY_FNAME);
29+
setName("Retry");
30+
}
31+
32+
2233
SwSSRec::SwSSRec()
2334
{
2435
/* Set Default values */

lib/recorder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ class RecWriter : public RecBase {
4848
std::string fname;
4949
};
5050

51+
class RetryRec : public RecWriter {
52+
public:
53+
RetryRec();
54+
};
55+
5156
class SwSSRec : public RecWriter {
5257
public:
5358
SwSSRec();
@@ -73,12 +78,14 @@ class Recorder {
7378
static const std::string SWSS_FNAME;
7479
static const std::string SAIREDIS_FNAME;
7580
static const std::string RESPPUB_FNAME;
81+
static const std::string RETRY_FNAME;
7682

7783
Recorder() = default;
7884
/* Individual Handlers */
7985
SwSSRec swss;
8086
SaiRedisRec sairedis;
8187
ResPubRec respub;
88+
RetryRec retry;
8289
};
8390

8491
}

orchagent/main.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ extern bool gIsNatSupported;
6262
#define SAIREDIS_RECORD_ENABLE 0x1
6363
#define SWSS_RECORD_ENABLE (0x1 << 1)
6464
#define RESPONSE_PUBLISHER_RECORD_ENABLE (0x1 << 2)
65+
#define RETRY_RECORD_ENABLE (0x1 << 3)
6566

6667
/* orchagent heart beat message interval */
6768
#define HEART_BEAT_INTERVAL_MSECS_DEFAULT 10 * 1000
@@ -109,6 +110,7 @@ void sighup_handler(int signo)
109110
/*
110111
* Don't do any logging since they are using mutexes.
111112
*/
113+
Recorder::Instance().retry.setRotate(true);
112114
Recorder::Instance().swss.setRotate(true);
113115
Recorder::Instance().sairedis.setRotate(true);
114116
Recorder::Instance().respub.setRotate(true);
@@ -361,6 +363,7 @@ int main(int argc, char **argv)
361363
string record_location = Recorder::DEFAULT_DIR;
362364
string swss_rec_filename = Recorder::SWSS_FNAME;
363365
string sairedis_rec_filename = Recorder::SAIREDIS_FNAME;
366+
string retry_rec_filename = Recorder::RETRY_FNAME;
364367
string zmq_server_address = "tcp://127.0.0.1:" + to_string(ORCH_ZMQ_PORT);
365368
string vrf;
366369
bool enable_zmq = false;
@@ -395,7 +398,7 @@ int main(int argc, char **argv)
395398
// Disable all recordings if atoi() fails i.e. returns 0 due to
396399
// invalid command line argument.
397400
record_type = atoi(optarg);
398-
if (record_type < 0 || record_type > 7)
401+
if (record_type < 0 || record_type > 15)
399402
{
400403
usage();
401404
exit(EXIT_FAILURE);
@@ -522,6 +525,13 @@ int main(int argc, char **argv)
522525
Recorder::Instance().respub.setFileName(responsepublisher_rec_filename);
523526
Recorder::Instance().respub.startRec(false);
524527

528+
Recorder::Instance().retry.setRecord(
529+
(record_type & RETRY_RECORD_ENABLE) == RETRY_RECORD_ENABLE
530+
);
531+
Recorder::Instance().retry.setLocation(record_location);
532+
Recorder::Instance().retry.setFileName(retry_rec_filename);
533+
Recorder::Instance().retry.startRec(true);
534+
525535
// Instantiate database connectors
526536
DBConnector appl_db("APPL_DB", 0);
527537
DBConnector config_db("CONFIG_DB", 0);

orchagent/orch.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,12 @@ ConsumerBase* Orch::getConsumerBase(const std::string &executorName)
168168
}
169169

170170
void ConsumerBase::addToRetry(const Task &task, const Constraint &cst) {
171+
Recorder::Instance().retry.record(dumpTuple(task).append(CACHE));
171172
getOrch()->getRetryCache(getName())->cache_failed_task(task, cst);
172173
}
173174

174175
void Orch::addToRetry(const std::string &executorName, const Task &task, const Constraint &cst) {
176+
Recorder::Instance().retry.record(getConsumerBase(executorName)->dumpTuple(task).append(CACHE));
175177
getRetryCache(executorName)->cache_failed_task(task, cst);
176178
}
177179

@@ -216,8 +218,11 @@ void ConsumerBase::addToSync(const KeyOpFieldsValuesTuple &entry, bool onRetry)
216218
string key = kfvKey(entry);
217219
string op = kfvOp(entry);
218220

219-
/* Record incoming tasks */
220-
Recorder::Instance().swss.record(dumpTuple(entry));
221+
if (!onRetry)
222+
/* Record incoming tasks */
223+
Recorder::Instance().swss.record(dumpTuple(entry));
224+
else
225+
Recorder::Instance().retry.record(dumpTuple(entry).append(DECACHE));
221226

222227
auto retryCache = getOrch()->getRetryCache(getName());
223228

orchagent/orch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,9 @@ typedef enum
272272
typedef std::pair<swss::DBConnector *, std::string> TableConnector;
273273
typedef std::pair<swss::DBConnector *, std::vector<std::string>> TablesConnector;
274274

275+
#define CACHE "| ++++ |"
276+
#define DECACHE "| ---- |"
277+
275278

276279
class Orch
277280
{

orchagent/retrycache.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <sys/time.h>
77
#include "timestamp.h"
88
#include <fstream>
9+
#include <sstream>
10+
#include "recorder.h"
911

1012
using namespace swss;
1113

@@ -19,6 +21,19 @@ enum ConstraintType
1921
RETRY_CST_ECMP // ecmp resources exhausted
2022
};
2123

24+
25+
inline std::ostream& operator<<(std::ostream& os, ConstraintType t) {
26+
switch(t) {
27+
case ConstraintType::RETRY_CST_DUMMY: return os << "RETRY_CST_DUMMY";
28+
case ConstraintType::RETRY_CST_NHG: return os << "RETRY_CST_NHG";
29+
case ConstraintType::RETRY_CST_NHG_REF: return os << "RETRY_CST_NHG_REF";
30+
case ConstraintType::RETRY_CST_PIC: return os << "RETRY_CST_PIC";
31+
case ConstraintType::RETRY_CST_PIC_REF: return os << "RETRY_CST_PIC_REF";
32+
case ConstraintType::RETRY_CST_ECMP: return os << "RETRY_CST_ECMP";
33+
default: return os << "UNKNOWN";
34+
}
35+
}
36+
2237
using ConstraintData = std::string;
2338
using Constraint = std::pair<ConstraintType, ConstraintData>;
2439

@@ -28,6 +43,11 @@ inline Constraint make_constraint(ConstraintType type, ConstraintData data = "")
2843
return {type, data};
2944
}
3045

46+
template<typename T, typename U>
47+
std::ostream& operator<<(std::ostream& os, const std::pair<T, U>& p) {
48+
return os << "(" << p.first << ", " << p.second << ")";
49+
}
50+
3151
typedef swss::KeyOpFieldsValuesTuple Task;
3252
typedef std::pair<Constraint, Task> FailedTask;
3353
typedef std::unordered_map<std::string, FailedTask> RetryMap;
@@ -80,6 +100,9 @@ class RetryCache
80100
else
81101
{
82102
m_resolvedConstraints.emplace(cst.first, cst.second);
103+
std::stringstream ss;
104+
ss << cst << " resolution notified -> " << m_retryKeys[cst].size() << " task(s)";
105+
Recorder::Instance().retry.record(ss.str());
83106
}
84107
}
85108

@@ -144,15 +167,22 @@ class RetryCache
144167
}
145168
}
146169

147-
if (keys.empty()) {
170+
std::stringstream ss;
171+
ss << cst << " | " << m_executorName << " | " << tasks->size() << " retried";
172+
173+
if (keys.empty()) {
148174
m_retryKeys.erase(cst);
149175
m_resolvedConstraints.erase(cst);
176+
} else {
177+
ss << " (rest:" << keys.size() << ")";
150178
}
151179

180+
Recorder::Instance().retry.record(ss.str());
181+
152182
return tasks;
153183
}
154184
};
155185

156186
typedef std::unordered_map<std::string, std::shared_ptr<RetryCache>> RetryCacheMap;
157187

158-
#endif /* SWSS_RETRY_CACHE_H */
188+
#endif /* SWSS_RETRY_CACHE_H */

0 commit comments

Comments
 (0)