-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[TSan][compiler-rt] Defer symbolization of Reports to as late as possible #151120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Dan Blackwell (DanBlackwell) ChangesThis is the refactoring portion of: #150994. My aim is for this change to replicate current behaviour - just with Symbolization done explicitly (and later than previously). Full diff: https://github.com/llvm/llvm-project/pull/151120.diff 8 Files Affected:
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 795e05394d71a..5d05789aaccdd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2146,6 +2146,7 @@ static void ReportErrnoSpoiling(ThreadState *thr, uptr pc, int sig) {
rep.SetSigNum(sig);
if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
rep.AddStack(stack, true);
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
index befd6a369026d..d16fd4cfb00d9 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
@@ -446,6 +446,7 @@ static void ReportMutexHeldWrongContext(ThreadState *thr, uptr pc) {
VarSizeStackTrace trace;
ObtainCurrentStack(thr, pc, &trace);
rep.AddStack(trace, true);
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
index 0ea83fb3b5982..3a0a6a1a4564f 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
@@ -185,6 +185,7 @@ static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(ReportTypeSignalUnsafe);
rep.AddStack(stack, true);
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.h b/compiler-rt/lib/tsan/rtl/tsan_report.h
index 8975540ddfda2..53bb21964dbba 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_report.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_report.h
@@ -12,6 +12,8 @@
#ifndef TSAN_REPORT_H
#define TSAN_REPORT_H
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
#include "sanitizer_common/sanitizer_symbolizer.h"
#include "sanitizer_common/sanitizer_thread_registry.h"
#include "sanitizer_common/sanitizer_vector.h"
@@ -56,6 +58,7 @@ struct ReportMop {
bool atomic;
uptr external_tag;
Vector<ReportMopMutex> mset;
+ StackTrace stack_trace;
ReportStack *stack;
ReportMop();
@@ -79,6 +82,7 @@ struct ReportLocation {
int fd = 0;
bool fd_closed = false;
bool suppressable = false;
+ StackID stack_id = 0;
ReportStack *stack = nullptr;
};
@@ -89,15 +93,23 @@ struct ReportThread {
ThreadType thread_type;
char *name;
Tid parent_tid;
+ StackID stack_id;
ReportStack *stack;
+ bool suppressable;
};
struct ReportMutex {
int id;
uptr addr;
+ StackID stack_id;
ReportStack *stack;
};
+struct AddedLocationAddr {
+ uptr addr;
+ usize locs_idx;
+};
+
class ReportDesc {
public:
ReportType typ;
@@ -105,6 +117,7 @@ class ReportDesc {
Vector<ReportStack*> stacks;
Vector<ReportMop*> mops;
Vector<ReportLocation*> locs;
+ Vector<AddedLocationAddr> added_location_addrs;
Vector<ReportMutex*> mutexes;
Vector<ReportThread*> threads;
Vector<Tid> unique_tids;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 46276f20831d0..0d9d3bbf580a3 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -420,6 +420,7 @@ class ScopedReportBase {
void AddSleep(StackID stack_id);
void SetCount(int count);
void SetSigNum(int sig);
+ void SymbolizeStackElems(void);
const ReportDesc *GetReport() const;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
index 2a8aa1915c9ae..0027589939906 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
@@ -62,6 +62,7 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
ObtainCurrentStack(thr, pc, &trace);
rep.AddStack(trace, true);
rep.AddLocation(addr, 1);
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
@@ -539,15 +540,18 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
for (int i = 0; i < r->n; i++) {
for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) {
u32 stk = r->loop[i].stk[j];
+ StackTrace stack;
if (stk && stk != kInvalidStackID) {
- rep.AddStack(StackDepotGet(stk), true);
+ stack = StackDepotGet(stk);
} else {
// Sometimes we fail to extract the stack trace (FIXME: investigate),
// but we should still produce some stack trace in the report.
- rep.AddStack(StackTrace(&dummy_pc, 1), true);
+ stack = StackTrace(&dummy_pc, 1);
}
+ rep.AddStack(stack, true);
}
}
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
@@ -572,6 +576,7 @@ void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr,
return;
rep.AddStack(trace, true);
rep.AddLocation(addr, 1);
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 0820bf1adee43..f0848919694fa 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "sanitizer_common/sanitizer_placement_new.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
@@ -187,10 +188,8 @@ void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
mop->size = size;
mop->write = !(typ & kAccessRead);
mop->atomic = typ & kAccessAtomic;
- mop->stack = SymbolizeStack(stack);
mop->external_tag = external_tag;
- if (mop->stack)
- mop->stack->suppressable = true;
+ mop->stack_trace = stack;
for (uptr i = 0; i < mset->Size(); i++) {
MutexSet::Desc d = mset->Get(i);
int id = this->AddMutex(d.addr, d.stack_id);
@@ -199,6 +198,80 @@ void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
}
}
+void ScopedReportBase::SymbolizeStackElems() {
+ // symbolize memory ops
+ for (usize i = 0; i < rep_->mops.Size(); i++) {
+ ReportMop *mop = rep_->mops[i];
+ mop->stack = SymbolizeStack(mop->stack_trace);
+ if (mop->stack)
+ mop->stack->suppressable = true;
+ }
+
+ Vector<ReportLocation *> locs_tmp;
+
+ // Repopulate the locations in the order they were added - take care with
+ // the added locations
+ usize locs_idx = 0;
+ for (usize i = 0; i < rep_->added_location_addrs.Size(); i++) {
+ AddedLocationAddr *added_loc_addr = &rep_->added_location_addrs[i];
+
+ for (; locs_idx < added_loc_addr->locs_idx; locs_idx++) {
+ ReportLocation *loc = rep_->locs[locs_idx];
+ loc->stack = SymbolizeStackId(loc->stack_id);
+ locs_tmp.PushBack(loc);
+ }
+
+ if (ReportLocation *added_loc = SymbolizeData(added_loc_addr->addr)) {
+ added_loc->suppressable = true;
+ locs_tmp.PushBack(added_loc);
+ }
+ }
+
+ // Append any remaining locations
+ for (; locs_idx < rep_->locs.Size(); locs_idx++) {
+ ReportLocation *loc = rep_->locs[locs_idx];
+ loc->stack = SymbolizeStackId(loc->stack_id);
+ locs_tmp.PushBack(loc);
+ }
+
+ rep_->locs.Reset();
+ for (usize i = 0; i < locs_tmp.Size(); i++) rep_->locs.PushBack(locs_tmp[i]);
+
+ // symbolize locations
+ for (usize i = 0; i < rep_->locs.Size(); i++) {
+ ReportLocation *loc = rep_->locs[i];
+ loc->stack = SymbolizeStackId(loc->stack_id);
+ }
+
+ usize offset = 0;
+ for (usize i = 0; i < rep_->added_location_addrs.Size(); i++) {
+ struct AddedLocationAddr *added_loc = &rep_->added_location_addrs[i];
+ if (ReportLocation *loc = SymbolizeData(added_loc->addr)) {
+ offset++;
+ loc->suppressable = true;
+ rep_->locs[added_loc->locs_idx] = loc;
+ }
+ }
+
+ // Check that all locs are populated (no-op in release)
+ for (usize i = 0; i < rep_->locs.Size(); i++)
+ CHECK_NE(rep_->locs[i], nullptr);
+
+ // symbolize threads
+ for (usize i = 0; i < rep_->threads.Size(); i++) {
+ ReportThread *rt = rep_->threads[i];
+ rt->stack = SymbolizeStackId(rt->stack_id);
+ if (rt->stack)
+ rt->stack->suppressable = rt->suppressable;
+ }
+
+ // symbolize mutexes
+ for (usize i = 0; i < rep_->mutexes.Size(); i++) {
+ ReportMutex *rm = rep_->mutexes[i];
+ rm->stack = SymbolizeStackId(rm->stack_id);
+ }
+}
+
void ScopedReportBase::AddUniqueTid(Tid unique_tid) {
rep_->unique_tids.PushBack(unique_tid);
}
@@ -216,10 +289,8 @@ void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) {
rt->name = internal_strdup(tctx->name);
rt->parent_tid = tctx->parent_tid;
rt->thread_type = tctx->thread_type;
- rt->stack = 0;
- rt->stack = SymbolizeStackId(tctx->creation_stack_id);
- if (rt->stack)
- rt->stack->suppressable = suppressable;
+ rt->stack_id = tctx->creation_stack_id;
+ rt->suppressable = suppressable;
}
#if !SANITIZER_GO
@@ -270,7 +341,7 @@ int ScopedReportBase::AddMutex(uptr addr, StackID creation_stack_id) {
rep_->mutexes.PushBack(rm);
rm->id = rep_->mutexes.Size() - 1;
rm->addr = addr;
- rm->stack = SymbolizeStackId(creation_stack_id);
+ rm->stack_id = creation_stack_id;
return rm->id;
}
@@ -288,7 +359,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
loc->fd_closed = closed;
loc->fd = fd;
loc->tid = creat_tid;
- loc->stack = SymbolizeStackId(creat_stack);
+ loc->stack_id = creat_stack;
rep_->locs.PushBack(loc);
AddThread(creat_tid);
return;
@@ -310,7 +381,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
loc->heap_chunk_size = b->siz;
loc->external_tag = b->tag;
loc->tid = b->tid;
- loc->stack = SymbolizeStackId(b->stk);
+ loc->stack_id = b->stk;
rep_->locs.PushBack(loc);
AddThread(b->tid);
return;
@@ -324,11 +395,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
AddThread(tctx);
}
#endif
- if (ReportLocation *loc = SymbolizeData(addr)) {
- loc->suppressable = true;
- rep_->locs.PushBack(loc);
- return;
- }
+ rep_->added_location_addrs.PushBack({addr, rep_->locs.Size()});
}
#if !SANITIZER_GO
@@ -819,6 +886,7 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid()))
rep.AddSleep(thr->last_sleep_stack_id);
#endif
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
index c6a8fd2acb6a8..36ee7af1fd973 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
@@ -96,6 +96,7 @@ void ThreadFinalize(ThreadState *thr) {
ScopedReport rep(ReportTypeThreadLeak);
rep.AddThread(leaks[i].tctx, true);
rep.SetCount(leaks[i].count);
+ rep.SymbolizeStackElems();
OutputReport(thr, rep);
}
#endif
|
@thurstond I have split this out from the previous PR now. |
I think that should be #149516 instead? Please add a sentence on what this refactoring enables in the future (a blurb of #149516). |
Vector<ReportLocation *> locs_tmp; | ||
|
||
// Repopulate the locations in the order they were added - take care with | ||
// the added locations | ||
usize locs_idx = 0; | ||
for (usize i = 0; i < rep_->added_location_addrs.Size(); i++) { | ||
AddedLocationAddr *added_loc_addr = &rep_->added_location_addrs[i]; | ||
|
||
for (; locs_idx < added_loc_addr->locs_idx; locs_idx++) { | ||
ReportLocation *loc = rep_->locs[locs_idx]; | ||
loc->stack = SymbolizeStackId(loc->stack_id); | ||
locs_tmp.PushBack(loc); | ||
} | ||
|
||
if (ReportLocation *added_loc = SymbolizeData(added_loc_addr->addr)) { | ||
added_loc->suppressable = true; | ||
locs_tmp.PushBack(added_loc); | ||
} | ||
} | ||
|
||
// Append any remaining locations | ||
for (; locs_idx < rep_->locs.Size(); locs_idx++) { | ||
ReportLocation *loc = rep_->locs[locs_idx]; | ||
loc->stack = SymbolizeStackId(loc->stack_id); | ||
locs_tmp.PushBack(loc); | ||
} | ||
|
||
rep_->locs.Reset(); | ||
for (usize i = 0; i < locs_tmp.Size(); i++) rep_->locs.PushBack(locs_tmp[i]); | ||
|
||
// symbolize locations | ||
for (usize i = 0; i < rep_->locs.Size(); i++) { | ||
ReportLocation *loc = rep_->locs[i]; | ||
loc->stack = SymbolizeStackId(loc->stack_id); | ||
} | ||
|
||
usize offset = 0; | ||
for (usize i = 0; i < rep_->added_location_addrs.Size(); i++) { | ||
struct AddedLocationAddr *added_loc = &rep_->added_location_addrs[i]; | ||
if (ReportLocation *loc = SymbolizeData(added_loc->addr)) { | ||
offset++; | ||
loc->suppressable = true; | ||
rep_->locs[added_loc->locs_idx] = loc; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC this is inserting elements from rep_->added_location_addrs into the correct place in rep_->locs (a small portion is calling the symbolizers), but it's a lot of work. I think it can be simplified by adding placeholders in ScopedReportBase::AddLocation (see comment below).
rep_->locs.PushBack(loc); | ||
return; | ||
} | ||
rep_->added_location_addrs.PushBack({addr, rep_->locs.Size()}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think if we also added a placeholder value here:
rep_->locs.PushBack(nullptr);
then ScopedReportBase::SymbolizeStackElems()
could be greatly simplified: loop through added_location_addrs and overwrite the placeholder values at rep_->locs[added_loc_addr->locs_idx]
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did try doing that originally, IIRC it ended up with me having to add null checks in other places (that felt kind of 'magic') - I'll redo the change and push it up as an additional commit and we can discuss which is the better approach.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now I remember the tricky bit - we need to delete the placeholder if if (ReportLocation *loc = SymbolizeData(added_loc->addr))
evaluates to false (as we wouldn't PushBack in that case previously).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, that's a tricky bit I hadn't considered.
I suspect adding the placeholders here and then deleting them in SymbolizeStackElems() would still be simpler than the current approach. Conditional deletion of placeholders in rep_->locs (effectively, compaction) could be done in-place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup; kept it simple with the null filtering as the internal Vector
type has limited functionality. imo this looks way neater than that mess I had there before.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forgot to say - just pushed up a commit with the change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, it looks pretty good! Left a few comments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good to me!
cc'ing @dvyukov and @vitalybuka as they were reviewers on the patch that this was spun off from (#149516).
@@ -199,6 +198,55 @@ void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, | |||
} | |||
} | |||
|
|||
void ScopedReportBase::SymbolizeStackElems() { | |||
// symbolize memory ops | |||
for (usize i = 0; i < rep_->mops.Size(); i++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nits throughout this function: https://llvm.org/docs/CodingStandards.html#don-t-evaluate-end-every-time-through-a-loop
// Filter out any added location placeholders that could not be symbolized | ||
Vector<ReportLocation *> filtered_locs; | ||
for (usize i = 0; i < rep_->locs.Size(); i++) | ||
if (rep_->locs[i] != nullptr) | ||
filtered_locs.PushBack(rep_->locs[i]); | ||
rep_->locs.Resize(filtered_locs.Size()); | ||
for (usize i = 0; i < filtered_locs.Size(); i++) | ||
rep_->locs[i] = (filtered_locs[i]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW I think the in-place compaction could be done with (untested):
usize j = 0; // Index past the last compacted element
usize size = rep_->locs.Size();
for (usize i = 0; i < size; i++) {
if (rep_->locs[i] != nullptr) {
rep_->locs[j] = rep_->locs[i];
j++;
}
}
rep_->locs.Resize(j);
but I'm ok with the current code.
rep_->locs.PushBack(loc); | ||
return; | ||
} | ||
rep_->added_location_addrs.PushBack({addr, rep_->locs.Size()}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, it looks pretty good! Left a few comments.
// symbolize locations | ||
for (usize i = 0; i < rep_->locs.Size(); i++) { | ||
// added locations have a NULL placeholder - don't dereference them | ||
if (ReportLocation *loc = rep_->locs[i]) | ||
loc->stack = SymbolizeStackId(loc->stack_id); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be simplified by moving this code block after the filtering step (or combining them)
This is the refactoring portion of: #149516. My aim is for this change to replicate current behaviour - just with Symbolization done explicitly (and later than previously).
This change will enable us to perform symboliaztion after releasing the locks in
OutputReport
; this is necessary on Apple platforms in order to avoid a deadlock.