Skip to content

Commit 894e101

Browse files
tieosean-mcmanus
andauthored
Fix C/C++ debug hover on intermediate members of a dereferenced expression (#14540)
* Add EvaluatableExpressionProvider to fix C/C++ debug hover on dereferenced members VS Code's default debug data-tip keeps a leading `*`/`&` and clips on the right, so hovering an intermediate member of e.g. `*a.b.c` evaluates `*a.b` (a dereference of the struct `a.b`) and shows no value. Register an EvaluatableExpressionProvider that, only for that case, returns the expression without the leading operator. Every other expression returns undefined so the default behavior is unchanged. * Move the debug hover provider to the debugger Register the EvaluatableExpressionProvider during debugger activation instead of through the language client, so it also works when IntelliSense is disabled, and move it next to the other debugger code. The expression computation lives in a vscode-free module (evaluatableExpression.ts) so it can be unit tested directly. Registering a provider replaces VS Code's built-in data-tip expression detection, so it reproduces that for ordinary tokens and additionally handles access chains the built-in detection gets wrong for C/C++: - A leading */& binds to the whole access chain (the postfix ., ->, [] operators bind tighter), so it is dropped for an interior member of a dot chain, where *a.b would dereference the struct a.b, and kept on the final member and before ->. - Array subscripts and :: are kept in the token, so members after a subscript (a.b[i].c) and scoped names (ns::var) resolve instead of producing a broken fragment. * Document why the leading-operator drop is .-only The leading */& is kept before -> / [] and at the end of a chain because those left operands are provably pointers/arrays, so *ptr->m, *a.b[i] and &a[i] evaluate without error; it is dropped only before . where the left operand may be a struct. Documents these keep-leading outcomes as deliberate per review feedback. * Decline non-token cursor positions inside a subscript Inside [...] the token spans whitespace and operators (e.g. a[i + j]), so returning the nearest identifier evaluated the wrong expression when the cursor was on an operator or space. Return undefined unless the cursor is on the index identifier itself, and add tests. * Rewrite hover token detection with a balanced-bracket scanner Replace the token regex with a manual scanner so nested subscripts (a[b[i]]) stay in one token, fix hovering past the last identifier truncating a trailing subscript, and decline non-token positions inside [...] (operators/whitespace). Keep the leading * only on the final dereferenced segment (including a final subscript element like *a.b[i]); drop it on interior segments and drop a leading & always. Removes the previous regex (and its ReDoS surface). --------- Co-authored-by: Sean McManus <seanmcm@microsoft.com>
1 parent 4aa35c9 commit 894e101

4 files changed

Lines changed: 373 additions & 1 deletion

File tree

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
/* --------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All Rights Reserved.
3+
* See 'LICENSE' in the project root for license information.
4+
* ------------------------------------------------------------------------------------------ */
5+
6+
// The column range and text of the expression a debug data-tip should evaluate.
7+
export interface EvaluatableExpressionInfo {
8+
readonly startColumn: number;
9+
readonly endColumn: number;
10+
readonly expression: string;
11+
}
12+
13+
const wordChar: RegExp = /[\p{L}\p{N}_]/u;
14+
15+
function isWord(ch: string | undefined): boolean {
16+
return ch !== undefined && wordChar.test(ch);
17+
}
18+
19+
// Index just past the `]` that closes the `[` at `open`, or -1 if it is unbalanced.
20+
function matchingClose(line: string, open: number): number {
21+
let depth: number = 0;
22+
for (let j: number = open; j < line.length; j++) {
23+
if (line[j] === '[') {
24+
depth++;
25+
} else if (line[j] === ']') {
26+
depth--;
27+
if (depth === 0) {
28+
return j + 1;
29+
}
30+
}
31+
}
32+
return -1;
33+
}
34+
35+
// Index of the `[` that opens the `]` at `close`, or -1 if it is unbalanced.
36+
function matchingOpen(line: string, close: number): number {
37+
let depth: number = 0;
38+
for (let j: number = close; j >= 0; j--) {
39+
if (line[j] === ']') {
40+
depth++;
41+
} else if (line[j] === '[') {
42+
depth--;
43+
if (depth === 0) {
44+
return j;
45+
}
46+
}
47+
}
48+
return -1;
49+
}
50+
51+
// Start of the access chain that the subscript opening at `open` applies to, without crossing
52+
// `exprStart` or an enclosing (still-open) `[`.
53+
function primaryStart(line: string, open: number, exprStart: number): number {
54+
let s: number = open;
55+
while (s > exprStart) {
56+
const prev: string = line[s - 1];
57+
if (isWord(prev)) {
58+
while (s > exprStart && isWord(line[s - 1])) {
59+
s--;
60+
}
61+
} else if (prev === '.') {
62+
s--;
63+
} else if (prev === '>' && line[s - 2] === '-') {
64+
s -= 2;
65+
} else if (prev === ':' && line[s - 2] === ':') {
66+
s -= 2;
67+
} else if (prev === ']') {
68+
const open2: number = matchingOpen(line, s - 1);
69+
if (open2 < exprStart) {
70+
break;
71+
}
72+
s = open2;
73+
} else {
74+
break;
75+
}
76+
}
77+
return s;
78+
}
79+
80+
// Computes the expression a debug data-tip should evaluate for the token at `character` in `line`,
81+
// or undefined when the cursor is not on an expression token.
82+
//
83+
// Registering an EvaluatableExpressionProvider replaces VS Code's built-in data-tip expression
84+
// detection, so this reproduces that detection for ordinary tokens and additionally resolves access
85+
// chains involving a leading `*`/`&` or array subscripts, which the built-in detection mishandles:
86+
// - A leading `*` is kept only when hovering the final segment of the chain (the value actually
87+
// dereferenced, e.g. `*a.b.c`); on any interior segment it is dropped, so hovering `b` in
88+
// `*a.b.c` gives `a.b` and hovering `b` in `*a.b[i]` gives `a.b` (not `*a.b`, the dereferenced
89+
// struct/array base). A leading `&` is always dropped so the hovered variable shows its value
90+
// rather than its address.
91+
// - Array subscripts are part of the chain, including nested ones like `a[b[i]]`; hovering `c` in
92+
// `a.b[i].c` evaluates `a.b[i].c` rather than a fragment after the `]`, hovering a subscript
93+
// bracket evaluates the indexed element, and hovering the index evaluates it on its own.
94+
//
95+
// This has no vscode dependency so it can be unit tested directly.
96+
export function computeEvaluatableExpression(line: string, character: number): EvaluatableExpressionInfo | undefined {
97+
// Find the access-chain token containing the cursor: an optional leading run of `*`/`&`, then a
98+
// chain of identifiers, `.`, `->`, `::` and balanced `[...]` subscripts. Brackets are matched by
99+
// depth so nested subscripts stay in one token. The cursor is matched with an inclusive end so a
100+
// token is selected when the cursor is at its trailing edge (VS Code's built-in does the same).
101+
let tokenStart: number = -1;
102+
let tokenEnd: number = -1;
103+
const n: number = line.length;
104+
let i: number = 0;
105+
while (i < n) {
106+
const start: number = i;
107+
while (i < n && (line[i] === '*' || line[i] === '&')) {
108+
i++;
109+
}
110+
let chained: boolean = false;
111+
let advanced: boolean = true;
112+
while (i < n && advanced) {
113+
const c: string = line[i];
114+
if (isWord(c)) {
115+
while (i < n && isWord(line[i])) {
116+
i++;
117+
}
118+
chained = true;
119+
} else if (c === '.') {
120+
i++;
121+
chained = true;
122+
} else if (c === '-' && line[i + 1] === '>') {
123+
i += 2;
124+
chained = true;
125+
} else if (c === ':' && line[i + 1] === ':') {
126+
i += 2;
127+
chained = true;
128+
} else if (c === '[') {
129+
const close: number = matchingClose(line, i);
130+
if (close === -1) {
131+
advanced = false;
132+
} else {
133+
i = close;
134+
chained = true;
135+
}
136+
} else {
137+
advanced = false;
138+
}
139+
}
140+
if (chained && start <= character && character <= i) {
141+
tokenStart = start;
142+
tokenEnd = i;
143+
break;
144+
}
145+
i = chained && i > start ? i : start + 1;
146+
}
147+
if (tokenStart === -1) {
148+
return undefined;
149+
}
150+
151+
const leadingMatch: RegExpMatchArray | null = line.substring(tokenStart, tokenEnd).match(/^[*&]+/u);
152+
const leading: string | null = leadingMatch !== null ? leadingMatch[0] : null;
153+
const exprStart: number = tokenStart + (leading !== null ? leading.length : 0);
154+
155+
// A chain can begin with `.` or `->` when its head was skipped (e.g. a call: `foo().bar` leaves
156+
// `.bar`). Such a fragment is not a valid expression, so decline it.
157+
if (line[exprStart] === '.' || (line[exprStart] === '-' && line[exprStart + 1] === '>')) {
158+
return undefined;
159+
}
160+
161+
// On a subscript bracket, evaluate the indexed element: the subscripted primary through that
162+
// subscript, without the leading `*`/`&`.
163+
const cursorChar: string = line.charAt(character);
164+
if (cursorChar === '[' || cursorChar === ']') {
165+
const open: number = cursorChar === '[' ? character : matchingOpen(line, character);
166+
const close: number = cursorChar === '[' ? matchingClose(line, character) : character + 1;
167+
if (open !== -1 && close !== -1) {
168+
let startColumn: number = Math.max(primaryStart(line, open, exprStart), exprStart);
169+
// Keep a leading `*` when the subscript is the final segment (the dereferenced
170+
// element, e.g. `*a.b[i]`); a leading `&`, or an interior subscript, drops it.
171+
if (close === tokenEnd && startColumn === exprStart && leading !== null && /^\*+$/u.test(leading)) {
172+
startColumn = tokenStart;
173+
}
174+
return { startColumn, endColumn: close, expression: line.substring(startColumn, close) };
175+
}
176+
}
177+
178+
// Locate the identifier under the cursor and the offset just past it.
179+
let clipEnd: number = tokenEnd;
180+
let wordStart: number = tokenStart;
181+
let word: string = '';
182+
const wordRegExp: RegExp = /[\p{L}\p{N}_]+/gu;
183+
const tokenText: string = line.substring(tokenStart, tokenEnd);
184+
for (let w: RegExpExecArray | null = wordRegExp.exec(tokenText); w !== null; w = wordRegExp.exec(tokenText)) {
185+
clipEnd = tokenStart + w.index + w[0].length;
186+
wordStart = tokenStart + w.index;
187+
word = w[0];
188+
if (clipEnd >= character) {
189+
break;
190+
}
191+
}
192+
193+
// An identifier inside a `[...]` is the index; it is evaluated on its own. Inside `[...]` the
194+
// chain also spans operators and whitespace (e.g. `a[i + j]`), so only return the identifier
195+
// when the cursor is actually on it; other positions are not tokens.
196+
let depth: number = 0;
197+
for (let k: number = tokenStart; k < character; k++) {
198+
if (line[k] === '[') {
199+
depth++;
200+
} else if (line[k] === ']') {
201+
depth--;
202+
}
203+
}
204+
if (depth > 0) {
205+
if (character < wordStart || character >= clipEnd) {
206+
return undefined;
207+
}
208+
return { startColumn: wordStart, endColumn: clipEnd, expression: word };
209+
}
210+
211+
// Past the last identifier but still on the token's trailing `]` (or its inclusive trailing
212+
// edge), with no identifier left between the cursor and the end: evaluate the indexed
213+
// element, like hovering that closing bracket, so the clip never cuts a subscript in half.
214+
if (line.charAt(tokenEnd - 1) === ']' && !/[\p{L}\p{N}_]/u.test(line.substring(character, tokenEnd))) {
215+
const open: number = matchingOpen(line, tokenEnd - 1);
216+
if (open !== -1) {
217+
let startColumn: number = Math.max(primaryStart(line, open, exprStart), exprStart);
218+
if (startColumn === exprStart && leading !== null && /^\*+$/u.test(leading)) {
219+
startColumn = tokenStart;
220+
}
221+
return { startColumn, endColumn: tokenEnd, expression: line.substring(startColumn, tokenEnd) };
222+
}
223+
}
224+
225+
// The leading `*`/`&` belongs to the final segment of the chain. A `*` is kept only when the
226+
// cursor is on that final segment (the value actually dereferenced, e.g. `*a.b.c`). On any
227+
// interior segment it is dropped, since `*a.b` would dereference the struct `a.b` and `*a.b[i]`
228+
// the array base rather than the indexed element. A leading `&` is always dropped so hovering
229+
// the variable shows its value, not its address.
230+
const keepLeading: boolean = leading !== null && clipEnd >= tokenEnd && /^\*+$/u.test(leading);
231+
if (!keepLeading) {
232+
return { startColumn: exprStart, endColumn: clipEnd, expression: line.substring(exprStart, clipEnd) };
233+
}
234+
return { startColumn: tokenStart, endColumn: clipEnd, expression: line.substring(tokenStart, clipEnd) };
235+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/* --------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All Rights Reserved.
3+
* See 'LICENSE' in the project root for license information.
4+
* ------------------------------------------------------------------------------------------ */
5+
import * as vscode from 'vscode';
6+
import { computeEvaluatableExpression, EvaluatableExpressionInfo } from './evaluatableExpression';
7+
8+
// Provides the expression a C/C++ debug data-tip evaluates when hovering a variable. The actual
9+
// computation lives in `evaluatableExpression.ts` (no vscode dependency) so it can be unit tested.
10+
export class EvaluatableExpressionProvider implements vscode.EvaluatableExpressionProvider {
11+
public provideEvaluatableExpression(document: vscode.TextDocument, position: vscode.Position): vscode.ProviderResult<vscode.EvaluatableExpression> {
12+
const info: EvaluatableExpressionInfo | undefined = computeEvaluatableExpression(document.lineAt(position.line).text, position.character);
13+
if (info === undefined) {
14+
return undefined;
15+
}
16+
return new vscode.EvaluatableExpression(new vscode.Range(position.line, info.startColumn, position.line, info.endColumn), info.expression);
17+
}
18+
}

Extension/src/Debugger/extension.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@ import { SshTargetsProvider, getActiveSshTarget, initializeSshTargets, selectSsh
1414
import { TargetLeafNode, setActiveSshTarget } from '../SSH/TargetsView/targetNodes';
1515
import { sshCommandToConfig } from '../SSH/sshCommandToConfig';
1616
import { getSshConfiguration, getSshConfigurationFiles, parseFailures, writeSshConfiguration } from '../SSH/sshHosts';
17-
import { pathAccessible } from '../common';
17+
import { documentSelector, pathAccessible } from '../common';
1818
import { instrument } from '../instrumentation';
1919
import { getSshChannel } from '../logger';
2020
import { AttachItemsProvider, AttachPicker, RemoteAttachPicker } from './attachToProcess';
2121
import { ConfigurationAssetProviderFactory, ConfigurationSnippetProvider, DebugConfigurationProvider, IConfigurationAssetProvider } from './configurationProvider';
2222
import { DebuggerType } from './configurations';
2323
import { CppdbgDebugAdapterDescriptorFactory, CppvsdbgDebugAdapterDescriptorFactory } from './debugAdapterDescriptorFactory';
24+
import { EvaluatableExpressionProvider } from './evaluatableExpressionProvider';
2425
import { NativeAttachItemsProviderFactory } from './nativeAttach';
2526

2627
// The extension deactivate method is asynchronous, so we handle the disposables ourselves instead of using extensionContext.subscriptions.
@@ -82,6 +83,9 @@ export async function initialize(context: vscode.ExtensionContext): Promise<void
8283
disposables.push(vscode.debug.registerDebugAdapterDescriptorFactory(DebuggerType.cppvsdbg, new CppvsdbgDebugAdapterDescriptorFactory(context)));
8384
disposables.push(vscode.debug.registerDebugAdapterDescriptorFactory(DebuggerType.cppdbg, new CppdbgDebugAdapterDescriptorFactory(context)));
8485

86+
// Supplies the expression evaluated by debug data-tips when hovering C/C++ source.
87+
disposables.push(vscode.languages.registerEvaluatableExpressionProvider(documentSelector, instrument(new EvaluatableExpressionProvider())));
88+
8589
// SSH Targets View
8690
await initializeSshTargets();
8791
const sshTargetsProvider: SshTargetsProvider = new SshTargetsProvider();
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/* --------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All Rights Reserved.
3+
* See 'LICENSE' in the project root for license information.
4+
* ------------------------------------------------------------------------------------------ */
5+
6+
import { describe, it } from 'mocha';
7+
import { strictEqual } from 'node:assert';
8+
import { computeEvaluatableExpression } from '../../src/Debugger/evaluatableExpression';
9+
10+
// In each input the `|` marks the cursor; it is removed (at that single index) before evaluating.
11+
function evaluate(marked: string): string | undefined {
12+
const character: number = marked.indexOf('|');
13+
const line: string = marked.slice(0, character) + marked.slice(character + 1);
14+
return computeEvaluatableExpression(line, character)?.expression;
15+
}
16+
17+
describe('computeEvaluatableExpression', () => {
18+
it('returns undefined when the cursor is not on a token', () => {
19+
strictEqual(evaluate('a + | b'), undefined);
20+
});
21+
22+
it('evaluates a plain identifier', () => {
23+
strictEqual(evaluate('|x'), 'x');
24+
});
25+
26+
it('drops a leading * for an interior member of a dot chain', () => {
27+
strictEqual(evaluate('*|a.b.c'), 'a');
28+
strictEqual(evaluate('*a.|b.c'), 'a.b');
29+
});
30+
31+
it('keeps a leading * on the final member', () => {
32+
strictEqual(evaluate('*a.b.|c'), '*a.b.c');
33+
});
34+
35+
it('drops a leading * on an interior member before -> and keeps it on the final member', () => {
36+
strictEqual(evaluate('*|ptr->member'), 'ptr');
37+
strictEqual(evaluate('*ptr->|member'), '*ptr->member');
38+
});
39+
40+
it('drops a leading * before a subscript (the array base is not the dereferenced value)', () => {
41+
strictEqual(evaluate('*a.|b[i]'), 'a.b');
42+
strictEqual(evaluate('*dbbolz.|nullzwang_ok[DBBOLZ_A_AUS]'), 'dbbolz.nullzwang_ok');
43+
});
44+
45+
it('drops a leading & so the variable shows its value, not its address', () => {
46+
strictEqual(evaluate('&|nullzwang_ok'), 'nullzwang_ok');
47+
strictEqual(evaluate('&a.b.|c'), 'a.b.c');
48+
});
49+
50+
it('leaves -> chains without a leading operator unchanged', () => {
51+
strictEqual(evaluate('p->|q->r'), 'p->q');
52+
strictEqual(evaluate('p->q->|r'), 'p->q->r');
53+
});
54+
55+
it('keeps array subscripts in the chain', () => {
56+
strictEqual(evaluate('a.|b[i].c'), 'a.b');
57+
strictEqual(evaluate('a.b[i].|c'), 'a.b[i].c');
58+
strictEqual(evaluate('dbbolz.dbbolz_anst[out_idx].|anw_dig'), 'dbbolz.dbbolz_anst[out_idx].anw_dig');
59+
strictEqual(evaluate('dbbolz.dbbolz_anst[out_idx].anw_dig.|stsdig'), 'dbbolz.dbbolz_anst[out_idx].anw_dig.stsdig');
60+
});
61+
62+
it('evaluates the element when on a subscript bracket, without the leading operator', () => {
63+
strictEqual(evaluate('a.b|[i].c'), 'a.b[i]');
64+
strictEqual(evaluate('a.b[i|].c'), 'a.b[i]');
65+
strictEqual(evaluate('&dbbolz.dbbolz_anst[out_idx].fg|[kanal_idx]'), 'dbbolz.dbbolz_anst[out_idx].fg[kanal_idx]');
66+
});
67+
68+
it('evaluates the index on its own when inside a subscript', () => {
69+
strictEqual(evaluate('a.b[|i].c'), 'i');
70+
strictEqual(evaluate('&dbbolz.dbbolz_anst[out_idx].fg[|kanal_idx]'), 'kanal_idx');
71+
});
72+
73+
it('keeps :: scoped names together', () => {
74+
strictEqual(evaluate('ns::|var'), 'ns::var');
75+
strictEqual(evaluate('ns::var::|z'), 'ns::var::z');
76+
});
77+
78+
it('returns undefined for a fragment that begins with a connector', () => {
79+
// The head of the expression (a call) is skipped by the tokenizer, leaving a `.`/`->` start.
80+
strictEqual(evaluate('foo().|bar'), undefined);
81+
strictEqual(evaluate('obj->fn()->|field'), undefined);
82+
strictEqual(evaluate('(*this).|member'), undefined);
83+
});
84+
85+
it('returns undefined when the cursor is on an operator or space inside a subscript', () => {
86+
strictEqual(evaluate('a[i |+ j].c'), undefined);
87+
strictEqual(evaluate('a[i +| j].c'), undefined);
88+
strictEqual(evaluate('a[i +|j].c'), 'j');
89+
strictEqual(evaluate('a[i|+1].c'), undefined);
90+
});
91+
92+
it('treats nested subscripts as balanced brackets', () => {
93+
strictEqual(evaluate('a|[b[i]]'), 'a[b[i]]');
94+
strictEqual(evaluate('a[b|[i]]'), 'b[i]');
95+
strictEqual(evaluate('a[b[|i]]'), 'i');
96+
strictEqual(evaluate('a[b[i]|]'), 'a[b[i]]');
97+
});
98+
99+
it('keeps a trailing subscript whole when hovering past the last identifier', () => {
100+
strictEqual(evaluate('a[i]|'), 'a[i]');
101+
strictEqual(evaluate('*a.b[i]|'), '*a.b[i]');
102+
});
103+
104+
it('keeps a leading * on a final subscript element but drops it on an interior one', () => {
105+
strictEqual(evaluate('*a.b|[i]'), '*a.b[i]');
106+
strictEqual(evaluate('*a.b[i|]'), '*a.b[i]');
107+
strictEqual(evaluate('*a.b|[i].c'), 'a.b[i]');
108+
strictEqual(evaluate('&a|[i]'), 'a[i]');
109+
});
110+
111+
it('does not grab the whole element when hovering an interior connector', () => {
112+
strictEqual(evaluate('*x|.y[i]'), 'x');
113+
strictEqual(evaluate('*dbbolz|.nullzwang_ok[DBBOLZ_A_AUS]'), 'dbbolz');
114+
});
115+
});

0 commit comments

Comments
 (0)