Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add telemtry for how long it takes to parse files with tree-sitter #213565

Merged
merged 43 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
e1ea829
Make space for tree sitter
alexr00 May 7, 2024
150354b
Add the tree sitter wasm file
alexr00 May 13, 2024
1ee4fe2
Very naive tree-sitter syntax highlighting for html, with a layer bre…
alexr00 May 21, 2024
3614235
Update tree when content changes
alexr00 May 22, 2024
2703b6b
WIP for making abstract tokens class
alexr00 May 22, 2024
edde184
Handle theme changes
alexr00 May 22, 2024
5c20e64
Replace entire text model value with parse callback
alexr00 May 22, 2024
4cd3e08
Perf improvements
alexr00 May 23, 2024
fef5067
Add tree-sitter-typescript
alexr00 May 23, 2024
5c65dfa
Add typescript + better initial parsing
alexr00 May 24, 2024
c372e40
Refactor into tree parsing service and fix flaw in parse callback
alexr00 May 27, 2024
d411bfc
Remove things that aren't the parser service
alexr00 May 27, 2024
7c0e5d5
Add yielding
alexr00 May 27, 2024
e32a24d
Remove changes that aren't required for PR
alexr00 May 27, 2024
7f0678a
Remove more file changes
alexr00 May 27, 2024
eb1bba2
Reduce yield to 50 ms
alexr00 Jun 18, 2024
d56de42
Fix incremental parsing
alexr00 Jun 18, 2024
194527c
Merge branch 'main' into alexr00/treeSitterParserService
alexr00 Jun 18, 2024
df28801
Try update node-abi
alexr00 Jun 18, 2024
85ba149
Revert "Try update node-abi"
alexr00 Jun 18, 2024
70e6fdd
Update text buffer chunk api
rebornix Jun 18, 2024
b728bd9
fix build
rebornix Jun 18, 2024
eb265f5
Merge pull request #215471 from microsoft/rebornix/right-cricket
rebornix Jun 18, 2024
0563837
Remove tree-sitter dependency
alexr00 Jun 19, 2024
37f5af6
Merge branch 'main' into alexr00/treeSitterParserService
alexr00 Jul 8, 2024
ab57877
Merge branch 'main' into alexr00/treeSitterParserService
alexr00 Jul 9, 2024
c28ede5
Adopt new, as yet unpublished, `@vscode/tree-sitter-wasm` package
alexr00 Jul 10, 2024
9747616
Use published `@vscode/tree-sitter-wasm` package
alexr00 Jul 11, 2024
8c7116c
Break `TreeSitterTree` and `TreeSitterParserService` into better pieces
alexr00 Jul 12, 2024
3a9a103
Merge remote-tracking branch 'origin/main' into alexr00/treeSitterPar…
alexr00 Jul 15, 2024
7148842
Fix tests
alexr00 Jul 15, 2024
2c2e321
Remove unneeded import
alexr00 Jul 15, 2024
028c770
Fix missing tree-sitter-wasm in web and remote
alexr00 Jul 16, 2024
539a273
Make package.jsons match
alexr00 Jul 16, 2024
e6c2529
Add @vscode/tree-sitter-wasm to web loader config
alexr00 Jul 17, 2024
4f98572
Merge branch 'main' into alexr00/treeSitterParserService
alexr00 Jul 17, 2024
04232a4
Try using importAMDNodeModule
alexr00 Jul 17, 2024
0ca55da
PR feedback
alexr00 Jul 18, 2024
6435924
Add race condition test for changing language while loading language
alexr00 Jul 18, 2024
7e82f06
Use same timeout
alexr00 Jul 18, 2024
13a6a3b
Queue content changes
alexr00 Jul 18, 2024
d08bbb1
Remove override dispose
alexr00 Jul 19, 2024
26edd5c
Move queue into TreeSitterTree
alexr00 Jul 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@
"@vscode/policy-watcher",
"@vscode/proxy-agent",
"@vscode/spdlog",
"@vscode/tree-sitter-wasm",
"@vscode/windows-process-tree",
"assert",
"child_process",
Expand Down Expand Up @@ -755,7 +756,8 @@
"vs/base/~",
"vs/base/parts/*/~",
"vs/platform/*/~",
"vs/editor/~"
"vs/editor/~",
"@vscode/tree-sitter-wasm" // node module allowed even in /common/
]
},
{
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"@vscode/spdlog": "^0.15.0",
"@vscode/sqlite3": "5.1.6-vscode",
"@vscode/sudo-prompt": "9.3.1",
"@vscode/tree-sitter-wasm": "^0.0.1",
"@vscode/vscode-languagedetection": "1.0.21",
"@vscode/windows-mutex": "^0.5.0",
"@vscode/windows-process-tree": "^0.6.0",
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap-window.js
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ const isESM = false;
// using a fallback such as node.js require which does not exist in sandbox
const baseNodeModulesPath = isDev ? '../node_modules' : '../node_modules.asar';
loaderConfig.paths = {
'@vscode/tree-sitter-wasm': `${baseNodeModulesPath}/@vscode/tree-sitter-wasm/wasm/tree-sitter.js`,
'vscode-textmate': `${baseNodeModulesPath}/vscode-textmate/release/main.js`,
'vscode-oniguruma': `${baseNodeModulesPath}/vscode-oniguruma/release/main.js`,
'vsda': `${baseNodeModulesPath}/vsda/index.js`,
Expand Down
263 changes: 263 additions & 0 deletions src/vs/editor/browser/services/treeSitter/treeSitterParserService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import { TreeSitterTokenizationRegistry } from 'vs/editor/common/languages';
// eslint-disable-next-line local/code-amd-node-module
import { Parser } from '@vscode/tree-sitter-wasm';
import { AppResourcePath, FileAccess, nodeModulesPath } from 'vs/base/common/network';
import { ITreeSitterParserService } from 'vs/editor/common/services/treeSitterParserService';
import { IModelService } from 'vs/editor/common/services/model';
import { Disposable, DisposableMap, DisposableStore, IDisposable } from 'vs/base/common/lifecycle';
import { ITextModel, ITextSnapshot } from 'vs/editor/common/model';
import { IFileService } from 'vs/platform/files/common/files';
import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
import { ITelemetryService } from 'vs/platform/telemetry/common/telemetry';
import { ILogService } from 'vs/platform/log/common/log';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { setTimeout0 } from 'vs/base/common/platform';

const EDITOR_EXPERIMENTAL_PREFER_TREESITTER = 'editor.experimental.preferTreeSitter';
const moduleLocationTreeSitter: AppResourcePath = `${nodeModulesPath}/@vscode/tree-sitter-wasm/wasm`;
const moduleLocationTreeSitterWasm: AppResourcePath = `${moduleLocationTreeSitter}/tree-sitter.wasm`;

export class TreeSitterTree implements IDisposable {
private _tree: Parser.Tree | undefined;
private _language: Parser.Language | undefined;
constructor(public readonly parser: Parser, private readonly disposables: DisposableStore) { }
dispose(): void {
this._tree?.delete();
this.parser?.delete();
this.disposables.dispose();
}
get tree() { return this._tree; }
set tree(newTree: Parser.Tree | undefined) {
this._tree?.delete();
this._tree = newTree;
this._snapshot = undefined;
this._snapshotChunks = [];
}
get language() { return this._language; }
set language(newLanguage: Parser.Language | undefined) {
this.parser?.setLanguage(newLanguage);
this._language = newLanguage;
if (this._language === undefined) {
this.tree = undefined;
}
}
private _snapshot: ITextSnapshot | undefined;
get snapshot() { return this._snapshot; }
public createSnapshot(textModel: ITextModel) {
this._snapshot = textModel.createSnapshot();
}
public clearSnapshot() {
this._snapshot = undefined;
}
private _snapshotChunks: { chunk: string; startOffset: number }[] = [];
get snapshotChunks() { return this._snapshotChunks; }
public addSnapshotChunk(chunk: string, startOffset: number) {
this._snapshotChunks.push({ chunk, startOffset });
}
}

export class TreeSitterParserService extends Disposable implements ITreeSitterParserService {
readonly _serviceBrand: undefined;
private _init: Promise<void>;
private _treeSitterTrees: DisposableMap<ITextModel, TreeSitterTree> = new DisposableMap();
private _languages: Map<string, Parser.Language> = new Map();

constructor(@IModelService private readonly _modelService: IModelService,
@IFileService private readonly _fileService: IFileService,
@ITelemetryService private readonly _telemetryService: ITelemetryService,
@ILogService private readonly _logService: ILogService,
@IConfigurationService private readonly _configurationService: IConfigurationService
) {
super();

const setting = this._getSetting();
if (setting.length === 0) {
this._init = Promise.resolve();
return;
}

this._init = Parser.init({
locateFile(_file: string, _folder: string) {
return FileAccess.asBrowserUri(moduleLocationTreeSitterWasm).toString(true);
}
});
// Eventually, this should actually use an extension point to add tree sitter grammars, but for now they are hard coded in core
if (setting.includes('typescript')) {
this._addGrammar('typescript', 'tree-sitter-typescript');
}
this._registerModelServiceListeners();

}

private _getSetting(): string[] {
return this._configurationService.getValue<string[]>(EDITOR_EXPERIMENTAL_PREFER_TREESITTER) || [];
}

getLanguageLocation(languageId: string): AppResourcePath | undefined {
const grammarName = TreeSitterTokenizationRegistry.get(languageId);
if (!grammarName) {
return undefined;
}
return moduleLocationTreeSitter;
}

private _registerModelServiceListeners() {
this._register(this._modelService.onModelAdded(model => {
this._registerModelListeners(model);
}));
this._register(this._modelService.onModelRemoved(model => {
this._treeSitterTrees.deleteAndDispose(model);
}));
this._modelService.getModels().forEach(model => this._registerModelListeners(model));
}

private async _registerModelListeners(model: ITextModel) {
await this._init;
const disposables = new DisposableStore();
disposables.add(model.onDidChangeContent(e => this._onDidChangeContent(model, e)));
disposables.add(model.onDidChangeLanguage(e => this._onDidChangeLanguage(model, e)));
const parser = new Parser();
parser.setTimeoutMicros(50 * 1000); // 50 ms
const treeSitterTree = new TreeSitterTree(parser, disposables);
this._treeSitterTrees.set(model, treeSitterTree);
this._setLanguageAndTree(model, treeSitterTree);
}

private async _setLanguageAndTree(model: ITextModel, treeSitterTree: TreeSitterTree) {
const languageId = model.getLanguageId();
const language = await this._ensureLanguage(languageId);
if (!language) {
return;
}
treeSitterTree.language = language;
treeSitterTree.tree = await this._doInitialParse(model, treeSitterTree, languageId);
}

private async _parseAndYield(model: ITextModel, treeSitterTree: TreeSitterTree, language: string, telemetryTag: string): Promise<Parser.Tree> {
let tree: Parser.Tree | undefined;
let time: number = 0;
let passes: number = 0;
do {
const timer = performance.now();
try {
tree = treeSitterTree.parser.parse((index: number, position?: Parser.Point) => this._parseCallback(model, index, position, treeSitterTree), treeSitterTree.tree);
} catch (e) {
// parsing can fail when the timeout is reached, will resume upon next loop
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check the error type and re-throw the error for non-expected errors?

} finally {
time += performance.now() - timer;
passes++;
}
await new Promise<void>(resolve => setTimeout0(resolve));
} while (!tree);
this.sendParseTimeTelemetry(telemetryTag, language, time, passes);
return tree;
}

private _doInitialParse(model: ITextModel, treeSitterTree: TreeSitterTree, language: string): Promise<Parser.Tree> {
treeSitterTree.createSnapshot(model);
const newTree = this._parseAndYield(model, treeSitterTree, language, 'fullParse');
return newTree;
}

private sendParseTimeTelemetry(eventName: string, languageId: string, time: number, passes: number): void {
this._logService.info(`Tree parsing (${eventName}) took ${time} ms and ${passes} passes.`);
type ParseTimeClassification = {
owner: 'alros';
comment: 'Used to understand how long it takes to parse a tree-sitter tree';
languageId: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The programming language ID.' };
time: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The ms it took to parse' };
passes: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'The number of passes it took to parse' };
};
this._telemetryService.publicLog2<{ languageId: string; time: number; passes: number }, ParseTimeClassification>(`treeSitter.${eventName}`, { languageId, time, passes });
}

private async _onDidChangeLanguage(model: ITextModel, e: IModelLanguageChangedEvent) {
const tree = this._treeSitterTrees.get(model);
if (!tree) {
return;
}
const language = await this._ensureLanguage(e.newLanguage);
if (!language) {
// not supported for this language
tree.language = undefined;
return;
}
tree.language = language;
const newTree = await this._doInitialParse(model, tree, e.newLanguage);
tree.tree = newTree;
}

private async _onDidChangeContent(model: ITextModel, e: IModelContentChangedEvent) {
const tree = this._treeSitterTrees.get(model);
if (!tree?.language) {
return;
}
for (const change of e.changes) {
const newEndOffset = change.rangeOffset + change.text.length;
const newEndPosition = model.getPositionAt(newEndOffset);
// TODO @alexr00 need to take into account the previous edits in the loop (text edits class)
tree.tree?.edit({
startIndex: change.rangeOffset,
oldEndIndex: change.rangeOffset + change.rangeLength,
newEndIndex: change.rangeOffset + change.text.length,
startPosition: { row: change.range.startLineNumber - 1, column: change.range.startColumn - 1 },
oldEndPosition: { row: change.range.endLineNumber - 1, column: change.range.endColumn - 1 },
newEndPosition: { row: newEndPosition.lineNumber - 1, column: newEndPosition.column - 1 }
});
}
tree.tree = await this._parseAndYield(model, tree, model.getLanguageId(), 'incrementalParse');
}

private async _fetchLanguage(languageId: string): Promise<Parser.Language | undefined> {
const grammarName = TreeSitterTokenizationRegistry.get(languageId);
const languageLocation = this.getLanguageLocation(languageId);
if (!grammarName || !languageLocation) {
return undefined;
}
const wasmPath: AppResourcePath = `${languageLocation}/${grammarName.name}.wasm`;
const languageFile = await (this._fileService.readFile(FileAccess.asFileUri(wasmPath)));
return Parser.Language.load(languageFile.value.buffer);
}

private async _ensureLanguage(languageId: string): Promise<Parser.Language | undefined> {
let language = this._languages.get(languageId);
if (!language) {
language = await this._fetchLanguage(languageId);
if (!language) {
return undefined;
}
this._languages.set(languageId, language);
}
return language;
}

private _parseCallback(textModel: ITextModel, index: number, position?: Parser.Point, treeSitterTree?: TreeSitterTree): string | null {
return textModel.getTextBuffer().getNearestChunk(index);
}

public initTreeSitter(): Promise<void> {
return this._init;
}

getTree(model: ITextModel): Parser.Tree | undefined {
return this._treeSitterTrees.get(model)?.tree;
}

getLanguage(model: ITextModel): Parser.Language | undefined {
return this._treeSitterTrees.get(model)?.language;
}

private _addGrammar(languageId: string, grammarName: string) {
TreeSitterTokenizationRegistry.register(languageId, { name: grammarName });
}

public override dispose(): void {
super.dispose();
this._treeSitterTrees.dispose();
}
}
30 changes: 21 additions & 9 deletions src/vs/editor/common/languages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ export class EncodedTokenizationResult {
}
}

/**
* An intermediate interface for scaffolding the new tree sitter tokenization support. Not final.
* @internal
*/
export interface ITreeSitterTokenizationSupport {
name: string;
}

/**
* @internal
*/
Expand Down Expand Up @@ -2106,14 +2114,14 @@ export interface ITokenizationSupportChangedEvent {
/**
* @internal
*/
export interface ILazyTokenizationSupport {
get tokenizationSupport(): Promise<ITokenizationSupport | null>;
export interface ILazyTokenizationSupport<TSupport> {
get tokenizationSupport(): Promise<TSupport | null>;
}

/**
* @internal
*/
export class LazyTokenizationSupport implements IDisposable, ILazyTokenizationSupport {
export class LazyTokenizationSupport implements IDisposable, ILazyTokenizationSupport<ITokenizationSupport> {
private _tokenizationSupport: Promise<ITokenizationSupport & IDisposable | null> | null = null;

constructor(private readonly createSupport: () => Promise<ITokenizationSupport & IDisposable | null>) {
Expand All @@ -2140,7 +2148,7 @@ export class LazyTokenizationSupport implements IDisposable, ILazyTokenizationSu
/**
* @internal
*/
export interface ITokenizationRegistry {
export interface ITokenizationRegistry<TSupport> {

/**
* An event triggered when:
Expand All @@ -2158,24 +2166,24 @@ export interface ITokenizationRegistry {
/**
* Register a tokenization support.
*/
register(languageId: string, support: ITokenizationSupport): IDisposable;
register(languageId: string, support: TSupport): IDisposable;

/**
* Register a tokenization support factory.
*/
registerFactory(languageId: string, factory: ILazyTokenizationSupport): IDisposable;
registerFactory(languageId: string, factory: ILazyTokenizationSupport<TSupport>): IDisposable;

/**
* Get or create the tokenization support for a language.
* Returns `null` if not found.
*/
getOrCreate(languageId: string): Promise<ITokenizationSupport | null>;
getOrCreate(languageId: string): Promise<TSupport | null>;

/**
* Get the tokenization support for a language.
* Returns `null` if not found.
*/
get(languageId: string): ITokenizationSupport | null;
get(languageId: string): TSupport | null;

/**
* Returns false if a factory is still pending.
Expand All @@ -2195,8 +2203,12 @@ export interface ITokenizationRegistry {
/**
* @internal
*/
export const TokenizationRegistry: ITokenizationRegistry = new TokenizationRegistryImpl();
export const TokenizationRegistry: ITokenizationRegistry<ITokenizationSupport> = new TokenizationRegistryImpl();

/**
* @internal
*/
export const TreeSitterTokenizationRegistry: ITokenizationRegistry<ITreeSitterTokenizationSupport> = new TokenizationRegistryImpl();

/**
* @internal
Expand Down
Loading
Loading