Skip to content

Commit 4cb1d95

Browse files
leaysgurIWANABETHATGUY
authored andcommitted
fix(semantic): Refactor jsdoc finding (oxc-project#2437)
Partial fix for oxc-project#168 - [x] Fix general finding behavior for leading comments - [x] Accept multiple jsdoc comments per node - [x] Provide `get_one` and also `get_all` - [x] Add `iter_all()` for non-node related usage - [x] Limit AST node kinds to parse
1 parent cdc3bbd commit 4cb1d95

File tree

4 files changed

+222
-71
lines changed

4 files changed

+222
-71
lines changed

crates/oxc_linter/src/context.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ use std::{cell::RefCell, path::Path, rc::Rc, sync::Arc};
22

33
use oxc_codegen::{Codegen, CodegenOptions};
44
use oxc_diagnostics::Error;
5-
use oxc_semantic::{AstNodes, JSDocComment, ScopeTree, Semantic, SymbolTable};
5+
use oxc_semantic::{AstNodes, JSDoc, ScopeTree, Semantic, SymbolTable};
66
use oxc_span::SourceType;
77

88
use crate::{
99
disable_directives::{DisableDirectives, DisableDirectivesBuilder},
1010
fixer::{Fix, Message},
1111
javascript_globals::GLOBALS,
12-
AstNode, ESLintEnv, ESLintSettings,
12+
ESLintEnv, ESLintSettings,
1313
};
1414

1515
pub struct LintContext<'a> {
@@ -155,7 +155,7 @@ impl<'a> LintContext<'a> {
155155
}
156156

157157
/* JSDoc */
158-
pub fn jsdoc(&self, node: &AstNode<'a>) -> Option<JSDocComment<'a>> {
159-
self.semantic().jsdoc().get_by_node(node)
158+
pub fn jsdoc(&self) -> &JSDoc<'a> {
159+
self.semantic().jsdoc()
160160
}
161161
}

crates/oxc_semantic/src/builder.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,10 @@ impl<'a> SemanticBuilder<'a> {
204204

205205
fn create_ast_node(&mut self, kind: AstKind<'a>) {
206206
let mut flags = self.current_node_flags;
207-
if self.jsdoc.retrieve_jsdoc_comment(kind) {
207+
if self.jsdoc.retrieve_attached_jsdoc(&kind) {
208208
flags |= NodeFlags::JSDoc;
209209
}
210+
210211
let ast_node = AstNode::new(kind, self.current_scope_id, self.cfg.current_node_ix, flags);
211212
let parent_node_id =
212213
if matches!(kind, AstKind::Program(_)) { None } else { Some(self.current_node_id) };
+191-59
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,127 @@
1-
use std::{collections::BTreeMap, rc::Rc};
1+
use std::collections::BTreeMap;
2+
use std::rc::Rc;
23

3-
use oxc_ast::{AstKind, TriviasMap};
4+
use oxc_ast::{AstKind, Comment, TriviasMap};
45
use oxc_span::{GetSpan, Span};
6+
use rustc_hash::FxHashSet;
57

68
use super::{JSDoc, JSDocComment};
79

810
pub struct JSDocBuilder<'a> {
911
source_text: &'a str,
10-
1112
trivias: Rc<TriviasMap>,
12-
13-
docs: BTreeMap<Span, JSDocComment<'a>>,
13+
attached_docs: BTreeMap<Span, Vec<JSDocComment<'a>>>,
14+
leading_comments_seen: FxHashSet<u32>,
1415
}
1516

1617
impl<'a> JSDocBuilder<'a> {
1718
pub fn new(source_text: &'a str, trivias: &Rc<TriviasMap>) -> Self {
18-
Self { source_text, trivias: Rc::clone(trivias), docs: BTreeMap::default() }
19+
Self {
20+
source_text,
21+
trivias: Rc::clone(trivias),
22+
attached_docs: BTreeMap::default(),
23+
leading_comments_seen: FxHashSet::default(),
24+
}
1925
}
2026

2127
pub fn build(self) -> JSDoc<'a> {
22-
JSDoc::new(self.docs)
28+
let not_attached_docs = self
29+
.trivias
30+
.comments()
31+
.iter()
32+
.filter(|(start, _)| !self.leading_comments_seen.contains(start))
33+
.filter_map(|(start, comment)| self.parse_if_jsdoc_comment(*start, *comment))
34+
.collect::<Vec<_>>();
35+
36+
JSDoc::new(self.attached_docs, not_attached_docs)
2337
}
2438

25-
/// Save the span if the given kind has a jsdoc comment attached
26-
pub fn retrieve_jsdoc_comment(&mut self, kind: AstKind<'a>) -> bool {
27-
if !kind.is_declaration() {
39+
// This process is done in conjunction with the `semantic.build()`.
40+
// This means that it's done "before" each use case (e.g. execute rule in oxlint) runs.
41+
//
42+
// If you need to control this attaching logic (e.g. by rule configurations), one of these would be necessary.
43+
// - 1. Give up pre-attaching here and leave it for use cases
44+
// - 2. Attach it more broadly(= loosely) here (may cause performance impact), so that it can be narrowed down later
45+
//
46+
// Since there is no reliable spec for JSDoc, there are some naive topics to consider:
47+
//
48+
// Q. Which node to attach JSDoc to?
49+
// A. Each implementation decides according to its own use case.
50+
//
51+
// For example, TypeScript tries to target quite broadly nodes.
52+
// > https://github.com/microsoft/TypeScript/blob/d04e3489b0d8e6bc9a8a9396a633632a5a467328/src/compiler/utilities.ts#L4195
53+
//
54+
// In case of `eslint-plugin-jsdoc`, its targets can be freely changed by rule configurations!
55+
// (But, default is only about function related nodes.)
56+
// > https://github.com/gajus/eslint-plugin-jsdoc/blob/e948bee821e964a92fbabc01574eca226e9e1252/src/iterateJsdoc.js#L2517-L2536
57+
//
58+
// Q. How do we attach JSDoc to that node?
59+
// A. Also depends on the implementation.
60+
//
61+
// In the case of TypeScript (they have specific AST node for JSDoc and an `endOfFileToken`),
62+
// some AST nodes have the `jsDoc` property and multiple `JSDocComment`s are attached.
63+
//
64+
// In the case of `eslint-plugin-jsdoc` (`@es-joy/jsdoccomment` is used)
65+
// tries to get a only single nearest comment, with some exception handling.
66+
//
67+
// It is hard to say how we should behave as OXC Semantic, but the current implementation is,
68+
// - Intuitive TypeScript-like attaching strategy
69+
// - Provide `get_one` or `get_all` APIs for each use case
70+
//
71+
// Of course, this can be changed in the future.
72+
pub fn retrieve_attached_jsdoc(&mut self, kind: &AstKind<'a>) -> bool {
73+
// This may be diffed compare to TypeScript's `canHaveJSDoc()`, should adjust if needed
74+
if !(kind.is_statement()
75+
|| kind.is_declaration()
76+
|| matches!(kind, AstKind::ParenthesizedExpression(_)))
77+
{
2878
return false;
2979
}
80+
81+
// 1. Retrieve every kind of leading comments for this node
3082
let span = kind.span();
31-
let comment_text = self.find_jsdoc_comment(span);
32-
if let Some(comment_text) = comment_text {
33-
self.docs.insert(span, JSDocComment::new(comment_text));
83+
let mut leading_comments = vec![];
84+
for (start, comment) in self.trivias.comments().range(..span.start) {
85+
if !self.leading_comments_seen.contains(start) {
86+
leading_comments.push((start, comment));
87+
}
88+
self.leading_comments_seen.insert(*start);
3489
}
35-
comment_text.is_some()
36-
}
3790

38-
/// Find the jsdoc doc in front of this span, a.k.a leading comment
39-
fn find_jsdoc_comment(&self, span: Span) -> Option<&'a str> {
40-
let (start, comment) = self.trivias.comments().range(..span.start).next()?;
91+
// 2. Filter and parse JSDoc comments only
92+
let leading_jsdoc_comments = leading_comments
93+
.iter()
94+
.filter_map(|(start, comment)| self.parse_if_jsdoc_comment(**start, **comment))
95+
.collect::<Vec<_>>();
4196

42-
if comment.kind().is_single_line() {
43-
return None;
97+
// 3. Save and return `true` to mark JSDoc flag
98+
if !leading_jsdoc_comments.is_empty() {
99+
self.attached_docs.insert(span, leading_jsdoc_comments);
100+
return true;
44101
}
45102

46-
let comment_text = Span::new(*start, comment.end()).source_text(self.source_text);
103+
false
104+
}
47105

48-
// Comments beginning with /*, /***, or more than 3 stars will be ignored.
49-
let mut chars = comment_text.chars();
50-
if chars.next() != Some('*') {
51-
return None;
52-
}
53-
if chars.next() == Some('*') {
106+
fn parse_if_jsdoc_comment(
107+
&self,
108+
span_start: u32,
109+
comment: Comment,
110+
) -> Option<JSDocComment<'a>> {
111+
if !comment.is_multi_line() {
54112
return None;
55113
}
56114

57-
// The comment is the leading comment of this span if there is nothing in between.
58-
// +2 to skip `*/` ending
59-
let text_between = Span::new(comment.end() + 2, span.start).source_text(self.source_text);
60-
if text_between.chars().any(|c| !c.is_whitespace()) {
115+
let comment_span = Span::new(span_start, comment.end());
116+
// Inside of marker: /*_CONTENT_*/
117+
let comment_content = comment_span.source_text(self.source_text);
118+
// Should start with "*": /**_CONTENT_*/
119+
if !comment_content.starts_with('*') {
61120
return None;
62121
}
63122

64-
Some(comment_text)
123+
// Should remove the very first `*`?
124+
Some(JSDocComment::new(comment_content))
65125
}
66126
}
67127

@@ -71,29 +131,37 @@ mod test {
71131
use oxc_parser::Parser;
72132
use oxc_span::{SourceType, Span};
73133

74-
use crate::{jsdoc::JSDocComment, SemanticBuilder};
134+
use crate::{jsdoc::JSDocComment, Semantic, SemanticBuilder};
75135

76-
#[allow(clippy::cast_possible_truncation)]
77-
fn get_jsdoc<'a>(
136+
fn build_semantic<'a>(
78137
allocator: &'a Allocator,
79138
source_text: &'a str,
80-
symbol: &'a str,
81139
source_type: Option<SourceType>,
82-
) -> Option<JSDocComment<'a>> {
140+
) -> Semantic<'a> {
83141
let source_type = source_type.unwrap_or_default();
84142
let ret = Parser::new(allocator, source_text, source_type).parse();
85143
let program = allocator.alloc(ret.program);
86144
let semantic = SemanticBuilder::new(source_text, source_type)
87145
.with_trivias(ret.trivias)
88146
.build(program)
89147
.semantic;
90-
let jsdoc = semantic.jsdoc();
91-
let start = source_text.find(symbol).unwrap() as u32;
148+
semantic
149+
}
150+
151+
#[allow(clippy::cast_possible_truncation)]
152+
fn get_jsdoc<'a>(
153+
allocator: &'a Allocator,
154+
source_text: &'a str,
155+
symbol: &'a str,
156+
source_type: Option<SourceType>,
157+
) -> Option<Vec<JSDocComment<'a>>> {
158+
let semantic = build_semantic(allocator, source_text, source_type);
159+
let start = source_text.find(symbol).unwrap_or(0) as u32;
92160
let span = Span::new(start, start + symbol.len() as u32);
93-
jsdoc.get_by_span(span)
161+
semantic.jsdoc().get_all_by_span(span)
94162
}
95163

96-
fn test_jsdoc(source_text: &str, symbol: &str, source_type: Option<SourceType>) {
164+
fn test_jsdoc_found(source_text: &str, symbol: &str, source_type: Option<SourceType>) {
97165
let allocator = Allocator::default();
98166
assert!(
99167
get_jsdoc(&allocator, source_text, symbol, source_type).is_some(),
@@ -112,45 +180,109 @@ mod test {
112180
#[test]
113181
fn not_found() {
114182
let source_texts = [
115-
"function foo() {}",
116-
"/* test */function foo() {}",
117-
"/*** test */function foo() {}",
118-
"/** test */ ; function foo() {}",
119-
"/** test */ function foo1() {} function foo() {}",
183+
("function foo() {}", "function foo() {}"),
184+
("// test", "function foo() {}"),
185+
("function foo() {}", "function foo() {}"),
186+
("/* test */function foo() {}", "function foo() {}"),
187+
("/** test */ ; function foo() {}", "function foo() {}"),
188+
("/** test */ function foo1() {} function foo() {}", "function foo() {}"),
189+
("function foo() {} /** test */", "function foo() {}"),
120190
];
121-
for source_text in source_texts {
122-
test_jsdoc_not_found(source_text, "function foo() {}");
191+
for (source_text, target) in source_texts {
192+
test_jsdoc_not_found(source_text, target);
123193
}
124194
}
125195

126196
#[test]
127197
fn found() {
128198
let source_texts = [
129-
"/** test */function foo() {}",
130-
"
199+
("/** test */function foo() {}", "function foo() {}"),
200+
("/*** test */function foo() {}", "function foo() {}"),
201+
(
202+
"
131203
/** test */
132204
function foo() {}",
133-
"/** test */
205+
"function foo() {}",
206+
),
207+
(
208+
"/** test */
209+
210+
134211
function foo() {}",
135-
"/**
212+
"function foo() {}",
213+
),
214+
(
215+
"/**
136216
* test
137217
* */
138218
function foo() {}",
139-
"/** test */
219+
"function foo() {}",
220+
),
221+
(
222+
"/** test */
223+
function foo() {}",
224+
"function foo() {}",
225+
),
226+
(
227+
"/** test */
228+
// noop
140229
function foo() {}",
230+
"function foo() {}",
231+
),
232+
(
233+
"/** test */
234+
/*noop*/
235+
function foo() {}",
236+
"function foo() {}",
237+
),
238+
("/** foo1 */ function foo1() {} /** test */ function foo() {}", "function foo() {}"),
141239
];
142-
for source_text in source_texts {
143-
test_jsdoc(source_text, "function foo() {}", None);
240+
for (source_text, target) in source_texts {
241+
test_jsdoc_found(source_text, target, None);
144242
}
145243
}
146244

147245
#[test]
148-
fn found_on_property_definition() {
149-
let source = "class Foo {
246+
fn found_ts() {
247+
let source_texts = [(
248+
"class Foo {
150249
/** jsdoc */
151250
bar: string;
152-
}";
251+
}",
252+
"bar: string;",
253+
)];
254+
153255
let source_type = SourceType::default().with_typescript(true);
154-
test_jsdoc(source, "bar: string;", Some(source_type));
256+
for (source_text, target) in source_texts {
257+
test_jsdoc_found(source_text, target, Some(source_type));
258+
}
259+
}
260+
261+
#[test]
262+
fn get_all_jsdoc() {
263+
let allocator = Allocator::default();
264+
let semantic = build_semantic(
265+
&allocator,
266+
r"
267+
// noop
268+
/** 1. ; */
269+
// noop
270+
;
271+
/** 2. class X {} *//** 3. class X {} */
272+
class X {
273+
/** 4. foo */
274+
foo = /** 5. () */ (() => {});
275+
}
276+
277+
/** 6. let x; */
278+
/* noop */
279+
280+
let x;
281+
282+
/** 7. Not attached but collected! */
283+
",
284+
Some(SourceType::default()),
285+
);
286+
assert_eq!(semantic.jsdoc().iter_all().count(), 7);
155287
}
156288
}

0 commit comments

Comments
 (0)