noir-lang · asterite · Feb 7, 2025 · Feb 7, 2025
diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs
@@ -47,7 +47,7 @@
            done: false,
            skip_comments: true,
            skip_whitespaces: true,
            max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus())
                - BigInt::one(),
        }
    }
@@ -103,11 +103,28 @@
     }
 
     fn next_token(&mut self) -> SpannedTokenResult {
+        if !self.skip_comments {
+            return self.next_token_without_checking_comments();
+        }
+
+        // Read tokens and skip comments. This is done like this to avoid recursion
+        // and hitting stack overflow when there are many comments in a row.
+        loop {
+            let token = self.next_token_without_checking_comments()?;
+            if matches!(token.token(), Token::LineComment(_, None) | Token::BlockComment(_, None)) {
+                continue;
+            }
+            return Ok(token);
+        }
+    }
+
+    /// Reads the next token, which might be a comment token (these aren't skipped in this method)
+    fn next_token_without_checking_comments(&mut self) -> SpannedTokenResult {
         match self.next_char() {
             Some(x) if Self::is_code_whitespace(x) => {
                 let spanned = self.eat_whitespace(x);
                 if self.skip_whitespaces {
-                    self.next_token()
+                    self.next_token_without_checking_comments()
                 } else {
                     Ok(spanned)
                 }
@@ -755,10 +772,6 @@
             return Err(LexerErrorKind::NonAsciiComment { span });
         }
 
-        if doc_style.is_none() && self.skip_comments {
-            return self.next_token();
-        }
-
         Ok(Token::LineComment(comment, doc_style).into_span(start, self.position))
     }
 
@@ -804,9 +817,6 @@
                 return Err(LexerErrorKind::NonAsciiComment { span });
             }
 
-            if doc_style.is_none() && self.skip_comments {
-                return self.next_token();
-            }
             Ok(Token::BlockComment(content, doc_style).into_span(start, self.position))
         } else {
             let span = Span::inclusive(start, self.position);
@@ -1405,7 +1415,7 @@
    //   (expected_token_discriminator, strings_to_lex)
    // expected_token_discriminator matches a given token when
    // std::mem::discriminant returns the same discriminant for both.
    fn blns_base64_to_statements(base64_str: String) -> Vec<(Option<Token>, Vec<String>)> {
        use base64::engine::general_purpose;
        use std::borrow::Cow;
        use std::io::Cursor;
@@ -1463,13 +1473,13 @@
    fn test_big_list_of_naughty_strings() {
        use std::mem::discriminant;

        let blns_contents = include_str!("./blns/blns.base64.json");
        let blns_base64: Vec<String> =
            serde_json::from_str(blns_contents).expect("BLNS json invalid");
        for blns_base64_str in blns_base64 {
            let statements = blns_base64_to_statements(blns_base64_str);
            for (token_discriminator_opt, blns_program_strs) in statements {
                for blns_program_str in blns_program_strs {
                    let mut expected_token_found = false;
                    let mut lexer = Lexer::new(&blns_program_str);
                    let mut result_tokens = Vec::new();
@@ -1560,7 +1570,7 @@

    #[test]
    fn test_non_ascii_comments() {
        let cases = vec!["// 🙂", "// schön", "/* in the middle 🙂 of a comment */"];

        for source in cases {
            let mut lexer = Lexer::new(source);

diff --git a/compiler/noirc_frontend/src/tests.rs b/compiler/noirc_frontend/src/tests.rs
@@ -4364,3 +4364,9 @@ fn errors_on_if_without_else_type_mismatch() {
     };
     assert!(matches!(**err, TypeCheckError::TypeMismatch { .. }));
 }
+
+#[test]
+fn does_not_stack_overflow_on_many_comments_in_a_row() {
+    let src = "//\n".repeat(10_000);
+    assert_no_errors(&src);
+}