1
1
from pygments .lexer import RegexLexer , words
2
2
from pygments .token import Comment , Keyword , Name , Number , Operator , String , Whitespace
3
3
4
- constants = (
4
+ keywords_annotation = ("annotation" ,)
5
+
6
+ keywords_class = (
7
+ "class" ,
8
+ "enum" ,
9
+ )
10
+
11
+ keywords_constant = (
12
+ "attr" ,
13
+ "val" ,
14
+ )
15
+
16
+ keywords_function = (
17
+ "fun" ,
18
+ "pipeline" ,
19
+ "segment" ,
20
+ )
21
+
22
+ keywords_literal = (
5
23
"false" ,
6
24
"null" ,
7
25
"true" ,
8
26
)
9
27
10
- keywords = (
11
- "annotation" ,
28
+ keywords_namespace = (
29
+ "from" ,
30
+ "package" ,
31
+ )
32
+
33
+ keywords_generic = (
12
34
"as" ,
13
- "attr" ,
14
- "class" ,
15
35
"const" ,
16
- "enum" ,
17
- "fun" ,
36
+ "import" ,
18
37
"in" ,
19
38
"internal" ,
20
39
"literal" ,
21
40
"out" ,
22
- "pipeline" ,
23
41
"private" ,
24
42
"schema" ,
25
- "segment" ,
26
43
"static" ,
27
44
"union" ,
28
- "val" ,
29
45
"where" ,
30
46
"yield" ,
31
47
)
32
48
33
- namespace = (
34
- "from" ,
35
- "import" ,
36
- "package" ,
37
- )
38
-
39
49
operators = (
40
50
"and" ,
41
51
"not" ,
44
54
"super" ,
45
55
)
46
56
57
+ builtins = (
58
+ "Any" ,
59
+ "Nothing" ,
60
+ "Boolean" ,
61
+ "Number" ,
62
+ "Int" ,
63
+ "Float" ,
64
+ "ListMap" ,
65
+ "String" ,
66
+ )
67
+
68
+ identifier_fragment = r"[_a-zA-Z][_a-zA-Z0-9]*"
69
+ identifier_regex = rf"{ identifier_fragment } |`{ identifier_fragment } `"
70
+ qualified_name_regex = rf"({ identifier_regex } )(\.({ identifier_regex } ))*"
71
+
47
72
48
73
class SafeDsLexer (RegexLexer ):
49
74
name = "safe-ds"
@@ -59,17 +84,57 @@ class SafeDsLexer(RegexLexer):
59
84
60
85
tokens = {
61
86
"root" : [
87
+ # Literals
62
88
(r"\b([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)\b" , Number ),
63
89
(r'"|}}' , String , "string" ),
64
- (words (constants , prefix = r"\b" , suffix = r"\b" ), Keyword .Constant ),
65
- (words (keywords , prefix = r"\b" , suffix = r"\b" ), Keyword ),
66
- (words (namespace , prefix = r"\b" , suffix = r"\b" ), Keyword .Namespace ),
90
+ # Keywords
91
+ (
92
+ words (keywords_annotation , prefix = r"\b" , suffix = r"\b" ),
93
+ Keyword ,
94
+ "annotation" ,
95
+ ),
96
+ (words (keywords_class , prefix = r"\b" , suffix = r"\b" ), Keyword , "class" ),
97
+ (
98
+ words (keywords_constant , prefix = r"\b" , suffix = r"\b" ),
99
+ Keyword .Declaration ,
100
+ "placeholder" ,
101
+ ),
102
+ (words (keywords_function , prefix = r"\b" , suffix = r"\b" ), Keyword , "function" ),
103
+ (words (keywords_literal , prefix = r"\b" , suffix = r"\b" ), Keyword .Constant ),
104
+ (
105
+ words (keywords_namespace , prefix = r"\b" , suffix = r"\b" ),
106
+ Keyword .Namespace ,
107
+ "namespace" ,
108
+ ),
109
+ (words (keywords_generic , prefix = r"\b" , suffix = r"\b" ), Keyword ),
110
+ # Operators
67
111
(words (operators , prefix = r"\b" , suffix = r"\b" ), Operator .Word ),
68
- (r"`[_a-zA-Z][_a-zA-Z0-9]*`" , Name ),
112
+ # Builtins
113
+ (words (builtins , prefix = r"\b" , suffix = r"\b" ), Name .Builtin ),
114
+ # Identifiers
115
+ (rf"@{ identifier_regex } " , Name .Decorator ),
116
+ (identifier_regex , Name ),
117
+ # Comments
69
118
(r"//.+?$" , Comment .Single ),
70
119
(r"/\*[\s\S]*?\*/" , Comment .Multiline ),
120
+ # Whitespace
71
121
(r"\s+" , Whitespace ),
72
122
],
123
+ "annotation" : [
124
+ (identifier_regex , Name .Decorator , "#pop" ),
125
+ ],
126
+ "class" : [
127
+ (identifier_regex , Name .Class , "#pop" ),
128
+ ],
129
+ "function" : [
130
+ (identifier_regex , Name .Function , "#pop" ),
131
+ ],
132
+ "namespace" : [
133
+ (qualified_name_regex , Name .Namespace , "#pop" ),
134
+ ],
135
+ "placeholder" : [
136
+ (identifier_regex , Name .Constant , "#pop" ),
137
+ ],
73
138
"string" : [
74
139
(r'([^"{]|\{(?!\{))+' , String ),
75
140
(r'\{\{|"' , String , "#pop" ),
0 commit comments