Gridarta Editor
HTMLTokenMarker.java
Go to the documentation of this file.
1 /*
2  * HTMLTokenMarker.java - HTML token marker
3  * Copyright (C) 1998, 1999 Slava Pestov
4  * Copyright (C) 2000-2023 The Gridarta Developers.
5  *
6  * You may use and modify this package for any purpose. Redistribution is
7  * permitted, in both source and binary form, provided that this notice
8  * remains intact in all source distributions of this package.
9  */
10 
11 package net.sf.gridarta.textedit.textarea.tokenmarker;
12 
13 import javax.swing.text.Segment;
16 import org.jetbrains.annotations.NotNull;
17 
22 public class HTMLTokenMarker extends TokenMarker {
23 
24  private static final byte JAVASCRIPT = Token.INTERNAL_FIRST;
25 
26  @NotNull
27  private final KeywordMap keywords;
28 
29  private final boolean js;
30 
31  private int lastOffset;
32 
33  private int lastKeyword;
34 
35  public HTMLTokenMarker(final boolean js) {
36  this.js = js;
38  }
39 
40  @Override
41  public byte markTokensImpl(final byte token, @NotNull final Segment line) {
42  final char[] array = line.array;
43  final int offset = line.offset;
44  lastOffset = offset;
45  lastKeyword = offset;
46  final int length = line.count + offset;
47  boolean backslash = false;
48 
49  byte currentToken = token;
50 loop:
51  for (int i = offset; i < length; i++) {
52  final int i1 = i + 1;
53 
54  final char c = array[i];
55  if (c == '\\') {
56  backslash = !backslash;
57  continue;
58  }
59 
60  switch (currentToken) {
61  case Token.NULL: // HTML text
62  backslash = false;
63  switch (c) {
64  case '<':
65  addToken(i - lastOffset, currentToken);
66  lastOffset = i;
67  lastKeyword = i;
68  if (SyntaxUtilities.regionMatches(false, line, i1, "!--")) {
69  i += 3;
70  currentToken = Token.COMMENT1;
71  } else if (js && SyntaxUtilities.regionMatches(true, line, i1, "script>")) {
73  i += 8;
74  lastOffset = i;
75  lastKeyword = i;
76  currentToken = JAVASCRIPT;
77  } else {
78  currentToken = Token.KEYWORD1;
79  }
80  break;
81 
82  case '&':
83  addToken(i - lastOffset, currentToken);
84  lastOffset = i;
85  lastKeyword = i;
86  currentToken = Token.KEYWORD2;
87  break;
88  }
89  break;
90 
91  case Token.KEYWORD1: // Inside a tag
92  backslash = false;
93  if (c == '>') {
94  addToken(i1 - lastOffset, currentToken);
95  lastOffset = i1;
96  lastKeyword = i1;
97  currentToken = Token.NULL;
98  }
99  break;
100 
101  case Token.KEYWORD2: // Inside an entity
102  backslash = false;
103  if (c == ';') {
104  addToken(i1 - lastOffset, currentToken);
105  lastOffset = i1;
106  lastKeyword = i1;
107  currentToken = Token.NULL;
108  break;
109  }
110  break;
111 
112  case Token.COMMENT1: // Inside a comment
113  backslash = false;
114  if (SyntaxUtilities.regionMatches(false, line, i, "-->")) {
115  addToken(i + 3 - lastOffset, currentToken);
116  lastOffset = i + 3;
117  lastKeyword = i + 3;
118  currentToken = Token.NULL;
119  }
120  break;
121 
122  case JAVASCRIPT: // Inside a JavaScript
123  switch (c) {
124  case '<':
125  backslash = false;
126  doKeyword(line, i);
127  if (SyntaxUtilities.regionMatches(true, line, i1, "/script>")) {
129  addToken(9, Token.KEYWORD1);
130  i += 9;
131  lastOffset = i;
132  lastKeyword = i;
133  currentToken = Token.NULL;
134  }
135  break;
136 
137  case '"':
138  if (backslash) {
139  backslash = false;
140  } else {
141  doKeyword(line, i);
143  lastOffset = i;
144  lastKeyword = i;
145  currentToken = Token.LITERAL1;
146  }
147  break;
148 
149  case '\'':
150  if (backslash) {
151  backslash = false;
152  } else {
153  doKeyword(line, i);
155  lastOffset = i;
156  lastKeyword = i;
157  currentToken = Token.LITERAL2;
158  }
159  break;
160 
161  case '/':
162  backslash = false;
163  doKeyword(line, i);
164  if (length - i > 1) {
166  lastOffset = i;
167  lastKeyword = i;
168  if (array[i1] == '/') {
170  lastOffset = length;
172  break loop;
173  }
174  if (array[i1] == '*') {
175  currentToken = Token.COMMENT2;
176  }
177  }
178  break;
179 
180  default:
181  backslash = false;
182  if (!Character.isLetterOrDigit(c) && c != '_') {
183  doKeyword(line, i);
184  }
185  break;
186  }
187  break;
188 
189  case Token.LITERAL1: // JavaScript "..."
190  if (backslash) {
191  backslash = false;
192  } else if (c == '"') {
194  lastOffset = i1;
195  lastKeyword = i1;
196  currentToken = JAVASCRIPT;
197  }
198  break;
199 
200  case Token.LITERAL2: // JavaScript '...'
201  if (backslash) {
202  backslash = false;
203  } else if (c == '\'') {
205  lastOffset = i1;
206  lastKeyword = i1;
207  currentToken = JAVASCRIPT;
208  }
209  break;
210 
211  case Token.COMMENT2: // Inside a JavaScript comment
212  backslash = false;
213  if (c == '*' && length - i > 1 && array[i1] == '/') {
214  i += 2;
216  lastOffset = i;
217  lastKeyword = i;
218  currentToken = JAVASCRIPT;
219  }
220  break;
221 
222  default:
223  throw new InternalError("Invalid state: " + currentToken);
224  }
225  }
226 
227  switch (currentToken) {
228  case Token.LITERAL1:
229  case Token.LITERAL2:
231  currentToken = JAVASCRIPT;
232  break;
233 
234  case Token.KEYWORD2:
236  currentToken = Token.NULL;
237  break;
238 
239  case JAVASCRIPT:
240  doKeyword(line, length);
242  break;
243 
244  default:
245  addToken(length - lastOffset, currentToken);
246  break;
247  }
248 
249  return currentToken;
250  }
251 
252  private void doKeyword(final Segment line, final int i) {
253  final int len = i - lastKeyword;
254  final byte id = keywords.lookup(line, lastKeyword, len);
255  if (id != Token.NULL) {
256  if (lastKeyword != lastOffset) {
258  }
259  addToken(len, id);
260  lastOffset = i;
261  }
262  lastKeyword = i + 1;
263  }
264 
265 }
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.markTokensImpl
byte markTokensImpl(final byte token, @NotNull final Segment line)
An abstract method that splits a line up into tokens.
Definition: HTMLTokenMarker.java:41
net.sf.gridarta.textedit.textarea.Token.INTERNAL_FIRST
static final byte INTERNAL_FIRST
The first id that can be used for internal state in a token marker.
Definition: Token.java:94
net.sf.gridarta.textedit.textarea
This package contains the other part of the script editor.
net.sf.gridarta
Base package of all Gridarta classes.
net.sf.gridarta.textedit.textarea.tokenmarker.TokenMarker.addToken
void addToken(final int length, final byte id)
Adds a token to the token list.
Definition: TokenMarker.java:225
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.keywords
final KeywordMap keywords
Definition: HTMLTokenMarker.java:27
net.sf.gridarta.textedit.textarea.Token.NULL
static final byte NULL
Normal text token id.
Definition: Token.java:26
net.sf
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.JAVASCRIPT
static final byte JAVASCRIPT
Definition: HTMLTokenMarker.java:24
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker
HTML token marker.
Definition: HTMLTokenMarker.java:22
net.sf.gridarta.textedit.textarea.tokenmarker.KeywordMap.lookup
byte lookup(final Segment text, final int offset, final int length)
Looks up a key.
Definition: KeywordMap.java:63
net.sf.gridarta.textedit
net.sf.gridarta.textedit.textarea.SyntaxUtilities.regionMatches
static boolean regionMatches(final boolean ignoreCase, final Segment text, final int offset, @NotNull final CharSequence match)
Checks if a sub-region of a.
Definition: SyntaxUtilities.java:43
net.sf.gridarta.textedit.textarea.tokenmarker.JavaScriptTokenMarker
JavaScript token marker.
Definition: JavaScriptTokenMarker.java:19
net.sf.gridarta.textedit.textarea.Token.LITERAL2
static final byte LITERAL2
Literal 2 token id.
Definition: Token.java:48
net.sf.gridarta.textedit.textarea.Token
A linked list of tokens.
Definition: Token.java:21
net
net.sf.gridarta.textedit.textarea.Token.COMMENT2
static final byte COMMENT2
Comment 2 token id.
Definition: Token.java:36
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.HTMLTokenMarker
HTMLTokenMarker(final boolean js)
Definition: HTMLTokenMarker.java:35
net.sf.gridarta.textedit.textarea.Token.INVALID
static final byte INVALID
Invalid token id.
Definition: Token.java:84
net.sf.gridarta.textedit.textarea.Token.KEYWORD2
static final byte KEYWORD2
Keyword 2 token id.
Definition: Token.java:66
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.lastKeyword
int lastKeyword
Definition: HTMLTokenMarker.java:33
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.js
final boolean js
Definition: HTMLTokenMarker.java:29
net.sf.gridarta.textedit.textarea.Token.KEYWORD1
static final byte KEYWORD1
Keyword 1 token id.
Definition: Token.java:60
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.lastOffset
int lastOffset
Definition: HTMLTokenMarker.java:31
net.sf.gridarta.textedit.textarea.tokenmarker.KeywordMap
Maps (parts of) Segments to.
Definition: KeywordMap.java:35
net.sf.gridarta.textedit.textarea.SyntaxUtilities
Class with several utility functions used by jEdit's syntax colorizing subsystem.
Definition: SyntaxUtilities.java:27
net.sf.gridarta.textedit.textarea.Token.LITERAL1
static final byte LITERAL1
Literal 1 token id.
Definition: Token.java:42
net.sf.gridarta.textedit.textarea.tokenmarker.JavaScriptTokenMarker.getJavaScriptKeywords
static KeywordMap getJavaScriptKeywords()
Definition: JavaScriptTokenMarker.java:27
net.sf.gridarta.textedit.textarea.tokenmarker.HTMLTokenMarker.doKeyword
void doKeyword(final Segment line, final int i)
Definition: HTMLTokenMarker.java:252
net.sf.gridarta.textedit.textarea.tokenmarker.TokenMarker
A token marker that splits lines of text into tokens.
Definition: TokenMarker.java:32
net.sf.gridarta.textedit.textarea.Token.COMMENT1
static final byte COMMENT1
Comment 1 token id.
Definition: Token.java:31
net.sf.gridarta.textedit.textarea.tokenmarker.TokenMarker.length
int length
The number of lines in the model being tokenized.
Definition: TokenMarker.java:52