Gridarta Editor
HTMLTokenMarker.java
Go to the documentation of this file.
1 /*
2  * HTMLTokenMarker.java - HTML token marker
3  * Copyright (C) 1998, 1999 Slava Pestov
4  * Copyright (C) 2000-2015 The Gridarta Developers.
5  *
6  * You may use and modify this package for any purpose. Redistribution is
7  * permitted, in both source and binary form, provided that this notice
8  * remains intact in all source distributions of this package.
9  */
10 
11 package net.sf.gridarta.textedit.textarea.tokenmarker;
12 
13 import javax.swing.text.Segment;
16 
21 public class HTMLTokenMarker extends TokenMarker {
22 
23  private static final byte JAVASCRIPT = Token.INTERNAL_FIRST;
24 
25  private final KeywordMap keywords;
26 
27  private final boolean js;
28 
29  private int lastOffset;
30 
31  private int lastKeyword;
32 
33  public HTMLTokenMarker(final boolean js) {
34  this.js = js;
36  }
37 
38  @Override
39  public byte markTokensImpl(final byte token, final Segment line) {
40  final char[] array = line.array;
41  final int offset = line.offset;
42  lastOffset = offset;
43  lastKeyword = offset;
44  final int length = line.count + offset;
45  boolean backslash = false;
46 
47  byte currentToken = token;
48 loop:
49  for (int i = offset; i < length; i++) {
50  final int i1 = i + 1;
51 
52  final char c = array[i];
53  if (c == '\\') {
54  backslash = !backslash;
55  continue;
56  }
57 
58  switch (currentToken) {
59  case Token.NULL: // HTML text
60  backslash = false;
61  switch (c) {
62  case '<':
63  addToken(i - lastOffset, currentToken);
64  lastOffset = i;
65  lastKeyword = i;
66  if (SyntaxUtilities.regionMatches(false, line, i1, "!--")) {
67  i += 3;
68  currentToken = Token.COMMENT1;
69  } else if (js && SyntaxUtilities.regionMatches(true, line, i1, "script>")) {
71  i += 8;
72  lastOffset = i;
73  lastKeyword = i;
74  currentToken = JAVASCRIPT;
75  } else {
76  currentToken = Token.KEYWORD1;
77  }
78  break;
79 
80  case '&':
81  addToken(i - lastOffset, currentToken);
82  lastOffset = i;
83  lastKeyword = i;
84  currentToken = Token.KEYWORD2;
85  break;
86  }
87  break;
88 
89  case Token.KEYWORD1: // Inside a tag
90  backslash = false;
91  if (c == '>') {
92  addToken(i1 - lastOffset, currentToken);
93  lastOffset = i1;
94  lastKeyword = i1;
95  currentToken = Token.NULL;
96  }
97  break;
98 
99  case Token.KEYWORD2: // Inside an entity
100  backslash = false;
101  if (c == ';') {
102  addToken(i1 - lastOffset, currentToken);
103  lastOffset = i1;
104  lastKeyword = i1;
105  currentToken = Token.NULL;
106  break;
107  }
108  break;
109 
110  case Token.COMMENT1: // Inside a comment
111  backslash = false;
112  if (SyntaxUtilities.regionMatches(false, line, i, "-->")) {
113  addToken(i + 3 - lastOffset, currentToken);
114  lastOffset = i + 3;
115  lastKeyword = i + 3;
116  currentToken = Token.NULL;
117  }
118  break;
119 
120  case JAVASCRIPT: // Inside a JavaScript
121  switch (c) {
122  case '<':
123  backslash = false;
124  doKeyword(line, i);
125  if (SyntaxUtilities.regionMatches(true, line, i1, "/script>")) {
126  addToken(i - lastOffset, Token.NULL);
127  addToken(9, Token.KEYWORD1);
128  i += 9;
129  lastOffset = i;
130  lastKeyword = i;
131  currentToken = Token.NULL;
132  }
133  break;
134 
135  case '"':
136  if (backslash) {
137  backslash = false;
138  } else {
139  doKeyword(line, i);
140  addToken(i - lastOffset, Token.NULL);
141  lastOffset = i;
142  lastKeyword = i;
143  currentToken = Token.LITERAL1;
144  }
145  break;
146 
147  case '\'':
148  if (backslash) {
149  backslash = false;
150  } else {
151  doKeyword(line, i);
152  addToken(i - lastOffset, Token.NULL);
153  lastOffset = i;
154  lastKeyword = i;
155  currentToken = Token.LITERAL2;
156  }
157  break;
158 
159  case '/':
160  backslash = false;
161  doKeyword(line, i);
162  if (length - i > 1) {
163  addToken(i - lastOffset, Token.NULL);
164  lastOffset = i;
165  lastKeyword = i;
166  if (array[i1] == '/') {
167  addToken(length - i, Token.COMMENT2);
168  lastOffset = length;
169  lastKeyword = length;
170  break loop;
171  } else if (array[i1] == '*') {
172  currentToken = Token.COMMENT2;
173  }
174  }
175  break;
176 
177  default:
178  backslash = false;
179  if (!Character.isLetterOrDigit(c) && c != '_') {
180  doKeyword(line, i);
181  }
182  break;
183  }
184  break;
185 
186  case Token.LITERAL1: // JavaScript "..."
187  if (backslash) {
188  backslash = false;
189  } else if (c == '"') {
190  addToken(i1 - lastOffset, Token.LITERAL1);
191  lastOffset = i1;
192  lastKeyword = i1;
193  currentToken = JAVASCRIPT;
194  }
195  break;
196 
197  case Token.LITERAL2: // JavaScript '...'
198  if (backslash) {
199  backslash = false;
200  } else if (c == '\'') {
201  addToken(i1 - lastOffset, Token.LITERAL1);
202  lastOffset = i1;
203  lastKeyword = i1;
204  currentToken = JAVASCRIPT;
205  }
206  break;
207 
208  case Token.COMMENT2: // Inside a JavaScript comment
209  backslash = false;
210  if (c == '*' && length - i > 1 && array[i1] == '/') {
211  i += 2;
212  addToken(i - lastOffset, Token.COMMENT2);
213  lastOffset = i;
214  lastKeyword = i;
215  currentToken = JAVASCRIPT;
216  }
217  break;
218 
219  default:
220  throw new InternalError("Invalid state: " + currentToken);
221  }
222  }
223 
224  switch (currentToken) {
225  case Token.LITERAL1:
226  case Token.LITERAL2:
227  addToken(length - lastOffset, Token.INVALID);
228  currentToken = JAVASCRIPT;
229  break;
230 
231  case Token.KEYWORD2:
232  addToken(length - lastOffset, Token.INVALID);
233  currentToken = Token.NULL;
234  break;
235 
236  case JAVASCRIPT:
237  doKeyword(line, length);
238  addToken(length - lastOffset, Token.NULL);
239  break;
240 
241  default:
242  addToken(length - lastOffset, currentToken);
243  break;
244  }
245 
246  return currentToken;
247  }
248 
249  private void doKeyword(final Segment line, final int i) {
250  final int i1 = i + 1;
251 
252  final int len = i - lastKeyword;
253  final byte id = keywords.lookup(line, lastKeyword, len);
254  if (id != Token.NULL) {
255  if (lastKeyword != lastOffset) {
256  addToken(lastKeyword - lastOffset, Token.NULL);
257  }
258  addToken(len, id);
259  lastOffset = i;
260  }
261  lastKeyword = i1;
262  }
263 
264 }
int length
The number of lines in the model being tokenized.
static final byte COMMENT2
Comment 2 token id.
Definition: Token.java:36
static final byte INTERNAL_FIRST
The first id that can be used for internal state in a token marker.
Definition: Token.java:94
byte lookup(final Segment text, final int offset, final int length)
Looks up a key.
Definition: KeywordMap.java:63
static final byte COMMENT1
Comment 1 token id.
Definition: Token.java:31
void addToken(final int length, final byte id)
Adds a token to the token list.
static final byte LITERAL1
Literal 1 token id.
Definition: Token.java:42
Base package of all Gridarta classes.
A linked list of tokens.
Definition: Token.java:21
Class with several utility functions used by jEdit&#39;s syntax colorizing subsystem. ...
This package contains the other part of the script editor.
static final byte LITERAL2
Literal 2 token id.
Definition: Token.java:48
static boolean regionMatches(final boolean ignoreCase, final Segment text, final int offset, final CharSequence match)
Checks if a sub-region of a.
static final byte NULL
Normal text token id.
Definition: Token.java:26
static final byte KEYWORD2
Keyword 2 token id.
Definition: Token.java:66
A token marker that splits lines of text into tokens.
static final byte KEYWORD1
Keyword 1 token id.
Definition: Token.java:60
byte markTokensImpl(final byte token, final Segment line)
static final byte INVALID
Invalid token id.
Definition: Token.java:84