public static GroovyRecognizer make(Reader in) { return make(new GroovyLexer(in)); } public static GroovyRecognizer make(InputBuffer in) { return make(new GroovyLexer(in)); }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); GroovyLexer lexer = new GroovyLexer(new StringReader(buffer.toString())); TokenStream tokenStream = lexer.plumb(); try { Token token = tokenStream.nextToken(); while (token.getType() != Token.EOF_TYPE) { TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine()); tokenEntries.add(tokenEntry); token = tokenStream.nextToken(); } } catch (TokenStreamException err) { // Wrap exceptions of the Groovy tokenizer in a TokenMgrError, so // they are correctly handled // when CPD is executed with the '--skipLexicalErrors' command line // option throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + lexer.getLine() + ", column " + lexer.getColumn() + ". Encountered: " + err.getMessage(), TokenMgrError.LEXICAL_ERROR); } finally { tokenEntries.add(TokenEntry.getEOF()); } } }
lexer.setWhitespaceIncluded(true); while (true) { Token t = lexer.nextToken(); System.out.println(t); if (t == null || t.getType() == Token.EOF_TYPE) break;
protected final void mDIGITS_WITH_UNDERSCORE(boolean _createToken) throws RecognitionException, CharStreamException, TokenStreamException { int _ttype; Token _token=null; int _begin=text.length(); _ttype = DIGITS_WITH_UNDERSCORE; int _saveIndex; mDIGIT(false); { if ((LA(1)=='0'||LA(1)=='1'||LA(1)=='2'||LA(1)=='3'||LA(1)=='4'||LA(1)=='5'||LA(1)=='6'||LA(1)=='7'||LA(1)=='8'||LA(1)=='9'||LA(1)=='_')) { mDIGITS_WITH_UNDERSCORE_OPT(false); } else { } } if ( _createToken && _token==null && _ttype!=Token.SKIP ) { _token = makeToken(_ttype); _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin)); } _returnToken = _token; }
protected final void mDIGITS_WITH_UNDERSCORE_OPT(boolean _createToken) throws RecognitionException, CharStreamException, TokenStreamException { int _ttype; Token _token=null; int _begin=text.length(); _ttype = DIGITS_WITH_UNDERSCORE_OPT; int _saveIndex; { _loop767: do { if (((LA(1) >= '0' && LA(1) <= '9')) && (LA(2)=='0'||LA(2)=='1'||LA(2)=='2'||LA(2)=='3'||LA(2)=='4'||LA(2)=='5'||LA(2)=='6'||LA(2)=='7'||LA(2)=='8'||LA(2)=='9'||LA(2)=='_')) { mDIGIT(false); } else if ((LA(1)=='_')) { match('_'); } else { break _loop767; } } while (true); } mDIGIT(false); if ( _createToken && _token==null && _ttype!=Token.SKIP ) { _token = makeToken(_ttype); _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin)); } _returnToken = _token; }
_loop669: do { if ((LA(1)=='\\') && (LA(2)=='\n'||LA(2)=='\r') && (true) && (true)) { match('\\'); mONE_NL(false,false); else if ((LA(1)==' ') && (true) && (true) && (true)) { match(' '); else if ((LA(1)=='\t') && (true) && (true) && (true)) { match('\t'); else if ((LA(1)=='\u000c') && (true) && (true) && (true)) { match('\f'); if ( _cnt669>=1 ) { break _loop669; } else {throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());} _token = makeToken(_ttype); _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));
Token _token = null; int _ttype = Token.INVALID_TYPE; resetText(); try { // for char stream error handling try { // for lexical error handling switch ( LA(1)) { case '(': mLPAREN(true); theRetToken=_returnToken; break; mRPAREN(true); theRetToken=_returnToken; break; mLBRACK(true); theRetToken=_returnToken; break; mRBRACK(true); theRetToken=_returnToken; break; mLCURLY(true); theRetToken=_returnToken; break; mRCURLY(true);
if (((LA(1)=='\'') && (LA(2)=='\'') && (LA(3)=='\'') && ((LA(4) >= '\u0000' && LA(4) <= '\ufffe')))) { int _m692 = mark(); synPredMatched692 = true; inputState.guessing++; try { match("'''"); rewind(_m692); inputState.guessing--; match("'''"); text.setLength(_saveIndex); switch ( LA(1)) { case '\\': mESC(false); break; match('"'); break; match('$'); break; mSTRING_NL(false,true); break;
if (((LA(1)=='/') && (_tokenSet_6.member(LA(2))) && (true) && (true))&&(allowRegexpLiteral())) { _saveIndex=text.length(); match('/'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { if (((LA(1)=='$') && (_tokenSet_2.member(LA(2))))&&(!atValidDollarEscape())) { match('$'); tt=mREGEXP_CTOR_END(false,true); else if ((_tokenSet_7.member(LA(1)))) { mREGEXP_SYMBOL(false); tt=mREGEXP_CTOR_END(false,true); else if ((LA(1)=='$') && (true)) { _saveIndex=text.length(); match('$'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn()); else if ((LA(1)=='/') && (LA(2)=='=') && (true) && (true)) { mDIV_ASSIGN(false); if ( inputState.guessing==0 ) { _ttype = DIV_ASSIGN; else if ((LA(1)=='/') && (true)) { mDIV(false);
if (!(allowRegexpLiteral())) throw new SemanticException("allowRegexpLiteral()"); _saveIndex=text.length(); match("$/"); text.setLength(_saveIndex); if (((LA(1)=='$') && (LA(2)=='/') && ((LA(3) >= '\u0000' && LA(3) <= '\ufffe')) && (true))) { int _m720 = mark(); synPredMatched720 = true; inputState.guessing++; try { match('$'); match('/'); rewind(_m720); inputState.guessing--; mESCAPED_SLASH(false); tt=mDOLLAR_REGEXP_CTOR_END(false,true); if (((LA(1)=='$') && (LA(2)=='$') && ((LA(3) >= '\u0000' && LA(3) <= '\ufffe')) && (true))) { int _m722 = mark(); synPredMatched722 = true; inputState.guessing++; try { match('$'); match('$');
if (!( !atMultiCommentStart() )) throw new SemanticException(" !atMultiCommentStart() "); if (((LA(1)=='/') && (_tokenSet_6.member(LA(2))) && (true) && (true))&&(allowRegexpLiteral())) { _saveIndex=text.length(); match('/'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { if (((LA(1)=='$') && ((LA(2) >= '\u0000' && LA(2) <= '\ufffe')))&&(!atValidDollarEscape())) { match('$'); tt=mREGEXP_CTOR_END(false,true); else if ((_tokenSet_7.member(LA(1)))) { mREGEXP_SYMBOL(false); tt=mREGEXP_CTOR_END(false,true); else if ((LA(1)=='$') && (true)) { _saveIndex=text.length(); match('$'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn()); else if ((LA(1)=='/') && (LA(2)=='=') && (true) && (true)) { mDIV_ASSIGN(false); if ( inputState.guessing==0 ) { _ttype = DIV_ASSIGN;
if (((LA(1)=='$') && ((LA(2) >= '\u0000' && LA(2) <= '\ufffe')))&&(!atValidDollarEscape())) { match('$'); else if ((_tokenSet_7.member(LA(1)))) { mREGEXP_SYMBOL(false); switch ( LA(1)) { case '/': match('/'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { match('$'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn()); _token = makeToken(_ttype); _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));
switch ( LA(1)) { case '0': match('0'); if ( inputState.guessing==0 ) { isDecimal = true; switch ( LA(1)) { case 'X': case 'x': switch ( LA(1)) { case 'x': match('x'); break; match('X'); break; throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn()); mHEX_DIGIT(false); if ((_tokenSet_11.member(LA(1))) && (true) && (true) && (true)) { if ((_tokenSet_12.member(LA(1))) && (_tokenSet_11.member(LA(2))) && (true) && (true)) { mHEX_DIGIT(false); else if ((LA(1)=='_')) {
if (((_tokenSet_0.member(LA(1))) && (true) && (true) && (true))&&(stringCtorState == 0)) { if ((LA(1)=='$')) { mDOLLAR(false); else if ((_tokenSet_10.member(LA(1)))) { mLETTER(false); throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn()); switch ( LA(1)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': mDIGIT(false); break; mDOLLAR(false); break; if ((_tokenSet_10.member(LA(1)))) { mLETTER(false); else if ((_tokenSet_10.member(LA(1))) && (true) && (true) && (true)) { mLETTER(false); if ((_tokenSet_10.member(LA(1)))) { mLETTER(false);
int _saveIndex; mONE_NL(false,true); if (((LA(1)=='\t'||LA(1)=='\n'||LA(1)=='\u000c'||LA(1)=='\r'||LA(1)==' '||LA(1)=='/'||LA(1)=='\\'))&&(!whitespaceIncluded)) { switch ( LA(1)) { case '\n': case '\r': mONE_NL(false,true); break; mWS(false); break; if ((LA(1)=='/') && (LA(2)=='/')) { mSL_COMMENT(false); else if ((LA(1)=='/') && (LA(2)=='*')) { mML_COMMENT(false); if ( _cnt675>=1 ) { break _loop675; } else {throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn());} _token = makeToken(_ttype); _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));
SourceBuffer sourceBuffer = new SourceBuffer(); try (UnicodeEscapingReader reader = new UnicodeEscapingReader(new StringReader(text), sourceBuffer)) { GroovyLexer lexer = new GroovyLexer(new UnicodeLexerSharedInputState(reader)); lexer.setWhitespaceIncluded(true); TokenStream tokenStream = lexer.plumb(); Token token = tokenStream.nextToken(); while (token.getType() != Token.EOF_TYPE) {
if ((LA(1)=='\\') && (LA(2)=='"'||LA(2)=='$'||LA(2)=='\''||LA(2)=='0'||LA(2)=='1'||LA(2)=='2'||LA(2)=='3'||LA(2)=='4'||LA(2)=='5'||LA(2)=='6'||LA(2)=='7'||LA(2)=='\\'||LA(2)=='b'||LA(2)=='f'||LA(2)=='n'||LA(2)=='r'||LA(2)=='t'||LA(2)=='u')) { _saveIndex=text.length(); match('\\'); text.setLength(_saveIndex); switch ( LA(1)) { case 'n': match('n'); if ( inputState.guessing==0 ) { text.setLength(_begin); text.append("\n"); match('r'); if ( inputState.guessing==0 ) { text.setLength(_begin); text.append("\r"); match('t'); if ( inputState.guessing==0 ) { text.setLength(_begin); text.append("\t"); match('b'); if ( inputState.guessing==0 ) { text.setLength(_begin); text.append("\b"); match('f'); if ( inputState.guessing==0 ) { text.setLength(_begin); text.append("\f"); match('"');
resetText(); try { switch (quoteType) { case SCS_SQ_TYPE: mSTRING_CTOR_END(true, /*fromStart:*/false, false); break; case SCS_TQ_TYPE: mSTRING_CTOR_END(true, /*fromStart:*/false, true); break; case SCS_RE_TYPE: mREGEXP_CTOR_END(true, /*fromStart:*/false); break; case SCS_DRE_TYPE: mDOLLAR_REGEXP_CTOR_END(true, /*fromStart:*/false); break; default: throw new AssertionError(false); Token token = GroovyLexer.this.nextToken(); int lasttype = token.getType(); if (whitespaceIncluded) {
if (!( atMultiCommentStart() )) throw new SemanticException(" atMultiCommentStart() "); match("/*"); if (((LA(1)=='*') && ((LA(2) >= '\u0000' && LA(2) <= '\ufffe')) && ((LA(3) >= '\u0000' && LA(3) <= '\ufffe')) && (true))) { int _m687 = mark(); synPredMatched687 = true; inputState.guessing++; try { match('*'); matchNot('/'); rewind(_m687); inputState.guessing--; match('*'); else if ((LA(1)=='\n'||LA(1)=='\r')) { mONE_NL(false,true); else if ((_tokenSet_2.member(LA(1)))) { match(_tokenSet_2); match("*/"); if ( inputState.guessing==0 ) { if (!whitespaceIncluded) _ttype = Token.SKIP;
int tt=0; if (!(allowRegexpLiteral())) throw new SemanticException("allowRegexpLiteral()"); _saveIndex=text.length(); match("$/"); text.setLength(_saveIndex); if (((LA(1)=='$') && ((LA(2) >= '\u0000' && LA(2) <= '\ufffe')))&&(!atValidDollarEscape())) { match('$'); tt=mDOLLAR_REGEXP_CTOR_END(false,true); else if ((_tokenSet_10.member(LA(1)))) { mDOLLAR_REGEXP_SYMBOL(false); tt=mDOLLAR_REGEXP_CTOR_END(false,true); else if ((LA(1)=='$') && (true)) { _saveIndex=text.length(); match('$'); text.setLength(_saveIndex); if ( inputState.guessing==0 ) { throw new NoViableAltForCharException((char)LA(1), getFilename(), getLine(), getColumn()); _token = makeToken(_ttype); _token.setText(new String(text.getBuffer(), _begin, text.length()-_begin));