XPathTokenizer.java [plain text]
package gnu.xml.xpath;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Map;
import java.util.TreeMap;
public class XPathTokenizer
implements XPathParser.yyInput
{
static class XPathToken
{
int type;
String val;
XPathToken (int type)
{
this (type, null);
}
XPathToken (int type, String val)
{
this.type = type;
this.val = val;
}
public String getText ()
{
return val;
}
public String toString ()
{
return val;
}
}
static final Map keywords = new TreeMap ();
static
{
keywords.put ("ancestor", new Integer (XPathParser.ANCESTOR));
keywords.put ("ancestor-or-self", new Integer (XPathParser.ANCESTOR_OR_SELF));
keywords.put ("attribute", new Integer (XPathParser.ATTRIBUTE));
keywords.put ("child", new Integer (XPathParser.CHILD));
keywords.put ("descendant", new Integer (XPathParser.DESCENDANT));
keywords.put ("descendant-or-self", new Integer (XPathParser.DESCENDANT_OR_SELF));
keywords.put ("following", new Integer (XPathParser.FOLLOWING));
keywords.put ("following-sibling", new Integer (XPathParser.FOLLOWING_SIBLING));
keywords.put ("namespace", new Integer (XPathParser.NAMESPACE));
keywords.put ("parent", new Integer (XPathParser.PARENT));
keywords.put ("preceding", new Integer (XPathParser.PRECEDING));
keywords.put ("preceding-sibling", new Integer (XPathParser.PRECEDING_SIBLING));
keywords.put ("self", new Integer (XPathParser.SELF));
keywords.put ("div", new Integer (XPathParser.DIV));
keywords.put ("mod", new Integer (XPathParser.MOD));
keywords.put ("or", new Integer (XPathParser.OR));
keywords.put ("and", new Integer (XPathParser.AND));
keywords.put ("comment", new Integer (XPathParser.COMMENT));
keywords.put ("processing-instruction", new Integer (XPathParser.PROCESSING_INSTRUCTION));
keywords.put ("text", new Integer (XPathParser.TEXT));
keywords.put ("node", new Integer (XPathParser.NODE));
}
Reader in;
XPathToken token;
XPathToken lastToken;
public XPathTokenizer (String expr)
{
this (new StringReader (expr));
}
XPathTokenizer (Reader in)
{
this.in = in.markSupported () ? in : new BufferedReader (in);
}
public boolean advance ()
throws IOException
{
lastToken = token;
int c = in.read ();
switch (c)
{
case -1: return false;
case 0x20:
case 0x09:
case 0x0d:
case 0x0a: return advance ();
case 0x22: case 0x27: token = consume_literal (c);
break;
case 0x28: token = new XPathToken (XPathParser.LP);
break;
case 0x29: token = new XPathToken (XPathParser.RP);
break;
case 0x5b: token = new XPathToken (XPathParser.LB);
break;
case 0x5d: token = new XPathToken (XPathParser.RB);
break;
case 0x2c: token = new XPathToken (XPathParser.COMMA);
break;
case 0x7c: token = new XPathToken (XPathParser.PIPE);
break;
case 0x2f: in.mark (1);
int d1 = in.read ();
if (d1 == 0x2f)
{
token = new XPathToken (XPathParser.DOUBLE_SLASH);
}
else
{
in.reset ();
token = new XPathToken (XPathParser.SLASH);
}
break;
case 0x3d: token = new XPathToken (XPathParser.EQ);
break;
case 0x21: in.mark (1);
int d2 = in.read ();
if (d2 == 0x3d) {
token = new XPathToken (XPathParser.NE);
}
else
{
in.reset ();
token = new XPathToken (XPathParser.yyErrorCode);
}
break;
case 0x3e: in.mark (1);
int d3 = in.read ();
if (d3 == 0x3d) {
token = new XPathToken (XPathParser.GTE);
}
else
{
in.reset ();
token = new XPathToken (XPathParser.GT);
}
break;
case 0x3c: in.mark (1);
int d4 = in.read ();
if (d4 == 0x3d) {
token = new XPathToken (XPathParser.LTE);
}
else
{
in.reset ();
token = new XPathToken (XPathParser.LT);
}
break;
case 0x2b: token = new XPathToken (XPathParser.PLUS);
break;
case 0x2d: token = new XPathToken (XPathParser.MINUS);
break;
case 0x40: token = new XPathToken (XPathParser.AT);
break;
case 0x2a: token = new XPathToken (XPathParser.STAR);
break;
case 0x24: token = new XPathToken (XPathParser.DOLLAR);
break;
case 0x3a: in.mark (1);
int d5 = in.read ();
if (d5 == 0x3a)
{
token = new XPathToken (XPathParser.DOUBLE_COLON);
}
else
{
in.reset ();
token = new XPathToken (XPathParser.COLON);
}
break;
case 0x2e: in.mark (1);
int d6 = in.read ();
if (d6 == 0x2e)
{
token = new XPathToken (XPathParser.DOUBLE_DOT);
}
else
{
in.reset ();
token = new XPathToken (XPathParser.DOT);
}
break;
default:
if (c >= 0x30 && c <= 0x39)
{
token = consume_digits (c);
}
else if (c == 0x5f || Character.isLetter ((char) c))
{
token = consume_name (c);
}
else
{
token = new XPathToken (XPathParser.yyErrorCode);
}
}
return true;
}
public int token ()
{
return token.type;
}
public Object value ()
{
return token.val;
}
XPathToken consume_literal (int delimiter)
throws IOException
{
StringBuffer buf = new StringBuffer ();
while (true)
{
int c = in.read ();
if (c == -1)
{
return new XPathToken (XPathParser.yyErrorCode);
}
else if (c == delimiter)
{
return new XPathToken (XPathParser.LITERAL, buf.toString ());
}
else
{
buf.append ((char) c);
}
}
}
XPathToken consume_digits (int c)
throws IOException
{
StringBuffer buf = new StringBuffer ();
buf.append ((char) c);
while (true)
{
in.mark (1);
c = in.read ();
if (c >= 0x30 && c <= 0x39)
{
buf.append ((char) c);
}
else
{
in.reset ();
return new XPathToken (XPathParser.DIGITS, buf.toString ());
}
}
}
XPathToken consume_name (int c)
throws IOException
{
StringBuffer buf = new StringBuffer ();
buf.append ((char) c);
while (true)
{
in.mark (1);
c = in.read ();
if (isNameChar (c))
{
buf.append ((char) c);
}
else
{
in.reset ();
String name = buf.toString ();
Integer keyword = (Integer) keywords.get (name);
if (keyword == null)
{
return new XPathToken (XPathParser.NAME, name);
}
else
{
int val = keyword.intValue ();
switch (val)
{
case XPathParser.NODE:
case XPathParser.COMMENT:
case XPathParser.TEXT:
case XPathParser.PROCESSING_INSTRUCTION:
in.mark (1);
do
{
c = in.read ();
}
while (c == 0x20 || c == 0x09);
if (c != 0x28)
{
in.reset ();
return new XPathToken (XPathParser.NAME, name);
}
break;
case XPathParser.CHILD:
case XPathParser.PARENT:
case XPathParser.SELF:
case XPathParser.DESCENDANT:
case XPathParser.ANCESTOR:
case XPathParser.DESCENDANT_OR_SELF:
case XPathParser.ANCESTOR_OR_SELF:
case XPathParser.ATTRIBUTE:
case XPathParser.NAMESPACE:
case XPathParser.FOLLOWING:
case XPathParser.FOLLOWING_SIBLING:
case XPathParser.PRECEDING:
case XPathParser.PRECEDING_SIBLING:
in.mark(1);
do
{
c = in.read();
}
while (c == 0x20 || c == 0x09);
if (c == 0x3a)
{
c = in.read();
if (c == 0x3a)
{
in.reset();
return new XPathToken(val);
}
}
in.reset();
return new XPathToken(XPathParser.NAME, name);
case XPathParser.DIV:
case XPathParser.MOD:
if (lastToken == null)
{
return new XPathToken(XPathParser.NAME, name);
}
switch (lastToken.type)
{
case XPathParser.LP:
case XPathParser.LB:
case XPathParser.COMMA:
case XPathParser.PIPE:
case XPathParser.EQ:
case XPathParser.NE:
case XPathParser.GT:
case XPathParser.LT:
case XPathParser.GTE:
case XPathParser.LTE:
case XPathParser.PLUS:
case XPathParser.MINUS:
case XPathParser.STAR:
case XPathParser.AT:
case XPathParser.DOLLAR:
case XPathParser.COLON:
case XPathParser.DOUBLE_COLON:
case XPathParser.DIV:
case XPathParser.MOD:
case XPathParser.OR:
case XPathParser.AND:
case XPathParser.SLASH:
return new XPathToken(XPathParser.NAME, name);
}
break;
}
return new XPathToken (val);
}
}
}
}
boolean isNameChar (int c)
{
return (c == 0x5f
|| c == 0x2d
|| c == 0x2e
|| (c >= 0x30 && c <= 0x39)
|| (c >= 0x0300 && c <= 0x0345)
|| (c >= 0x0360 && c <= 0x0361)
|| (c >= 0x0483 && c <= 0x0486)
|| (c >= 0x0591 && c <= 0x05A1)
|| (c >= 0x05A3 && c <= 0x05B9)
|| (c >= 0x05BB && c <= 0x05BD)
|| c == 0x05BF
|| (c >= 0x05C1 && c <= 0x05C2)
|| c == 0x05C4
|| (c >= 0x064B && c <= 0x0652)
|| c == 0x0670
|| (c >= 0x06D6 && c <= 0x06DC)
|| (c >= 0x06DD && c <= 0x06DF)
|| (c >= 0x06E0 && c <= 0x06E4)
|| (c >= 0x06E7 && c <= 0x06E8)
|| (c >= 0x06EA && c <= 0x06ED)
|| (c >= 0x0901 && c <= 0x0903)
|| c == 0x093C
|| (c >= 0x093E && c <= 0x094C)
|| c == 0x094D
|| (c >= 0x0951 && c <= 0x0954)
|| (c >= 0x0962 && c <= 0x0963)
|| (c >= 0x0981 && c <= 0x0983)
|| c == 0x09BC
|| c == 0x09BE
|| c == 0x09BF
|| (c >= 0x09C0 && c <= 0x09C4)
|| (c >= 0x09C7 && c <= 0x09C8)
|| (c >= 0x09CB && c <= 0x09CD)
|| c == 0x09D7
|| (c >= 0x09E2 && c <= 0x09E3)
|| c == 0x0A02
|| c == 0x0A3C
|| c == 0x0A3E
|| c == 0x0A3F
|| (c >= 0x0A40 && c <= 0x0A42)
|| (c >= 0x0A47 && c <= 0x0A48)
|| (c >= 0x0A4B && c <= 0x0A4D)
|| (c >= 0x0A70 && c <= 0x0A71)
|| (c >= 0x0A81 && c <= 0x0A83)
|| c == 0x0ABC
|| (c >= 0x0ABE && c <= 0x0AC5)
|| (c >= 0x0AC7 && c <= 0x0AC9)
|| (c >= 0x0ACB && c <= 0x0ACD)
|| (c >= 0x0B01 && c <= 0x0B03)
|| c == 0x0B3C
|| (c >= 0x0B3E && c <= 0x0B43)
|| (c >= 0x0B47 && c <= 0x0B48)
|| (c >= 0x0B4B && c <= 0x0B4D)
|| (c >= 0x0B56 && c <= 0x0B57)
|| (c >= 0x0B82 && c <= 0x0B83)
|| (c >= 0x0BBE && c <= 0x0BC2)
|| (c >= 0x0BC6 && c <= 0x0BC8)
|| (c >= 0x0BCA && c <= 0x0BCD)
|| c == 0x0BD7
|| (c >= 0x0C01 && c <= 0x0C03)
|| (c >= 0x0C3E && c <= 0x0C44)
|| (c >= 0x0C46 && c <= 0x0C48)
|| (c >= 0x0C4A && c <= 0x0C4D)
|| (c >= 0x0C55 && c <= 0x0C56)
|| (c >= 0x0C82 && c <= 0x0C83)
|| (c >= 0x0CBE && c <= 0x0CC4)
|| (c >= 0x0CC6 && c <= 0x0CC8)
|| (c >= 0x0CCA && c <= 0x0CCD)
|| (c >= 0x0CD5 && c <= 0x0CD6)
|| (c >= 0x0D02 && c <= 0x0D03)
|| (c >= 0x0D3E && c <= 0x0D43)
|| (c >= 0x0D46 && c <= 0x0D48)
|| (c >= 0x0D4A && c <= 0x0D4D)
|| c == 0x0D57
|| c == 0x0E31
|| (c >= 0x0E34 && c <= 0x0E3A)
|| (c >= 0x0E47 && c <= 0x0E4E)
|| c == 0x0EB1
|| (c >= 0x0EB4 && c <= 0x0EB9)
|| (c >= 0x0EBB && c <= 0x0EBC)
|| (c >= 0x0EC8 && c <= 0x0ECD)
|| (c >= 0x0F18 && c <= 0x0F19)
|| c == 0x0F35
|| c == 0x0F37
|| c == 0x0F39
|| c == 0x0F3E
|| c == 0x0F3F
|| (c >= 0x0F71 && c <= 0x0F84)
|| (c >= 0x0F86 && c <= 0x0F8B)
|| (c >= 0x0F90 && c <= 0x0F95)
|| c == 0x0F97
|| (c >= 0x0F99 && c <= 0x0FAD)
|| (c >= 0x0FB1 && c <= 0x0FB7)
|| c == 0x0FB9
|| (c >= 0x20D0 && c <= 0x20DC)
|| c == 0x20E1
|| (c >= 0x302A && c <= 0x302F)
|| c == 0x3099
|| c == 0x309A
|| c == 0x00B7
|| c == 0x02D0
|| c == 0x02D1
|| c == 0x0387
|| c == 0x0640
|| c == 0x0E46
|| c == 0x0EC6
|| c == 0x3005
|| (c >= 0x3031 && c <= 0x3035)
|| (c >= 0x309D && c <= 0x309E)
|| (c >= 0x30FC && c <= 0x30FE)
|| Character.isLetter ((char) c));
}
}