|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object uk.ac.bham.gloss.StreamTokenizer
public class StreamTokenizer
The StreamTokenizer class represents an object that reads a stream of data and converts it into tokens. This stream is parsed using Java's regular expression methods. A typical method call to get a token will list the types of tokens allowed at that point. Since this depends on the current mode this list of types may vary. If a token is rejected the input characters that form it may be put back on the input stream; a subsequent call to get a token may extract a different token from the same characters if the types allowed are different. This class also handles comments (starting with ; and continuing to the end of the line) and line continuations (\ immediately followed by newline, though the character \ can be changed). Copyright Richard Kaye 2007-8 for GLOSS, http://gloss.bham.ac.uk Usage permitted according to the GPL. No waranty.
Field Summary | |
---|---|
protected int |
attrvaltsREI
|
protected static java.lang.String |
b64digitre
b64digitre matches a base64 digit |
protected int |
b64dtsREI
|
protected static java.lang.String |
b64re
b64re matches a base64 constant starting with = and ending with = or == for padding if required. |
protected int |
chardtsREI
|
protected static java.lang.String |
charre
charre matches a character constant which is delimited by '...' and contains either a single character or a \c escape combination. |
protected int |
commentnlREI
|
protected int |
cref1REI
|
protected int |
cref2REI
|
protected int |
eltnametsREI
|
protected int |
entrefREI
|
protected int |
fpdatatsREI
|
protected static java.lang.String |
fpre
fpre matches an (arb precision) fp constant |
protected int |
hexdatatsREI
|
protected static java.lang.String |
hexre
hexre matches an (arb precision) signed hex integer, almost according to OpenMath syntax, except that a leading 0 is required (after any - sign) and before the x. |
protected int |
intdatatsREI
|
protected static java.lang.String |
intre
intre matches an (arb precision) signed integer written to base 10, acording to syntax defined in OpenMath. |
protected int |
leadcommREI
|
protected int |
leadlabcommREI
|
protected int |
leadspcommREI
|
protected int |
leadspREI
|
protected static java.lang.String |
namere
namere matches a fully qualified element name. |
protected int |
nlcoREI
|
protected static java.lang.String |
nsre
nsre matches a non-space unicode character in UTF-16 |
protected int |
nsreREI
|
protected int |
parreftsREI
|
protected int |
parvaltsREI
|
protected int |
peltvaltsREI
|
protected int |
pideftsREI
|
protected static java.lang.String |
pnamere
pnamere matches a resolved parameter name. |
protected int |
puncvaltsREI
|
protected int |
slabtabREI
|
protected int |
spcommentnlREI
|
protected int |
speqspREI
|
protected int |
spnlREI
|
protected int |
sptabREI
|
protected int |
stringdtsREI
|
protected static java.lang.String |
stringre1
stringre1 matches a string delimited with " |
protected static java.lang.String |
stringre2
|
protected static java.lang.String |
tsre
tsre matches trailing space |
protected static java.lang.String |
ucre
ucre matches a unicode character in UTF-16 |
protected int |
ucreREI
|
protected static java.lang.String |
uqnre
uqnre matches a namespace prefix. |
protected int |
uridtsREI
|
protected static java.lang.String |
urire
urire matches a uri constant which can be almost anything that uses the safe characters a-z A-Z 0-9 $ - _ . |
protected static java.lang.String |
valuere
valuere matches either stringre1, stringre2 or a string not starting with " or ' and not containing whitespace |
Constructor Summary | |
---|---|
StreamTokenizer(java.lang.String documentname,
java.io.InputStreamReader isr,
Logger l)
Constructor: use null for documentname if it is not available. |
Method Summary | |
---|---|
protected int |
addRE(java.lang.String re)
Adds an RE "re" to the library, precompiling it for future use. |
int |
col()
Returns the column number of the next character to be read. |
protected void |
displayState(java.lang.String message)
Internal method that displays the current state of the Tokenizer. |
java.lang.String |
doc()
Returns the document name or URL. |
protected Token |
getATTR(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getB64(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getCHAR(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getCREF(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getELT(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getEOS(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getEREF(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getFP(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getHEX(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getINT(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getLABEL(int depth,
int thislinen,
int thiscoln)
|
char |
getLineContinuationChar()
Returns the line continuation char, or '\0' if there is none set. |
protected Token |
getNS(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getPDEF(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getPELT(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getPI(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getPREF(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getPUNC(java.lang.String tdata,
int depth,
int thislinen,
int thiscoln)
|
protected java.lang.String |
getRegex(int reIndx)
Internal private method that gets a string matching a regex at the beginning of unread input (inbuff, at inindx) returning it (or returning null if no such match). |
protected Token |
getSTR(int depth,
int thislinen,
int thiscoln)
|
Token |
getToken(java.lang.String accept)
Default form of getToken in which the "disregardComments" and "disregardWhitespace" parameters are true provided UC is not one of the acceptable types, and false otherwise. |
Token |
getToken(java.lang.String accept,
boolean disregardComments,
boolean disregardWhitespace,
boolean allowLinecontinuation,
boolean allowLabel)
Gets the next token, where the argument "allowedtypes" is a bitwise OR (|) of Token.type_TYPE values of allowed token types. |
protected Token |
getUC(int depth,
int thislinen,
int thiscoln)
|
protected Token |
getURI(int depth,
int thislinen,
int thiscoln)
|
int |
lastLineNumber()
Returns the number of the last line read from the input stream, 0 if no line has been read. |
int |
line()
Returns the line number of the next character to be read. |
Logger |
logger()
accessor: get the logger object |
protected boolean |
matches(java.lang.String s,
int reIndx)
Internal private method that tests is a string matches a RE. |
protected boolean |
notReadEnough(int startindx,
boolean atStart,
boolean disregardComments,
boolean disregardWhitespace,
boolean allowLabel)
Method to determine if we have read in enough data to guarentee a token. |
protected java.lang.String |
replaceAll(java.lang.String s,
int reIndx,
java.lang.String replacement)
Internal private method that performs a replaceAll operation on string s against an re. |
protected java.lang.String |
replaceFirst(java.lang.String s,
int reIndx,
java.lang.String replacement)
Internal private method that performs a replaceFirst operation on string s against an re. |
void |
setLineContinuationChar(char c)
Sets the line continuation char, set to '\0' if none is required. |
void |
setLogger(Logger l)
set the logger object |
protected java.lang.String |
trimLeadingSpace(java.lang.String s)
Returns the string with leading whitespace (tab, space, nl) removed |
protected java.lang.String |
trimTrailingSpace(java.lang.String s)
Returns the string with trailing whitespace (tab, space, nl) removed |
void |
ungetToken()
Restores the state of this Tokenizer to just before the last getToken operation. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected static final java.lang.String uqnre
protected static final java.lang.String namere
protected static final java.lang.String tsre
protected static final java.lang.String stringre1
protected static final java.lang.String stringre2
protected static final java.lang.String valuere
protected static final java.lang.String ucre
protected static final java.lang.String nsre
protected static final java.lang.String charre
protected static final java.lang.String fpre
protected static final java.lang.String hexre
protected static final java.lang.String intre
protected static final java.lang.String b64digitre
protected static final java.lang.String b64re
protected static final java.lang.String urire
protected static final java.lang.String pnamere
protected int commentnlREI
protected int spcommentnlREI
protected int spnlREI
protected int entrefREI
protected int parvaltsREI
protected int stringdtsREI
protected int nsreREI
protected int ucreREI
protected int chardtsREI
protected int b64dtsREI
protected int uridtsREI
protected int attrvaltsREI
protected int peltvaltsREI
protected int eltnametsREI
protected int fpdatatsREI
protected int hexdatatsREI
protected int intdatatsREI
protected int cref1REI
protected int cref2REI
protected int nlcoREI
protected int slabtabREI
protected int leadspREI
protected int leadcommREI
protected int leadspcommREI
protected int leadlabcommREI
protected int sptabREI
protected int speqspREI
protected int puncvaltsREI
protected int parreftsREI
protected int pideftsREI
Constructor Detail |
---|
public StreamTokenizer(java.lang.String documentname, java.io.InputStreamReader isr, Logger l)
Method Detail |
---|
public Logger logger()
public void setLogger(Logger l)
protected int addRE(java.lang.String re)
public void setLineContinuationChar(char c)
public char getLineContinuationChar()
public int lastLineNumber()
public int line()
line
in interface Locator
public int col()
col
in interface Locator
public java.lang.String doc()
doc
in interface Locator
public void ungetToken() throws GlossException
ungetToken
in interface Tokenizer
GlossException
protected void displayState(java.lang.String message)
protected java.lang.String trimTrailingSpace(java.lang.String s)
protected java.lang.String trimLeadingSpace(java.lang.String s)
protected boolean notReadEnough(int startindx, boolean atStart, boolean disregardComments, boolean disregardWhitespace, boolean allowLabel)
protected java.lang.String replaceFirst(java.lang.String s, int reIndx, java.lang.String replacement)
protected java.lang.String replaceAll(java.lang.String s, int reIndx, java.lang.String replacement)
protected boolean matches(java.lang.String s, int reIndx)
protected java.lang.String getRegex(int reIndx)
public Token getToken(java.lang.String accept) throws GlossException
getToken
in interface Tokenizer
GlossException
public Token getToken(java.lang.String accept, boolean disregardComments, boolean disregardWhitespace, boolean allowLinecontinuation, boolean allowLabel) throws GlossException, java.io.IOException
GlossException
java.io.IOException
protected Token getLABEL(int depth, int thislinen, int thiscoln) throws GlossException
GlossException
protected Token getPUNC(java.lang.String tdata, int depth, int thislinen, int thiscoln)
protected Token getEOS(int depth, int thislinen, int thiscoln) throws GlossException
GlossException
protected Token getELT(int depth, int thislinen, int thiscoln)
protected Token getATTR(int depth, int thislinen, int thiscoln)
protected Token getPELT(int depth, int thislinen, int thiscoln)
protected Token getEREF(int depth, int thislinen, int thiscoln)
protected Token getCREF(int depth, int thislinen, int thiscoln)
protected Token getPREF(int depth, int thislinen, int thiscoln)
protected Token getPDEF(int depth, int thislinen, int thiscoln)
protected Token getPI(int depth, int thislinen, int thiscoln)
protected Token getSTR(int depth, int thislinen, int thiscoln)
protected Token getCHAR(int depth, int thislinen, int thiscoln)
protected Token getB64(int depth, int thislinen, int thiscoln)
protected Token getURI(int depth, int thislinen, int thiscoln)
protected Token getHEX(int depth, int thislinen, int thiscoln)
protected Token getFP(int depth, int thislinen, int thiscoln)
protected Token getINT(int depth, int thislinen, int thiscoln)
protected Token getNS(int depth, int thislinen, int thiscoln)
protected Token getUC(int depth, int thislinen, int thiscoln)
|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |