RuleBasedCollator.java [plain text]
package java.text;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
final class RBCElement
{
String key;
char relation;
RBCElement (String key, char relation)
{
this.key = key;
this.relation = relation;
}
}
public class RuleBasedCollator extends Collator
{
public Object clone ()
{
RuleBasedCollator c = (RuleBasedCollator) super.clone ();
c.map = (Hashtable) map.clone ();
c.prefixes = (Hashtable) map.clone ();
return c;
}
int ceiNext (CollationElementIterator cei)
{
if (cei.lookahead_set)
{
cei.lookahead_set = false;
return cei.lookahead;
}
int save = cei.index;
int max = cei.text.length();
String s = null;
boolean found = false;
int i;
for (i = save + 1; i <= max; ++i)
{
s = cei.text.substring(save, i);
if (prefixes.get(s) == null)
break;
found = true;
}
Object obj = map.get(s);
while (found && obj == null && s.length() > 1)
{
--i;
s = cei.text.substring(save, i);
obj = map.get(s);
}
cei.index = i;
if (obj == null)
{
cei.lookahead_set = true;
cei.lookahead = s.charAt(0) << 8;
return 0x7fff << 16;
}
return ((Integer) obj).intValue();
}
static final int next (CollationElementIterator iter, int strength)
{
while (true)
{
int os = iter.next();
if (os == CollationElementIterator.NULLORDER)
return os;
int c = 0;
switch (strength)
{
case PRIMARY:
c = os & ~0xffff;
break;
case SECONDARY:
c = os & ~0x00ff;
break;
case TERTIARY:
case IDENTICAL:
c = os;
break;
}
if (c != 0)
return c;
}
}
public int compare (String source, String target)
{
CollationElementIterator cs, ct;
cs = new CollationElementIterator (source, this);
ct = new CollationElementIterator (target, this);
while (true)
{
int os = next (cs, strength);
int ot = next (ct, strength);
if (os == CollationElementIterator.NULLORDER
&& ot == CollationElementIterator.NULLORDER)
break;
else if (os == CollationElementIterator.NULLORDER)
{
return -1;
}
else if (ot == CollationElementIterator.NULLORDER)
{
return 1;
}
if (os != ot)
return os - ot;
}
return 0;
}
public boolean equals (Object obj)
{
if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj))
return false;
RuleBasedCollator rbc = (RuleBasedCollator) obj;
return (frenchAccents == rbc.frenchAccents
&& rules.equals(rbc.rules));
}
public CollationElementIterator getCollationElementIterator (String source)
{
StringBuffer expand = new StringBuffer (source.length());
int max = source.length();
for (int i = 0; i < max; ++i)
decomposeCharacter (source.charAt(i), expand);
return new CollationElementIterator (expand.toString(), this);
}
public CollationElementIterator getCollationElementIterator (CharacterIterator source)
{
StringBuffer expand = new StringBuffer ();
for (char c = source.first ();
c != CharacterIterator.DONE;
c = source.next ())
decomposeCharacter (c, expand);
return new CollationElementIterator (expand.toString(), this);
}
public CollationKey getCollationKey (String source)
{
return new CollationKey (getCollationElementIterator (source), source,
strength);
}
public String getRules ()
{
return rules;
}
public int hashCode ()
{
return (frenchAccents ? 1231 : 1237
^ rules.hashCode()
^ map.hashCode()
^ prefixes.hashCode());
}
private final boolean is_special (char c)
{
return ((c >= 0x0009 && c <= 0x000d)
|| (c >= 0x0020 && c <= 0x002f)
|| (c >= 0x003a && c <= 0x0040)
|| (c >= 0x005b && c <= 0x0060)
|| (c >= 0x007b && c <= 0x007e));
}
private final int text_argument (String rules, int index,
StringBuffer result)
{
result.setLength(0);
int len = rules.length();
while (index < len)
{
char c = rules.charAt(index);
if (c == '\'' && index + 2 < len
&& rules.charAt(index + 2) == '\''
&& is_special (rules.charAt(index + 1)))
index += 2;
else if (is_special (c) || Character.isWhitespace(c))
return index;
result.append(c);
++index;
}
return index;
}
public RuleBasedCollator (String rules) throws ParseException
{
this.rules = rules;
this.frenchAccents = false;
int insertion_index = 0;
Vector vec = new Vector ();
StringBuffer argument = new StringBuffer ();
int len = rules.length();
for (int index = 0; index < len; ++index)
{
char c = rules.charAt(index);
if (Character.isWhitespace(c))
continue;
if (c == '@')
{
frenchAccents = true;
continue;
}
if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&'))
throw new ParseException ("invalid character", index);
++index;
while (index < len)
{
if (! Character.isWhitespace(rules.charAt(index)))
break;
++index;
}
if (index == len)
throw new ParseException ("missing argument", index);
int save = index;
index = text_argument (rules, index, argument);
if (argument.length() == 0)
throw new ParseException ("invalid character", save);
String arg = argument.toString();
int item_index = vec.indexOf(arg);
if (c != '&')
{
if (item_index != -1)
{
vec.removeElementAt(item_index);
if (insertion_index >= item_index)
--insertion_index;
}
RBCElement r = new RBCElement (arg, c);
vec.insertElementAt(r, insertion_index);
++insertion_index;
}
else
{
if (item_index == -1)
throw
new ParseException ("argument to reset not previously seen",
save);
insertion_index = item_index + 1;
}
--index;
}
int primary = 0;
int secondary = 0;
int tertiary = 0;
this.map = new Hashtable ();
this.prefixes = new Hashtable ();
Enumeration e = vec.elements();
while (e.hasMoreElements())
{
RBCElement r = (RBCElement) e.nextElement();
switch (r.relation)
{
case '<':
++primary;
secondary = 0;
tertiary = 0;
break;
case ';':
++secondary;
tertiary = 0;
break;
case ',':
++tertiary;
break;
case '=':
break;
}
map.put(r.key, new Integer (primary << 16
| secondary << 8 | tertiary));
for (int i = r.key.length() - 1; i >= 1; --i)
prefixes.put(r.key.substring(0, i), Boolean.TRUE);
}
}
private boolean frenchAccents;
private String rules;
private Hashtable map;
private Hashtable prefixes;
}