/*
 * Decompiled with CFR 0.152.
 */
package org.luwrain.inlandes.util;

import java.util.ArrayList;
import java.util.List;
import org.luwrain.inlandes.Token;
import org.luwrain.inlandes.util.Token;

public abstract class AbstractTokenizer {
    public static final char NBSP = '\u00a0';
    protected final List<Token> output = new ArrayList<Token>();

    abstract char getCh();

    public abstract boolean hasCh();

    public abstract void backCh(char var1);

    public void tokenize() {
        while (this.hasCh()) {
            char ch = this.getCh();
            if (ch >= '0' && ch <= '9') {
                this.onNumToken(ch);
                continue;
            }
            if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
                this.onLatinToken(ch);
                continue;
            }
            if (AbstractTokenizer.isCyrChar(ch)) {
                this.onCyrilToken(ch);
                continue;
            }
            if (Character.isSpace(ch) || ch == '\u00a0') {
                this.onSpaceToken(ch);
                continue;
            }
            this.onPuncToken(ch);
        }
    }

    private void onNumToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (nextCh >= '0' && nextCh <= '9') {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new org.luwrain.inlandes.util.Token(Token.Type.NUM, new String(b)));
    }

    private void onLatinToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (nextCh >= 'a' && nextCh <= 'z' || nextCh >= 'A' && nextCh <= 'Z') {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new org.luwrain.inlandes.util.Token(Token.Type.LATIN, new String(b)));
    }

    private void onCyrilToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (AbstractTokenizer.isCyrChar(nextCh)) {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new org.luwrain.inlandes.util.Token(Token.Type.CYRIL, new String(b)));
    }

    private void onSpaceToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (Character.isSpace(nextCh) || nextCh == '\u00a0') {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new org.luwrain.inlandes.util.Token(Token.Type.SPACE, new String(b)));
    }

    private void onPuncToken(char ch) {
        this.output.add(new org.luwrain.inlandes.util.Token(Token.Type.PUNC, new Character(ch).toString()));
    }

    public Token[] getOutput() {
        return this.output.toArray(new Token[this.output.size()]);
    }

    public static boolean isCyrChar(char ch) {
        if (ch >= '\u0430' && ch <= '\u044f') {
            return true;
        }
        if (ch >= '\u0410' && ch <= '\u042f') {
            return true;
        }
        return ch == '\u0451' || ch == '\u0401';
    }
}

