/*
 * Decompiled with CFR 0.152.
 */
package org.luwrain.nlp.ru;

import java.util.ArrayList;
import java.util.List;
import org.luwrain.nlp.ru.Token;

public abstract class AbstractTokenizer {
    protected final List<Token> output = new ArrayList<Token>();

    abstract char getCh();

    public abstract boolean hasCh();

    public abstract void backCh(char var1);

    public void tokenize() {
        while (this.hasCh()) {
            char ch = this.getCh();
            if (ch >= '0' && ch <= '9') {
                this.onNumToken(ch);
                continue;
            }
            if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
                this.onLatinToken(ch);
                continue;
            }
            if (ch >= '\u0430' && ch <= '\u044f' || ch >= '\u0410' && ch <= '\u042f' || ch == '\u0451' || ch == '\u0401') {
                this.onCyrilToken(ch);
                continue;
            }
            if (Character.isWhitespace(ch)) {
                this.onSpaceToken(ch);
                continue;
            }
            this.onPuncToken(ch);
        }
    }

    private void onNumToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (nextCh >= '0' && nextCh <= '9') {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new Token(Token.Type.NUM, new String(b)));
    }

    private void onLatinToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (nextCh >= 'a' && nextCh <= 'z' || nextCh >= 'A' && nextCh <= 'Z') {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new Token(Token.Type.LATIN, new String(b)));
    }

    private void onCyrilToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (nextCh >= '\u0430' && nextCh <= '\u044f' || nextCh >= '\u0410' && nextCh <= '\u042f' || nextCh == '\u0451' || nextCh == '\u0401') {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new Token(Token.Type.CYRIL, new String(b)));
    }

    private void onSpaceToken(char ch) {
        StringBuilder b = new StringBuilder();
        b.append(ch);
        while (this.hasCh()) {
            char nextCh = this.getCh();
            if (Character.isWhitespace(nextCh)) {
                b.append(nextCh);
                continue;
            }
            this.backCh(nextCh);
            break;
        }
        this.output.add(new Token(Token.Type.SPACE, new String(b)));
    }

    private void onPuncToken(char ch) {
        this.output.add(new Token(Token.Type.PUNC, new Character(ch).toString()));
    }

    public Token[] getOutput() {
        return this.output.toArray(new Token[this.output.size()]);
    }
}

