package org.biffo.dms.ki.modules.metropostagger;

import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.Span;
import org.biffo.dms.api.Description;
import org.biffo.dms.api.IDocumentFragments;
import org.biffo.dms.api.IFragmentData;
import org.biffo.dms.api.ProcessAuthor;
import org.biffo.dms.api.ProcessName;
import org.biffo.dms.entity.Fragments.Fragment;

@Description("Sucht nach Rechnungspositionen innerhalb einer Metro Rechnung, und liefert entsp. die gewünschten Informationen zurück")
@ProcessName("Metro Position Tagger")
@ProcessAuthor(author = "mlange", url = "www.biffo.de", major = 0, minor = 5)
/* loaded from: input_file:org/biffo/dms/ki/modules/metropostagger/MetroPosTagger.class */
public class MetroPosTagger implements IDocumentFragments, Serializable {
    private static final long serialVersionUID = 1;
    private static final SimpleDateFormat _sdf = new SimpleDateFormat("dd.MM.yyyy");
    private transient TokenNameFinderModel _model;
    private transient NameFinderME _nameDetector;
    public boolean debug = false;
    private final String regExPos = "(\\d+\\.\\d)\\s(\\d+)\\s(.+)\\s(\\D{2})\\s(\\d+,\\d+)\\s(\\d+)\\s(.*?)\\s(\\d+)\\s(.*?)\\s.+";
    private final Pattern ptPos = Pattern.compile("(\\d+\\.\\d)\\s(\\d+)\\s(.+)\\s(\\D{2})\\s(\\d+,\\d+)\\s(\\d+)\\s(.*?)\\s(\\d+)\\s(.*?)\\s.+");
    private List<Fragment> lstFragments = new LinkedList();
    private List<IFragmentData> lstFragmentData = new LinkedList();

    public void init() throws Exception {
        this._model = new TokenNameFinderModel(getClass().getClassLoader().getResourceAsStream(String.valueOf(getClass().getPackageName().replaceAll("\\.", "/")) + "/METRO_POS_NAIVEBAYES.bin"));
        this._nameDetector = new NameFinderME(this._model);
    }

    public boolean doText(String str) {
        try {
            clearAll();
            String[] strArr = WhitespaceTokenizer.INSTANCE.tokenize(str);
            Span[] find = this._nameDetector.find(strArr);
            if (find != null && find.length > 0) {
                for (Span span : find) {
                    int start = span.getStart();
                    int end = span.getEnd();
                    StringBuilder sb = new StringBuilder();
                    for (int i = start; i < end; i++) {
                        sb.append(strArr[i]);
                        sb.append(" ");
                    }
                    if (this.debug) {
                        System.out.println(span + " " + span.getProb());
                    }
                    if (this.debug) {
                        System.out.println(sb.toString());
                    }
                    if (span.getProb() > 0.8d) {
                        parsePosition(sb.toString(), span);
                        Fragment fragment = new Fragment();
                        fragment.setErstelltVon(getClass().getSimpleName());
                        fragment.setGeaendertVon(fragment.getErstelltVon());
                        fragment.setFragmentname(span.getType());
                        fragment.setFragmentvalue(sb.toString());
                        fragment.setFragmenttype("IDocumentFragments");
                        fragment.setProbability(span.getProb());
                        fragment.setPage(-1);
                        fragment.setValid(true);
                        this.lstFragments.add(fragment);
                    }
                }
            }
            this._nameDetector.clearAdaptiveData();
        } catch (Exception e) {
        }
        return false;
    }

    private void parsePosition(String str, Span span) {
        if (str == null || str.isEmpty()) {
            return;
        }
        String replaceAll = str.trim().replaceAll("(\\d+), (\\d+)", "$1,$2").replaceAll("^(\\d+)\\. (\\d)", "$1.$2");
        if (this.debug) {
            System.out.println(replaceAll);
        }
        Matcher matcher = this.ptPos.matcher(replaceAll);
        if (matcher.matches()) {
            MetroPos metroPos = new MetroPos();
            metroPos.setArtNr(matcher.group(1));
            metroPos.setEan(matcher.group(2));
            metroPos.setArtBez(matcher.group(3));
            metroPos.setPack(matcher.group(4));
            metroPos.setEinzelpreis(matcher.group(5));
            metroPos.setInhaltKoli(matcher.group(6));
            metroPos.setKoliPreis(matcher.group(7));
            metroPos.setMenge(matcher.group(8));
            metroPos.setGesamtPreis(matcher.group(9));
            this.lstFragmentData.add(metroPos);
        }
    }

    private void clearAll() {
        this.lstFragments.clear();
        this.lstFragmentData.clear();
    }

    public List<Fragment> getFragments() {
        return this.lstFragments;
    }

    public boolean hasFragments() {
        return this.lstFragments.size() > 0;
    }

    public boolean hasData() {
        return this.lstFragmentData.size() > 0;
    }

    public List<IFragmentData> getData() {
        return this.lstFragmentData;
    }
}
