package org.apache.tika.eval.core.textstats;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.BytesRef;
import org.apache.tika.eval.core.langid.LanguageIDWrapper;
import org.apache.tika.eval.core.textstats.BytesRefCalculator;
import org.apache.tika.eval.core.tokens.AnalyzerManager;
import org.apache.tika.eval.core.tokens.TokenCounts;
import org.apache.tika.language.detect.LanguageResult;

/* loaded from: input_file:org/apache/tika/eval/core/textstats/CompositeTextStatsCalculator.class */
public class CompositeTextStatsCalculator {
    private static final String FIELD = "f";
    private static final int DEFAULT_MAX_TOKENS = 10000000;
    private final byte[] whitespace;
    private final Analyzer analyzer;
    private final LanguageIDWrapper languageIDWrapper;
    private final List<LanguageAwareTokenCountStats> languageAwareTokenCountStats;
    private final List<TokenCountStatsCalculator> tokenCountStatCalculators;
    private final List<StringStatsCalculator> stringStatCalculators;
    private final List<BytesRefCalculator> bytesRefCalculators;

    public CompositeTextStatsCalculator(List<TextStatsCalculator> list) {
        this(list, AnalyzerManager.newInstance(10000000).getGeneralAnalyzer(), new LanguageIDWrapper());
    }

    public CompositeTextStatsCalculator(List<TextStatsCalculator> list, Analyzer analyzer, LanguageIDWrapper languageIDWrapper) {
        this.whitespace = new byte[]{32};
        this.languageAwareTokenCountStats = new ArrayList();
        this.tokenCountStatCalculators = new ArrayList();
        this.stringStatCalculators = new ArrayList();
        this.bytesRefCalculators = new ArrayList();
        this.analyzer = analyzer;
        this.languageIDWrapper = languageIDWrapper;
        for (TextStatsCalculator textStatsCalculator : list) {
            if (textStatsCalculator instanceof StringStatsCalculator) {
                this.stringStatCalculators.add((StringStatsCalculator) textStatsCalculator);
            } else if (textStatsCalculator instanceof LanguageAwareTokenCountStats) {
                this.languageAwareTokenCountStats.add((LanguageAwareTokenCountStats) textStatsCalculator);
                if (languageIDWrapper == null) {
                    throw new IllegalArgumentException("Must specify a LanguageIdWrapper if you want to calculate languageAware stats: " + textStatsCalculator.getClass());
                }
            } else if (textStatsCalculator instanceof TokenCountStatsCalculator) {
                this.tokenCountStatCalculators.add((TokenCountStatsCalculator) textStatsCalculator);
                if (analyzer == null) {
                    throw new IllegalArgumentException("Analyzer must not be null if you are using a TokenCountStats: " + textStatsCalculator.getClass());
                }
            } else {
                if (!(textStatsCalculator instanceof BytesRefCalculator)) {
                    throw new IllegalArgumentException("I regret I don't yet handle: " + textStatsCalculator.getClass());
                }
                this.bytesRefCalculators.add((BytesRefCalculator) textStatsCalculator);
                if (analyzer == null) {
                    throw new IllegalArgumentException("Analyzer must not be null if you are using a BytesRefCalculator: " + textStatsCalculator.getClass());
                }
            }
        }
    }

    public Map<Class, Object> calculate(String str) {
        Map<Class, Object> hashMap = new HashMap<>();
        for (StringStatsCalculator stringStatsCalculator : this.stringStatCalculators) {
            hashMap.put(stringStatsCalculator.getClass(), stringStatsCalculator.calculate(str));
        }
        TokenCounts tokenCounts = null;
        if (this.tokenCountStatCalculators.size() > 0 || this.languageAwareTokenCountStats.size() > 0 || this.bytesRefCalculators.size() > 0) {
            try {
                tokenCounts = tokenize(str, hashMap);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        if (this.languageAwareTokenCountStats.size() > 0) {
            List<LanguageResult> calculate = hashMap.containsKey(LanguageIDWrapper.class) ? (List) hashMap.get(LanguageIDWrapper.class) : this.languageIDWrapper.calculate(str);
            hashMap.put(LanguageIDWrapper.class, calculate);
            for (LanguageAwareTokenCountStats languageAwareTokenCountStats : this.languageAwareTokenCountStats) {
                hashMap.put(languageAwareTokenCountStats.getClass(), languageAwareTokenCountStats.calculate(calculate, tokenCounts));
            }
        }
        for (TokenCountStatsCalculator tokenCountStatsCalculator : this.tokenCountStatCalculators) {
            hashMap.put(tokenCountStatsCalculator.getClass(), tokenCountStatsCalculator.calculate(tokenCounts));
        }
        return hashMap;
    }

    private TokenCounts tokenize(String str, Map<Class, Object> map) throws IOException {
        TokenCounts tokenCounts = new TokenCounts();
        TokenStream tokenStream = this.analyzer.tokenStream(FIELD, str);
        if (this.bytesRefCalculators.size() == 0) {
            try {
                CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class);
                tokenStream.reset();
                while (tokenStream.incrementToken()) {
                    tokenCounts.increment(charTermAttribute.toString());
                }
                tokenStream.close();
                tokenStream.end();
            } finally {
            }
        } else {
            ArrayList<BytesRefCalculator.BytesRefCalcInstance> arrayList = new ArrayList();
            Iterator<BytesRefCalculator> it = this.bytesRefCalculators.iterator();
            while (it.hasNext()) {
                arrayList.add(it.next().getInstance());
            }
            try {
                TermToBytesRefAttribute termToBytesRefAttribute = (TermToBytesRefAttribute) tokenStream.getAttribute(TermToBytesRefAttribute.class);
                tokenStream.reset();
                int i = 0;
                while (tokenStream.incrementToken()) {
                    BytesRef bytesRef = termToBytesRefAttribute.getBytesRef();
                    tokenCounts.increment(termToBytesRefAttribute.toString());
                    for (BytesRefCalculator.BytesRefCalcInstance bytesRefCalcInstance : arrayList) {
                        if (i > 0) {
                            bytesRefCalcInstance.update(this.whitespace, 0, 1);
                        }
                        bytesRefCalcInstance.update(bytesRef.bytes, bytesRef.offset, bytesRef.length);
                    }
                    i++;
                }
                for (BytesRefCalculator.BytesRefCalcInstance bytesRefCalcInstance2 : arrayList) {
                    map.put(bytesRefCalcInstance2.getOuterClass(), bytesRefCalcInstance2.finish());
                }
                tokenStream.close();
                tokenStream.end();
            } finally {
            }
        }
        return tokenCounts;
    }
}
