package org.lionsoul.jcseg.test;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.Iterator;
import java.util.List;
import jodd.util.StringPool;
import org.lionsoul.jcseg.ISegment;
import org.lionsoul.jcseg.IWord;
import org.lionsoul.jcseg.dic.ADictionary;
import org.lionsoul.jcseg.dic.DictionaryFactory;
import org.lionsoul.jcseg.extractor.KeyphraseExtractor;
import org.lionsoul.jcseg.extractor.KeywordsExtractor;
import org.lionsoul.jcseg.extractor.SummaryExtractor;
import org.lionsoul.jcseg.extractor.impl.TextRankKeyphraseExtractor;
import org.lionsoul.jcseg.extractor.impl.TextRankKeywordsExtractor;
import org.lionsoul.jcseg.extractor.impl.TextRankSummaryExtractor;
import org.lionsoul.jcseg.segmenter.NLPSeg;
import org.lionsoul.jcseg.segmenter.SegmenterConfig;
import org.lionsoul.jcseg.sentence.SentenceSeg;
import org.lionsoul.jcseg.util.ArrayUtil;
import org.testcontainers.shaded.org.bouncycastle.i18n.ErrorBundle;

/* loaded from: input_file:BOOT-INF/lib/jcseg-core-2.6.2.jar:org/lionsoul/jcseg/test/JcsegTest.class */
public class JcsegTest {
    final SegmenterConfig tokenizerConfig = new SegmenterConfig(true);
    final ADictionary dic;
    ISegment tokenizerSeg;
    ISegment extractorSeg;
    KeywordsExtractor keywordsExtractor;
    KeyphraseExtractor keyphraseExtractor;
    SummaryExtractor summaryExtractor;

    public JcsegTest() throws IOException, CloneNotSupportedException {
        this.keywordsExtractor = null;
        this.keyphraseExtractor = null;
        this.summaryExtractor = null;
        SegmenterConfig m9591clone = this.tokenizerConfig.m9591clone();
        this.dic = DictionaryFactory.createSingletonDictionary(this.tokenizerConfig);
        this.tokenizerSeg = ISegment.COMPLEX.factory.create(this.tokenizerConfig, this.dic);
        m9591clone.setAppendCJKPinyin(false);
        m9591clone.setClearStopwords(true);
        m9591clone.setKeepUnregWords(false);
        this.extractorSeg = ISegment.COMPLEX.factory.create(m9591clone, this.dic);
        this.keywordsExtractor = new TextRankKeywordsExtractor(this.tokenizerSeg);
        this.keyphraseExtractor = new TextRankKeyphraseExtractor(this.tokenizerSeg);
        this.summaryExtractor = new TextRankSummaryExtractor(this.tokenizerSeg, new SentenceSeg());
        TextRankKeyphraseExtractor textRankKeyphraseExtractor = (TextRankKeyphraseExtractor) this.keyphraseExtractor;
        textRankKeyphraseExtractor.setAutoMinLength(4);
        textRankKeyphraseExtractor.setMaxWordsNum(4);
        System.out.println("jcseg参数设置：");
        System.out.println("当前加载的配置文件：" + this.tokenizerConfig.getPropertieFile());
        System.out.println("最大切分匹配词数：" + this.tokenizerConfig.MAX_LENGTH);
    }

    public void tokenize(String str) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        long nanoTime = System.nanoTime();
        boolean z = true;
        boolean z2 = this.tokenizerSeg instanceof NLPSeg;
        int i = 0;
        this.tokenizerSeg.reset(new StringReader(str));
        while (true) {
            IWord next = this.tokenizerSeg.next();
            if (next == null) {
                long nanoTime2 = System.nanoTime();
                System.out.println("分词结果：");
                System.out.println(stringBuffer.toString());
                System.out.format("Done, total:" + this.tokenizerSeg.getStreamPosition() + ", tokens:" + i + ", in %.5fsec\n", Double.valueOf((((float) nanoTime2) - ((float) nanoTime)) / 1.0E9d));
                return;
            }
            if (z) {
                stringBuffer.append(next.getValue());
                z = false;
            } else {
                stringBuffer.append(" ");
                stringBuffer.append(next.getValue());
            }
            stringBuffer.append("[");
            stringBuffer.append(next.getPosition());
            stringBuffer.append(",");
            stringBuffer.append(next.getLength());
            stringBuffer.append("]");
            if (next.getPartSpeech() != null) {
                stringBuffer.append('/');
                stringBuffer.append(next.getPartSpeech()[0]);
            }
            if (z2) {
                stringBuffer.append('/');
                stringBuffer.append(ArrayUtil.implode(StringPool.PIPE, next.getEntity()));
            }
            i++;
        }
    }

    public void resetMode(String str) {
        this.tokenizerSeg = ISegment.Type.fromString(str).factory.create(this.tokenizerConfig, this.dic);
    }

    public void keywords(String str) throws IOException {
        long nanoTime = System.nanoTime();
        List<String> keywordsFromString = this.keywordsExtractor.getKeywordsFromString(str);
        long nanoTime2 = System.nanoTime();
        System.out.println("Top10关键词：");
        System.out.println(keywordsFromString);
        System.out.format("Done in %.5fsec\n", Double.valueOf((((float) nanoTime2) - ((float) nanoTime)) / 1.0E9d));
    }

    public void keyphrase(String str) throws IOException {
        long nanoTime = System.nanoTime();
        List<String> keyphraseFromString = this.keyphraseExtractor.getKeyphraseFromString(str);
        long nanoTime2 = System.nanoTime();
        System.out.println("Top10关键短语：");
        System.out.println(keyphraseFromString);
        System.out.format("Done in %.5fsec\n", Double.valueOf((((float) nanoTime2) - ((float) nanoTime)) / 1.0E9d));
    }

    public void sentence(String str) throws IOException {
        long nanoTime = System.nanoTime();
        List<String> keySentenceFromString = this.summaryExtractor.getKeySentenceFromString(str);
        long nanoTime2 = System.nanoTime();
        System.out.println("Top6相关句子：");
        System.out.println("+-Key sentence: ");
        int i = 1;
        Iterator<String> it = keySentenceFromString.iterator();
        while (it.hasNext()) {
            System.out.println(i + ": " + it.next());
            i++;
        }
        System.out.format("Done in %.5fsec\n", Double.valueOf((((float) nanoTime2) - ((float) nanoTime)) / 1.0E9d));
    }

    public void summary(String str) throws IOException {
        long nanoTime = System.nanoTime();
        String summaryFromString = this.summaryExtractor.getSummaryFromString(str, 86);
        long nanoTime2 = System.nanoTime();
        System.out.println("摘要结果：");
        System.out.println(summaryFromString);
        System.out.format("Done in %.5fsec\n", Double.valueOf((((float) nanoTime2) - ((float) nanoTime)) / 1.0E9d));
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    public static void main(String[] strArr) throws IOException, CloneNotSupportedException {
        boolean z = false;
        String str = "tokenizer:complex";
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
        JcsegTest jcsegTest = new JcsegTest();
        System.out.println("歧义和同义词:研究生命起源，混合词: 做B超检查身体，x射线本质是什么，今天去奇都ktv唱卡拉ok去，哆啦a梦是一个动漫中的主角，单位和全角: 2009年８月６日开始大学之旅，岳阳今天的气温为38.6℃, 也就是101.48℉, 中文数字/分数: 你分三十分之二, 小陈拿三十分之五,剩下的三十分之二十三全部是我的，那是一九九八年前的事了，四川麻辣烫很好吃，五四运动留下的五四精神。笔记本五折包邮亏本大甩卖。人名识别: 我是陈鑫，也是jcseg的作者，三国时期的诸葛亮是个天才，我们一起给刘翔加油，罗志高兴奋极了因为老吴送了他一台笔记本。冰岛时间7月1日，正在当地拍片的汤姆·克鲁斯通过发言人承认，他与第三任妻子凯蒂·赫尔墨斯（第一二任妻子分别为咪咪·罗杰斯、妮可·基德曼）的婚姻即将结束。配对标点: 本次『畅想杯』黑客技术大赛的得主为电信09-2BF的张三，奖励C++程序设计语言一书和【畅想网络】的『PHP教程』一套。特殊字母: 【Ⅰ】（Ⅱ），英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.");
        try {
            jcsegTest.tokenize("歧义和同义词:研究生命起源，混合词: 做B超检查身体，x射线本质是什么，今天去奇都ktv唱卡拉ok去，哆啦a梦是一个动漫中的主角，单位和全角: 2009年８月６日开始大学之旅，岳阳今天的气温为38.6℃, 也就是101.48℉, 中文数字/分数: 你分三十分之二, 小陈拿三十分之五,剩下的三十分之二十三全部是我的，那是一九九八年前的事了，四川麻辣烫很好吃，五四运动留下的五四精神。笔记本五折包邮亏本大甩卖。人名识别: 我是陈鑫，也是jcseg的作者，三国时期的诸葛亮是个天才，我们一起给刘翔加油，罗志高兴奋极了因为老吴送了他一台笔记本。冰岛时间7月1日，正在当地拍片的汤姆·克鲁斯通过发言人承认，他与第三任妻子凯蒂·赫尔墨斯（第一二任妻子分别为咪咪·罗杰斯、妮可·基德曼）的婚姻即将结束。配对标点: 本次『畅想杯』黑客技术大赛的得主为电信09-2BF的张三，奖励C++程序设计语言一书和【畅想网络】的『PHP教程』一套。特殊字母: 【Ⅰ】（Ⅱ），英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.");
            printHelpMenu();
        } catch (IOException e) {
            e.printStackTrace();
        }
        while (true) {
            System.out.print("jcseg~" + str + ">> ");
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.out.println("Bye!");
                return;
            }
            String trim = readLine.trim();
            if (!"".equals(trim)) {
                if (trim.charAt(0) == ':') {
                    if (":complex".equals(trim)) {
                        jcsegTest.resetMode("complex");
                        str = "tokenzier:complex";
                        z = false;
                        System.out.println("Entered complex tokenize mode!");
                    } else if (":simple".equals(trim)) {
                        jcsegTest.resetMode("simple");
                        str = "tokenzier:simple";
                        z = false;
                        System.out.println("Entered simple tokenize mode!");
                    } else if (":most".equals(trim)) {
                        jcsegTest.resetMode("most");
                        str = "tokenzier:most";
                        z = false;
                        System.out.println("Entered most tokenize mode!");
                    } else if (":detect".equals(trim)) {
                        jcsegTest.resetMode("detect");
                        str = "tokenzier:detect";
                        z = false;
                        System.out.println("Entered detect tokenize mode!");
                    } else if (":delimiter".equals(trim)) {
                        jcsegTest.resetMode("delimiter");
                        str = "tokenzier:delimiter";
                        z = false;
                        System.out.println("Entered delimiter tokenize mode!");
                    } else if (":NLP".equals(trim)) {
                        jcsegTest.resetMode("nlp");
                        str = "tokenzier:NLP";
                        z = false;
                        System.out.println("Entered NLP tokenize mode!");
                    } else if (":ngram".equals(trim)) {
                        jcsegTest.resetMode("ngram");
                        str = "tokenzier:ngram";
                        z = false;
                        System.out.println("Entered ngram tokenize mode!");
                    } else if (":keywords".equals(trim)) {
                        str = "keywords";
                        z = true;
                        System.out.println("Entered keywords extract mode!");
                    } else if (":keyphrase".equals(trim)) {
                        str = "keyphrase";
                        z = 2;
                        System.out.println("Entered keyphrase extract mode!");
                    } else if (":sentence".equals(trim)) {
                        z = 3;
                        str = "sentence";
                        System.out.println("Entered sentence extract mode!");
                    } else if (":summary".equals(trim)) {
                        z = 4;
                        str = ErrorBundle.SUMMARY_ENTRY;
                        System.out.println("Entered summary extract mode!");
                    } else if (":help".equals(trim)) {
                        printHelpMenu();
                    } else if (":quit".equals(trim)) {
                        System.out.println("Thanks for trying jcseg, Bye!");
                        System.exit(0);
                    }
                }
                switch (z) {
                    case false:
                        jcsegTest.tokenize(trim);
                        break;
                    case true:
                        jcsegTest.keywords(trim);
                        break;
                    case true:
                        jcsegTest.keyphrase(trim);
                        break;
                    case true:
                        jcsegTest.sentence(trim);
                        break;
                    case true:
                        jcsegTest.summary(trim);
                        break;
                }
            }
        }
    }

    static void printHelpMenu() {
        System.out.println("+--------Jcseg chinese word tokenizer demo-------------------+");
        System.out.println("|- @Author chenxin<chenxin619315@gmail.com>                  |");
        System.out.println("|- :seg_mode  : switch to specified tokenizer mode.          |");
        System.out.println("|- (:complex,:simple,:most,:detect,:delimiter,:NLP,:ngram)   |");
        System.out.println("|- :keywords  : switch to keywords extract mode.             |");
        System.out.println("|- :keyphrase : switch to keyphrase extract mode.            |");
        System.out.println("|- :sentence  : switch to sentence extract mode.             |");
        System.out.println("|- :summary   : switch to summary extract mode.              |");
        System.out.println("|- :help      : print this help menu.                        |");
        System.out.println("|- :quit      : to exit the program.                         |");
        System.out.println("+------------------------------------------------------------+");
    }
}
