/*
 * Decompiled with CFR 0.152.
 */
package com.xforceplus.ultraman.oqsengine.tokenizer.segmentation;

import com.xforceplus.ultraman.oqsengine.pojo.dto.entity.FieldConfig;
import com.xforceplus.ultraman.oqsengine.tokenizer.EmptyWorkdsIterator;
import com.xforceplus.ultraman.oqsengine.tokenizer.Tokenizer;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Spliterators;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.lionsoul.jcseg.ISegment;
import org.lionsoul.jcseg.IWord;
import org.lionsoul.jcseg.dic.ADictionary;
import org.lionsoul.jcseg.dic.DictionaryFactory;
import org.lionsoul.jcseg.segmenter.SegmenterConfig;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;

public class JcsegTokenizer
implements Tokenizer {
    private SegmenterConfig config;
    private ADictionary dic;

    public JcsegTokenizer() throws IOException {
        this.init();
    }

    public JcsegTokenizer(File lexDir) throws IOException {
        this.init();
        this.dic.loadDirectory(lexDir.getAbsolutePath());
    }

    public JcsegTokenizer(URL url) throws IOException {
        this.init();
        try (InputStream in = url.openStream();){
            this.dic.load(in);
        }
    }

    private void init() throws IOException {
        this.config = new SegmenterConfig(true);
        this.dic = DictionaryFactory.createDefaultDictionary((SegmenterConfig)this.config, (boolean)false);
        this.initFromDict();
    }

    private void initFromDict() throws IOException {
        Resource[] resources;
        ClassLoader cl = this.getClass().getClassLoader();
        PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(cl);
        for (Resource resource : resources = resolver.getResources("classpath*:/lexicon/*.lex")) {
            this.dic.load(resource.getInputStream());
        }
    }

    @Override
    public Iterator<String> tokenize(String value, Tokenizer.TokenizerMode mode) {
        if (value == null || value.isEmpty()) {
            return EmptyWorkdsIterator.getInstance();
        }
        if (Tokenizer.TokenizerMode.SEARCH == mode) {
            try {
                return new JcsegIterator(this.config, this.dic, ISegment.NLP, value);
            }
            catch (IOException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
        if (Tokenizer.TokenizerMode.STORAGE == mode) {
            JcsegIterator mostIter;
            JcsegIterator nlpIter;
            try {
                nlpIter = new JcsegIterator(this.config, this.dic, ISegment.NLP, value);
                mostIter = new JcsegIterator(this.config, this.dic, ISegment.MOST, value);
            }
            catch (IOException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
            Map nlpWords = StreamSupport.stream(Spliterators.spliteratorUnknownSize(nlpIter, 16), false).collect(Collectors.toMap(s -> s, s -> "", (s0, s1) -> s0, LinkedHashMap::new));
            boolean watchLen = true;
            Collection mostWords = StreamSupport.stream(Spliterators.spliteratorUnknownSize(mostIter, 16), false).filter(word -> !nlpWords.containsKey(word)).filter(word -> {
                if (word.length() == 1 || this.isEnOrNumber((String)word)) {
                    return nlpWords.containsKey(word);
                }
                return true;
            }).collect(Collectors.toList());
            ArrayList results = new ArrayList(nlpWords.size() + mostWords.size());
            results.addAll(nlpWords.keySet());
            results.addAll(mostWords);
            return results.iterator();
        }
        return EmptyWorkdsIterator.getInstance();
    }

    private boolean isEnOrNumber(String word) {
        for (char c : word.toCharArray()) {
            if (this.isAlphabets(c)) {
                return true;
            }
            if (!this.isNumber(c)) continue;
            return true;
        }
        return false;
    }

    private boolean isAlphabets(char c) {
        return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z';
    }

    private boolean isNumber(char c) {
        return c >= '0' && c <= '9';
    }

    @Override
    public FieldConfig.FuzzyType support() {
        return FieldConfig.FuzzyType.SEGMENTATION;
    }

    private static class JcsegIterator
    implements Iterator<String> {
        private SegmenterConfig config;
        private ADictionary dic;
        private ISegment seg;
        private String value;
        private IWord nextWord;

        public JcsegIterator(SegmenterConfig config, ADictionary dic, ISegment.Type type, String value) throws IOException {
            this.config = config;
            this.dic = dic;
            this.value = value;
            this.seg = type.factory.create(config, dic);
            this.seg.reset((Reader)new StringReader(this.value));
            this.nextWord = this.seg.next();
        }

        @Override
        public boolean hasNext() {
            return this.nextWord != null;
        }

        @Override
        public String next() {
            IWord current = this.nextWord;
            try {
                this.nextWord = this.seg.next();
            }
            catch (IOException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
            return current.getValue();
        }
    }
}

