package edu.stanford.nlp.neural;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.Generics;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.ejml.simple.SimpleMatrix;

/* loaded from: input_file:edu/stanford/nlp/neural/Embedding.class */
public class Embedding {
    private Map<String, SimpleMatrix> wordVectors;
    private int embeddingSize;
    static final String START_WORD = "*START*";
    static final String END_WORD = "*END*";
    static final String UNKNOWN_WORD = "*UNK*";
    static final String UNKNOWN_NUMBER = "*NUM*";
    static final String UNKNOWN_CAPS = "*CAPS*";
    static final String UNKNOWN_CHINESE_YEAR = "*ZH_YEAR*";
    static final String UNKNOWN_CHINESE_NUMBER = "*ZH_NUM*";
    static final String UNKNOWN_CHINESE_PERCENT = "*ZH_PERCENT*";
    static final Pattern NUMBER_PATTERN = Pattern.compile("-?[0-9][-0-9,.:]*");
    static final Pattern CAPS_PATTERN = Pattern.compile("[a-zA-Z]*[A-Z][a-zA-Z]*");
    static final Pattern CHINESE_YEAR_PATTERN = Pattern.compile("[〇零一二三四五六七八九０１２３４５６７８９]{4}+年");
    static final Pattern CHINESE_NUMBER_PATTERN = Pattern.compile("(?:[〇０零一二三四五六七八九０１２３４５６７８９十百万千亿]+[点多]?)+");
    static final Pattern CHINESE_PERCENT_PATTERN = Pattern.compile("百分之[〇０零一二三四五六七八九０１２３４５６７８９十点]+");
    static final Pattern DG_PATTERN = Pattern.compile(".*DG.*");

    public Embedding(Map<String, SimpleMatrix> map) {
        this.wordVectors = map;
        this.embeddingSize = getEmbeddingSize(map);
    }

    public Embedding(String str) {
        this(str, 0);
    }

    public Embedding(String str, int i) {
        this.wordVectors = Generics.newHashMap();
        this.embeddingSize = i;
        loadWordVectors(str);
    }

    public Embedding(String str, String str2) {
        this(str, str2, 0);
    }

    public Embedding(String str, String str2, int i) {
        this.wordVectors = Generics.newHashMap();
        this.embeddingSize = i;
        loadWordVectors(str, str2);
    }

    private void loadWordVectors(String str) {
        System.err.println("# Loading embedding ...\n  word vector file = " + str);
        boolean z = false;
        int i = 0;
        Iterator<String> it = IOUtils.readLines(str, "utf-8").iterator();
        while (it.hasNext()) {
            String[] split = it.next().split("\\s+");
            String str2 = split[0];
            if (str2.equals("UNKNOWN") || str2.equals("UUUNKKK") || str2.equals("UNK") || str2.equals("*UNKNOWN*")) {
                str2 = UNKNOWN_WORD;
            }
            if (str2.equals("<s>")) {
                str2 = START_WORD;
            }
            if (str2.equals("</s>")) {
                str2 = START_WORD;
            }
            int length = split.length - 1;
            if (this.embeddingSize <= 0) {
                this.embeddingSize = length;
                System.err.println("  detected embedding size = " + length);
            }
            if (length > this.embeddingSize) {
                if (!z) {
                    z = true;
                    System.err.println("WARNING: Dimensionality of numHid parameter and word vectors do not match, deleting word vector dimensions to fit!");
                }
                length = this.embeddingSize;
            } else if (length < this.embeddingSize) {
                throw new RuntimeException("Word vectors file has dimension too small for requested numHid of " + this.embeddingSize);
            }
            double[][] dArr = new double[length][1];
            for (int i2 = 1; i2 <= length; i2++) {
                dArr[i2 - 1][0] = Double.parseDouble(split[i2]);
            }
            this.wordVectors.put(str2, new SimpleMatrix(dArr));
            i++;
        }
        System.err.println("  num words = " + i);
    }

    private void loadWordVectors(String str, String str2) {
        System.err.println("# Loading embedding ...\n  word file = " + str + "\n  vector file = " + str2);
        boolean z = false;
        int i = 0;
        Iterator<String> it = IOUtils.readLines(str, "utf-8").iterator();
        Iterator<String> it2 = IOUtils.readLines(str2, "utf-8").iterator();
        while (it2.hasNext()) {
            String[] split = it2.next().split("\\s+");
            String next = it.next();
            if (next.equals("UNKNOWN") || next.equals("UUUNKKK") || next.equals("UNK") || next.equals("*UNKNOWN*")) {
                next = UNKNOWN_WORD;
            }
            if (next.equals("<s>")) {
                next = START_WORD;
            }
            if (next.equals("</s>")) {
                next = START_WORD;
            }
            int length = split.length;
            if (this.embeddingSize <= 0) {
                this.embeddingSize = length;
                System.err.println("  detected embedding size = " + length);
            }
            if (length > this.embeddingSize) {
                if (!z) {
                    z = true;
                    System.err.println("WARNING: Dimensionality of numHid parameter and word vectors do not match, deleting word vector dimensions to fit!");
                }
                length = this.embeddingSize;
            } else if (length < this.embeddingSize) {
                throw new RuntimeException("Word vectors file has dimension too small for requested numHid of " + this.embeddingSize);
            }
            double[][] dArr = new double[length][1];
            for (int i2 = 0; i2 < length; i2++) {
                dArr[i2][0] = Double.parseDouble(split[i2]);
            }
            this.wordVectors.put(next, new SimpleMatrix(dArr));
            i++;
        }
        System.err.println("  num words = " + i);
    }

    public int size() {
        return this.wordVectors.size();
    }

    public Collection<SimpleMatrix> values() {
        return this.wordVectors.values();
    }

    public Set<String> keySet() {
        return this.wordVectors.keySet();
    }

    public Set<Map.Entry<String, SimpleMatrix>> entrySet() {
        return this.wordVectors.entrySet();
    }

    public SimpleMatrix get(String str) {
        return this.wordVectors.containsKey(str) ? this.wordVectors.get(str) : this.wordVectors.get(UNKNOWN_WORD);
    }

    public SimpleMatrix getStartWordVector() {
        return this.wordVectors.get(START_WORD);
    }

    public SimpleMatrix getEndWordVector() {
        return this.wordVectors.get(END_WORD);
    }

    public SimpleMatrix getUnknownWordVector() {
        return this.wordVectors.get(UNKNOWN_WORD);
    }

    public Map<String, SimpleMatrix> getWordVectors() {
        return this.wordVectors;
    }

    public int getEmbeddingSize() {
        return this.embeddingSize;
    }

    public void setWordVectors(Map<String, SimpleMatrix> map) {
        this.wordVectors = map;
        this.embeddingSize = getEmbeddingSize(map);
    }

    private int getEmbeddingSize(Map<String, SimpleMatrix> map) {
        if (!map.containsKey(UNKNOWN_WORD)) {
            String str = map.containsKey("UNK") ? "UNK" : "";
            if (map.containsKey("UUUNKKK")) {
                str = "UUUNKKK";
            }
            if (map.containsKey("UNKNOWN")) {
                str = "UNKNOWN";
            }
            if (str.equals("")) {
                throw new RuntimeException("! wordVectors used to initialize Embedding doesn't contain any recognized form of *UNK*");
            }
            map.put(UNKNOWN_WORD, map.get(str));
        }
        return map.get(UNKNOWN_WORD).getNumElements();
    }
}
