/** * Init a Map with 0 matrices for all the matrices in the original map. */ private static Map<String, SimpleMatrix> initDerivatives(Map<String, SimpleMatrix> map) { Map<String, SimpleMatrix> derivatives = Generics.newTreeMap(); for (Map.Entry<String, SimpleMatrix> entry : map.entrySet()) { int numRows = entry.getValue().numRows(); int numCols = entry.getValue().numCols(); derivatives.put(entry.getKey(), new SimpleMatrix(numRows, numCols)); } return derivatives; } }
static TreeMap<Integer, String> findSeparators(List<Tree> sentence) { TreeMap<Integer, String> separators = Generics.newTreeMap(); for (int index = 0; index < sentence.size(); ++index) { Tree leaf = sentence.get(index).children()[0]; String value = leaf.value(); for (char[] equivalentSeparator : equivalentSeparators) { value = value.replace(equivalentSeparator[0], equivalentSeparator[1]); } if (separatorRegex.matcher(value).matches()) { // TODO: put "value" instead? Perhaps do this next time we rebuild all models separators.put(index, leaf.value()); } } return separators; }
void readWordVectors() { Embedding embedding = new Embedding(op.wordVectors, op.numHid); this.wordVectors = Generics.newTreeMap(); // Map<String, SimpleMatrix> rawWordVectors = NeuralUtils.readRawWordVectors(op.wordVectors, op.numHid); // for (String word : rawWordVectors.keySet()) { for (String word : embedding.keySet()) { // TODO: factor out unknown word vector code from DVParser wordVectors.put(word, embedding.get(word)); } String unkWord = op.unkWord; SimpleMatrix unknownWordVector = wordVectors.get(unkWord); wordVectors.put(UNKNOWN_WORD, unknownWordVector); if (unknownWordVector == null) { throw new RuntimeException("Unknown word vector not specified in the word vector file"); } }
public static Map<String, SimpleMatrix> averageUnaryMatrices(List<Map<String, SimpleMatrix>> maps) { Map<String, SimpleMatrix> averages = Generics.newTreeMap(); for (String name : getUnaryMatrixNames(maps)) { int count = 0; SimpleMatrix matrix = null; for (Map<String, SimpleMatrix> map : maps) { if (!map.containsKey(name)) { continue; } SimpleMatrix original = map.get(name); ++count; if (matrix == null) { matrix = original; } else { matrix = matrix.plus(original); } } matrix = matrix.divide(count); averages.put(name, matrix); } return averages; }
void initRandomWordVectors(List<Tree> trainingTrees) { if (op.numHid == 0) { throw new RuntimeException("Cannot create random word vectors for an unknown numHid"); } Set<String> words = Generics.newHashSet(); words.add(UNKNOWN_WORD); for (Tree tree : trainingTrees) { List<Tree> leaves = tree.getLeaves(); for (Tree leaf : leaves) { String word = leaf.label().value(); if (op.lowercaseWordVectors) { word = word.toLowerCase(); } words.add(word); } } this.wordVectors = Generics.newTreeMap(); for (String word : words) { SimpleMatrix vector = randomWordVector(); wordVectors.put(word, vector); } }
/** * Given single matrices and sets of options, create the * corresponding SentimentModel. Useful for creating a Java version * of a model trained in some other manner, such as using the * original Matlab code. */ static SentimentModel modelFromMatrices(SimpleMatrix W, SimpleMatrix Wcat, SimpleTensor Wt, Map<String, SimpleMatrix> wordVectors, RNNOptions op) { if (!op.combineClassification || !op.simplifiedModel) { throw new IllegalArgumentException("Can only create a model using this method if combineClassification and simplifiedModel are turned on"); } TwoDimensionalMap<String, String, SimpleMatrix> binaryTransform = TwoDimensionalMap.treeMap(); binaryTransform.put("", "", W); TwoDimensionalMap<String, String, SimpleTensor> binaryTensors = TwoDimensionalMap.treeMap(); binaryTensors.put("", "", Wt); TwoDimensionalMap<String, String, SimpleMatrix> binaryClassification = TwoDimensionalMap.treeMap(); Map<String, SimpleMatrix> unaryClassification = Generics.newTreeMap(); unaryClassification.put("", Wcat); return new SentimentModel(binaryTransform, binaryTensors, binaryClassification, unaryClassification, wordVectors, op); }
numBinaryMatrices = binaryTransform.size(); Map<String, SimpleMatrix> newUnaryTransforms = Generics.newTreeMap(); Map<String, SimpleMatrix> newUnaryScores = Generics.newTreeMap(); for (String unaryRule : unaryRules) { SimpleMatrix transform = unaryTransform.get(unaryRule); numUnaryMatrices = unaryTransform.size(); Map<String, SimpleMatrix> newWordVectors = Generics.newTreeMap(); for (String word : words) { SimpleMatrix wordVector = wordVectors.get(word);
public ModelDerivatives(SentimentModel model) { binaryTD = initDerivatives(model.binaryTransform); binaryTensorTD = (model.op.useTensors) ? initTensorDerivatives(model.binaryTensors) : TwoDimensionalMap.treeMap(); binaryCD = (!model.op.combineClassification) ? initDerivatives(model.binaryClassification) : TwoDimensionalMap.treeMap(); unaryCD = initDerivatives(model.unaryClassification); // wordVectorD will be filled on an as-needed basis wordVectorD = Generics.newTreeMap(); }
SimpleMatrix unknownChinesePercentVector = null; wordVectors = Generics.newTreeMap(); int numberCount = 0; int capsCount = 0;
Map<String, SimpleMatrix> wordVectors = Generics.newTreeMap();
unaryTransform = Generics.newTreeMap(); binaryScore = TwoDimensionalMap.treeMap(); unaryScore = Generics.newTreeMap();
unaryClassification = Generics.newTreeMap();
/** * Init a Map with 0 matrices for all the matrices in the original map. */ private static Map<String, SimpleMatrix> initDerivatives(Map<String, SimpleMatrix> map) { Map<String, SimpleMatrix> derivatives = Generics.newTreeMap(); for (Map.Entry<String, SimpleMatrix> entry : map.entrySet()) { int numRows = entry.getValue().numRows(); int numCols = entry.getValue().numCols(); derivatives.put(entry.getKey(), new SimpleMatrix(numRows, numCols)); } return derivatives; } }
static TreeMap<Integer, String> findSeparators(List<Tree> sentence) { TreeMap<Integer, String> separators = Generics.newTreeMap(); for (int index = 0; index < sentence.size(); ++index) { Tree leaf = sentence.get(index).children()[0]; String value = leaf.value(); for (int i = 0; i < equivalentSeparators.length; ++i) { value = value.replace(equivalentSeparators[i][0], equivalentSeparators[i][1]); } if (separatorRegex.matcher(value).matches()) { separators.put(index, leaf.value()); } } return separators; }
static TreeMap<Integer, String> findSeparators(List<Tree> sentence) { TreeMap<Integer, String> separators = Generics.newTreeMap(); for (int index = 0; index < sentence.size(); ++index) { Tree leaf = sentence.get(index).children()[0]; String value = leaf.value(); for (char[] equivalentSeparator : equivalentSeparators) { value = value.replace(equivalentSeparator[0], equivalentSeparator[1]); } if (separatorRegex.matcher(value).matches()) { // TODO: put "value" instead? Perhaps do this next time we rebuild all models separators.put(index, leaf.value()); } } return separators; }
static TreeMap<Integer, String> findSeparators(List<Tree> sentence) { TreeMap<Integer, String> separators = Generics.newTreeMap(); for (int index = 0; index < sentence.size(); ++index) { Tree leaf = sentence.get(index).children()[0]; String value = leaf.value(); for (char[] equivalentSeparator : equivalentSeparators) { value = value.replace(equivalentSeparator[0], equivalentSeparator[1]); } if (separatorRegex.matcher(value).matches()) { // TODO: put "value" instead? Perhaps do this next time we rebuild all models separators.put(index, leaf.value()); } } return separators; }
void readWordVectors() { Embedding embedding = new Embedding(op.wordVectors, op.numHid); this.wordVectors = Generics.newTreeMap(); // Map<String, SimpleMatrix> rawWordVectors = NeuralUtils.readRawWordVectors(op.wordVectors, op.numHid); // for (String word : rawWordVectors.keySet()) { for (String word : embedding.keySet()) { // TODO: factor out unknown word vector code from DVParser wordVectors.put(word, embedding.get(word)); } String unkWord = op.unkWord; SimpleMatrix unknownWordVector = wordVectors.get(unkWord); wordVectors.put(UNKNOWN_WORD, unknownWordVector); if (unknownWordVector == null) { throw new RuntimeException("Unknown word vector not specified in the word vector file"); } }
void readWordVectors() { Embedding embedding = new Embedding(op.wordVectors, op.numHid); this.wordVectors = Generics.newTreeMap(); // Map<String, SimpleMatrix> rawWordVectors = NeuralUtils.readRawWordVectors(op.wordVectors, op.numHid); // for (String word : rawWordVectors.keySet()) { for (String word : embedding.keySet()) { // TODO: factor out unknown word vector code from DVParser wordVectors.put(word, embedding.get(word)); } String unkWord = op.unkWord; SimpleMatrix unknownWordVector = wordVectors.get(unkWord); wordVectors.put(UNKNOWN_WORD, unknownWordVector); if (unknownWordVector == null) { throw new RuntimeException("Unknown word vector not specified in the word vector file"); } }
/** * Given single matrices and sets of options, create the * corresponding SentimentModel. Useful for creating a Java version * of a model trained in some other manner, such as using the * original Matlab code. */ static SentimentModel modelFromMatrices(SimpleMatrix W, SimpleMatrix Wcat, SimpleTensor Wt, Map<String, SimpleMatrix> wordVectors, RNNOptions op) { if (!op.combineClassification || !op.simplifiedModel) { throw new IllegalArgumentException("Can only create a model using this method if combineClassification and simplifiedModel are turned on"); } TwoDimensionalMap<String, String, SimpleMatrix> binaryTransform = TwoDimensionalMap.treeMap(); binaryTransform.put("", "", W); TwoDimensionalMap<String, String, SimpleTensor> binaryTensors = TwoDimensionalMap.treeMap(); binaryTensors.put("", "", Wt); TwoDimensionalMap<String, String, SimpleMatrix> binaryClassification = TwoDimensionalMap.treeMap(); Map<String, SimpleMatrix> unaryClassification = Generics.newTreeMap(); unaryClassification.put("", Wcat); return new SentimentModel(binaryTransform, binaryTensors, binaryClassification, unaryClassification, wordVectors, op); }
public ModelDerivatives(SentimentModel model) { binaryTD = initDerivatives(model.binaryTransform); binaryTensorTD = (model.op.useTensors) ? initTensorDerivatives(model.binaryTensors) : TwoDimensionalMap.treeMap(); binaryCD = (!model.op.combineClassification) ? initDerivatives(model.binaryClassification) : TwoDimensionalMap.treeMap(); unaryCD = initDerivatives(model.unaryClassification); // wordVectorD will be filled on an as-needed basis wordVectorD = Generics.newTreeMap(); }