Goran Glavaš
commited on
Commit
•
1d5d16d
1
Parent(s):
41fc5cc
Fixing relative resources path access (Maven)
Browse files
README.txt
CHANGED
@@ -16,6 +16,7 @@ This repository contains:
|
|
16 |
Usage
|
17 |
========
|
18 |
|
|
|
19 |
The following command with four arguments runs the GraphSeg tool:
|
20 |
|
21 |
java -jar graphseg.jar <input-folder-path> <output-folder-path> <relatedness-treshold> <minimal-segment-size>
|
|
|
16 |
Usage
|
17 |
========
|
18 |
|
19 |
+
To successfully run the GraphSeg tool you need to have Java 1.8 installed.
|
20 |
The following command with four arguments runs the GraphSeg tool:
|
21 |
|
22 |
java -jar graphseg.jar <input-folder-path> <output-folder-path> <relatedness-treshold> <minimal-segment-size>
|
binary/graphseg.jar
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8933d43d10f8ba885d4df38b3b0a2d1bf7796f7d367b7f9fef6f3925801987c5
|
3 |
+
size 616819715
|
source/src/edu/uma/nlp/graphseg/Start.java
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
package edu.uma.nlp.graphseg;
|
2 |
-
|
3 |
import java.io.File;
|
4 |
import java.io.IOException;
|
|
|
5 |
import java.nio.file.Files;
|
6 |
import java.nio.file.Path;
|
7 |
import java.nio.file.Paths;
|
@@ -73,15 +73,15 @@ public class Start {
|
|
73 |
return;
|
74 |
}
|
75 |
|
76 |
-
|
77 |
-
List<String> stopwords = IOHelper.
|
78 |
|
79 |
-
|
80 |
MemoryStorage.setWordVectorSpace(new WordVectorSpace());
|
81 |
-
MemoryStorage.getWordVectorSpace().load(
|
82 |
|
83 |
-
|
84 |
-
MemoryStorage.setInformationContent(new InformationContent(
|
85 |
|
86 |
|
87 |
SemanticSimilarity.setStopwords(stopwords);
|
|
|
1 |
package edu.uma.nlp.graphseg;
|
|
|
2 |
import java.io.File;
|
3 |
import java.io.IOException;
|
4 |
+
import java.io.InputStream;
|
5 |
import java.nio.file.Files;
|
6 |
import java.nio.file.Path;
|
7 |
import java.nio.file.Paths;
|
|
|
73 |
return;
|
74 |
}
|
75 |
|
76 |
+
InputStream stopwordsStream = Start.class.getClassLoader().getResourceAsStream("stopwords.txt");
|
77 |
+
List<String> stopwords = IOHelper.getAllLinesStream(stopwordsStream);
|
78 |
|
79 |
+
InputStream embeddingsStream = Start.class.getClassLoader().getResourceAsStream("embeddings.txt");
|
80 |
MemoryStorage.setWordVectorSpace(new WordVectorSpace());
|
81 |
+
MemoryStorage.getWordVectorSpace().load(embeddingsStream, null);
|
82 |
|
83 |
+
InputStream freqsStream = Start.class.getClassLoader().getResourceAsStream("freqs.txt");
|
84 |
+
MemoryStorage.setInformationContent(new InformationContent(freqsStream, 1));
|
85 |
|
86 |
|
87 |
SemanticSimilarity.setStopwords(stopwords);
|
source/src/edu/uma/nlp/graphseg/semantics/WordVectorSpace.java
CHANGED
@@ -3,10 +3,10 @@ package edu.uma.nlp.graphseg.semantics;
|
|
3 |
import java.io.BufferedReader;
|
4 |
import java.io.BufferedWriter;
|
5 |
import java.io.File;
|
6 |
-
import java.io.FileInputStream;
|
7 |
import java.io.FileNotFoundException;
|
8 |
import java.io.FileOutputStream;
|
9 |
import java.io.IOException;
|
|
|
10 |
import java.io.InputStreamReader;
|
11 |
import java.io.OutputStreamWriter;
|
12 |
import java.util.ArrayList;
|
@@ -26,11 +26,11 @@ public class WordVectorSpace {
|
|
26 |
return dimension;
|
27 |
}
|
28 |
|
29 |
-
public void load(
|
30 |
{
|
31 |
embeddings = new HashMap<String, double[]>();
|
32 |
|
33 |
-
try (BufferedReader br = new BufferedReader(new InputStreamReader(
|
34 |
String line;
|
35 |
int counter = 0;
|
36 |
while ((line = br.readLine()) != null) {
|
|
|
3 |
import java.io.BufferedReader;
|
4 |
import java.io.BufferedWriter;
|
5 |
import java.io.File;
|
|
|
6 |
import java.io.FileNotFoundException;
|
7 |
import java.io.FileOutputStream;
|
8 |
import java.io.IOException;
|
9 |
+
import java.io.InputStream;
|
10 |
import java.io.InputStreamReader;
|
11 |
import java.io.OutputStreamWriter;
|
12 |
import java.util.ArrayList;
|
|
|
26 |
return dimension;
|
27 |
}
|
28 |
|
29 |
+
public void load(InputStream stream, HashMap<String, Integer> filters) throws FileNotFoundException, IOException
|
30 |
{
|
31 |
embeddings = new HashMap<String, double[]>();
|
32 |
|
33 |
+
try (BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF8"))) {
|
34 |
String line;
|
35 |
int counter = 0;
|
36 |
while ((line = br.readLine()) != null) {
|