Spaces:
Running
Running
change from year to publication year
Browse files
document_qa/document_qa_engine.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import copy
|
2 |
-
import json
|
3 |
import os
|
4 |
from pathlib import Path
|
5 |
from typing import Union, Any
|
6 |
|
|
|
7 |
from grobid_client.grobid_client import GrobidClient
|
8 |
from langchain.chains import create_extraction_chain
|
9 |
from langchain.chains.question_answering import load_qa_chain
|
@@ -13,8 +13,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
13 |
from langchain.vectorstores import Chroma
|
14 |
from tqdm import tqdm
|
15 |
|
16 |
-
from document_qa.grobid_processors import GrobidProcessor
|
17 |
-
|
18 |
|
19 |
class DocumentQAEngine:
|
20 |
llm = None
|
@@ -220,7 +218,7 @@ class DocumentQAEngine:
|
|
220 |
biblio_metadata = copy.copy(biblio)
|
221 |
biblio_metadata['type'] = "biblio"
|
222 |
biblio_metadata['section'] = "header"
|
223 |
-
for key in ['title', 'authors', '
|
224 |
if key in biblio_metadata:
|
225 |
texts.append("{}: {}".format(key, biblio_metadata[key]))
|
226 |
metadatas.append(biblio_metadata)
|
|
|
1 |
import copy
|
|
|
2 |
import os
|
3 |
from pathlib import Path
|
4 |
from typing import Union, Any
|
5 |
|
6 |
+
from document_qa.grobid_processors import GrobidProcessor
|
7 |
from grobid_client.grobid_client import GrobidClient
|
8 |
from langchain.chains import create_extraction_chain
|
9 |
from langchain.chains.question_answering import load_qa_chain
|
|
|
13 |
from langchain.vectorstores import Chroma
|
14 |
from tqdm import tqdm
|
15 |
|
|
|
|
|
16 |
|
17 |
class DocumentQAEngine:
|
18 |
llm = None
|
|
|
218 |
biblio_metadata = copy.copy(biblio)
|
219 |
biblio_metadata['type'] = "biblio"
|
220 |
biblio_metadata['section'] = "header"
|
221 |
+
for key in ['title', 'authors', 'publication_year']:
|
222 |
if key in biblio_metadata:
|
223 |
texts.append("{}: {}".format(key, biblio_metadata[key]))
|
224 |
metadatas.append(biblio_metadata)
|
document_qa/grobid_processors.py
CHANGED
@@ -171,7 +171,7 @@ class GrobidProcessor(BaseProcessor):
|
|
171 |
}
|
172 |
try:
|
173 |
year = dateparser.parse(doc_biblio.header.date).year
|
174 |
-
biblio["
|
175 |
except:
|
176 |
pass
|
177 |
|
|
|
171 |
}
|
172 |
try:
|
173 |
year = dateparser.parse(doc_biblio.header.date).year
|
174 |
+
biblio["publication_year"] = year
|
175 |
except:
|
176 |
pass
|
177 |
|