File size: 1,312 Bytes
f870b18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from typing import List, Tuple
import torch
from SciAssist import ReferenceStringParsing

device = "gpu" if torch.cuda.is_available() else "cpu"
rsp_pipeline = ReferenceStringParsing(os_name="nt", device=device)


def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
    results = rsp_pipeline.predict(input, type="str", dehyphen=dehyphen)
    output = []
    for res in results:
        for token, tag in zip(res["tokens"], res["tags"]):
            output.append((token, tag))
        output.append(("\n\n", None))
    return output


def rsp_for_file(input, dehyphen=False) -> List[Tuple[str, str]]:
    if input == None:
        return None
    filename = input.name
    # Identify the format of input and parse reference strings
    if filename[-4:] == ".txt":
        results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen, save_results=False)
    elif filename[-4:] == ".pdf":
        results = rsp_pipeline.predict(filename, dehyphen=dehyphen, save_results=False)
    else:
        return [("File Format Error !", None)]
    # Prepare for the input gradio.HighlightedText accepts.
    output = []
    for res in results:
        for token, tag in zip(res["tokens"], res["tags"]):
            output.append((token, tag))
        output.append(("\n\n", None))
    return output