File size: 726 Bytes
58627fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
import ujson

from collections import defaultdict
from colbert.utils.utils import print_message, file_tqdm


def load_collection_(path, retain_titles):
    with open(path) as f:
        collection = []

        for line in file_tqdm(f):
            _, passage, title = line.strip().split('\t')

            if retain_titles:
                passage = title + ' | ' + passage

            collection.append(passage)

    return collection


def load_qas_(path):
    print_message("#> Loading the reference QAs from", path)

    triples = []

    with open(path) as f:
        for line in f:
            qa = ujson.loads(line)
            triples.append((qa['qid'], qa['question'], qa['answers']))

    return triples