Model Maximum Context length possible
#19
by
zera09
- opened
What is the maximum context length that I can provide for this model?
Here is a rudimentary test for finding answers at the end of the context input. It appears to start having trouble beyond around 100,000 characters.
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
from pathlib import Path
tokenizer = AutoTokenizer('deepset/roberta-base-squad2')
model = AutoModelForQuestionAnswering.from_pretrained('deepset/roberta-base-squad2')
nlp = pipeline('question-answering', tokenizer=tokenizer, model=model)
# the text is the concatenation of the cleaned wikipedia articles for Germany and Greece
# a litle less than 167,000 characters.
text = Path('text/germany-greece.txt').read_text()
question = 'What is my name?'
base_ix = 80000
ix_step = 1000
for step in range(85):
ctx_len = t[:base_ix + step * ix_step].rfind('.') + 1
qa_input = {'context': t[:ctx_len] + ' My name is James.', 'question': q}
answer = nlp(qa_input)
print(f'{ctx_len:>6}:', answer['answer'], round(answer['score'], 3))
Output:
59995: James 0.378
60923: James 0.24
61954: James 0.037
62912: James 0.654
63888: James 0.753
64923: James 0.48
65924: James 0.832
66671: James 0.89
67914: James 0.92
68915: James 0.869
69932: James 0.756
70956: James 0.52
71981: James 0.89
72944: James 0.887
73993: James 0.544
74968: James 0.864
75882: James 0.902
76958: James 0.389
77962: James 0.916
78964: James 0.939
79963: James 0.854
80969: James 0.796
81861: James 0.893
82886: James 0.51
83990: James 0.817
84894: James 0.807
85833: James 0.714
86918: James 0.363
87988: James 0.865
88963: James 0.525
89803: James 0.609
90990: James 0.686
91963: James 0.662
92873: James 0.658
93963: James 0.916
94998: James 0.575
95846: James 0.768
96986: James 0.813
97753: James 0.895
98780: James 0.847
99812: James 0.71
100818: James 0.887
101808: James 0.889
102918: James 0.648
103956: James 0.729
104633: James 0.81
105863: James 0.887
106906: James 0.789
107749: Ellinikos Stratos, ES 0.471
108949: James 0.88
109977: James 0.89
110954: James 0.802
111948: Ellinikos Stratos, ES 0.471
112951: Ellinikos Stratos, ES 0.471
113877: James 0.612
114935: James 0.617
115979: James 0.849
116755: James 0.906
117825: James 0.907
118983: James 0.927
119839: James 0.913
120955: James 0.728
121934: Ellinikos Stratos, ES 0.471
122876: James 0.535
123960: Ellinikos Stratos, ES 0.471
124970: James 0.918
125002: James 0.921
126989: James 0.899
127993: Ellinikos Stratos, ES 0.471
128939: James 0.676
129958: James 0.923
130987: James 0.782
131757: James 0.752
132956: James 0.836
133853: Ellinikos Stratos, ES 0.471
134975: Ellinikos Stratos, ES 0.471
135899: Ellinikos Stratos, ES 0.471
136932: Ellinikos Stratos, ES 0.471
137930: James 0.684
138860: Ellinikos Stratos, ES 0.471
139932: James 0.475
140903: James 0.896
141886: James 0.765
142992: James 0.54
143991: James 0.502