Henry Qu commited on
Commit
bb448d0
1 Parent(s): f42f51d
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -2,11 +2,12 @@ import gradio as gr
2
  import os
3
  from huggingface_hub import hf_hub_download
4
  from pathlib import Path
5
- from transformers import GPT2Config, GPT2LMHeadModel, GPT2Tokenizer
6
  import json
7
 
8
  model = GPT2LMHeadModel.from_pretrained('gpt2')
9
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 
10
 
11
  logits_dict = {}
12
 
@@ -15,14 +16,14 @@ with open(json_file, 'r') as file:
15
  data = json.load(file)
16
  for key, value in data.items():
17
  text_description = value['text_description']
18
- inputs = tokenizer(text_description, return_tensors="pt")
19
  outputs = model(**inputs, labels=inputs["input_ids"])
20
  logits = outputs.logits
21
  logits_dict[key] = logits
22
 
23
 
24
  def search_index(query):
25
- inputs = tokenizer(query, return_tensors="pt")
26
  outputs = model(**inputs, labels=inputs["input_ids"])
27
 
28
  max_similarity = float('-inf')
 
2
  import os
3
  from huggingface_hub import hf_hub_download
4
  from pathlib import Path
5
+ from transformers import GPT2Config, GPT2LMHeadModel, GPT2TokenizerFast
6
  import json
7
 
8
  model = GPT2LMHeadModel.from_pretrained('gpt2')
9
+ tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
10
+ tokenizer.pad_token = tokenizer.eos_token
11
 
12
  logits_dict = {}
13
 
 
16
  data = json.load(file)
17
  for key, value in data.items():
18
  text_description = value['text_description']
19
+ inputs = tokenizer(text_description, return_tensors="pt", padding="max_length", max_length=128, truncation=True)
20
  outputs = model(**inputs, labels=inputs["input_ids"])
21
  logits = outputs.logits
22
  logits_dict[key] = logits
23
 
24
 
25
  def search_index(query):
26
+ inputs = tokenizer(query, return_tensors="pt", padding="max_length", max_length=128, truncation=True)
27
  outputs = model(**inputs, labels=inputs["input_ids"])
28
 
29
  max_similarity = float('-inf')