Almaatla commited on
Commit
9f38a4d
1 Parent(s): c96166d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -1,7 +1,6 @@
1
- from PyPDF2 import PdfFileReader
2
- from tiktoken import Tokenizer
3
- from tiktoken.models import GPT2
4
  import gradio as gr
 
 
5
 
6
  def extract_text_from_pdf(file_path):
7
  with open(file_path, "rb") as file:
@@ -12,8 +11,11 @@ def extract_text_from_pdf(file_path):
12
  return text
13
 
14
  def count_tokens(text):
15
- tokenizer = Tokenizer(GPT2())
16
- tokens = tokenizer.tokenize(text)
 
 
 
17
  return len(tokens)
18
 
19
  def count_tokens_in_file(file):
 
 
 
 
1
  import gradio as gr
2
+ from PyPDF4 import PdfFileReader
3
+ import tiktoken
4
 
5
  def extract_text_from_pdf(file_path):
6
  with open(file_path, "rb") as file:
 
11
  return text
12
 
13
  def count_tokens(text):
14
+ tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
15
+ tokens = tokenizer.encode(
16
+ text,
17
+ disallowed_special=()
18
+ )
19
  return len(tokens)
20
 
21
  def count_tokens_in_file(file):