FridayMaster commited on
Commit
68656c4
1 Parent(s): ad06723

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -9,7 +9,6 @@ import nltk
9
  # Download the required NLTK data
10
  nltk.download('punkt')
11
  nltk.download('punkt_tab')
12
-
13
  # Paths to your files
14
  faiss_path = "manual_chunked_faiss_index_500.bin"
15
  manual_path = "ubuntu_manual.txt"
@@ -52,7 +51,7 @@ except Exception as e:
52
  embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
53
 
54
  # OpenAI API key
55
- openai.api_key = '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'
56
 
57
  # Function to create embeddings
58
  def embed_text(text_list):
@@ -79,6 +78,11 @@ def retrieve_chunks(query, k=5):
79
  relevant_chunks = [manual_chunks[i] for i in valid_indices]
80
  return relevant_chunks, distances, indices
81
 
 
 
 
 
 
82
  # Function to perform RAG: Retrieve chunks and generate a response
83
  def rag_response(query, k=5, max_tokens=150):
84
  try:
@@ -87,8 +91,15 @@ def rag_response(query, k=5, max_tokens=150):
87
  if not relevant_chunks:
88
  return "Sorry, I couldn't find relevant information.", distances, indices
89
 
90
- augmented_input = query + "\n" + "\n".join(relevant_chunks)
 
91
 
 
 
 
 
 
 
92
  # Generate response using OpenAI API
93
  response = openai.ChatCompletion.create(
94
  model="gpt-3.5-turbo",
 
9
  # Download the required NLTK data
10
  nltk.download('punkt')
11
  nltk.download('punkt_tab')
 
12
  # Paths to your files
13
  faiss_path = "manual_chunked_faiss_index_500.bin"
14
  manual_path = "ubuntu_manual.txt"
 
51
  embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
52
 
53
  # OpenAI API key
54
+ openai.api_key = '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'
55
 
56
  # Function to create embeddings
57
  def embed_text(text_list):
 
78
  relevant_chunks = [manual_chunks[i] for i in valid_indices]
79
  return relevant_chunks, distances, indices
80
 
81
+ # Function to truncate long inputs
82
+ def truncate_input(text, max_length=16385):
83
+ tokens = tokenizer.encode(text, truncation=True, max_length=max_length, return_tensors="pt")
84
+ return tokens
85
+
86
  # Function to perform RAG: Retrieve chunks and generate a response
87
  def rag_response(query, k=5, max_tokens=150):
88
  try:
 
91
  if not relevant_chunks:
92
  return "Sorry, I couldn't find relevant information.", distances, indices
93
 
94
+ # Combine the query with retrieved chunks
95
+ augmented_input = query + "\n\n" + "\n\n".join(relevant_chunks)
96
 
97
+ # Truncate the input if it exceeds token limits
98
+ input_tokens = tokenizer.encode(augmented_input, return_tensors="pt")
99
+ if input_tokens.shape[1] > 16385:
100
+ # Truncate to fit within the model's maximum input length
101
+ augmented_input = tokenizer.decode(input_tokens[0, :16385])
102
+
103
  # Generate response using OpenAI API
104
  response = openai.ChatCompletion.create(
105
  model="gpt-3.5-turbo",