halyn commited on
Commit
e2ce39d
1 Parent(s): c8ddd96

remove dotenv

Browse files
Files changed (2) hide show
  1. app.py +0 -23
  2. requirements.txt +1 -1
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import io
3
  import requests
4
  import streamlit as st
5
- from dotenv import load_dotenv
6
  from PyPDF2 import PdfReader
7
  from langchain.text_splitter import CharacterTextSplitter
8
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
@@ -11,12 +10,6 @@ from langchain.chains.question_answering import load_qa_chain
11
  from langchain.llms import HuggingFacePipeline
12
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
13
 
14
- # Disable WANDB
15
- os.environ['WANDB_DISABLED'] = "true"
16
-
17
- # Constants
18
- MODEL_PATH = "/home/lab/halyn/gemma/halyn/paper/models/gemma-2-9b-it"
19
-
20
  # Global variables
21
  knowledge_base = None
22
  qa_chain = None
@@ -24,10 +17,6 @@ qa_chain = None
24
  def load_pdf(pdf_file):
25
  """
26
  Load and extract text from a PDF.
27
- Args:
28
- pdf_file (str): The PDF file.
29
- Returns:
30
- str: Extracted text from the PDF.
31
  """
32
  pdf_reader = PdfReader(pdf_file)
33
  text = "".join(page.extract_text() for page in pdf_reader.pages)
@@ -36,10 +25,6 @@ def load_pdf(pdf_file):
36
  def split_text(text):
37
  """
38
  Split the extracted text into chunks.
39
- Args:
40
- text (str): The full text extracted from the PDF.
41
- Returns:
42
- list: A list of text chunks.
43
  """
44
  text_splitter = CharacterTextSplitter(
45
  separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
@@ -49,10 +34,6 @@ def split_text(text):
49
  def create_knowledge_base(chunks):
50
  """
51
  Create a FAISS knowledge base from text chunks.
52
- Args:
53
- chunks (list): A list of text chunks.
54
- Returns:
55
- FAISS: A FAISS knowledge base object.
56
  """
57
  embeddings = HuggingFaceEmbeddings()
58
  return FAISS.from_texts(chunks, embeddings)
@@ -60,10 +41,6 @@ def create_knowledge_base(chunks):
60
  def load_model(model_path):
61
  """
62
  Load the HuggingFace model and tokenizer, and create a text-generation pipeline.
63
- Args:
64
- model_path (str): The path to the pre-trained model.
65
- Returns:
66
- pipeline: A HuggingFace pipeline for text generation.
67
  """
68
  tokenizer = AutoTokenizer.from_pretrained(model_path)
69
  model = AutoModelForCausalLM.from_pretrained(model_path)
 
2
  import io
3
  import requests
4
  import streamlit as st
 
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
  from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 
10
  from langchain.llms import HuggingFacePipeline
11
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
12
 
 
 
 
 
 
 
13
  # Global variables
14
  knowledge_base = None
15
  qa_chain = None
 
17
  def load_pdf(pdf_file):
18
  """
19
  Load and extract text from a PDF.
 
 
 
 
20
  """
21
  pdf_reader = PdfReader(pdf_file)
22
  text = "".join(page.extract_text() for page in pdf_reader.pages)
 
25
  def split_text(text):
26
  """
27
  Split the extracted text into chunks.
 
 
 
 
28
  """
29
  text_splitter = CharacterTextSplitter(
30
  separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
 
34
  def create_knowledge_base(chunks):
35
  """
36
  Create a FAISS knowledge base from text chunks.
 
 
 
 
37
  """
38
  embeddings = HuggingFaceEmbeddings()
39
  return FAISS.from_texts(chunks, embeddings)
 
41
  def load_model(model_path):
42
  """
43
  Load the HuggingFace model and tokenizer, and create a text-generation pipeline.
 
 
 
 
44
  """
45
  tokenizer = AutoTokenizer.from_pretrained(model_path)
46
  model = AutoModelForCausalLM.from_pretrained(model_path)
requirements.txt CHANGED
@@ -5,4 +5,4 @@ transformers==4.31.0
5
  torch==2.0.1
6
  faiss-cpu==1.7.4
7
  requests==2.31.0
8
- huggingface-hub==0.16.4
 
5
  torch==2.0.1
6
  faiss-cpu==1.7.4
7
  requests==2.31.0
8
+ huggingface-hub==0.16.4