Spaces:
Paused
Paused
remove dotenv
Browse files- app.py +0 -23
- requirements.txt +1 -1
app.py
CHANGED
@@ -2,7 +2,6 @@ import os
|
|
2 |
import io
|
3 |
import requests
|
4 |
import streamlit as st
|
5 |
-
from dotenv import load_dotenv
|
6 |
from PyPDF2 import PdfReader
|
7 |
from langchain.text_splitter import CharacterTextSplitter
|
8 |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
@@ -11,12 +10,6 @@ from langchain.chains.question_answering import load_qa_chain
|
|
11 |
from langchain.llms import HuggingFacePipeline
|
12 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
13 |
|
14 |
-
# Disable WANDB
|
15 |
-
os.environ['WANDB_DISABLED'] = "true"
|
16 |
-
|
17 |
-
# Constants
|
18 |
-
MODEL_PATH = "/home/lab/halyn/gemma/halyn/paper/models/gemma-2-9b-it"
|
19 |
-
|
20 |
# Global variables
|
21 |
knowledge_base = None
|
22 |
qa_chain = None
|
@@ -24,10 +17,6 @@ qa_chain = None
|
|
24 |
def load_pdf(pdf_file):
|
25 |
"""
|
26 |
Load and extract text from a PDF.
|
27 |
-
Args:
|
28 |
-
pdf_file (str): The PDF file.
|
29 |
-
Returns:
|
30 |
-
str: Extracted text from the PDF.
|
31 |
"""
|
32 |
pdf_reader = PdfReader(pdf_file)
|
33 |
text = "".join(page.extract_text() for page in pdf_reader.pages)
|
@@ -36,10 +25,6 @@ def load_pdf(pdf_file):
|
|
36 |
def split_text(text):
|
37 |
"""
|
38 |
Split the extracted text into chunks.
|
39 |
-
Args:
|
40 |
-
text (str): The full text extracted from the PDF.
|
41 |
-
Returns:
|
42 |
-
list: A list of text chunks.
|
43 |
"""
|
44 |
text_splitter = CharacterTextSplitter(
|
45 |
separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
|
@@ -49,10 +34,6 @@ def split_text(text):
|
|
49 |
def create_knowledge_base(chunks):
|
50 |
"""
|
51 |
Create a FAISS knowledge base from text chunks.
|
52 |
-
Args:
|
53 |
-
chunks (list): A list of text chunks.
|
54 |
-
Returns:
|
55 |
-
FAISS: A FAISS knowledge base object.
|
56 |
"""
|
57 |
embeddings = HuggingFaceEmbeddings()
|
58 |
return FAISS.from_texts(chunks, embeddings)
|
@@ -60,10 +41,6 @@ def create_knowledge_base(chunks):
|
|
60 |
def load_model(model_path):
|
61 |
"""
|
62 |
Load the HuggingFace model and tokenizer, and create a text-generation pipeline.
|
63 |
-
Args:
|
64 |
-
model_path (str): The path to the pre-trained model.
|
65 |
-
Returns:
|
66 |
-
pipeline: A HuggingFace pipeline for text generation.
|
67 |
"""
|
68 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
69 |
model = AutoModelForCausalLM.from_pretrained(model_path)
|
|
|
2 |
import io
|
3 |
import requests
|
4 |
import streamlit as st
|
|
|
5 |
from PyPDF2 import PdfReader
|
6 |
from langchain.text_splitter import CharacterTextSplitter
|
7 |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
|
|
10 |
from langchain.llms import HuggingFacePipeline
|
11 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# Global variables
|
14 |
knowledge_base = None
|
15 |
qa_chain = None
|
|
|
17 |
def load_pdf(pdf_file):
|
18 |
"""
|
19 |
Load and extract text from a PDF.
|
|
|
|
|
|
|
|
|
20 |
"""
|
21 |
pdf_reader = PdfReader(pdf_file)
|
22 |
text = "".join(page.extract_text() for page in pdf_reader.pages)
|
|
|
25 |
def split_text(text):
|
26 |
"""
|
27 |
Split the extracted text into chunks.
|
|
|
|
|
|
|
|
|
28 |
"""
|
29 |
text_splitter = CharacterTextSplitter(
|
30 |
separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
|
|
|
34 |
def create_knowledge_base(chunks):
|
35 |
"""
|
36 |
Create a FAISS knowledge base from text chunks.
|
|
|
|
|
|
|
|
|
37 |
"""
|
38 |
embeddings = HuggingFaceEmbeddings()
|
39 |
return FAISS.from_texts(chunks, embeddings)
|
|
|
41 |
def load_model(model_path):
|
42 |
"""
|
43 |
Load the HuggingFace model and tokenizer, and create a text-generation pipeline.
|
|
|
|
|
|
|
|
|
44 |
"""
|
45 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
46 |
model = AutoModelForCausalLM.from_pretrained(model_path)
|
requirements.txt
CHANGED
@@ -5,4 +5,4 @@ transformers==4.31.0
|
|
5 |
torch==2.0.1
|
6 |
faiss-cpu==1.7.4
|
7 |
requests==2.31.0
|
8 |
-
huggingface-hub==0.16.4
|
|
|
5 |
torch==2.0.1
|
6 |
faiss-cpu==1.7.4
|
7 |
requests==2.31.0
|
8 |
+
huggingface-hub==0.16.4
|