Spaces:

kskathe
/

text-summary-llama

Sleeping

kskathe commited on Sep 3

Commit

33873fb

•

1 Parent(s): 2fe6bd0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,12 +35,12 @@
 #     st.write(decoded_output[0])
 import streamlit as st
 from transformers import pipeline
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoTokenizer
 # Initialize the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("kskathe/finetuned-llama-text-summarization")
@@ -55,14 +55,15 @@ input_text = st.text_area("Enter the article content:")
 formatted_input = alpaca_prompt.format(input_text, "")
 if st.button("Generate Highlights"):
-    # Prepare the input and specify the device as CPU
-    inputs = tokenizer([formatted_input], return_tensors="pt").to("cpu")
-    # Load the model without quantization
     text_model = AutoPeftModelForCausalLM.from_pretrained(
         "kskathe/finetuned-llama-text-summarization",
-        load_in_8bit=False, # Ensure quantization is disabled
-        device_map="cpu" # Force the model to run on CPU
     )
     # Generate the output

 #     st.write(decoded_output[0])
 import streamlit as st
 from transformers import pipeline
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoTokenizer
 # Initialize the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("kskathe/finetuned-llama-text-summarization")
 formatted_input = alpaca_prompt.format(input_text, "")
 if st.button("Generate Highlights"):
+    # Prepare the input
+    inputs = tokenizer([formatted_input], return_tensors="pt")
+    # Load the model without quantization and force CPU usage
     text_model = AutoPeftModelForCausalLM.from_pretrained(
         "kskathe/finetuned-llama-text-summarization",
+        device_map="cpu",  # Force the model to run on CPU
+        load_in_8bit=False,  # Disable 8-bit quantization if it was enabled
+        torch_dtype="float32"  # Use float32 precision which is CPU friendly
     )
     # Generate the output