Spaces:

jhansi1
/

train

Running

jhansi1 commited on 20 days ago

Commit

eb223fd

•

1 Parent(s): f163e0e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,26 @@
-# app.py
 import gradio as gr
 import streamlit as st
 from transformers import pipeline
 from datasets import load_dataset
 from huggingface_hub import hf_hub_download
-from datasets import load_dataset
-file_path = hf_hub_download("BEE-spoke-data/survivorslib-law-books", filename="train.parquet")
-ds = load_dataset("parquet", data_files=file_path)
 # Initialize text-generation pipeline with the model
 model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
 pipe = pipeline("text-generation", model=model_name)
-# Load the dataset from the cloned local directory
-# ds = load_dataset("./canadian-legal-data", split="train",verify=False)
 # Gradio Interface setup
 def respond(
     message,

 import gradio as gr
 import streamlit as st
 from transformers import pipeline
 from datasets import load_dataset
 from huggingface_hub import hf_hub_download
+import subprocess
+import os
+# Clone the dataset repository if not already cloned
+repo_url = "https://huggingface.co/datasets/BEE-spoke-data/survivorslib-law-books"
+repo_dir = "./survivorslib-law-books"
+if not os.path.exists(repo_dir):
+    subprocess.run(["git", "clone", repo_url], check=True)
+# Load the dataset from the cloned repository
+dataset_path = os.path.join(repo_dir, "train.parquet")
+ds = load_dataset("parquet", data_files=dataset_path)
 # Initialize text-generation pipeline with the model
 model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
 pipe = pipeline("text-generation", model=model_name)
 # Gradio Interface setup
 def respond(
     message,