jhansi1 commited on
Commit
eb223fd
1 Parent(s): f163e0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -1,22 +1,26 @@
1
- # app.py
2
-
3
  import gradio as gr
4
  import streamlit as st
5
  from transformers import pipeline
6
  from datasets import load_dataset
7
  from huggingface_hub import hf_hub_download
8
- from datasets import load_dataset
 
 
 
 
 
9
 
10
- file_path = hf_hub_download("BEE-spoke-data/survivorslib-law-books", filename="train.parquet")
11
- ds = load_dataset("parquet", data_files=file_path)
 
 
 
 
12
 
13
  # Initialize text-generation pipeline with the model
14
  model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
15
  pipe = pipeline("text-generation", model=model_name)
16
 
17
- # Load the dataset from the cloned local directory
18
- # ds = load_dataset("./canadian-legal-data", split="train",verify=False)
19
-
20
  # Gradio Interface setup
21
  def respond(
22
  message,
 
 
 
1
  import gradio as gr
2
  import streamlit as st
3
  from transformers import pipeline
4
  from datasets import load_dataset
5
  from huggingface_hub import hf_hub_download
6
+ import subprocess
7
+ import os
8
+
9
+ # Clone the dataset repository if not already cloned
10
+ repo_url = "https://huggingface.co/datasets/BEE-spoke-data/survivorslib-law-books"
11
+ repo_dir = "./survivorslib-law-books"
12
 
13
+ if not os.path.exists(repo_dir):
14
+ subprocess.run(["git", "clone", repo_url], check=True)
15
+
16
+ # Load the dataset from the cloned repository
17
+ dataset_path = os.path.join(repo_dir, "train.parquet")
18
+ ds = load_dataset("parquet", data_files=dataset_path)
19
 
20
  # Initialize text-generation pipeline with the model
21
  model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
22
  pipe = pipeline("text-generation", model=model_name)
23
 
 
 
 
24
  # Gradio Interface setup
25
  def respond(
26
  message,