Hawoly18 commited on
Commit
815fa95
1 Parent(s): 493c487

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -47
app.py CHANGED
@@ -1,55 +1,47 @@
 
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
- from typing import List, Tuple
4
  import torch
5
 
6
-
7
- model_name = "Hawoly18/Adia_Llama3.1"
8
-
9
- # Vérifier si un GPU est disponible
10
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
-
12
-
13
- tokenizer = AutoTokenizer.from_pretrained(model_name)
14
- model = AutoModelForCausalLM.from_pretrained(model_name)
15
-
16
- def respond(
17
- message: str,
18
- history: List[Tuple[str, str]],
19
- system_message: str,
20
- max_tokens: int,
21
- temperature: float,
22
- top_p: float,
23
- ) -> str:
24
-
25
- prompt = system_message
26
- for user_msg, assistant_msg in history:
27
- prompt += f"\nUser: {user_msg}\nAssistant: {assistant_msg}"
28
- prompt += f"\nUser: {message}\nAssistant:"
29
-
30
-
31
- inputs = tokenizer(prompt, return_tensors="pt")
32
- outputs = model.generate(
33
- **inputs,
34
- max_length=max_tokens,
35
- temperature=temperature,
36
- top_p=top_p,
37
- do_sample=True,
38
- )
39
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
40
  return response
41
 
42
-
43
- demo = gr.ChatInterface(
44
- respond,
45
- additional_inputs=[
46
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
47
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
48
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # Fixed syntax error
49
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
50
- ],
51
- title="Chatbot Interface"
52
  )
53
 
54
- if __name__ == "__main__":
55
- demo.launch()
 
1
+ import os
2
+ #os.environ['HF_HOME'] = 'E:/huggingface_cache'
3
+
4
  import gradio as gr
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
6
  import torch
7
 
8
+ # Load the model and tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("Hawoly18/Adia_Llama3.1")
10
+ model = AutoModelForCausalLM.from_pretrained("Hawoly18/Adia_Llama3.1")
11
+
12
+ if tokenizer.pad_token is None:
13
+ tokenizer.pad_token = tokenizer.eos_token
14
+
15
+ # Function to generate responses
16
+ def generate_response(question, max_length=512):
17
+ input_text = f"Question: {question}\nRéponse:"
18
+ input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
19
+ attention_mask = input_ids != tokenizer.pad_token_id
20
+
21
+ with torch.no_grad():
22
+ output_ids = model.generate(
23
+ input_ids,
24
+ max_length=max_length,
25
+ attention_mask=attention_mask,
26
+ pad_token_id=tokenizer.eos_token_id,
27
+ eos_token_id=tokenizer.eos_token_id,
28
+ num_beams=5, # Beam search for better quality
29
+ no_repeat_ngram_size=2, # Prevent n-gram repetition
30
+ early_stopping=True
31
+ )
32
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
33
+ response = response.replace(input_text, "").strip()
 
 
 
 
 
 
 
 
34
  return response
35
 
36
+ # Define the Gradio interface
37
+ interface = gr.Interface(
38
+ fn=generate_response,
39
+ inputs="text",
40
+ outputs="text",
41
+ title="Model Q&A Interface",
42
+ description="Ask a question related to BSE and entrepreneurship!",
43
+
 
 
44
  )
45
 
46
+ # Launch the interface
47
+ interface.launch(share=True)