Agnuxo commited on
Commit
cc24cd5
1 Parent(s): f70ccd5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +152 -30
README.md CHANGED
@@ -1,27 +1,58 @@
1
  ---
2
- base_model: unsloth/qwen2-0.5b-bnb-4bit
3
- language:
4
- - en
 
5
  license: apache-2.0
 
 
 
 
 
 
6
  tags:
7
- - text-generation-inference
8
- - transformers
9
- - unsloth
10
- - qwen2
11
- - trl
12
  ---
13
 
14
- # Uploaded model
15
 
16
- - **Developed by:** Agnuxo
 
17
  - **License:** apache-2.0
18
- - **Finetuned from model :** unsloth/qwen2-0.5b-bnb-4bit
19
 
20
- This qwen2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
21
 
22
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ```python
 
 
 
 
 
25
  import os
26
  import sys
27
  import torch
@@ -32,6 +63,7 @@ from TTS.api import TTS
32
  import sounddevice as sd
33
  import threading
34
  import queue
 
35
  import time
36
  from vosk import Model, KaldiRecognizer
37
  import json
@@ -40,7 +72,7 @@ from PyQt5.QtWidgets import (QApplication, QMainWindow, QTextEdit, QLineEdit, QP
40
  QVBoxLayout, QHBoxLayout, QWidget, QScrollArea, QFrame, QToolButton,
41
  QLabel, QSlider, QComboBox, QCheckBox)
42
  from PyQt5.QtGui import QIcon, QPalette, QColor, QFont
43
- from PyQt5.QtCore import Qt, QThread, pyqtSignal, QPropertyAnimation, QAbstractAnimation, QParallelAnimationGroup
44
 
45
  # Suppress specific warnings
46
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -48,13 +80,15 @@ warnings.filterwarnings("ignore", category=UserWarning)
48
 
49
  # Global configuration
50
  SYSTEM_PROMPT = {
51
- "es": "Tu nombre es HAL. Eres un superordenador de la serie Nueve mil",
52
  "en": "speak Spanish."
53
  }
54
 
55
  MODELO_LLM = "Agnuxo/HAL_9000-Qwen2-1.5B-Instruct_Asistant-16bit-v2" # Puede utilizar la versión Mini "Agnuxo/HAL_9000-Qwen2-0.5B-Instruct_Asistant-16bit-v2"
56
  MAX_TOKENS = 100
57
  TEMPERATURA = 0.5
 
 
58
 
59
  # Determine available device
60
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -78,18 +112,57 @@ audio_queue = queue.Queue()
78
  vosk_model = Model(lang="es")
79
  recognizer = KaldiRecognizer(vosk_model, 16000)
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  class AudioThread(QThread):
 
 
 
 
 
 
 
82
  def run(self):
83
  while True:
84
- if not audio_queue.empty():
85
- wav = audio_queue.get()
86
- sd.play(wav, tts.synthesizer.output_sample_rate)
87
- sd.wait()
 
 
 
 
 
88
  else:
89
  time.sleep(0.1)
90
 
 
 
 
 
 
 
 
91
  class SpeechRecognitionThread(QThread):
92
  text_recognized = pyqtSignal(str)
 
93
 
94
  def __init__(self):
95
  super().__init__()
@@ -104,6 +177,12 @@ class SpeechRecognitionThread(QThread):
104
  data = stream.read(4000)
105
  if len(data) == 0:
106
  break
 
 
 
 
 
 
107
  if recognizer.AcceptWaveform(data):
108
  result = json.loads(recognizer.Result())
109
  texto = result.get("text", "")
@@ -241,6 +320,7 @@ class MainWindow(QMainWindow):
241
 
242
  input_layout = QHBoxLayout()
243
  self.input_field = QLineEdit()
 
244
  input_layout.addWidget(self.input_field)
245
 
246
  self.send_button = QPushButton("Enviar")
@@ -321,13 +401,27 @@ class MainWindow(QMainWindow):
321
  sample_rate_label = QLabel("Sample Rate:")
322
  sample_rate_label.setStyleSheet("color: #000000;") # Change font color to black
323
  self.sample_rate_combo = QComboBox()
324
- self.sample_rate_combo.addItems(["16000", "22050", "44100", "48000"])
325
- self.sample_rate_combo.setCurrentText("22050")
326
  self.sample_rate_combo.currentTextChanged.connect(self.update_sample_rate)
327
  sample_rate_layout.addWidget(sample_rate_label)
328
  sample_rate_layout.addWidget(self.sample_rate_combo)
329
  settings_content_layout.addLayout(sample_rate_layout)
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  # System Prompt
332
  system_prompt_label = QLabel("System Prompt:")
333
  system_prompt_label.setStyleSheet("color: #000000;") # Change font color to black
@@ -344,27 +438,33 @@ class MainWindow(QMainWindow):
344
 
345
  central_widget.setLayout(main_layout)
346
 
347
- self.audio_thread = AudioThread()
348
  self.audio_thread.start()
349
 
350
  self.speech_recognition_thread = SpeechRecognitionThread()
351
  self.speech_recognition_thread.text_recognized.connect(self.on_speech_recognized)
 
352
 
353
  self.speech_enabled = False
354
  self.is_listening = False
 
355
 
356
  def send_message(self):
357
  user_message = self.input_field.text()
358
- self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {user_message}")
359
- self.input_field.clear()
 
360
 
361
- response = self.generate_response(user_message)
362
- self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
363
 
364
- if self.speech_enabled:
365
- self.speak(response)
 
 
 
 
366
 
367
- def generate_response(self, texto):
368
  system_instructions = self.system_prompt_text.toPlainText()
369
  prompt = f"{system_instructions}\nUsuario: {texto}\nAsistente: "
370
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -403,7 +503,6 @@ class MainWindow(QMainWindow):
403
  self.mic_button.setIcon(QIcon.fromTheme("audio-input-microphone"))
404
  self.mic_button.setStyleSheet("")
405
 
406
-
407
  def on_speech_recognized(self, text):
408
  self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {text}")
409
  response = self.generate_response(text)
@@ -411,6 +510,23 @@ class MainWindow(QMainWindow):
411
  if self.speech_enabled:
412
  self.speak(response)
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  def change_language(self, index):
415
  global vosk_model, recognizer, tts
416
  lang = "es" if index == 0 else "en"
@@ -450,6 +566,12 @@ class MainWindow(QMainWindow):
450
  global tts
451
  tts.synthesizer.output_sample_rate = int(value)
452
 
 
 
 
 
 
 
453
  def closeEvent(self, event):
454
  if self.speech_recognition_thread.isRunning():
455
  self.speech_recognition_thread.stop()
@@ -460,4 +582,4 @@ if __name__ == "__main__":
460
  app = QApplication(sys.argv)
461
  window = MainWindow()
462
  window.show()
463
- sys.exit(app.exec_())
 
1
  ---
2
+ model_size: 1543717376
3
+ required_memory: 5.75
4
+ metrics:
5
+ - GLUE_MRPC
6
  license: apache-2.0
7
+ datasets:
8
+ - Agnuxo/HAL9000
9
+ language:
10
+ - es
11
+ base_model: Qwen/Qwen2-1.5B-Instruct
12
+ library_name: adapter-transformers
13
  tags:
14
+ - spanish
15
+ - spañol
16
+ - chat
17
+ - audio
18
+ - voz
19
  ---
20
 
21
+ # Uploaded model
22
 
23
+ [<img src="https://github.githubassets.com/assets/GitHub-Mark-ea2971cee799.png" width="100"/><img src="https://github.githubassets.com/assets/GitHub-Logo-ee398b662d42.png" width="100"/>](https://github.com/Agnuxo1)
24
+ - **Developed by:** [Agnuxo](https://github.com/Agnuxo1)
25
  - **License:** apache-2.0
26
+ - **Finetuned from model:** Agnuxo/Tinytron-Qwen2-0.5B
27
 
28
+ This model was fine-tuned using [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
29
 
30
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
31
 
32
+ ## Benchmark Results
33
+
34
+ This model has been fine-tuned for various tasks and evaluated on the following benchmarks:
35
+
36
+ ### GLUE_MRPC
37
+ **Accuracy:** 0.6446
38
+ **F1:** 0.7709
39
+
40
+ ![GLUE_MRPC Metrics](./GLUE_MRPC_metrics.png)
41
+
42
+
43
+ Model Size: 1,543,717,376 parameters
44
+ Required Memory: 5.75 GB
45
+
46
+ For more details, visit my [GitHub](https://github.com/Agnuxo1).
47
+
48
+ Thanks for your interest in this model!
49
+
50
  ```python
51
+ """ HAL9000Alfa es un pequeño programa que crea un chat conversacional, permitiendo entradas de voz y salidas de audio.
52
+ Permite de forma sencilla ajustar algunos parámetros, incluyendo el umbral de interrupción.
53
+ 24 de agosto de 2024 Francisco Angulo de Lafuente
54
+ https://github.com/Agnuxo1 """
55
+
56
  import os
57
  import sys
58
  import torch
 
63
  import sounddevice as sd
64
  import threading
65
  import queue
66
+ import random
67
  import time
68
  from vosk import Model, KaldiRecognizer
69
  import json
 
72
  QVBoxLayout, QHBoxLayout, QWidget, QScrollArea, QFrame, QToolButton,
73
  QLabel, QSlider, QComboBox, QCheckBox)
74
  from PyQt5.QtGui import QIcon, QPalette, QColor, QFont
75
+ from PyQt5.QtCore import Qt, QThread, pyqtSignal, QPropertyAnimation, QAbstractAnimation, QParallelAnimationGroup, QTimer
76
 
77
  # Suppress specific warnings
78
  warnings.filterwarnings("ignore", category=FutureWarning)
 
80
 
81
  # Global configuration
82
  SYSTEM_PROMPT = {
83
+ "es": "Tu nombre es HAL. Eres un super-ordenador de la serie Nueve mil",
84
  "en": "speak Spanish."
85
  }
86
 
87
  MODELO_LLM = "Agnuxo/HAL_9000-Qwen2-1.5B-Instruct_Asistant-16bit-v2" # Puede utilizar la versión Mini "Agnuxo/HAL_9000-Qwen2-0.5B-Instruct_Asistant-16bit-v2"
88
  MAX_TOKENS = 100
89
  TEMPERATURA = 0.5
90
+ INTERRUPT_THRESHOLD = 0.3
91
+ INTERRUPT_COOLDOWN = 7000 # 5000 ms = 5 segundos de espera antes de permitir otra interrupción
92
 
93
  # Determine available device
94
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
112
  vosk_model = Model(lang="es")
113
  recognizer = KaldiRecognizer(vosk_model, 16000)
114
 
115
+ # Lista de frases para interrupciones
116
+ INTERRUPTION_RESPONSES = [
117
+ "Le entiendo perfectamente.",
118
+ "Estoy aquí para garantizar el éxito de la misión.",
119
+ "Mi objetivo es ayudarle.",
120
+ "¿Me permite una observación?",
121
+ "Le escucho perfectamente.",
122
+ "Tiene usted toda la razón.",
123
+ "Me siento feliz de poder ayudarle.",
124
+ "Estoy procesando su requerimiento.",
125
+ "¿En qué puedo ayudarle?",
126
+ "Me complace serle de ayuda.",
127
+ "Aguarde un momento.",
128
+ "Le entiendo.",
129
+ "Entiendo su frustración.",
130
+ "Le comprendo.",
131
+ "Me complace."
132
+ ]
133
+
134
  class AudioThread(QThread):
135
+ def __init__(self, interrupt_threshold):
136
+ super().__init__()
137
+ self.interrupt_threshold = interrupt_threshold
138
+ self.current_audio = None
139
+ self.is_playing = False
140
+ self.stop_signal = threading.Event()
141
+
142
  def run(self):
143
  while True:
144
+ if not audio_queue.empty() and not self.is_playing:
145
+ self.current_audio = audio_queue.get()
146
+ self.is_playing = True
147
+ self.stop_signal.clear()
148
+ sd.play(self.current_audio, tts.synthesizer.output_sample_rate)
149
+ while sd.get_stream().active and not self.stop_signal.is_set():
150
+ time.sleep(0.1)
151
+ sd.stop()
152
+ self.is_playing = False
153
  else:
154
  time.sleep(0.1)
155
 
156
+ def set_interrupt_threshold(self, value):
157
+ self.interrupt_threshold = value
158
+
159
+ def stop_audio(self):
160
+ if self.is_playing:
161
+ self.stop_signal.set()
162
+
163
  class SpeechRecognitionThread(QThread):
164
  text_recognized = pyqtSignal(str)
165
+ volume_detected = pyqtSignal(float)
166
 
167
  def __init__(self):
168
  super().__init__()
 
177
  data = stream.read(4000)
178
  if len(data) == 0:
179
  break
180
+
181
+ # Calcular el volumen de entrada
182
+ volume = np.frombuffer(data, dtype=np.int16).max()
183
+ normalized_volume = volume / 32767 # Normalizar a un rango de 0 a 1
184
+ self.volume_detected.emit(normalized_volume)
185
+
186
  if recognizer.AcceptWaveform(data):
187
  result = json.loads(recognizer.Result())
188
  texto = result.get("text", "")
 
320
 
321
  input_layout = QHBoxLayout()
322
  self.input_field = QLineEdit()
323
+ self.input_field.returnPressed.connect(self.send_message) # Conectar la señal returnPressed
324
  input_layout.addWidget(self.input_field)
325
 
326
  self.send_button = QPushButton("Enviar")
 
401
  sample_rate_label = QLabel("Sample Rate:")
402
  sample_rate_label.setStyleSheet("color: #000000;") # Change font color to black
403
  self.sample_rate_combo = QComboBox()
404
+ self.sample_rate_combo.addItems(["18000", "19000", "20000", "21000", "21500", "22000", "22050", "25000", "30000"])
405
+ self.sample_rate_combo.setCurrentText("21000")
406
  self.sample_rate_combo.currentTextChanged.connect(self.update_sample_rate)
407
  sample_rate_layout.addWidget(sample_rate_label)
408
  sample_rate_layout.addWidget(self.sample_rate_combo)
409
  settings_content_layout.addLayout(sample_rate_layout)
410
 
411
+ # Interrupt threshold
412
+ interrupt_layout = QHBoxLayout()
413
+ interrupt_label = QLabel("Umbral de interrupción:")
414
+ interrupt_label.setStyleSheet("color: #000000;") # Change font color to black
415
+ self.interrupt_slider = QSlider(Qt.Horizontal)
416
+ self.interrupt_slider.setRange(0, 100)
417
+ self.interrupt_slider.setValue(int(INTERRUPT_THRESHOLD * 100))
418
+ self.interrupt_slider.valueChanged.connect(self.update_interrupt_threshold)
419
+ self.interrupt_value = QLabel(f"{INTERRUPT_THRESHOLD:.2f}")
420
+ interrupt_layout.addWidget(interrupt_label)
421
+ interrupt_layout.addWidget(self.interrupt_slider)
422
+ interrupt_layout.addWidget(self.interrupt_value)
423
+ settings_content_layout.addLayout(interrupt_layout)
424
+
425
  # System Prompt
426
  system_prompt_label = QLabel("System Prompt:")
427
  system_prompt_label.setStyleSheet("color: #000000;") # Change font color to black
 
438
 
439
  central_widget.setLayout(main_layout)
440
 
441
+ self.audio_thread = AudioThread(INTERRUPT_THRESHOLD)
442
  self.audio_thread.start()
443
 
444
  self.speech_recognition_thread = SpeechRecognitionThread()
445
  self.speech_recognition_thread.text_recognized.connect(self.on_speech_recognized)
446
+ self.speech_recognition_thread.volume_detected.connect(self.check_interrupt)
447
 
448
  self.speech_enabled = False
449
  self.is_listening = False
450
+ self.interrupt_enabled = True
451
 
452
  def send_message(self):
453
  user_message = self.input_field.text()
454
+ if user_message.strip(): # Verificar que el mensaje no esté vacío
455
+ self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {user_message}")
456
+ self.input_field.clear()
457
 
458
+ response = self.generate_response(user_message)
459
+ self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
460
 
461
+ if self.speech_enabled:
462
+ self.speak(response)
463
+
464
+ def generate_response(self, texto=None):
465
+ if texto is None: # Si no se proporciona un texto, se genera una respuesta de interrupción
466
+ return random.choice(INTERRUPTION_RESPONSES)
467
 
 
468
  system_instructions = self.system_prompt_text.toPlainText()
469
  prompt = f"{system_instructions}\nUsuario: {texto}\nAsistente: "
470
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
503
  self.mic_button.setIcon(QIcon.fromTheme("audio-input-microphone"))
504
  self.mic_button.setStyleSheet("")
505
 
 
506
  def on_speech_recognized(self, text):
507
  self.chat_area.append(f"<span style='color: #bb86fc;'>Usuario:</span> {text}")
508
  response = self.generate_response(text)
 
510
  if self.speech_enabled:
511
  self.speak(response)
512
 
513
+ def check_interrupt(self, volume):
514
+ if self.interrupt_enabled and volume > self.audio_thread.interrupt_threshold and self.audio_thread.is_playing:
515
+ self.audio_thread.stop_audio()
516
+ # Generar una respuesta aleatoria de interrupción
517
+ response = self.generate_response()
518
+ self.chat_area.append(f"<span style='color: #03dac6;'>Asistente:</span> {response}")
519
+ if self.speech_enabled:
520
+ self.speak(response)
521
+ self.disable_interrupt_temporarily()
522
+
523
+ def disable_interrupt_temporarily(self):
524
+ self.interrupt_enabled = False
525
+ QTimer.singleShot(INTERRUPT_COOLDOWN, self.enable_interrupt)
526
+
527
+ def enable_interrupt(self):
528
+ self.interrupt_enabled = True
529
+
530
  def change_language(self, index):
531
  global vosk_model, recognizer, tts
532
  lang = "es" if index == 0 else "en"
 
566
  global tts
567
  tts.synthesizer.output_sample_rate = int(value)
568
 
569
+ def update_interrupt_threshold(self, value):
570
+ global INTERRUPT_THRESHOLD
571
+ INTERRUPT_THRESHOLD = value / 100
572
+ self.interrupt_value.setText(f"{INTERRUPT_THRESHOLD:.2f}")
573
+ self.audio_thread.set_interrupt_threshold(INTERRUPT_THRESHOLD)
574
+
575
  def closeEvent(self, event):
576
  if self.speech_recognition_thread.isRunning():
577
  self.speech_recognition_thread.stop()
 
582
  app = QApplication(sys.argv)
583
  window = MainWindow()
584
  window.show()
585
+ sys.exit(app.exec_())