Spaces:

shamimjony1000
/

mn

Sleeping

App Files Files Community

shamimjony1000 commited on 27 days ago

Commit

24dab17

•

1 Parent(s): 83fca7e

Upload 9 files

Browse files

Files changed (9) hide show

app.py +6 -0
database.py +97 -0
gemini.py +84 -0
memory.py +148 -0
requests.db +0 -0
requirements.txt +7 -0
text_to_speech.py +11 -0
ui.py +273 -0
voice.py +50 -0

app.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import gradio as gr
+from ui import create_ui
+if __name__ == "__main__":
+    app = create_ui()
+    app.launch()

database.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import sqlite3
+from datetime import datetime
+import time
+from contextlib import contextmanager
+class Database:
+    def __init__(self, db_name="requests.db"):
+        self.db_name = db_name
+        self.max_retries = 3
+        self.retry_delay = 1
+        self.initialize_database()
+    @contextmanager
+    def get_connection(self):
+        conn = sqlite3.connect(self.db_name)
+        try:
+            yield conn
+        finally:
+            conn.close()
+    def initialize_database(self):
+        for attempt in range(self.max_retries):
+            try:
+                with self.get_connection() as conn:
+                    conn.execute('PRAGMA encoding="UTF-8"')
+                    cursor = conn.cursor()
+                    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='requests'")
+                    if not cursor.fetchone():
+                        self.create_table(conn)
+                    else:
+                        cursor.execute('PRAGMA table_info(requests)')
+                        columns = [col[1] for col in cursor.fetchall()]
+                        required_columns = ['id', 'timestamp', 'project_number', 'project_name', 'amount', 'reason', 'original_text']
+                        if not all(col in columns for col in required_columns):
+                            cursor.execute('ALTER TABLE requests RENAME TO requests_old')
+                            self.create_table(conn)
+                            cursor.execute('''
+                                INSERT INTO requests (timestamp, project_number, project_name, amount, reason)
+                                SELECT timestamp, project_number, project_name, amount, reason
+                                FROM requests_old
+                            ''')
+                            cursor.execute('DROP TABLE requests_old')
+                            conn.commit()
+                return
+            except sqlite3.OperationalError as e:
+                if attempt < self.max_retries - 1:
+                    time.sleep(self.retry_delay)
+                    continue
+                raise Exception(f"Could not initialize database after {self.max_retries} attempts: {str(e)}")
+    def create_table(self, conn):
+        cursor = conn.cursor()
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS requests (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME,
+                project_number TEXT,
+                project_name TEXT,
+                amount REAL,
+                reason TEXT,
+                original_text TEXT
+            )
+        ''')
+        conn.commit()
+    def add_request(self, project_number, project_name, amount, reason, original_text=""):
+        for attempt in range(self.max_retries):
+            try:
+                with self.get_connection() as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('''
+                        INSERT INTO requests (timestamp, project_number, project_name, amount, reason, original_text)
+                        VALUES (?, ?, ?, ?, ?, ?)
+                    ''', (datetime.now(), project_number, project_name, amount, reason, original_text))
+                    conn.commit()
+                return
+            except sqlite3.OperationalError as e:
+                if attempt < self.max_retries - 1:
+                    time.sleep(self.retry_delay)
+                    continue
+                raise Exception(f"Could not add request after {self.max_retries} attempts: {str(e)}")
+    def get_all_requests(self):
+        for attempt in range(self.max_retries):
+            try:
+                with self.get_connection() as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('SELECT * FROM requests ORDER BY timestamp DESC')
+                    columns = [description[0] for description in cursor.description]
+                    results = cursor.fetchall()
+                    return [dict(zip(columns, row)) for row in results]
+            except sqlite3.OperationalError as e:
+                if attempt < self.max_retries - 1:
+                    time.sleep(self.retry_delay)
+                    continue
+                raise Exception(f"Could not fetch requests after {self.max_retries} attempts: {str(e)}")

gemini.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import google.generativeai as genai
+import os
+import json
+import re
+class GeminiProcessor:
+    def __init__(self):
+        api_key = "AIzaSyCLyDgZNcE_v4wLMFF8SoimKga9bbLSun0"
+        if not api_key:
+            raise ValueError("GOOGLE_API_KEY not found in environment variables")
+        genai.configure(api_key=api_key)
+        self.model = genai.GenerativeModel('gemini-pro')
+        self.config = genai.GenerationConfig(temperature=0)
+    def is_arabic(self, text):
+        arabic_pattern = re.compile('[\u0600-\u06FF]')
+        return bool(arabic_pattern.search(text))
+    def translate_arabic_to_english(self, text):
+        prompt = f"""
+        Translate the following Arabic text to English. If the text is mixed (Arabic and English),
+        translate only the Arabic parts and keep the English parts as is.
+        Keep numbers in their original format.
+        Text to translate: {text}
+        """
+        try:
+            response = self.model.generate_content(prompt)
+            return response.text.strip()
+        except Exception as e:
+            print(f"Translation error: {e}")
+            return text
+    def extract_request_details(self, text, context=""):
+        full_text = f"{context} {text}".strip()
+        is_arabic_input = self.is_arabic(full_text)
+        if is_arabic_input:
+            translated_text = self.translate_arabic_to_english(full_text)
+            processing_text = translated_text
+        else:
+            processing_text = full_text
+        prompt = f"""
+        Extract the following information from this text and previous context.
+        The input has been translated from Arabic if it contained Arabic text.
+        If any information is missing, leave it empty.
+        Format the response exactly as a JSON object with these keys:
+        {{
+            "project_number": "extracted number or empty string",
+            "project_name": "extracted name or empty string",
+            "amount": extracted number or 0,
+            "reason": "extracted reason or empty string",
+            "missing_fields": ["list of missing required fields"],
+            "original_text": "the original input text"
+        }}
+        ##No preamble## Response in VALID JSON ONLY##
+        Text to analyze: {processing_text}
+        """
+        try:
+            response = self.model.generate_content(prompt, generation_config=self.config)
+            result = json.loads(response.text)
+            required_keys = ['project_number', 'project_name', 'amount', 'reason', 'missing_fields']
+            if not all(key in result for key in required_keys):
+                raise ValueError("Missing required keys in response")
+            result['amount'] = float(result.get('amount', 0))
+            result['original_text'] = full_text
+            if is_arabic_input:
+                result['translated_text'] = processing_text
+            return result
+        except json.JSONDecodeError as e:
+            print(f"JSON parsing error: {e}")
+            return None
+        except Exception as e:
+            print(f"Error processing request: {e}")
+            return None

memory.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from datetime import datetime, timedelta
+import json
+from gtts import gTTS
+import io
+class MemoryHandler:
+    def __init__(self):
+        self.conversation_history = []
+        self.max_history = 5
+        self.context_timeout = timedelta(minutes=2)
+        self.last_interaction_time = None
+        self.partial_info = {
+            'project_number': None,
+            'project_name': None,
+            'amount': None,
+            'reason': None,
+            'timestamp': None
+        }
+        self.confidence_scores = {
+            'project_number': 0.0,
+            'project_name': 0.0,
+            'amount': 0.0,
+            'reason': 0.0
+        }
+    def add_interaction(self, text: str, extracted_info: dict = None) -> None:
+        current_time = datetime.now()
+        if self.last_interaction_time and \
+           (current_time - self.last_interaction_time) > self.context_timeout:
+            self.clear_partial_info()
+        if text:
+            self.conversation_history.append({
+                'text': text,
+                'timestamp': current_time.isoformat(),
+                'extracted_info': extracted_info
+            })
+            if len(self.conversation_history) > self.max_history:
+                self.conversation_history.pop(0)
+        if extracted_info:
+            self._update_partial_info(extracted_info, current_time)
+        self.last_interaction_time = current_time
+    def _update_partial_info(self, extracted_info: dict, current_time: datetime) -> None:
+        for key in self.partial_info:
+            if key in extracted_info and extracted_info[key]:
+                new_value = extracted_info[key]
+                current_value = self.partial_info[key]
+                if (current_value is None or
+                    extracted_info.get(f'{key}_confidence', 0.5) >
+                    self.confidence_scores.get(key, 0)):
+                    self.partial_info[key] = new_value
+                    self.confidence_scores[key] = extracted_info.get(f'{key}_confidence', 0.5)
+        self.partial_info['timestamp'] = current_time
+    def get_context(self) -> str:
+        context_parts = []
+        for entry in self.conversation_history:
+            timestamp = datetime.fromisoformat(entry['timestamp']).strftime('%H:%M:%S')
+            context_parts.append(f"[{timestamp}] {entry['text']}")
+        context = " ".join(context_parts)
+        partial_context = []
+        for key, value in self.partial_info.items():
+            if value and key != 'timestamp':
+                confidence = self.confidence_scores.get(key, 0)
+                partial_context.append(f"{key}: {value} (confidence: {confidence:.2f})")
+        if partial_context:
+            context += "\nPartial information: " + ", ".join(partial_context)
+        return context
+    def get_partial_info(self) -> dict:
+        info = {k: v for k, v in self.partial_info.items()
+               if k != 'timestamp' and v is not None}
+        info['confidence_scores'] = self.confidence_scores
+        return info
+    def merge_partial_info(self, new_info: dict) -> None:
+        for key in self.partial_info:
+            if key in new_info and new_info[key] is not None:
+                new_confidence = new_info.get(f'{key}_confidence', 0.5)
+                if (self.partial_info[key] is None or
+                    new_confidence > self.confidence_scores.get(key, 0)):
+                    self.partial_info[key] = new_info[key]
+                    self.confidence_scores[key] = new_confidence
+    def clear_partial_info(self) -> None:
+        self.partial_info = {
+            'project_number': None,
+            'project_name': None,
+            'amount': None,
+            'reason': None,
+            'timestamp': None
+        }
+        self.confidence_scores = {
+            'project_number': 0.0,
+            'project_name': 0.0,
+            'amount': 0.0,
+            'reason': 0.0
+        }
+    def clear_memory(self) -> None:
+        self.conversation_history = []
+        self.clear_partial_info()
+        self.last_interaction_time = None
+        return "Memory cleared!"
+    def get_missing_fields(self) -> list:
+        missing = []
+        confidence_threshold = 0.5
+        for field in ['project_number', 'project_name', 'amount', 'reason']:
+            if (self.partial_info.get(field) is None or
+                self.confidence_scores.get(field, 0) < confidence_threshold):
+                missing.append(field)
+        return missing
+    def get_prompt_for_missing_info(self) -> str:
+        missing = self.get_missing_fields()
+        if not missing:
+            return "All required information has been provided with sufficient confidence."
+        current_info = self.get_partial_info()
+        prompt = "Current information:\n"
+        for key, value in current_info.items():
+            if key != 'confidence_scores' and value is not None:
+                confidence = self.confidence_scores.get(key, 0)
+                prompt += f"- {key}: {value} (confidence: {confidence:.2f})\n"
+        prompt += "\nPlease provide or clarify the following information:\n"
+        for field in missing:
+            current_confidence = self.confidence_scores.get(field, 0)
+            if current_confidence > 0:
+                prompt += f"- {field} (current confidence: {current_confidence:.2f}, needs improvement)\n"
+            else:
+                prompt += f"- {field} (missing)\n"
+        return prompt

requests.db ADDED Viewed

Binary file (12.3 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+pandas
+google-generativeai
+SpeechRecognition
+pydub
+gTTS
+python-dotenv

text_to_speech.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from gtts import gTTS
+import io
+def play_text(text: str) -> tuple[str, str]:
+    try:
+        tts = gTTS(text=text, lang='en')
+        audio_path = "temp_audio.mp3"
+        tts.save(audio_path)
+        return audio_path, None
+    except Exception as e:
+        return None, f"Error generating audio: {str(e)}"

ui.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import gradio as gr
+import pandas as pd
+from database import Database
+from voice import VoiceHandler
+from gemini import GeminiProcessor
+from memory import MemoryHandler
+from text_to_speech import play_text
+def create_ui():
+    # Initialize components
+    db = Database()
+    voice_handler = VoiceHandler()
+    gemini_processor = GeminiProcessor()
+    memory_handler = MemoryHandler()
+    def validate_request(project_number, project_name, amount, reason):
+        if not project_number or not project_name or not amount or not reason:
+            missing_fields = []
+            if not project_number: missing_fields.append("project number")
+            if not project_name: missing_fields.append("project name")
+            if not amount: missing_fields.append("amount")
+            if not reason: missing_fields.append("reason")
+            return False, f"Please provide: {', '.join(missing_fields)}"
+        return True, ""
+    def process_text_input(text, language):
+        if not text:
+            return "Please enter some text first.", None, None, None, None
+        context = memory_handler.get_context()
+        details = gemini_processor.extract_request_details(text, context)
+        if not details:
+            return "Could not extract request details. Please try again.", None, None, None, None
+        memory_handler.add_interaction(text, details)
+        partial_info = memory_handler.get_partial_info()
+        return (
+            f"Text processed! {memory_handler.get_prompt_for_missing_info()}",
+            partial_info.get('project_number', ''),
+            partial_info.get('project_name', ''),
+            partial_info.get('amount', 0),
+            partial_info.get('reason', '')
+        )
+    def process_voice_input(audio_path, language):
+        if not audio_path:
+            return "No audio detected.", None, None, None, None
+        voice_text = voice_handler.process_audio_file(audio_path, language)
+        if voice_text.startswith("Error:"):
+            return voice_text, None, None, None, None
+        context = memory_handler.get_context()
+        details = gemini_processor.extract_request_details(voice_text, context)
+        if not details:
+            return "Could not extract request details. Please try again.", None, None, None, None
+        memory_handler.add_interaction(voice_text, details)
+        partial_info = memory_handler.get_partial_info()
+        return (
+            f"Voice processed! You said: {voice_text}\n\n{memory_handler.get_prompt_for_missing_info()}",
+            partial_info.get('project_number', ''),
+            partial_info.get('project_name', ''),
+            partial_info.get('amount', 0),
+            partial_info.get('reason', '')
+        )
+    def confirm_submission(project_number, project_name, amount, reason):
+        is_valid, message = validate_request(project_number, project_name, amount, reason)
+        if not is_valid:
+            return (
+                message,  # confirmation_output
+                None,    # confirmation_audio
+                gr.update(interactive=False),  # submit_btn
+                gr.update(interactive=True),   # confirm_btn
+                gr.update(interactive=True),   # project_number
+                gr.update(interactive=True),   # project_name
+                gr.update(interactive=True),   # amount
+                gr.update(interactive=True)    # reason
+            )
+        confirmation_text = f"Sir please ensure before submit project number: {project_number}, project name: {project_name}, amount: {amount} riyals, reason for request: {reason} are ok"
+        audio_path, error = play_text(confirmation_text)
+        if error:
+            return (
+                error,   # confirmation_output
+                None,    # confirmation_audio
+                gr.update(interactive=False),  # submit_btn
+                gr.update(interactive=True),   # confirm_btn
+                gr.update(interactive=True),   # project_number
+                gr.update(interactive=True),   # project_name
+                gr.update(interactive=True),   # amount
+                gr.update(interactive=True)    # reason
+            )
+        return (
+            "Please confirm the details you heard.",  # confirmation_output
+            audio_path,                              # confirmation_audio
+            gr.update(interactive=True),             # submit_btn
+            gr.update(interactive=False),            # confirm_btn
+            gr.update(interactive=False),            # project_number
+            gr.update(interactive=False),            # project_name
+            gr.update(interactive=False),            # amount
+            gr.update(interactive=False)             # reason
+        )
+    def submit_request(project_number, project_name, amount, reason):
+        is_valid, message = validate_request(project_number, project_name, amount, reason)
+        if not is_valid:
+            return message, None
+        try:
+            db.add_request(project_number, project_name, float(amount), reason)
+            memory_handler.clear_memory()
+            return "Request successfully added!", get_requests_df()
+        except Exception as e:
+            return f"Error saving request: {str(e)}", None
+    def get_requests_df():
+        try:
+            requests = db.get_all_requests()
+            if requests:
+                df = pd.DataFrame(requests)
+                columns = ['timestamp', 'project_number', 'project_name', 'amount', 'reason']
+                df = df[columns]
+                headers = df.columns.tolist()
+                data = df.values.tolist()
+                return {"headers": headers, "data": data}
+            return {"headers": ['timestamp', 'project_number', 'project_name', 'amount', 'reason'], "data": []}
+        except Exception as e:
+            print(f"Error getting requests: {str(e)}")
+            return {"headers": ['timestamp', 'project_number', 'project_name', 'amount', 'reason'], "data": []}
+    def reset_form():
+        return (
+            gr.update(value=""),             # project_number
+            gr.update(value=""),             # project_name
+            gr.update(value=None),           # amount
+            gr.update(value=""),             # reason
+            gr.update(value=""),             # confirmation_output
+            gr.update(value=None),           # confirmation_audio
+            gr.update(interactive=False),    # submit_btn
+            gr.update(interactive=True),     # confirm_btn
+            gr.update(interactive=True),     # project_number
+            gr.update(interactive=True),     # project_name
+            gr.update(interactive=True),     # amount
+            gr.update(interactive=True),     # reason
+            gr.update(value=""),            # text_input
+            gr.update(value=None),          # audio_input
+            gr.update(value="")             # process_output
+        )
+    # Create UI layout
+    with gr.Blocks(title="AI Agent Money Request System") as app:
+        gr.Markdown("# AI Agent Money Request System")
+        with gr.Tab("Input"):
+            language = gr.Dropdown(
+                choices=["English", "Arabic", "Mixed (Arabic/English)"],
+                value="English",
+                label="Select Language"
+            )
+            with gr.Tab("Voice Input"):
+                audio_input = gr.Audio(
+                    label="Voice Input",
+                    type="filepath",
+                    sources=["microphone"]
+                )
+                voice_process_btn = gr.Button("Process Voice")
+            with gr.Tab("Text Input"):
+                text_input = gr.Textbox(
+                    lines=3,
+                    placeholder="Enter your request here...",
+                    label="Text Input"
+                )
+                text_process_btn = gr.Button("Process Text")
+            process_output = gr.Textbox(label="Processing Result")
+            with gr.Group():
+                project_number = gr.Textbox(label="Project Number")
+                project_name = gr.Textbox(label="Project Name")
+                amount = gr.Number(label="Amount (in riyals)")
+                reason = gr.Textbox(label="Reason for Request")
+                with gr.Row():
+                    confirm_btn = gr.Button("Confirm Details", variant="secondary")
+                    submit_btn = gr.Button("Submit Request", variant="primary", interactive=False)
+                confirmation_output = gr.Textbox(label="Confirmation Message")
+                confirmation_audio = gr.Audio(label="Confirmation Audio", type="filepath")
+            result_text = gr.Textbox(label="Submission Result")
+        with gr.Tab("Existing Requests"):
+            requests_table = gr.DataFrame(
+                headers=["Timestamp", "Project Number", "Project Name", "Amount", "Reason"],
+                label="Existing Requests"
+            )
+            refresh_btn = gr.Button("Refresh")
+        # Event handlers
+        text_process_btn.click(
+            process_text_input,
+            inputs=[text_input, language],
+            outputs=[process_output, project_number, project_name, amount, reason]
+        )
+        voice_process_btn.click(
+            process_voice_input,
+            inputs=[audio_input, language],
+            outputs=[process_output, project_number, project_name, amount, reason]
+        )
+        # Confirm button handler with proper submit button and form field state management
+        confirm_btn.click(
+            confirm_submission,
+            inputs=[project_number, project_name, amount, reason],
+            outputs=[
+                confirmation_output,
+                confirmation_audio,
+                submit_btn,
+                confirm_btn,
+                project_number,
+                project_name,
+                amount,
+                reason
+            ]
+        )
+        # Submit button handler with form reset
+        submit_btn.click(
+            submit_request,
+            inputs=[project_number, project_name, amount, reason],
+            outputs=[result_text, requests_table]
+        ).then(
+            reset_form,
+            outputs=[
+                project_number,
+                project_name,
+                amount,
+                reason,
+                confirmation_output,
+                confirmation_audio,
+                submit_btn,
+                confirm_btn,
+                project_number,
+                project_name,
+                amount,
+                reason,
+                text_input,
+                audio_input,
+                process_output
+            ]
+        )
+        refresh_btn.click(
+            lambda: get_requests_df(),
+            outputs=[requests_table]
+        )
+        # Initialize requests table
+        requests_table.value = get_requests_df()
+    return app

voice.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import speech_recognition as sr
+import os
+from pydub import AudioSegment
+import tempfile
+class VoiceHandler:
+    def __init__(self):
+        self.recognizer = sr.Recognizer()
+        self.recognizer.energy_threshold = 20000
+        self.recognizer.dynamic_energy_threshold = False
+        self.recognizer.pause_threshold = 0.8
+    def process_audio_file(self, audio_path: str, language: str) -> str:
+        try:
+            if not audio_path.endswith('.wav'):
+                audio = AudioSegment.from_file(audio_path)
+                temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+                audio.export(temp_wav.name, format='wav')
+                audio_path = temp_wav.name
+            with sr.AudioFile(audio_path) as source:
+                audio = self.recognizer.record(source)
+                if language == "Arabic":
+                    return self.recognizer.recognize_google(audio, language="ar-SA")
+                elif language == "Mixed (Arabic/English)":
+                    try:
+                        return self.recognizer.recognize_google(audio, language="ar-SA")
+                    except sr.UnknownValueError:
+                        return self.recognizer.recognize_google(audio, language="en-US")
+                else:  # English
+                    return self.recognizer.recognize_google(audio, language="en-US")
+        except sr.RequestError as e:
+            return f"Error: Could not request results from speech service: {str(e)}"
+        except sr.UnknownValueError:
+            return "Error: Could not understand audio. Please speak clearly and try again."
+        except Exception as e:
+            return f"Error: {str(e)}"
+        finally:
+            if 'temp_wav' in locals():
+                os.unlink(temp_wav.name)
+    def check_microphone_access(self) -> bool:
+        try:
+            with sr.Microphone() as source:
+                self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
+                return True
+        except (OSError, AttributeError, sr.RequestError):
+            return False