File size: 3,008 Bytes
070c576
 
 
 
 
 
6449689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803a58c
6449689
 
 
 
 
f132467
6449689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
070c576
6449689
 
070c576
 
 
 
 
 
6449689
070c576
6449689
070c576
6449689
 
 
 
 
070c576
6449689
 
070c576
 
 
 
7156cc6
6449689
070c576
6449689
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os
import pandas as pd
import numpy as np
from groq import Groq

def ask_llm(query, input, client_index):
    messages = [
                {
                    "role": "system",
                    "content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}."
                },
                {
                    "role": "user",
                    "content": f"{input}",
                }
            ]

    if client_index == 0:
        client = Groq(api_key=os.environ["GROQ_API_KEY"])
        chat_completion = client.chat.completions.create(
            messages=messages,
            model='mixtral-8x7b-32768',
        )
    else:
        client = MistralClient(api_key=os.environ['MISTRAL_API_KEY'])
        chat_completion = client.chat(
            messages=messages,
            model='mistral-small-latest',
        )

    return chat_completion.choices[0].message.content

def filter_df(df, column_name, keywords):
    if len(keywords)>0:
        if column_name in df.columns:
            contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords)
            filtered_df = df[df[column_name].apply(contains_keyword)]
        else:
            contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row)
            filtered_df = df[df.apply(contains_keyword, axis=1)]
    else:
        filtered_df = df
    return filtered_df

def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client):
    print(f'xlsxfile = {excel_file}')
    df = pd.read_excel(excel_file)
    df[dest_col] = ""
    try:
      file_name = url.split("/")[-2] + ".xlsx"
    except:
      file_name = excel_file
    print(f"Keywords: {keywords}")

    filtred_df = filter_df(df, search_col, keywords)

    for index, row in filtred_df.iterrows():
        concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols)
        llm_answer = ask_llm(prompt, concatenated_content, client)
        print(f"QUERY:\n{prompt}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}")
        df.at[index, dest_col] = llm_answer

    df.to_excel(file_name, index=False)
    return file_name, df.head(5)

def get_columns(file):
    if file is not None:
        df = pd.read_excel(file)
        columns = list(df.columns)
        return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5)
    else:
        return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame()