File size: 3,974 Bytes
070c576
 
 
 
 
 
6449689
92d0a3c
 
6449689
 
 
 
 
 
 
 
 
 
 
 
4011ea8
 
 
 
 
 
 
 
 
 
 
 
6449689
4011ea8
803a58c
6449689
 
 
 
4011ea8
f132467
6449689
 
 
 
4011ea8
 
 
 
 
 
 
 
 
 
6449689
 
 
 
 
 
 
 
 
 
 
 
 
 
070c576
6449689
 
070c576
 
 
 
 
 
6449689
070c576
6449689
070c576
6449689
 
4011ea8
 
 
 
 
 
070c576
6449689
 
070c576
4011ea8
 
070c576
 
853deb7
 
070c576
853deb7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os
import pandas as pd
import numpy as np
from groq import Groq



def ask_llm(query, input, client_index):
    messages = [
                {
                    "role": "system",
                    "content": f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}."
                },
                {
                    "role": "user",
                    "content": f"{input}",
                }
            ]
    systemC = f"You are a helpful assistant. Only show your final response to the **User Query**! Do not provide any explanations or details: \n# User Query:\n{query}."   
    messageC=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"{input}"
                }
            ]
        }
    ]

    if client_index == "Groq":
        client = Groq(api_key=os.environ["GROQ_API_KEY"])
        chat_completion = client.chat.completions.create(
            messages=messages,
            model='mixtral-8x7b-32768',
        )
    elif client_index == "Mistral":
        client = MistralClient(api_key=os.environ['MISTRAL_API_KEY'])
        chat_completion = client.chat(
            messages=messages,
            model='mistral-small-latest',
        )
    else:
        client = anthropic.Anthropic(api_key=os.environ['CLAUDE_API_KEY'])
        chat_completion = client.messages.create(
            model="claude-3-sonnet-20240229",
            max_tokens=350,
            temperature=0,
            system=systemC,
            messages=messageC
        ).content[0].text
        return chat_completion

    return chat_completion.choices[0].message.content

def filter_df(df, column_name, keywords):
    if len(keywords)>0:
        if column_name in df.columns:
            contains_keyword = lambda x: any(keyword.lower() in (x.lower() if type(x)==str else '') for keyword in keywords)
            filtered_df = df[df[column_name].apply(contains_keyword)]
        else:
            contains_keyword = lambda row: any(keyword.lower() in (str(cell).lower() if isinstance(cell, str) else '') for keyword in keywords for cell in row)
            filtered_df = df[df.apply(contains_keyword, axis=1)]
    else:
        filtered_df = df
    return filtered_df

def chat_with_mistral(source_cols, dest_col, prompt, excel_file, url, search_col, keywords, client):
    print(f'xlsxfile = {excel_file}')
    df = pd.read_excel(excel_file)
    df[dest_col] = ""
    try:
      file_name = url.split("/")[-2] + ".xlsx"
    except:
      file_name = excel_file
    print(f"Keywords: {keywords}")

    filtred_df = filter_df(df, search_col, keywords)

    for index, row in filtred_df.iterrows():
        concatenated_content = "\n\n".join(f"{column_name}: {str(row[column_name])}" for column_name in source_cols)
        print('test')
        if not concatenated_content == "\n\n".join(f"{column_name}: nan" for column_name in source_cols):
            print('c bon')
            llm_answer = ask_llm(prompt, concatenated_content, client)
            print(f"QUERY:\n{prompt}\nCONTENT:\n{concatenated_content[:200]}...\n\nANSWER:\n{llm_answer}")
            df.at[index, dest_col] = llm_answer

    df.to_excel(file_name, index=False)
    return file_name, df.head(5)


def get_columns(file,progress=gr.Progress()):
    if file is not None:
        df = pd.read_excel(file)
        columns = list(df.columns)
        return gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns), gr.update(choices=columns + [""]), gr.update(choices=columns + ['[ALL]']), df.head(5)
    else:
        return gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[]), pd.DataFrame()