File size: 4,294 Bytes
e6af5e0
9aefd9e
e355279
e6af5e0
 
 
52077c3
e6af5e0
24af045
82ba273
e6af5e0
592ad4f
 
 
e6af5e0
52077c3
da545f1
 
 
e6af5e0
 
24af045
e6af5e0
 
 
 
 
 
 
 
 
 
 
 
 
 
592ad4f
 
 
 
 
 
e4f9add
592ad4f
 
 
 
 
 
e6af5e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4f9add
e6af5e0
 
269de0b
e6af5e0
 
 
 
 
 
 
 
 
 
 
 
 
 
dc79538
e6af5e0
 
 
 
 
 
 
 
592ad4f
 
 
 
 
 
 
 
 
 
852f1ba
592ad4f
 
 
 
 
e6af5e0
 
 
 
 
e4f9add
 
e6af5e0
 
 
 
 
 
 
bdae1d4
 
e4f9add
1799d45
46ac6e3
e4f9add
e6af5e0
 
e4f9add
b6489dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import uuid
import json

import gradio as gr

from openai import AzureOpenAI

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

from huggingface_hub import CommitScheduler
from pathlib import Path


client = AzureOpenAI(
    api_key=os.environ["AZURE_OPENAI_KEY"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version="2024-02-01"
)

embedding_model = HuggingFaceEmbeddings(model_name='thenlper/gte-small')

tesla_10k_collection = 'tesla-10k-2019-to-2023'

vectorstore_persisted = Chroma(
    collection_name=tesla_10k_collection,
    persist_directory='./tesla_db',
    embedding_function=embedding_model
)

retriever = vectorstore_persisted.as_retriever(
    search_type='similarity',
    search_kwargs={'k': 5}
)

# Prepare the logging functionality

log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent

scheduler = CommitScheduler(
    repo_id="document-qna-chroma-anyscale-logs",
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2
)

qna_system_message = """
You are an assistant to a financial services firm who answers user queries on annual reports.
Users will ask questions delimited by triple backticks, that is, ```.
User input will have the context required by you to answer user questions.
This context will begin with the token: ###Context.
The context contains references to specific portions of a document relevant to the user query.
Please answer only using the context provided in the input. However, do not mention anything about the context in your answer. 
If the answer is not found in the context, respond "I don't know".
"""

qna_user_message_template = """
###Context
Here are some documents that are relevant to the question.
{context}
```
{question}
```
"""

# Define the predict function that runs when 'Submit' is clicked or when a API request is made
def predict(user_input):

    relevant_document_chunks = retriever.invoke(user_input)
    context_list = [d.page_content for d in relevant_document_chunks]
    context_for_query = ".".join(context_list)
    
    prompt = [
        {'role':'system', 'content': qna_system_message},
        {'role': 'user', 'content': qna_user_message_template.format(
            context=context_for_query,
            question=user_input
            )
        }
    ]

    try:
        response = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=prompt,
            temperature=0
        )

        prediction = response.choices[0].message.content

    except Exception as e:
        prediction = e

    # While the prediction is made, log both the inputs and outputs to a local log file
    # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
    # access
    
    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'user_input': user_input,
                    'retrieved_context': context_for_query,
                    'model_response': prediction
                }
            ))
            f.write("\n")
    
    return prediction


textbox = gr.Textbox(placeholder="Enter your query here", lines=6)

# Create the interface
demo = gr.Interface(
    inputs=textbox, fn=predict, outputs="text",
    title="AMA on Tesla 10-K statements",
    description="This web API presents an interface to ask questions on contents of the Tesla 10-K reports for the period 2019 - 2023.",
    article="Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
    examples=[["What was the total revenue of the company in 2022?", "$ 81.46 Billion"],
              ["Summarize the Management Discussion and Analysis section of the 2021 report in 50 words.", ""],
              ["What was the company's debt level in 2020?", ""],
              ["Identify 5 key risks identified in the 2019 10k report? Respond with bullet point summaries.", ""],
              ["What is the view of the management on the future of electric vehicle batteries?",""]
             ],
    cache_examples=False,
    theme=gr.themes.Base(),
    concurrency_limit=16
)

demo.queue()
demo.launch(auth=("demouser", os.getenv('PASSWD')))