Spaces:
Build error
Build error
Upload 3 files
Browse files- .gitattributes +1 -0
- app.py +1 -1
- earnings_calls_cleaned_metadata.csv +3 -0
- utils.py +10 -8
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
earnings_calls_sentencewise.csv filter=lfs diff=lfs merge=lfs -text
|
36 |
+
earnings_calls_cleaned_metadata.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -72,7 +72,7 @@ with st.sidebar:
|
|
72 |
|
73 |
# Choose encoder model
|
74 |
|
75 |
-
encoder_models_choice = ["
|
76 |
with st.sidebar:
|
77 |
encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
|
78 |
|
|
|
72 |
|
73 |
# Choose encoder model
|
74 |
|
75 |
+
encoder_models_choice = ["MPNET", "SGPT"]
|
76 |
with st.sidebar:
|
77 |
encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
|
78 |
|
earnings_calls_cleaned_metadata.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c6474da1f710d2a6d2ea65c475baf6821db95a5cb81dd8703eec3c04cd22cbe
|
3 |
+
size 18988194
|
utils.py
CHANGED
@@ -17,7 +17,7 @@ import streamlit_scrollable_textbox as stx
|
|
17 |
|
18 |
@st.experimental_singleton
|
19 |
def get_data():
|
20 |
-
data = pd.read_csv("
|
21 |
return data
|
22 |
|
23 |
|
@@ -72,6 +72,7 @@ def query_pinecone(query, top_k, model, index, year, quarter, ticker, threshold=
|
|
72 |
"Year": int(year),
|
73 |
"Quarter": {"$eq": quarter},
|
74 |
"Ticker": {"$eq": ticker},
|
|
|
75 |
},
|
76 |
include_metadata=True,
|
77 |
)
|
@@ -103,7 +104,7 @@ def sentence_id_combine(data, query_results, lag=2):
|
|
103 |
]
|
104 |
# Create a list of context sentences by joining the sentences corresponding to the lookup IDs
|
105 |
context_list = [
|
106 |
-
"
|
107 |
]
|
108 |
return context_list
|
109 |
|
@@ -114,11 +115,11 @@ def text_lookup(data, sentence_ids):
|
|
114 |
|
115 |
|
116 |
def generate_prompt(query_text, context_list):
|
117 |
-
|
118 |
prompt = f"""
|
119 |
Context information is below:
|
120 |
---------------------
|
121 |
-
{
|
122 |
---------------------
|
123 |
Given the context information and prior knowledge, answer this question:
|
124 |
{query_text}
|
@@ -148,15 +149,16 @@ def retrieve_transcript(data, year, quarter, ticker):
|
|
148 |
(data.Year == int(year))
|
149 |
& (data.Quarter == quarter)
|
150 |
& (data.Ticker == ticker),
|
151 |
-
["
|
152 |
]
|
153 |
.drop_duplicates()
|
154 |
-
.iloc[0]
|
155 |
)
|
|
|
156 |
# convert row to a string and join values with "-"
|
157 |
-
row_str = "-".join(row.astype(str)) + ".txt"
|
158 |
open_file = open(
|
159 |
-
f"Transcripts/{ticker}/{
|
160 |
"r",
|
161 |
)
|
162 |
file_text = open_file.read()
|
|
|
17 |
|
18 |
@st.experimental_singleton
|
19 |
def get_data():
|
20 |
+
data = pd.read_csv("earnings_calls_cleaned_metadata.csv")
|
21 |
return data
|
22 |
|
23 |
|
|
|
72 |
"Year": int(year),
|
73 |
"Quarter": {"$eq": quarter},
|
74 |
"Ticker": {"$eq": ticker},
|
75 |
+
"QA_Flag": {"$eq": "Answer"},
|
76 |
},
|
77 |
include_metadata=True,
|
78 |
)
|
|
|
104 |
]
|
105 |
# Create a list of context sentences by joining the sentences corresponding to the lookup IDs
|
106 |
context_list = [
|
107 |
+
" ".join(data.Text.iloc[lookup_id].to_list()) for lookup_id in lookup_ids
|
108 |
]
|
109 |
return context_list
|
110 |
|
|
|
115 |
|
116 |
|
117 |
def generate_prompt(query_text, context_list):
|
118 |
+
context = " \n".join(context_list)
|
119 |
prompt = f"""
|
120 |
Context information is below:
|
121 |
---------------------
|
122 |
+
{context}
|
123 |
---------------------
|
124 |
Given the context information and prior knowledge, answer this question:
|
125 |
{query_text}
|
|
|
149 |
(data.Year == int(year))
|
150 |
& (data.Quarter == quarter)
|
151 |
& (data.Ticker == ticker),
|
152 |
+
["File_Name"],
|
153 |
]
|
154 |
.drop_duplicates()
|
155 |
+
.iloc[0,0]
|
156 |
)
|
157 |
+
print(row)
|
158 |
# convert row to a string and join values with "-"
|
159 |
+
#row_str = "-".join(row.astype(str)) + ".txt"
|
160 |
open_file = open(
|
161 |
+
f"Transcripts/{ticker}/{row}",
|
162 |
"r",
|
163 |
)
|
164 |
file_text = open_file.read()
|