nmurugesh's picture
Update app.py
0e3ffd7 verified
# !pip install langchain langchain-groq sentence-transformers langchainhub faiss-cpu gradio gradio_client yfinance duckduckgo-search
import pandas as pd
import io
import requests
import os
import json
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
import requests
import yfinance as yf
import ast
import re
from datetime import datetime, timedelta
import pytz
# import langchain libraries
# !pip install langchain langchain-groq langchainhub duckduckgo-search
from langchain.agents import AgentExecutor
from langchain.agents import create_react_agent
from langchain.agents import create_structured_chat_agent
from langchain import hub
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import Tool
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.schema.output_parser import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain import hub
from langchain.chains import RetrievalQA
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.tools import DuckDuckGoSearchRun
from langchain_core.output_parsers import JsonOutputParser
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, create_structured_chat_agent
#import gradio libraries
# !pip install gradio gradio_client
import gradio as gr
#import vectorstore libraries
# !pip install faiss-cpu
from langchain_community.vectorstores import FAISS
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
############################################
############################################
# # Code steps involved:
# 1. Define the LLM
# 2. Extract data from NSE
# 2. Process the datafrme and store it as CSV files
# 3. Use Langchain CSV Loaders to load the CSV data
# 4. Create Vector Stores
# 5. Create company lists
# 6. Create the LLM functions required
# 7. Create the python functions for stock data and charting functions
# 8. Create Gradio Blocks
# 9. Find any recent real time addition to NSE data and add it to the vector stores.
# 10. Create retrievers and langchain QA retrieval chains
# 11. Define charts for default
# 12. Gradio app
##########################################
##########################################
# Get the data from NSE as pandas dataframe
# Function to get dataframe from NSE website
# Data from two pages: NSE Announcements and NSE corporate actions are fetched and hence two dataframes
groq_api_key = os.getenv('groq')
serp_api_key = os.getenv('serp')
def get_pd(d):
# Get the current date
current_date = datetime.now()
# Get the previous day
previous_day = current_date - timedelta(days=d)
# Format the dates in the required format (dd-mm-yyyy)
current_date_str = current_date.strftime("%d-%m-%Y")
previous_day_str = previous_day.strftime("%d-%m-%Y")
base_url = 'https://www.nseindia.com'
session = requests.Session()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, '
'like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36',
'accept-language': 'en,gu;q=0.9,hi;q=0.8',
'accept-encoding': 'gzip, deflate, br'}
r = session.get(base_url, headers=headers, timeout=120)
cookies = dict(r.cookies)
# Use the dates in the URL
url1 = f"https://www.nseindia.com/api/corporate-announcements?index=equities&from_date={previous_day_str}&to_date={current_date_str}&csv=true"
url2 = f"https://www.nseindia.com/api/corporates-corporateActions?index=equities&csv=true"
response1 = session.get(url1, timeout=120, headers=headers, cookies=cookies)
response2 = session.get(url2, timeout=120, headers=headers, cookies=cookies)
content1 = response1.content
content2 = response2.content
df=pd.read_csv(io.StringIO(content1.decode('utf-8')))
dfca=pd.read_csv(io.StringIO(content2.decode('utf-8')))
return df, dfca
# Process the datafrme and store it as CSV files
# To increase the speed of prcocessing in RAG, I decided to use three separate vectostores
# First vector store will all data, second store with minimum data and third one with CA related data
# Owing to context window problem of RAG, it is always good to ensure that we don't have any irrelevant data
df_old, dfca = get_pd(1)
df_back = df_old.copy()
df_back.to_csv("df_backup.csv",index=False)
df_old.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)
df_old2 = df_old.drop(['ATTACHMENT'],axis=1)
# Save it as a CSV file
df_old.to_csv("nse_data_old.csv", index=False)
# df_old1.to_csv("nse_data_old1.csv", index=False)
df_old2.to_csv("nse_data_old2.csv", index=False)
dfca.to_csv("nse_ca.csv", index=False)
# Use Langchain CSV Loaders to load the CSV data
loader = CSVLoader("nse_data_old.csv")
data_old = loader.load()
loader2 = CSVLoader("nse_data_old2.csv")
data_old_2 = loader2.load()
loader3 = CSVLoader("nse_ca.csv")
data_ca = loader3.load()
global vectorstore,vectorstore2,vectorstore3, colist, colist_tracked
# Create vectorstores - I tried Chroma but FAISS turned out to be successful
vectorstore = FAISS.from_documents(data_old, embedding_function)
vectorstore2 = FAISS.from_documents(data_old_2, embedding_function)
vectorstore3 = FAISS.from_documents(data_ca, embedding_function)
vectorstore.save_local("vectorstore")
vectorstore2.save_local("vectorstore2")
vectorstore3.save_local("vectorstore3")
###########################
# Create company list
# Upload the NIFTY company names - this is currently hardcoded as NIFTY does not change as often but can be made dynamic
co1 = pd.read_csv('ind_nifty50list.csv')
# Create company lists required
# Get the column you want to convert to a list
column_name = "Company Name"
# # Convert the column to a list
co_list1 = co1[column_name].tolist()
# # These are the companies that are being tracked - this can be uploaded / hardcoded
co_list_tracked = ['Reliance Industries Limited', 'Infosys Limited','ICICI Bank Ltd', 'Indusind Bank Ltd','Ramco Systems', \
'Zydus Lifesciences Limited','Bharti Airtel Limited',\
'ICICI Bank Limited','TechMahindra Limited', 'Indiabulls Real Estate Limited','Tamilnad Mercanitle Bank Limited', \
'Bajaj Finance Limited', 'Apollo Tyres Limited', 'Zydus Lifesciences Limited', 'Indusind Bank Limited', 'Kirloskar Oil Engines Limited']
co_list = co_list1 + co_list_tracked
####################################
##################################
# Let us create some functions required
##################################
# LLM function to get announcement detail
def give_announcement(llm,stock):
if not stock:
return "This company has not made any announcements today or yesterday"
else:
retriever1 = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm,
retriever=retriever1,
return_source_documents=False)
response = qa_chain({"query":f"What are the announcements made by the company {stock}?. If no announcement has been made by that company, \
just say that no announcement has been made by that company."})
return f"Announcements made by {stock}: {response['result']}"
# LLM function to get Corporate Action Detail
def get_ca(llm,stock):
# stock = stock_name
if not stock:
return "This company has not made any announcements today or yesterday"
else:
# resp1 = llm.invoke(f"get all the yahoo finance company name(s) of entity name in {stock}. Just print the ticker(s) alone. Do not print leading sentences.")
# stock = resp1.content
retriever3 = vectorstore3.as_retriever()
qa_chain2 = RetrievalQA.from_chain_type(llm,
retriever=retriever3,
return_source_documents=False)
response = qa_chain2({"query":f"What are the corporate action announcements made by the company {stock}?. If no announcement has been made by that company, do not print any source documents and \
just say that no announcement has been made by that company."})
return response['result']#, response['source_documents']
# a web search tool
search=DuckDuckGoSearchRun()
os.environ['SERPAPI_API_KEY']= serp_api_key
from langchain_community.utilities import SerpAPIWrapper
search = SerpAPIWrapper()
# Fetch stock data from Yahoo Finance
def get_stock_price(ticker,history=5):
# time.sleep(4) #To avoid rate limit error
if "." in ticker:
ticker=ticker.split(".")[0]
ticker=ticker+".NS"
stock = yf.Ticker(ticker)
df = stock.history(period="1y")
df=df[["Close","Volume"]]
df.index=[str(x).split()[0] for x in list(df.index)]
df.index.rename("Date",inplace=True)
df=df[-history:]
# print(df.columns)
return df.to_string()
# get stock price movements
def get_movements(llm,stock):
if not stock:
return "This company has not made any announcements today or yesterday"
else:
stock = stock[0]
dfc = pd.read_csv('nse_data_old.csv')
stockdesc = dfc[dfc['COMPANY NAME'] == stock]['COMPANY NAME'].iloc[0]
stock1 = dfc[dfc['COMPANY NAME'] == stock]['SYMBOL'].iloc[0]
stock = get_ticker(stock1)
print("stock is ",stock)
tools=[
Tool(
name="get stock data",
func=get_stock_price,
description=f"Use this tool to get stock price data. This tool will return three values: date, volume and closing price of the stock \
for the period of 5 days. stock = {stock}"
),
Tool(
name="SerpAPI search",
func=search.run,
description=f"Use this tool for for web search for searching details about stock like broker sentiment. You can also get recent stock \
related news. stock symbol = {stock} and stockname = {stockdesc}"
),
]
prompt = hub.pull("hwchase17/structured-chat-agent")
# Construct the Tools agent
agent = create_structured_chat_agent(llm, tools, prompt)
try:
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
response = agent_executor.invoke({"input": f"How much the stock price of stock {stock} with name {stockdesc} moved in the last few days?. Give the prices \
over the last few days and also percentage change. For example, If the stock has not moved in single direction, \
you can say the stock has been volatile. But if it has moved up over five days, you can say so with percentage movement"})
return f"Answer for {stock} - {response['output']}"
except Exception as e:
return f"An error occurred: {str(e)}"
#####################################
# get stock sentiments
#####################################
prompt1 = """Hello, I need broker sentiment data for a specific stock. Please search and summarize current market analyses, broker reports, \
and overall sentiment regarding the given stock:\Focus on information from credible sources like financial news, broker reports, and investment research firms. \
Provide key insights, including:\
Recent broker recommendations (buy, hold, sell), \
Notable broker analyses or reports, \
General trends in broker sentiment, \
Any major news or events impacting the stock's sentiment. \
Please ensure the data is up-to-date and from reputable sources. Provide a concise summary with relevant details and any supporting context to understand the current sentiment.\
Please note that you are not chat agent, but meant for single usage, so do not conclude with any greetings or asking for further assistance etc!.\
"""
def get_sentiments(llm,stock):
if not stock:
return "This company has not made any announcements today or yesterday"
else:
print("st1",stock)
stock = stock[0]
print("af ",stock)
#####
dfc = pd.read_csv('nse_data_old.csv')
stockdesc = dfc[dfc['COMPANY NAME'] == stock]['COMPANY NAME'].iloc[0]
stock1 = dfc[dfc['COMPANY NAME'] == stock]['SYMBOL'].iloc[0]
stock = get_ticker(stock1)
tools=[
Tool(
name="get stock data",
func=get_stock_price,
description=f"Use this tool to get stock price data. This tool will return three values: date, volume and closing price of the stock \
for the period of 5 days. stock = {stock}"
),
Tool(
name="SerpAPI Search",
func=search.run,
description=f"Use this tool for for web search for searching details about stock like broker sentiment. You can also get recent stock \
related news. stock name = {stockdesc}"
),
]
prompt = hub.pull("hwchase17/structured-chat-agent")
# Construct the Tools agent
agent = create_structured_chat_agent(llm, tools, prompt)
try:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
response = agent_executor.invoke({"input": f"Get broker sentiment for the stock {stock} and stock name {stockdesc}"})
return f"Broker sentiment analysis for {stock}. - {response['output']}"
except Exception as e:
return f"An error occurred: {str(e)}"
#################
# Fetch financial statements from Yahoo Finance
def get_balancesheet(ticker):
# time.sleep(4) #To avoid rate limit error
if "." in ticker:
ticker=ticker.split(".")[0]
else:
ticker=ticker
ticker=ticker+".NS"
company = yf.Ticker(ticker)
df = company.balance_sheet
# df = df.head(30)
df.fillna(method='ffill',inplace=True)
df.dropna(inplace=True)
return df
def get_incomestatement(ticker):
# time.sleep(4) #To avoid rate limit error
if "." in ticker:
ticker=ticker.split(".")[0]
else:
ticker=ticker
ticker=ticker+".NS"
company = yf.Ticker(ticker)
df = company.financials
# df = df.head(30)
df.fillna(method='ffill',inplace=True)
df.dropna(inplace=True)
return df
def get_ticker(company_name):
com=company_name+".NS"
ticker = yf.Ticker(com)
return ticker.info['symbol']
def get_financialratio(model, input,stock):
stock_name = get_companynames(stock)
llm = get_model(model)
if not stock_name:
return "This company has not made any announcements"
else:
stockname = stock_name[0]
print("stock1 ",stockname)
dfc = pd.read_csv('nse_data_old.csv')
stock1 = dfc[dfc['COMPANY NAME'] == stockname]['SYMBOL'].iloc[0]
print("staock1 ",stock1)
stock = get_ticker(stock1)
print("stock is ",stock)
if input == '':
return "No query has been entered!"
else:
resp = llm.invoke(f"You have to answer either 'A' or 'B' without any leading sentences - check whether the input {input} pertains \
to financial ratio query. If it pertains to financial ratio query, \
respond with letter 'A', else with letter 'B' if it contains only something like company name")
print("nature of query ",resp)
if resp.content == 'B':
return "Enter a query pertaining to financial ratios!"
else:
# resp1 = llm.invoke(f"get yahoo finance ticker name of entity name in {input}. Just print the ticker alone. Do not print leading sentences.")
# stock = resp1.content
# resp2 = llm.invoke(f"to answer the query {input}, whether balance sheet or income statement required? If balance sheet, answer A, else B")
resp2 = llm.invoke(f"Answer A, if balance sheet or B, if income statement. To answer the query {input}, \
whether balance sheet or income statement required - If balance sheet, answer A, else B")
if resp2.content=='A':
df1 = get_balancesheet(f'{stock}')
print("balance sheet")
else:
df1 = get_incomestatement(f'{stock}')
print("income statement")
df=df1.T
print("the df is ",df)
cols= df.columns.tolist()
resp3 = llm.invoke(f"List the column names, as python list, in {cols} needed for {input} calculation. Do not output any sentence other than column names.\
For example, do not output leading answer statements like: Here are the column names needed for ..")
message=resp3.content
def extract_df(df, message):
c = ast.literal_eval(message)
return df[c]
df_new=extract_df(df,message)
# prompt1 = f"List the column names, as python list, in {cols} needed for {data} calculation. Do not output any sentence other than column names.\
# For example, do not output leading answer statements like: Here are the column names needed for .."
# prompt = f"What is the current ratio of {stock}?. Use {df_new}. Give only year and current ratio for that year in JSON format"
parser = JsonOutputParser()
prompt = PromptTemplate(
template="Answer the user query.\n{format_instructions}\n{query}\n",
input_variables=["query"],
partial_variables={"format_instructions": parser.get_format_instructions()},
)
# prompt = ChatPromptTemplate.from_messages(
# [
# (
# "system",
# "You are a helpful financial data analysis assistant.",
# ),
# ("placeholder", "{chat_history}"),
# ("human", f"Answer the user using df_new and input: question:{input}, dataframe: {df_new}, \
# format_instructions: parser.get_format_instructions()"\
# ),
# ("placeholder", "{agent_scratchpad}"),
# ]
# )
chain = prompt | llm | parser
try:
response= chain.invoke( f"Using {df_new}, {input}?")
# Print only the results. Print the output in Json format.")
return f"For the company: {stockname}, Here are the details: {response}"
except Exception as e:
return f"An error occurred: {str(e)}"
##########################
# Functions to plot a chart over ratios - this has scope for major enhancements!
def plot_chart(data):
# Load the JSON string into a Python object
# data = json.loads(json_str)
# Get the first key in the dictionary
try:
key = list(data.keys())[0]
# Create a plot
plt.figure(figsize=(8, 6))
plt.bar(data[key].keys(), data[key].values())
plt.title(f"{key} Over Years")
plt.xlabel("Year")
plt.ylabel(key)
plt.tight_layout()
# Return the plot
return plt
except Exception as e:
return None
# def get_chart(input):
# response = get_financialratio(model,input)
# plt = plot_chart(response)
# return plt
def get_chart(model,input,stock):
stock_name = get_companynames(stock)
if stock_name:
response = get_financialratio(model,input,stock)
# Extract the dictionary part using regex
dict_match = re.search(r"\{.*\}", response) # Search for content within curly braces
# Convert the extracted string to a dictionary
if dict_match:
extracted_dict_str = dict_match.group(0) # Get the matching text
extracted_dict = ast.literal_eval(extracted_dict_str) # Convert string to dictionary
else:
extracted_dict = None # No dictionary found
print("extrated tic ", extracted_dict)
plt = plot_chart(extracted_dict)
return plt
else: return None
def combined_ratio(model, input,stock):
return get_financialratio(model,input,stock), get_chart(model, input,stock)
###############################
###############################
# Create the Gradio Blocks interface with a title and description
###############
def get_colist2():
dfco = pd.read_csv('dfco.csv')
dfco1 = dfco[['COMPANY NAME']]
dfco2 = dfco1.drop_duplicates()
# Save the result to a new CSV file
dfco2.to_csv('companies.csv', index=False)
dfco3 = dfco2.head(10)
co_list3 = dfco3['COMPANY NAME'].unique().tolist()
filtered_df = dfco2[dfco2['COMPANY NAME'].isin(co_list)]
co_list2 = filtered_df['COMPANY NAME'].tolist()
return co_list2, co_list3
######################################
def get_timestampmessage(flag):
dfco = pd.read_csv('dfco.csv')
timestamp = dfco[['BROADCAST DATE/TIME']].max().values.tolist()[0]
if flag == 1:
message = f"There is NSE timeout error. The latest filing information is available upto {timestamp}"
else: message = f"Lastest filing information is available upto {timestamp}"
return message
global message_init, message_update
def update():
flag, message_update = incremental_process()
message_init = message_update
return message_update
def give_time():
return message_init
def give_timestamp():
dfco = pd.read_csv("dfco.csv")
timestamp = dfco[['BROADCAST DATE/TIME']].max().values.tolist()[0]
return timestamp
##################################
# Define the IST timezone
ist_timezone = pytz.timezone("Asia/Kolkata")
# Define UTC for server-side time
utc_timezone = pytz.utc
def refresh():
# Get the client-side timestamp (assuming it is in IST)
timestamp_str = give_timestamp() # The format returned should match the expected format
timest = give_time()
given_time = datetime.strptime(timestamp_str, "%d-%b-%Y %H:%M:%S")
given_time_ist = ist_timezone.localize(given_time) # Localize to IST
# Get the current server time in UTC
current_time_utc = datetime.now(tz=utc_timezone)
# Convert the client-side time to UTC for consistent comparison
given_time_utc = given_time_ist.astimezone(utc_timezone)
# Calculate the time difference
time_difference = current_time_utc - given_time_utc
print("the time diff is ", time_difference)
# Check if the time difference is greater than one hour
if time_difference > timedelta(minutes=5):
message1 = update()
print("Incremental update run")
else:
message1 = f"Refresh allowed only if data is stale for more than 5 minutes. Current timestamp: {timest}"
return message1
##########################################################################
def plot1_top_20():
df = pd.read_csv('nse_data_old.csv')
subjects = ['Acquisition',
'Alteration Of Capital and Fund Raising-XBRL',
'Analysts/Institutional Investor Meet/Con. Call Updates',
'Board Meeting Intimation',
'Book Closure',
'Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent',
'Change in Management',
'Credit Rating',
'Disclosure of material issue',
'Dividend',
'Financial Result Updates',
'Investor Presentation',
'Notice Of Shareholders Meetings-XBRL',
'Related Party Transactions',
'Resignation',
'Rights Issue',
'Shareholders meeting',
'Spurt in Volume',
'Update-Acquisition/Scheme/Sale/Disposal-XBRL',
]
# companies = co_list2
# df = df[df['COMPANY NAME'].isin(co_list2)]
df = df[df['SUBJECT'].isin(subjects)]
# df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
value_counts = df['SUBJECT'].value_counts()
# Get the top 10 labels by count
# top_20_value_counts = value_counts[:20]
plt.figure(figsize=(10, 6))
plt.barh(value_counts.index, value_counts.values)
plt.xlabel('Count')
plt.ylabel('Announcements')
plt.title('NSE Corporate Announcements - A Glance')
plt.tight_layout()
# plt.close()
return plt
## Function to create company list specific chart
def plot2_top_20():
co_list2,_ = get_colist2()
# global co_list2
# Get the counts of each label
df = pd.read_csv('nse_data_old.csv')
subjects = ['Acquisition',
'Alteration Of Capital and Fund Raising-XBRL',
'Analysts/Institutional Investor Meet/Con. Call Updates',
'Board Meeting Intimation',
'Book Closure',
'Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent',
'Change in Management',
'Credit Rating',
'Disclosure of material issue',
'Dividend',
'Financial Result Updates',
'Investor Presentation',
'Notice Of Shareholders Meetings-XBRL',
'Related Party Transactions',
'Resignation',
'Rights Issue',
'Shareholders meeting',
'Spurt in Volume',
'Update-Acquisition/Scheme/Sale/Disposal-XBRL',
]
# companies = co_list2
df = df[df['COMPANY NAME'].isin(co_list2)]
# df = df[df['COMPANY NAME'].isin(co_list_tracked)]
# df = df[df['SUBJECT'].isin(subjects)]
# df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
value_counts = df['SUBJECT'].value_counts()
# Get the top 10 labels by count
# top_20_value_counts = value_counts[:20]
plt.figure(figsize=(10, 6))
plt.barh(value_counts.index, value_counts.values)
plt.xlabel('Count')
plt.ylabel('Announcements')
plt.title('NSE Corporate Announcements - Tracked Companies')
plt.tight_layout()
# plt.close()
return plt
def get_companynames(stock):
print("get companynames", stock)
df = pd.read_csv('nse_data_old.csv')
if stock:
print('fi stock', stock)
# Create a regular expression pattern
pattern = f'.*{stock}.*'
# Get rows where 'COMPANY NAME' contains the keyword (case-insensitive)
matched_rows = df[df['COMPANY NAME'].str.contains(pattern, case=False, na=False)]
# Get unique company names
unique_companies = matched_rows['COMPANY NAME'].unique()
return list(unique_companies)
else:
return "not found"
# A combined function to be used in Gradio output box
def print_model(llm):
co_list2,_ = get_colist2()
if co_list2:
return f"You are using {llm.model_name} model for this session. \n \n" \
f"These are the companies you track: {co_list_tracked}. \n \n" \
f"These are the companies, including those in NIFTY, that have filed any information with NSE either today / yesterday - {co_list2}"
else:
return f"You are using {llm.model_name} model for this session. \n \n" \
f"Your are tracking these companies: {co_list_tracked}, \n \n"\
f"None of the tracked companies or NIFTY 50 have filed any information with NSE on either today or yesterday"
def print_model1(llm):
return f"You are using {llm.model_name} model for this session. \n \n [Note: There is NSE timeout error preventing fetching of latest data. So, results may not be real-time / up-to-date]"
def combined_function1(model,stock):
global flag
llm = get_model(model)
stock = get_companynames(stock)
if flag == 0:
return print_model(llm), give_announcement(llm,stock),get_ca(llm,stock),get_movements(llm,stock), get_sentiments(llm,stock)
else:
return print_model1(llm), give_announcement(llm,stock),get_ca(llm,stock),get_movements(llm,stock), get_sentiments(llm,stock)
def get_model(model_name):
llm = ChatGroq(
api_key=groq_api_key,
model=model_name,
max_tokens = 8192,
# model = 'gemma-7b-it',
temperature = 0
# model = 'mixtral-8x7B-32768'
)
return llm
# This function is given here as company list is dynamic
def give_names():
global co_list_tracked
co_list2, co_list3 = get_colist2()
return f"Apart from NIFTY, these are the companies you track: \n \n" \
f" {co_list_tracked}. \n \n" \
f"These are the tracked companies that have made announcements: \n \n" \
f"{co_list2}. \n \n" \
f"These are latest 10 companies that have made announcements: \n \n " \
f"{co_list3}"
##############################
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
###############################
# This function is for chat queries. Given here due to retriever defined here
def chat_chain(model,query):
llm = get_model(model)
if query=='':
return "Please enter a query!"
else:
combine_docs_chain = create_stuff_documents_chain(
llm, retrieval_qa_chat_prompt)
retriever2 = vectorstore2.as_retriever()
retrieval_chain = create_retrieval_chain(retriever2, combine_docs_chain)
response = retrieval_chain.invoke({"input": query})
return response['answer']
#################################
## NEW
## Update the vectorstate with latest data
global flag
def init_process():
global vectorstore,vectorstore2,vectorstore3, flag
try:
df_new, _ = get_pd(1)
flag = 0
except:
df_new = pd.read_csv("df_backup.csv")
flag = 1
df_new.to_csv("df_new.csv",index=False)
print("length of df_new ",len(df_new))
print("length of df_old ", len(df_old))
#drop unnecessary common columns
df_new.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)
# #find the difference and add incrementally for first store
df_merged = df_new.merge(df_old, how='left', indicator=True)
# Filter rows that are unique to 'n' (i.e., where '_merge' is 'left_only')
df_add1= df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
# Save it as a CSV file
df_add1.to_csv("nse_data_add1.csv", index=False)
#drop unnecessary columns for second vector store
df_new2 = df_new.drop(['ATTACHMENT'],axis=1)
# add increment for second store
df_merged = df_new2.merge(df_old2, how='left', indicator=True)
df_add2 = df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
# Save it as a CSV file
df_add2.to_csv("nse_data_add2.csv", index=False)
#####################
# Load the first CSV file
dfold = pd.read_csv('nse_data_old.csv')
# Load the second CSV file
dfadd = pd.read_csv('nse_data_add1.csv')
# print("df old",dfold)
# print("######")
# print("df add ",dfadd)
if dfadd.empty:
dfco = dfold.copy()
else:
# Append df2 at the end of df1
dfco = pd.concat([dfold, dfadd], ignore_index=True)
dfco.to_csv("dfco.csv",index=False)
# Here incremental RAG is achieved by adding additional data dynamically to vectorstore
loader = CSVLoader("nse_data_add1.csv")
data_new1 = loader.load()
loader = CSVLoader("nse_data_add2.csv")
data_new2 = loader.load()
print("original size ",vectorstore.index.ntotal)
len1 = len(pd.read_csv('nse_data_old.csv')) + len(pd.read_csv('nse_data_add1.csv'))
print("len1 old + new csv ",len1)
len2 = vectorstore.index.ntotal
if len1!=len2:
print("old size ",vectorstore.index.ntotal)
# for first store
vectorstore_add1 = FAISS.from_documents(data_new1, embedding_function)
print("incremental size ",vectorstore_add1.index.ntotal)
vectorstore_new1 = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
vectorstore_new1.merge_from(vectorstore_add1)
vectorstore_new1.save_local("vectorstore")
print("new size ",vectorstore_new1.index.ntotal)
print("new old size ",vectorstore.index.ntotal)
# retrieverx = vectorstore_new.as_retriever()
# for second store
vectorstore_add2 = FAISS.from_documents(data_new2, embedding_function)
print("incremental size ",vectorstore_add2.index.ntotal)
vectorstore_new2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
vectorstore_new2.merge_from(vectorstore_add2)
vectorstore_new2.save_local("vectorstore2")
print("new size ",vectorstore_new2.index.ntotal)
print("new old size ",vectorstore2.index.ntotal)
# retrieverx = vectorstore_new2.as_retriever()
##########################
# Define updated vector stores, retrievers and QA chains
##########################
vectorstore = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
print("final size store 1",vectorstore.index.ntotal)
vectorstore2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
print("final size store 2",vectorstore2.index.ntotal)
vectorstore3 = FAISS.load_local("vectorstore3",embedding_function,allow_dangerous_deserialization=True)
print("final size store 3",vectorstore3.index.ntotal)
message_init = get_timestampmessage(flag)
print("timeout flag",flag)
print("message at init ", message_init)
return flag, message_init
def incremental_process():
global vectorstore,vectorstore2,vectorstore3, flag
try:
df_new, _ = get_pd(1)
flag = 0
except:
df_new = pd.read_csv("df_backup.csv")
flag = 1
df_new.to_csv("df_new.csv",index=False)
print("length of df_new ",len(df_new))
print("length of df_old ", len(df_old))
#drop unnecessary common columns
df_new.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)
# #find the difference and add incrementally for first store
df_merged = df_new.merge(df_old, how='left', indicator=True)
# Filter rows that are unique to 'n' (i.e., where '_merge' is 'left_only')
df_add1= df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
# Save it as a CSV file
df_add1.to_csv("nse_data_add1.csv", index=False)
#drop unnecessary columns for second vector store
df_new2 = df_new.drop(['ATTACHMENT'],axis=1)
# add increment for second store
df_merged = df_new2.merge(df_old2, how='left', indicator=True)
df_add2 = df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
# Save it as a CSV file
df_add2.to_csv("nse_data_add2.csv", index=False)
#####################
# Load the first CSV file
dfold = pd.read_csv('nse_data_old.csv')
# Load the second CSV file
dfadd = pd.read_csv('nse_data_add1.csv')
# print("df old",dfold)
# print("######")
# print("df add ",dfadd)
if dfadd.empty:
dfco = dfold.copy()
else:
# Append df2 at the end of df1
dfco = pd.concat([dfold, dfadd], ignore_index=True)
dfco.to_csv("dfco.csv",index=False)
# Here incremental RAG is achieved by adding additional data dynamically to vectorstore
loader = CSVLoader("nse_data_add1.csv")
data_new1 = loader.load()
loader = CSVLoader("nse_data_add2.csv")
data_new2 = loader.load()
print("original size ",vectorstore.index.ntotal)
len1 = len(pd.read_csv('nse_data_old.csv')) + len(pd.read_csv('nse_data_add1.csv'))
print("len1 old + new csv ",len1)
len2 = vectorstore.index.ntotal
if len1!=len2:
print("old size ",vectorstore.index.ntotal)
# for first store
vectorstore_add1 = FAISS.from_documents(data_new1, embedding_function)
print("incremental size ",vectorstore_add1.index.ntotal)
vectorstore_new1 = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
vectorstore_new1.merge_from(vectorstore_add1)
vectorstore_new1.save_local("vectorstore")
print("new size ",vectorstore_new1.index.ntotal)
print("new old size ",vectorstore.index.ntotal)
# retrieverx = vectorstore_new.as_retriever()
# for second store
vectorstore_add2 = FAISS.from_documents(data_new2, embedding_function)
print("incremental size ",vectorstore_add2.index.ntotal)
vectorstore_new2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
vectorstore_new2.merge_from(vectorstore_add2)
vectorstore_new2.save_local("vectorstore2")
print("new size ",vectorstore_new2.index.ntotal)
print("new old size ",vectorstore2.index.ntotal)
# retrieverx = vectorstore_new2.as_retriever()
##########################
# Define updated vector stores, retrievers and QA chains
##########################
vectorstore = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
print("final size store 1",vectorstore.index.ntotal)
vectorstore2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
print("final size store 2",vectorstore2.index.ntotal)
vectorstore3 = FAISS.load_local("vectorstore3",embedding_function,allow_dangerous_deserialization=True)
print("final size store 3",vectorstore3.index.ntotal)
message_update = get_timestampmessage(flag)
return flag, message_update
#################################
## Update the vectorstate with latest data
flag,message_init = init_process()
###########################################################################
###########################################################################
with gr.Blocks() as demo:
# Add a Markdown block for the description
gr.Markdown("""<h1 style='color: blue;'>Chat and Analyze with NSE Filings Information</h1>""")
gr.Markdown("""Powered by Gradio, Groq, Llama3, FAISS, Langchain, YahooFinance""")
gr.Markdown(
"""
<img src="https://upload.wikimedia.org/wikipedia/commons/1/12/NSE_Exchange_Plaza.jpg" width=500px>
Enter any company name to know its recent filings with NSE in real time. This app can track a list of companies for any corporate announcements \
with NSE (now NSE 50 hard coded). If you want to know whether any of the tracked company has made any announcements either yesterday or today,\
enter the company name and submit. The first output box will list all the companies (that are tracked and) that have made an announcement today. \
The second box provides details about the announcement. You can also do ratio analysis and chat with the filings information (beta).
"""
)
txt_output = gr.Text(give_time(),label = "Opening Data - Timestamp of latest Filing")
txt_output = gr.Text(give_names(),label = "Announcements for tracked companies")
# This is for defaulting charts when app is launched
plot_output1 = gr.Plot(plot1_top_20(), label="Chart") # Call the function to create the plot
plt.close()
plot_output2 = gr.Plot(plot2_top_20(), label="Chart") # Call the function to create the plot
plt.close()
gr.Markdown("""<h2 style='color: blue;'>Fetch Announcements/Corporate Actions/Price Movements/Broker Sentiments</h2>""")
# Use a Column to structure the inputs and outputs
with gr.Column():
outputs5 = [gr.Textbox(label="Latest Filing Timestamp",placeholder="Refresh data if stale for more than an hour")]
button5 = gr.Button("Refresh Data")
# button5.click(lambda: refresh(dfco), inputs=None, outputs=outputs5)
button5.click(lambda: refresh(), inputs=None, outputs=outputs5)
# Create a dropdown box for selecting the operation
operation_dropdown = gr.Dropdown(
label="Select a model",
choices=['llama3-70b-8192','llama3-8b-8192', 'gemma-7b-it','mixtral-8x7B-32768' ], # Options for the dropdown
value='llama3-70b-8192', # Default value
)
# First text input and button
text_input1 = gr.Textbox(
label="Enter Company Name",
placeholder="Enter a company name; e.g., Zydus Lifesciences Limited",
lines=1
)
button1 = gr.Button("Start Analysis")
outputs1 = [
gr.Textbox(label="Selected Model",show_copy_button=True),
gr.Textbox(label="Announcement Detail", max_lines=100,show_copy_button=True),
gr.Textbox(label="Any Corporate Actions during last week?", max_lines=100,show_copy_button=True),
gr.Textbox(label="Stock Price Movement", max_lines=100,show_copy_button=True),
gr.Textbox(label="Broker Sentiment", max_lines=100,show_copy_button=True),
]
button1.click(lambda x,y: combined_function1(x,y), inputs=[operation_dropdown,text_input1], outputs=outputs1)
gr.Markdown("""<h1 style='color: green;'>Analyse the Financial Statements of the above Company</h1>""")
text_input3 = gr.Textbox(
label="Enter Query",
placeholder="Enter your query: e.g., What is the current ratio of the stock over three years?",
lines=1)
button3 = gr.Button("Analyse")
outputs3 = [
gr.Textbox(label="Chat Response", max_lines=100,show_copy_button=True),
gr.Plot(label = "Chart")]
button3.click(combined_ratio, inputs=[operation_dropdown,text_input3,text_input1], outputs=outputs3)
gr.Markdown("""<h1 style='color: orange;'>Chat With the NSE Filings Information</h1>""")
# Second text input and button
text_input2 = gr.Textbox(
label="Enter Chat Query",
placeholder="Enter your query: e.g., List the companies that have recently made acquisitions",
lines=2
)
button2 = gr.Button("Chat")
outputs2 = [gr.Textbox(label="Chat Response", max_lines=100,lines=10,show_copy_button=True)]
# gr.Plot(label = "Categories")]
button2.click(chat_chain, inputs=[operation_dropdown,text_input2], outputs=outputs2)
# Launch the Gradio app
demo.launch()