Spaces:

nmurugesh
/

Chat-Analyze-NSE-Corporate-Announcements-MultipleModels

Runtime error

App Files Files Community

Chat-Analyze-NSE-Corporate-Announcements-MultipleModels / app.py

nmurugesh

Update app.py

edca45c verified 6 months ago

raw

history blame contribute delete

39.1 kB

	# !pip install langchain langchain-groq sentence-transformers langchainhub faiss-cpu gradio gradio_client yfinance duckduckgo-search

	import pandas as pd
	import io
	import requests
	import os
	import json
	import matplotlib.pyplot as plt
	from datetime import datetime, timedelta
	import requests
	from bs4 import BeautifulSoup
	import requests
	import yfinance as yf
	import ast
	import re
	from datetime import datetime, timedelta
	import pytz


	# import langchain libraries
	# !pip install langchain langchain-groq langchainhub duckduckgo-search
	from langchain.agents import AgentExecutor
	from langchain.agents import create_react_agent
	from langchain.agents import create_structured_chat_agent
	from langchain import hub
	from langchain_groq import ChatGroq
	from langchain_core.prompts import ChatPromptTemplate
	from langchain.agents import Tool
	from langchain_community.tools import DuckDuckGoSearchResults
	from langchain.schema.output_parser import StrOutputParser
	from langchain_core.prompts import PromptTemplate
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain.chains import create_retrieval_chain
	from langchain import hub
	from langchain.chains import RetrievalQA
	from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
	from langchain_community.document_loaders.csv_loader import CSVLoader
	from langchain.tools import DuckDuckGoSearchRun
	from langchain_core.output_parsers import JsonOutputParser
	from langchain.agents import AgentExecutor, create_tool_calling_agent
	from langchain_core.prompts import ChatPromptTemplate

	#import gradio libraries
	# !pip install gradio gradio_client
	import gradio as gr


	#import vectorstore libraries
	# !pip install faiss-cpu
	from langchain_community.vectorstores import FAISS
	embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


	############################################
	############################################
	# # Code steps involved:
	# 1. Define the LLM
	# 2. Extract data from NSE
	# 2. Process the datafrme and store it as CSV files
	# 3. Use Langchain CSV Loaders to load the CSV data
	# 4. Create Vector Stores
	# 5. Create company lists
	# 6. Create the LLM functions required
	# 7. Create the python functions for stock data and charting functions
	# 8. Create Gradio Blocks
	# 9. Find any recent real time addition to NSE data and add it to the vector stores.
	# 10. Create retrievers and langchain QA retrieval chains
	# 11. Define charts for default
	# 12. Gradio app
	##########################################
	##########################################


	# Define the LLM - We shall use ChatGroq of Groq Platform and LLama70B
	# This llm definition is redundant as now models will be chosen by user
	# llm = ChatGroq(
	# api_key="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",
	# model="llama3-70b-8192",
	# # model = 'gemma-7b-it',
	# temperature = 0
	# # model = 'mixtral-8x7B-32768'
	# )



	# Get the data from NSE as pandas dataframe
	# Function to get dataframe from NSE website
	# Data from two pages: NSE Announcements and NSE corporate actions are fetched and hence two dataframes
	def get_pd(d):

	# Get the current date
	current_date = datetime.now()

	# Get the previous day
	previous_day = current_date - timedelta(days=d)

	# Format the dates in the required format (dd-mm-yyyy)
	current_date_str = current_date.strftime("%d-%m-%Y")
	previous_day_str = previous_day.strftime("%d-%m-%Y")


	base_url = 'https://www.nseindia.com'
	session = requests.Session()
	headers = {
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, '
	'like Gecko) '
	'Chrome/80.0.3987.149 Safari/537.36',
	'accept-language': 'en,gu;q=0.9,hi;q=0.8',
	'accept-encoding': 'gzip, deflate, br'}

	r = session.get(base_url, headers=headers, timeout=120)
	cookies = dict(r.cookies)
	# Use the dates in the URL
	url1 = f"https://www.nseindia.com/api/corporate-announcements?index=equities&from_date={previous_day_str}&to_date={current_date_str}&csv=true"

	url2 = f"https://www.nseindia.com/api/corporates-corporateActions?index=equities&csv=true"

	response1 = session.get(url1, timeout=120, headers=headers, cookies=cookies)
	response2 = session.get(url2, timeout=120, headers=headers, cookies=cookies)

	content1 = response1.content
	content2 = response2.content
	df=pd.read_csv(io.StringIO(content1.decode('utf-8')))

	dfca=pd.read_csv(io.StringIO(content2.decode('utf-8')))
	return df, dfca


	# Process the datafrme and store it as CSV files
	# To increase the speed of prcocessing in RAG, I decided to use three separate vectostores
	# First vector store will all data, second store with minimum data and third one with CA related data
	# Owing to context window problem of RAG, it is always good to ensure that we don't have any irrelevant data
	df_old, dfca = get_pd(1)
	df_back = df_old.copy()
	df_back.to_csv("df_backup.csv",index=False)
	df_old.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)
	df_old2 = df_old.drop(['ATTACHMENT'],axis=1)
	# Save it as a CSV file
	df_old.to_csv("nse_data_old.csv", index=False)
	# df_old1.to_csv("nse_data_old1.csv", index=False)
	df_old2.to_csv("nse_data_old2.csv", index=False)
	dfca.to_csv("nse_ca.csv", index=False)


	# Use Langchain CSV Loaders to load the CSV data
	loader = CSVLoader("nse_data_old.csv")
	data_old = loader.load()
	loader2 = CSVLoader("nse_data_old2.csv")
	data_old_2 = loader2.load()
	loader3 = CSVLoader("nse_ca.csv")
	data_ca = loader3.load()

	global vectorstore,vectorstore2,vectorstore3, colist, colist_tracked
	# Create vectorstores - I tried Chroma but FAISS turned out to be successful
	vectorstore = FAISS.from_documents(data_old, embedding_function)
	vectorstore2 = FAISS.from_documents(data_old_2, embedding_function)
	vectorstore3 = FAISS.from_documents(data_ca, embedding_function)
	vectorstore.save_local("vectorstore")
	vectorstore2.save_local("vectorstore2")
	vectorstore3.save_local("vectorstore3")

	###########################
	# Create company list

	# Upload the NIFTY company names - this is currently hardcoded as NIFTY does not change as often but can be made dynamic
	co1 = pd.read_csv('ind_nifty50list.csv')

	# Create company lists required
	# Get the column you want to convert to a list
	column_name = "Company Name"

	# # Convert the column to a list
	co_list1 = co1[column_name].tolist()

	# # These are the companies that are being tracked - this can be uploaded / hardcoded
	co_list_tracked = ['Reliance Industries Limited', 'Infosys Limited','ICICI Bank Ltd', 'Indusind Bank Ltd','Ramco Systems', \
	'Zydus Lifesciences Limited','Bharti Airtel Limited',\
	'ICICI Bank Limited','TechMahindra Limited', 'Indiabulls Real Estate Limited','Tamilnad Mercanitle Bank Limited', \
	'Bajaj Finance Limited', 'Apollo Tyres Limited', 'Zydus Lifesciences Limited', 'Indusind Bank Limited', 'Kirloskar Oil Engines Limited']

	co_list = co_list1 + co_list_tracked



	####################################

	##################################
	# Let us create some functions required
	##################################

	# LLM function to get announcement detail

	def give_announcement(llm,stock):
	if not stock:
	return "This company has not made any announcements today or yesterday"

	else:


	retriever1 = vectorstore.as_retriever()
	qa_chain = RetrievalQA.from_chain_type(llm,
	retriever=retriever1,
	return_source_documents=False)

	response = qa_chain({"query":f"What are the announcements made by the company {stock}?. If no announcement has been made by that company, \
	just say that no announcement has been made by that company."})
	return f"Announcements made by {stock}: {response['result']}"

	# LLM function to get Corporate Action Detail
	def get_ca(llm,stock):
	# stock = stock_name
	if not stock:
	return "This company has not made any announcements today or yesterday"

	else:

	# resp1 = llm.invoke(f"get all the yahoo finance company name(s) of entity name in {stock}. Just print the ticker(s) alone. Do not print leading sentences.")
	# stock = resp1.content


	retriever3 = vectorstore3.as_retriever()
	qa_chain2 = RetrievalQA.from_chain_type(llm,
	retriever=retriever3,
	return_source_documents=False)

	response = qa_chain2({"query":f"What are the corporate action announcements made by the company {stock}?. If no announcement has been made by that company, do not print any source documents and \
	just say that no announcement has been made by that company."})
	return response['result']#, response['source_documents']

	# a web search tool
	search=DuckDuckGoSearchRun()

	# Fetch stock data from Yahoo Finance
	def get_stock_price(ticker,history=5):
	# time.sleep(4) #To avoid rate limit error
	if "." in ticker:
	ticker=ticker.split(".")[0]
	ticker=ticker+".NS"
	stock = yf.Ticker(ticker)
	df = stock.history(period="1y")
	df=df[["Close","Volume"]]
	df.index=[str(x).split()[0] for x in list(df.index)]
	df.index.rename("Date",inplace=True)
	df=df[-history:]
	# print(df.columns)
	return df.to_string()

	# get stock price movements
	def get_movements(llm,stock):
	if not stock:
	return "This company has not made any announcements today or yesterday"
	else:

	stock = stock[0]

	dfc = pd.read_csv('nse_data_old.csv')

	stockdesc = dfc[dfc['COMPANY NAME'] == stock]['COMPANY NAME'].iloc[0]
	stock1 = dfc[dfc['COMPANY NAME'] == stock]['SYMBOL'].iloc[0]
	stock = get_ticker(stock1)

	print("stock is ",stock)

	tools=[
	Tool(
	name="get stock data",
	func=get_stock_price,
	description=f"Use this tool to get stock price data. This tool will return three values: date, volume and closing price of the stock \
	for the period of 5 days. stock = {stock}"
	),

	Tool(
	name="DuckDuckGo Search",
	func=search.run,
	description=f"Use this tool for for web search for searching details about stock like broker sentiment. You can also get recent stock \
	related news. stock symbol = {stock} and stockname = {stockdesc}"
	),

	]

	prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"You are a helpful stock market analysis assistant. Make sure to use the tools given for information.",
	),
	("placeholder", "{chat_history}"),
	("human", "{input}"),
	("placeholder", "{agent_scratchpad}"),
	]
	)

	# Construct the Tools agent
	agent = create_tool_calling_agent(llm, tools, prompt)

	# Create an agent executor by passing in the agent and tools
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
	response = agent_executor.invoke({"input": f"How much the stock price of stock {stock} with name {stockdesc} moved in the last few days?. Give the prices \
	over the last few days and also percentage change. For example, If the stock has not moved in single direction, \
	you can say the stock has been volatile. But if it has moved up over five days, you can say so with percentage movement"})
	return f"Answer for {stock} - {response['output']}"


	#####################################
	# get stock sentiments
	#####################################

	prompt1 = """Hello, I need broker sentiment data for a specific stock. Please search and summarize current market analyses, broker reports, \
	and overall sentiment regarding the given stock:\Focus on information from credible sources like financial news, broker reports, and investment research firms. \
	Provide key insights, including:\
	Recent broker recommendations (buy, hold, sell), \
	Notable broker analyses or reports, \
	General trends in broker sentiment, \
	Any major news or events impacting the stock's sentiment. \
	Please ensure the data is up-to-date and from reputable sources. Provide a concise summary with relevant details and any supporting context to understand the current sentiment.\
	Please note that you are not chat agent, but meant for single usage, so do not conclude with any greetings or asking for further assistance etc!.\
	"""

	def get_sentiments(llm,stock):
	if not stock:
	return "This company has not made any announcements today or yesterday"
	else:
	print("st1",stock)
	stock = stock[0]
	print("af ",stock)
	#####
	dfc = pd.read_csv('nse_data_old.csv')

	stockdesc = dfc[dfc['COMPANY NAME'] == stock]['COMPANY NAME'].iloc[0]
	stock1 = dfc[dfc['COMPANY NAME'] == stock]['SYMBOL'].iloc[0]
	stock = get_ticker(stock1)
	tools=[
	Tool(
	name="get stock data",
	func=get_stock_price,
	description=f"Use this tool to get stock price data. This tool will return three values: date, volume and closing price of the stock \
	for the period of 5 days. stock = {stock}"
	),

	Tool(
	name="DuckDuckGo Search",
	func=search.run,
	description=f"Use this tool for for web search for searching details about stock like broker sentiment. You can also get recent stock \
	related news. stock name = {stockdesc}"
	),

	]

	prompt = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	f"{prompt1}",
	),
	("placeholder", "{chat_history}"),
	("human", "{input}"),
	("placeholder", "{agent_scratchpad}"),
	]
	)

	# Construct the Tools agent
	# agent = create_tool_calling_agent(llm, tools, prompt)

	# Create an agent executor by passing in the agent and tools
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
	try:
	agent = create_tool_calling_agent(llm, tools, prompt)
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
	response = agent_executor.invoke({"input": f"Get broker sentiment for the stock {stock} and stock name {stockdesc}"})
	return f"Broker sentiment analysis for {stock}. - {response['output']}"
	except Exception as e:
	return f"An error occurred: {str(e)}"
	#################

	# Fetch financial statements from Yahoo Finance
	def get_balancesheet(ticker):
	# time.sleep(4) #To avoid rate limit error
	if "." in ticker:
	ticker=ticker.split(".")[0]
	else:
	ticker=ticker
	ticker=ticker+".NS"
	company = yf.Ticker(ticker)
	df = company.balance_sheet
	# df = df.head(30)
	df.fillna(method='ffill',inplace=True)
	df.dropna(inplace=True)
	return df


	def get_incomestatement(ticker):
	# time.sleep(4) #To avoid rate limit error
	if "." in ticker:
	ticker=ticker.split(".")[0]
	else:
	ticker=ticker
	ticker=ticker+".NS"
	company = yf.Ticker(ticker)
	df = company.financials
	# df = df.head(30)
	df.fillna(method='ffill',inplace=True)
	df.dropna(inplace=True)
	return df

	def get_ticker(company_name):
	com=company_name+".NS"
	ticker = yf.Ticker(com)
	return ticker.info['symbol']

	def get_financialratio(model, input,stock):

	stock_name = get_companynames(stock)

	llm = get_model(model)

	if not stock_name:
	return "This company has not made any announcements"
	else:

	stockname = stock_name[0]

	print("stock1 ",stockname)

	dfc = pd.read_csv('nse_data_old.csv')

	stock1 = dfc[dfc['COMPANY NAME'] == stockname]['SYMBOL'].iloc[0]

	print("staock1 ",stock1)

	stock = get_ticker(stock1)

	print("stock is ",stock)

	if input == '':
	return "No query has been entered!"

	else:
	resp = llm.invoke(f"You have to answer either 'A' or 'B' without any leading sentences - check whether the input {input} pertains \
	to financial ratio query. If it pertains to financial ratio query, \
	respond with letter 'A', else with letter 'B' if it contains only something like company name")
	print("nature of query ",resp)
	if resp.content == 'B':
	return "Enter a query pertaining to financial ratios!"
	else:

	# resp1 = llm.invoke(f"get yahoo finance ticker name of entity name in {input}. Just print the ticker alone. Do not print leading sentences.")
	# stock = resp1.content

	# resp2 = llm.invoke(f"to answer the query {input}, whether balance sheet or income statement required? If balance sheet, answer A, else B")
	resp2 = llm.invoke(f"Answer A, if balance sheet or B, if income statement. To answer the query {input}, \
	whether balance sheet or income statement required - If balance sheet, answer A, else B")


	if resp2.content=='A':
	df1 = get_balancesheet(f'{stock}')
	print("balance sheet")
	else:
	df1 = get_incomestatement(f'{stock}')
	print("income statement")

	df=df1.T

	print("the df is ",df)

	cols= df.columns.tolist()

	resp3 = llm.invoke(f"List the column names, as python list, in {cols} needed for {input} calculation. Do not output any sentence other than column names.\
	For example, do not output leading answer statements like: Here are the column names needed for ..")
	message=resp3.content

	def extract_df(df, message):
	c = ast.literal_eval(message)
	return df[c]

	df_new=extract_df(df,message)

	# prompt1 = f"List the column names, as python list, in {cols} needed for {data} calculation. Do not output any sentence other than column names.\
	# For example, do not output leading answer statements like: Here are the column names needed for .."

	# prompt = f"What is the current ratio of {stock}?. Use {df_new}. Give only year and current ratio for that year in JSON format"

	parser = JsonOutputParser()


	prompt = PromptTemplate(
	template="Answer the user query.\n{format_instructions}\n{query}\n",
	input_variables=["query"],
	partial_variables={"format_instructions": parser.get_format_instructions()},
	)

	# prompt = ChatPromptTemplate.from_messages(
	# [
	# (
	# "system",
	# "You are a helpful financial data analysis assistant.",
	# ),
	# ("placeholder", "{chat_history}"),
	# ("human", f"Answer the user using df_new and input: question:{input}, dataframe: {df_new}, \
	# format_instructions: parser.get_format_instructions()"\
	# ),
	# ("placeholder", "{agent_scratchpad}"),
	# ]
	# )

	chain = prompt \| llm \| parser

	try:
	response= chain.invoke( f"Using {df_new}, {input}?")
	# Print only the results. Print the output in Json format.")
	return f"For the company: {stockname}, Here are the details: {response}"
	except Exception as e:
	return f"An error occurred: {str(e)}"

	##########################
	# Functions to plot a chart over ratios - this has scope for major enhancements!
	def plot_chart(data):
	# Load the JSON string into a Python object
	# data = json.loads(json_str)

	# Get the first key in the dictionary
	try:
	key = list(data.keys())[0]
	# Create a plot
	plt.figure(figsize=(8, 6))
	plt.bar(data[key].keys(), data[key].values())
	plt.title(f"{key} Over Years")
	plt.xlabel("Year")
	plt.ylabel(key)
	plt.tight_layout()

	# Return the plot
	return plt

	except Exception as e:
	return None

	# def get_chart(input):
	# response = get_financialratio(model,input)
	# plt = plot_chart(response)
	# return plt

	def get_chart(model,input,stock):
	stock_name = get_companynames(stock)
	if stock_name:

	response = get_financialratio(model,input,stock)
	# Extract the dictionary part using regex
	dict_match = re.search(r"\{.*\}", response) # Search for content within curly braces

	# Convert the extracted string to a dictionary
	if dict_match:
	extracted_dict_str = dict_match.group(0) # Get the matching text
	extracted_dict = ast.literal_eval(extracted_dict_str) # Convert string to dictionary
	else:
	extracted_dict = None # No dictionary found
	print("extrated tic ", extracted_dict)
	plt = plot_chart(extracted_dict)
	return plt
	else: return None


	def combined_ratio(model, input,stock):
	return get_financialratio(model,input,stock), get_chart(model, input,stock)

	###############################


	###############################

	# Create the Gradio Blocks interface with a title and description

	##################################
	global flag
	def incremental_process():
	global vectorstore,vectorstore2,vectorstore3, flag

	try:
	df_new, _ = get_pd(1)
	flag = 0
	except:
	df_new = pd.read_csv("df_backup.csv")
	flag = 1

	df_new.to_csv("df_new.csv",index=False)
	print("length of df_new ",len(df_new))
	print("length of df_old ", len(df_old))

	#drop unnecessary common columns
	df_new.drop(['RECEIPT','DISSEMINATION','DIFFERENCE'],axis=1,inplace=True)

	# #find the difference and add incrementally for first store
	df_merged = df_new.merge(df_old, how='left', indicator=True)
	# Filter rows that are unique to 'n' (i.e., where '_merge' is 'left_only')
	df_add1= df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])

	# Save it as a CSV file
	df_add1.to_csv("nse_data_add1.csv", index=False)

	#drop unnecessary columns for second vector store
	df_new2 = df_new.drop(['ATTACHMENT'],axis=1)

	# add increment for second store
	df_merged = df_new2.merge(df_old2, how='left', indicator=True)
	df_add2 = df_merged[df_merged['_merge'] == 'left_only'].drop(columns=['_merge'])
	# Save it as a CSV file
	df_add2.to_csv("nse_data_add2.csv", index=False)

	#####################

	# Load the first CSV file
	dfold = pd.read_csv('nse_data_old.csv')

	# Load the second CSV file
	dfadd = pd.read_csv('nse_data_add1.csv')

	# print("df old",dfold)
	# print("######")
	# print("df add ",dfadd)

	if dfadd.empty:
	dfco = dfold.copy()
	else:
	# Append df2 at the end of df1
	dfco = pd.concat([dfold, dfadd], ignore_index=True)

	dfco.to_csv("dfco.csv",index=False)

	# Here incremental RAG is achieved by adding additional data dynamically to vectorstore
	loader = CSVLoader("nse_data_add1.csv")
	data_new1 = loader.load()

	loader = CSVLoader("nse_data_add2.csv")
	data_new2 = loader.load()

	print("original size ",vectorstore.index.ntotal)

	len1 = len(pd.read_csv('nse_data_old.csv')) + len(pd.read_csv('nse_data_add1.csv'))
	print("len1 old + new csv ",len1)

	len2 = vectorstore.index.ntotal

	if len1!=len2:

	print("old size ",vectorstore.index.ntotal)

	# for first store
	vectorstore_add1 = FAISS.from_documents(data_new1, embedding_function)
	print("incremental size ",vectorstore_add1.index.ntotal)
	vectorstore_new1 = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
	vectorstore_new1.merge_from(vectorstore_add1)
	vectorstore_new1.save_local("vectorstore")
	print("new size ",vectorstore_new1.index.ntotal)
	print("new old size ",vectorstore.index.ntotal)
	# retrieverx = vectorstore_new.as_retriever()

	# for second store
	vectorstore_add2 = FAISS.from_documents(data_new2, embedding_function)
	print("incremental size ",vectorstore_add2.index.ntotal)
	vectorstore_new2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
	vectorstore_new2.merge_from(vectorstore_add2)
	vectorstore_new2.save_local("vectorstore2")
	print("new size ",vectorstore_new2.index.ntotal)
	print("new old size ",vectorstore2.index.ntotal)
	# retrieverx = vectorstore_new2.as_retriever()

	##########################
	# Define updated vector stores, retrievers and QA chains
	##########################

	vectorstore = FAISS.load_local("vectorstore",embedding_function,allow_dangerous_deserialization=True)
	print("final size store 1",vectorstore.index.ntotal)

	vectorstore2 = FAISS.load_local("vectorstore2",embedding_function,allow_dangerous_deserialization=True)
	print("final size store 2",vectorstore2.index.ntotal)

	vectorstore3 = FAISS.load_local("vectorstore3",embedding_function,allow_dangerous_deserialization=True)
	print("final size store 3",vectorstore3.index.ntotal)

	return flag


	def get_colist2():
	dfco = pd.read_csv('dfco.csv')
	dfco1 = dfco[['COMPANY NAME']]
	dfco2 = dfco1.drop_duplicates()
	# Save the result to a new CSV file
	dfco2.to_csv('companies.csv', index=False)

	dfco3 = dfco2.head(10)

	co_list3 = dfco3['COMPANY NAME'].unique().tolist()

	filtered_df = dfco2[dfco2['COMPANY NAME'].isin(co_list)]

	co_list2 = filtered_df['COMPANY NAME'].tolist()
	return co_list2, co_list3

	def get_timestampmessage(flag):
	dfco = pd.read_csv('dfco.csv')
	timestamp = dfco[['BROADCAST DATE/TIME']].max().values.tolist()[0]
	if flag == 1:
	message = f"There is NSE timeout error. The latest filing information is available upto {timestamp}"
	else: message = f"Lastest filing information is available upto {timestamp}"
	return message


	def update():
	global flag
	flag = incremental_process()
	message = get_timestampmessage(flag)
	return message

	def give_time():
	dfco = pd.read_csv("dfco.csv")
	timestamp = dfco[['BROADCAST DATE/TIME']].max().values.tolist()[0]
	return timestamp



	# Define the IST timezone
	ist_timezone = pytz.timezone("Asia/Kolkata")
	# Define UTC for server-side time
	utc_timezone = pytz.utc

	def refresh():
	# Get the client-side timestamp (assuming it is in IST)
	timestamp_str = give_time() # The format returned should match the expected format
	given_time = datetime.strptime(timestamp_str, "%d-%b-%Y %H:%M:%S")
	given_time_ist = ist_timezone.localize(given_time) # Localize to IST

	# Get the current server time in UTC
	current_time_utc = datetime.now(tz=utc_timezone)

	# Convert the client-side time to UTC for consistent comparison
	given_time_utc = given_time_ist.astimezone(utc_timezone)

	# Calculate the time difference
	time_difference = current_time_utc - given_time_utc

	print("the time diff is ", time_difference)

	# Check if the time difference is greater than one hour
	if time_difference > timedelta(hours=1):
	message1 = update()
	print("Incremental update run")
	else:
	message1 = f"Refresh allowed only if data is stale for more than one hour. Current client timestamp: {timestamp_str}"

	return message1
	##########################################################################


	def plot1_top_20():
	df = pd.read_csv('nse_data_old.csv')
	subjects = ['Acquisition',
	'Alteration Of Capital and Fund Raising-XBRL',
	'Analysts/Institutional Investor Meet/Con. Call Updates',
	'Board Meeting Intimation',
	'Book Closure',
	'Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent',
	'Change in Management',
	'Credit Rating',
	'Disclosure of material issue',
	'Dividend',
	'Financial Result Updates',
	'Investor Presentation',
	'Notice Of Shareholders Meetings-XBRL',
	'Related Party Transactions',
	'Resignation',
	'Rights Issue',
	'Shareholders meeting',
	'Spurt in Volume',
	'Update-Acquisition/Scheme/Sale/Disposal-XBRL',
	]

	# companies = co_list2
	# df = df[df['COMPANY NAME'].isin(co_list2)]
	df = df[df['SUBJECT'].isin(subjects)]
	# df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')

	df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
	value_counts = df['SUBJECT'].value_counts()

	# Get the top 10 labels by count
	# top_20_value_counts = value_counts[:20]

	plt.figure(figsize=(10, 6))
	plt.barh(value_counts.index, value_counts.values)
	plt.xlabel('Count')
	plt.ylabel('Announcements')
	plt.title('NSE Corporate Announcements - A Glance')
	plt.tight_layout()
	# plt.close()
	return plt

	## Function to create company list specific chart
	def plot2_top_20():
	co_list2,_ = get_colist2()

	# global co_list2
	# Get the counts of each label
	df = pd.read_csv('nse_data_old.csv')
	subjects = ['Acquisition',
	'Alteration Of Capital and Fund Raising-XBRL',
	'Analysts/Institutional Investor Meet/Con. Call Updates',
	'Board Meeting Intimation',
	'Book Closure',
	'Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent',
	'Change in Management',
	'Credit Rating',
	'Disclosure of material issue',
	'Dividend',
	'Financial Result Updates',
	'Investor Presentation',
	'Notice Of Shareholders Meetings-XBRL',
	'Related Party Transactions',
	'Resignation',
	'Rights Issue',
	'Shareholders meeting',
	'Spurt in Volume',
	'Update-Acquisition/Scheme/Sale/Disposal-XBRL',
	]

	# companies = co_list2

	df = df[df['COMPANY NAME'].isin(co_list2)]
	# df = df[df['COMPANY NAME'].isin(co_list_tracked)]
	# df = df[df['SUBJECT'].isin(subjects)]
	# df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')

	df['SUBJECT'] = df['SUBJECT'].replace('Change in Directors/ Key Managerial Personnel/ Auditor/ Compliance Officer/ Share Transfer Agent', 'Change in Key Managerial Personnel')
	value_counts = df['SUBJECT'].value_counts()

	# Get the top 10 labels by count
	# top_20_value_counts = value_counts[:20]

	plt.figure(figsize=(10, 6))
	plt.barh(value_counts.index, value_counts.values)
	plt.xlabel('Count')
	plt.ylabel('Announcements')
	plt.title('NSE Corporate Announcements - Tracked Companies')
	plt.tight_layout()
	# plt.close()
	return plt

	def get_companynames(stock):
	df = pd.read_csv('nse_data_old.csv')
	if stock:

	# Create a regular expression pattern
	pattern = f'.{stock}.'

	# Get rows where 'COMPANY NAME' contains the keyword (case-insensitive)
	matched_rows = df[df['COMPANY NAME'].str.contains(pattern, case=False)]

	# Get unique company names
	unique_companies = matched_rows['COMPANY NAME'].unique()

	return list(set(unique_companies))
	else: return None

	# A combined function to be used in Gradio output box
	def print_model(llm):
	co_list2,_ = get_colist2()
	if co_list2:
	return f"You are using {llm.model_name} model for this session. \n \n" \
	f"These are the companies you track: {co_list_tracked}. \n \n" \
	f"These are the companies, including those in NIFTY, that have filed any information with NSE either today / yesterday - {co_list2}"
	else:
	return f"You are using {llm.model_name} model for this session. \n \n" \
	f"Your are tracking these companies: {co_list_tracked}, \n \n"\
	f"None of the tracked companies or NIFTY 50 have filed any information with NSE on either today or yesterday"


	def print_model1(llm):
	return f"You are using {llm.model_name} model for this session. \n \n [Note: There is NSE timeout error preventing fetching of latest data. So, results may not be real-time / up-to-date]"


	def combined_function1(model,stock):
	global flag
	llm = get_model(model)
	stock = get_companynames(stock)
	if flag == 0:
	return print_model(llm), give_announcement(llm,stock),get_ca(llm,stock),get_movements(llm,stock), get_sentiments(llm,stock)
	else:
	return print_model1(llm), give_announcement(llm,stock),get_ca(llm,stock),get_movements(llm,stock), get_sentiments(llm,stock)

	def get_model(model_name):
	llm = ChatGroq(
	api_key="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",
	model=model_name,
	max_tokens = 8192,
	# model = 'gemma-7b-it',
	temperature = 0
	# model = 'mixtral-8x7B-32768'
	)
	return llm


	# This function is given here as company list is dynamic
	def give_names():
	global co_list_tracked
	co_list2, co_list3 = get_colist2()
	return f"Apart from NIFTY, these are the companies you track: \n \n" \
	f" {co_list_tracked}. \n \n" \
	f"These are the tracked companies that have made announcements: \n \n" \
	f"{co_list2}. \n \n" \
	f"These are latest 10 companies that have made announcements: \n \n " \
	f"{co_list3}"


	##############################
	retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
	###############################

	# This function is for chat queries. Given here due to retriever defined here
	def chat_chain(model,query):
	llm = get_model(model)
	if query=='':
	return "Please enter a query!"
	else:
	combine_docs_chain = create_stuff_documents_chain(
	llm, retrieval_qa_chat_prompt)
	retriever2 = vectorstore2.as_retriever()
	retrieval_chain = create_retrieval_chain(retriever2, combine_docs_chain)
	response = retrieval_chain.invoke({"input": query})
	return response['answer']


	#################################
	## Update the vectorstate with latest data
	flag = incremental_process()
	###########################################################################

	with gr.Blocks() as demo:

	# Add a Markdown block for the description
	gr.Markdown("""<h1 style='color: blue;'>Chat and Analyze with NSE Filings Information</h1>""")
	gr.Markdown("""Powered by Gradio, Groq, Llama3, FAISS, Langchain, YahooFinance""")
	gr.Markdown(
	"""
	<img src="https://upload.wikimedia.org/wikipedia/commons/1/12/NSE_Exchange_Plaza.jpg" width=500px>
	Enter any company name to know its recent filings with NSE in real time. This app can track a list of companies for any corporate announcements \
	with NSE (now NSE 50 hard coded). If you want to know whether any of the tracked company has made any announcements either yesterday or today,\
	enter the company name and submit. The first output box will list all the companies (that are tracked and) that have made an announcement today. \
	The second box provides details about the announcement. You can also do ratio analysis and chat with the filings information (beta).
	"""
	)

	txt_output = gr.Text(give_time(),label = "Opening Data - Timestamp of latest Filing")
	txt_output = gr.Text(give_names(),label = "Announcements for tracked companies")

	# This is for defaulting charts when app is launched
	plot_output1 = gr.Plot(plot1_top_20(), label="Chart") # Call the function to create the plot
	plt.close()
	plot_output2 = gr.Plot(plot2_top_20(), label="Chart") # Call the function to create the plot
	plt.close()
	gr.Markdown("""<h2 style='color: blue;'>Fetch Announcements/Corporate Actions/Price Movements/Broker Sentiments</h2>""")
	# Use a Column to structure the inputs and outputs
	with gr.Column():
	outputs5 = [gr.Textbox(label="Latest Filing Timestamp",placeholder="Refresh data if stale for more than an hour")]
	button5 = gr.Button("Refresh Data")
	# button5.click(lambda: refresh(dfco), inputs=None, outputs=outputs5)
	button5.click(lambda: refresh(), inputs=None, outputs=outputs5)

	# Create a dropdown box for selecting the operation
	operation_dropdown = gr.Dropdown(
	label="Select a model",
	choices=['llama3-70b-8192','llama3-8b-8192', 'gemma-7b-it','mixtral-8x7B-32768' ], # Options for the dropdown
	value='llama3-70b-8192', # Default value
	)
	# First text input and button
	text_input1 = gr.Textbox(
	label="Enter Company Name",
	placeholder="Enter a company name; e.g., Zydus Lifesciences Limited",
	lines=1
	)
	button1 = gr.Button("Start Analysis")
	outputs1 = [
	gr.Textbox(label="Selected Model",show_copy_button=True),
	gr.Textbox(label="Announcement Detail", max_lines=100,show_copy_button=True),
	gr.Textbox(label="Any Corporate Actions during last week?", max_lines=100,show_copy_button=True),
	gr.Textbox(label="Stock Price Movement", max_lines=100,show_copy_button=True),
	gr.Textbox(label="Broker Sentiment", max_lines=100,show_copy_button=True),
	]

	button1.click(lambda x,y: combined_function1(x,y), inputs=[operation_dropdown,text_input1], outputs=outputs1)
	gr.Markdown("""<h1 style='color: green;'>Analyse the Financial Statements of the above Company</h1>""")

	text_input3 = gr.Textbox(
	label="Enter Query",
	placeholder="Enter your query: e.g., What is the current ratio of the stock over three years?",
	lines=1)

	button3 = gr.Button("Analyse")
	outputs3 = [
	gr.Textbox(label="Chat Response", max_lines=100,show_copy_button=True),
	gr.Plot(label = "Chart")]


	button3.click(combined_ratio, inputs=[operation_dropdown,text_input3,text_input1], outputs=outputs3)

	gr.Markdown("""<h1 style='color: orange;'>Chat With the NSE Filings Information</h1>""")

	# Second text input and button
	text_input2 = gr.Textbox(
	label="Enter Chat Query",
	placeholder="Enter your query: e.g., List the companies that have recently made acquisitions",
	lines=2
	)
	button2 = gr.Button("Chat")
	outputs2 = [gr.Textbox(label="Chat Response", max_lines=100,lines=10,show_copy_button=True)]
	# gr.Plot(label = "Categories")]
	button2.click(chat_chain, inputs=[operation_dropdown,text_input2], outputs=outputs2)

	# Launch the Gradio app
	demo.launch()