def read_and_split_file(filename, chunk_size=1200, chunk_overlap=200): with open(filename, 'r') as f: text = f.read() text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function = len, separators=[" ", ",", "\n"] ) # st.write(f'Financial report char len: {len(text)}') texts = text_splitter.create_documents([text]) return texts if __name__ == '__main__': # Comments and ideas to implement: # 1. Try sending list of inputs to the Inference API. import streamlit as st from sys import exit from pprint import pprint from collections import Counter from itertools import zip_longest from random import choice import requests from re import sub from rouge import Rouge from time import sleep, perf_counter import os from textwrap import wrap from multiprocessing import Pool, freeze_support from tqdm import tqdm from stqdm import stqdm from langchain.document_loaders import TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.schema.document import Document # from langchain.schema import Document from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI from langchain.schema import AIMessage, HumanMessage, SystemMessage from langchain.prompts import PromptTemplate from datasets import Dataset, load_dataset from sklearn.preprocessing import LabelEncoder from test_models.train_classificator import MLP from safetensors.torch import load_model, save_model from sentence_transformers import SentenceTransformer from torch.utils.data import DataLoader, TensorDataset import torch.nn.functional as F import torch import torch.nn as nn import sys sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'test_models/'))) sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'test_models/financial-roberta'))) st.set_page_config( page_title="Financial advisor", page_icon="๐ณ๐ฐ", layout="wide", ) # st.session_state.summarized = False with st.sidebar: "# How to use๐" """ โจThis is a holiday version of the web-UI with the magic ๐, allowing you to unwrap label predictions for a company based on its financial report text! ๐โจ The prediction enchantment is performed using the sophisticated embedding classifier approach. ๐๐ฎ """ center_style = "