import streamlit as st import pandas as pd import numpy as np from unidecode import unidecode import tensorflow as tf import cloudpickle from transformers import DistilBertTokenizerFast import os def load_model(): interpreter = tf.lite.Interpreter(model_path=os.path.join("models/news_classification_hf_distilbert.tflite")) with open("models/news_classification_labelencoder.bin", "rb") as model_file_obj: label_encoder = cloudpickle.load(model_file_obj) model_checkpoint = "distilbert-base-uncased" tokenizer = DistilBertTokenizerFast.from_pretrained(model_checkpoint) return interpreter, label_encoder, tokenizer interpreter, label_encoder, tokenizer = load_model() def inference(text): tflite_pred = "Can't Predict" if text != "": tokens = tokenizer(text, max_length=80, padding="max_length", truncation=True, return_tensors="tf") # tflite model inference interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details()[0] attention_mask, input_ids = tokens['attention_mask'], tokens['input_ids'] interpreter.set_tensor(input_details[0]["index"], attention_mask) interpreter.set_tensor(input_details[1]["index"], input_ids) interpreter.invoke() tflite_pred = interpreter.get_tensor(output_details["index"])[0] tflite_pred_argmax = np.argmax(tflite_pred) tflite_pred = f"{label_encoder.inverse_transform([tflite_pred_argmax])[0].upper()} ({str(np.round(tflite_pred[tflite_pred_argmax], 5))})" return tflite_pred def main(): st.title("News Headline Classification") lang_trained = 'TECHNOLOGY, HEALTH, WORLD, ENTERTAINMENT, SPORTS, BUSINESS, NATION, SCIENCE' st.write(f'Model is fine-tuned on the following categories \n{lang_trained}') review = st.text_area("Enter a news headline:", "", height=100) if st.button("Submit"): result = inference(review) st.write(result) if __name__ == "__main__": main()