from transformers import pipeline import streamlit as st def image2test(image): pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") outputs = pipe(image) return outputs[0]["generated_text"] def main(): st.set_page_config(page_title="Text to Speech", page_icon=":speech_balloon:", layout="wide") st.header("Text to Speech") uploaded_file = st.file_uploader("Choose a file",type="jpg") if uploaded_file is not None: bytes_data = uploaded_file.getvalue() with open(uploaded_file.name, "wb") as file: file.write(bytes_data) st.image(uploaded_file, caption="Uploaded Image.", use_column_width=True) text = image2test(uploaded_file.name) # res = bytes(outputs[0]["generated_text"], encoding='utf-8') with st.expander("text"): st.write(text) if __name__ == "__main__": main()