from transformers import pipeline
import streamlit as st

def image2test(image):
    pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    outputs = pipe(image)
    return outputs[0]["generated_text"]


def main():
    st.set_page_config(page_title="Text to Speech", page_icon=":speech_balloon:", layout="wide")
    st.header("Text to Speech")
    uploaded_file = st.file_uploader("Choose a file",type="jpg")

    if uploaded_file is not None:
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)
        st.image(uploaded_file, caption="Uploaded Image.", use_column_width=True)
        text = image2test(uploaded_file.name)
    # res = bytes(outputs[0]["generated_text"], encoding='utf-8')

        with st.expander("text"):
            st.write(text)

if __name__ == "__main__":
    main()