caption_api / caption_api.py
divyanshu1807gupta's picture
Upload 4 files
c402060
from flask import Flask,request
import google.generativeai as palm
import re
import pickle
import numpy as np
import requests
from PIL import Image
from io import BytesIO
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
from tensorflow.keras.models import load_model
#tokenizer=pickle.load(open('tokenizer.pkl','rb'))
#vgg_model = load_model('vgg_model.h5')
model = load_model('best_model.h5')
max_len=35
with open('captions.txt','r') as f:
next(f)
caption_file=f.read()
captions={}
for line in caption_file.split('\n'):
values=line.split(",")
if(len(line)<2):
continue
#get image_id
image_id=values[0]
image_id=image_id.split('.')[0]
#get caption
caption=values[1:]
caption=" ".join(caption)
#mapping caption
if image_id not in captions:
captions[image_id]=[]
captions[image_id].append(caption)
def clean(captions):
for key,caption_ in captions.items():
for i in range(len(caption_)):
caption=caption_[i]
#process caption
caption=caption.lower()
caption = re.sub('[^a-zA-Z]', ' ', caption)
caption = re.sub('\s+', ' ', caption)
caption=" ".join([word for word in caption.split() if len(word)>1])
caption="startseq "+caption+" endseq"
caption_[i]=caption
clean(captions)
all_captions=[]
for key,caption_ in captions.items():
for i in range(len(caption_)):
all_captions.append(caption_[i])
tokenizer=Tokenizer()
tokenizer.fit_on_texts(all_captions)
# load vgg16 model
vgg_model = VGG16()
# restructure the model
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
def index_to_word(indx,tokenizer):
for word,index in tokenizer.word_index.items():
if index == indx:
return word
return None
def predict_captions(model,image,tokenizer,max_len):
in_text='startseq'
for i in range(max_len):
seq=tokenizer.texts_to_sequences([in_text])[0]
seq=pad_sequences([seq],max_len)[0]
if len(image.shape) == 3:
image = np.expand_dims(image, axis=0)
y_pred=model.predict([image, np.expand_dims(seq, axis=0)],verbose=0)
y_pred=np.argmax(y_pred)
word=index_to_word(y_pred,tokenizer)
if word == None:
break
in_text += " " + word
if word == 'endseq':
break
return in_text
def caption_generator(url):
#load image
response = requests.get(url)
image= Image.open(BytesIO(response.content))
image = image.resize((224,224))
#convert image into numpy array
image=img_to_array(image)
#reshape image
image=image.reshape((1,image.shape[0],image.shape[1],image.shape[2]))
#preprrocess image for vgg16
image=preprocess_input(image)
#extract features
feature=vgg_model.predict(image,verbose=0)
y_pred = predict_captions(model, feature, tokenizer, max_len)
#plt.imshow(image_pic)
return y_pred
app=Flask(__name__)
@app.route('/')
def home():
return "HELLO WORLD"
@app.route('/predict',methods=['POST'])
def predict():
url=request.get_json()
print(url)
result=caption_generator(url['url'])
palm.configure(api_key='AIzaSyDDXOjF1BBgJM6g1tMV-6tcI7xh9-ctvQU')
#models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]
#model = models[0].name
model="models/text-bison-001"
prompt = "Generate a creative & attractive instagram caption of 10-30 words words for" + str(result)
completion = palm.generate_text(
model=model,
prompt=prompt,
temperature=0,
# The maximum length of the response
max_output_tokens=100,
)
return completion.result
#return {'caption':str(result)}
if __name__ == '__main__':
app.run(debug=True)