divyanshu1807gupta commited on
Commit
c402060
1 Parent(s): af74ede

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. caption_api.py +140 -0
  3. captions.txt +0 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
caption_api.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask,request
2
+ import google.generativeai as palm
3
+ import re
4
+ import pickle
5
+ import numpy as np
6
+ import requests
7
+ from PIL import Image
8
+ from io import BytesIO
9
+ from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
10
+ from tensorflow.keras.preprocessing.image import load_img, img_to_array
11
+ from tensorflow.keras.preprocessing.text import Tokenizer
12
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
13
+ from tensorflow.keras.models import Model
14
+ from tensorflow.keras.utils import to_categorical, plot_model
15
+ from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
16
+ from tensorflow.keras.models import load_model
17
+
18
+
19
+ #tokenizer=pickle.load(open('tokenizer.pkl','rb'))
20
+ #vgg_model = load_model('vgg_model.h5')
21
+ model = load_model('best_model.h5')
22
+ max_len=35
23
+
24
+
25
+ with open('captions.txt','r') as f:
26
+ next(f)
27
+ caption_file=f.read()
28
+
29
+ captions={}
30
+ for line in caption_file.split('\n'):
31
+ values=line.split(",")
32
+ if(len(line)<2):
33
+ continue
34
+ #get image_id
35
+ image_id=values[0]
36
+ image_id=image_id.split('.')[0]
37
+ #get caption
38
+ caption=values[1:]
39
+ caption=" ".join(caption)
40
+ #mapping caption
41
+ if image_id not in captions:
42
+ captions[image_id]=[]
43
+ captions[image_id].append(caption)
44
+
45
+ def clean(captions):
46
+ for key,caption_ in captions.items():
47
+ for i in range(len(caption_)):
48
+ caption=caption_[i]
49
+ #process caption
50
+ caption=caption.lower()
51
+ caption = re.sub('[^a-zA-Z]', ' ', caption)
52
+ caption = re.sub('\s+', ' ', caption)
53
+ caption=" ".join([word for word in caption.split() if len(word)>1])
54
+ caption="startseq "+caption+" endseq"
55
+ caption_[i]=caption
56
+
57
+ clean(captions)
58
+
59
+ all_captions=[]
60
+ for key,caption_ in captions.items():
61
+ for i in range(len(caption_)):
62
+ all_captions.append(caption_[i])
63
+
64
+ tokenizer=Tokenizer()
65
+ tokenizer.fit_on_texts(all_captions)
66
+
67
+ # load vgg16 model
68
+ vgg_model = VGG16()
69
+ # restructure the model
70
+ vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
71
+
72
+ def index_to_word(indx,tokenizer):
73
+ for word,index in tokenizer.word_index.items():
74
+ if index == indx:
75
+ return word
76
+ return None
77
+
78
+ def predict_captions(model,image,tokenizer,max_len):
79
+ in_text='startseq'
80
+ for i in range(max_len):
81
+ seq=tokenizer.texts_to_sequences([in_text])[0]
82
+ seq=pad_sequences([seq],max_len)[0]
83
+ if len(image.shape) == 3:
84
+ image = np.expand_dims(image, axis=0)
85
+ y_pred=model.predict([image, np.expand_dims(seq, axis=0)],verbose=0)
86
+ y_pred=np.argmax(y_pred)
87
+
88
+ word=index_to_word(y_pred,tokenizer)
89
+ if word == None:
90
+ break
91
+ in_text += " " + word
92
+ if word == 'endseq':
93
+ break
94
+ return in_text
95
+
96
+ def caption_generator(url):
97
+ #load image
98
+ response = requests.get(url)
99
+ image= Image.open(BytesIO(response.content))
100
+ image = image.resize((224,224))
101
+ #convert image into numpy array
102
+ image=img_to_array(image)
103
+ #reshape image
104
+ image=image.reshape((1,image.shape[0],image.shape[1],image.shape[2]))
105
+ #preprrocess image for vgg16
106
+ image=preprocess_input(image)
107
+ #extract features
108
+ feature=vgg_model.predict(image,verbose=0)
109
+ y_pred = predict_captions(model, feature, tokenizer, max_len)
110
+ #plt.imshow(image_pic)
111
+ return y_pred
112
+
113
+ app=Flask(__name__)
114
+
115
+ @app.route('/')
116
+ def home():
117
+ return "HELLO WORLD"
118
+
119
+ @app.route('/predict',methods=['POST'])
120
+ def predict():
121
+ url=request.get_json()
122
+ print(url)
123
+ result=caption_generator(url['url'])
124
+ palm.configure(api_key='AIzaSyDDXOjF1BBgJM6g1tMV-6tcI7xh9-ctvQU')
125
+ #models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]
126
+ #model = models[0].name
127
+ model="models/text-bison-001"
128
+ prompt = "Generate a creative & attractive instagram caption of 10-30 words words for" + str(result)
129
+ completion = palm.generate_text(
130
+ model=model,
131
+ prompt=prompt,
132
+ temperature=0,
133
+ # The maximum length of the response
134
+ max_output_tokens=100,
135
+ )
136
+ return completion.result
137
+ #return {'caption':str(result)}
138
+
139
+ if __name__ == '__main__':
140
+ app.run(debug=True)
captions.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask
2
+ numpy
3
+ Pillow
4
+ protobuf
5
+ Requests
6
+ tensorflow
7
+ tensorflow_intel