Spaces:
Runtime error
Runtime error
ydshieh
commited on
Commit
•
6f0178d
1
Parent(s):
5dfe197
update UI and samples
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +31 -26
- model.py +14 -4
- samples/COCO_val2014_000000581632.jpg +0 -0
- samples/COCO_val2014_000000581683.jpg +0 -0
- samples/COCO_val2014_000000581702.jpg +0 -0
- samples/COCO_val2014_000000581717.jpg +0 -0
- samples/COCO_val2014_000000581726.jpg +0 -0
- samples/COCO_val2014_000000581736.jpg +0 -0
- samples/COCO_val2014_000000581781.jpg +0 -0
- samples/COCO_val2014_000000581827.jpg +0 -0
- samples/COCO_val2014_000000581829.jpg +0 -0
- samples/COCO_val2014_000000581863.jpg +0 -0
- samples/COCO_val2014_000000581899.jpg +0 -0
- samples/COCO_val2017_000000006771.jpg +0 -0
- samples/COCO_val2017_000000021903.jpg +0 -0
- samples/COCO_val2017_000000030213.jpg +0 -0
- samples/COCO_val2017_000000039956.jpg +0 -0
- samples/COCO_val2017_000000045472.jpg +0 -0
- samples/COCO_val2017_000000053505.jpg +0 -0
- samples/COCO_val2017_000000057597.jpg +0 -0
- samples/COCO_val2017_000000059386.jpg +0 -0
- samples/COCO_val2017_000000067406.jpg +0 -0
- samples/COCO_val2017_000000069795.jpg +0 -0
- samples/COCO_val2017_000000084431.jpg +0 -0
- samples/COCO_val2017_000000088432.jpg +0 -0
- samples/COCO_val2017_000000100238.jpg +0 -0
- samples/COCO_val2017_000000104619.jpg +0 -0
- samples/COCO_val2017_000000104803.jpg +0 -0
- samples/COCO_val2017_000000124442.jpg +0 -0
- samples/COCO_val2017_000000125936.jpg +0 -0
- samples/COCO_val2017_000000132703.jpg +0 -0
- samples/COCO_val2017_000000146155.jpg +0 -0
- samples/COCO_val2017_000000149770.jpg +0 -0
- samples/COCO_val2017_000000152120.jpg +0 -0
- samples/COCO_val2017_000000154431.jpg +0 -0
- samples/COCO_val2017_000000161609.jpg +0 -0
- samples/COCO_val2017_000000163258.jpg +0 -0
- samples/COCO_val2017_000000168593.jpg +0 -0
- samples/COCO_val2017_000000170116.jpg +0 -0
- samples/COCO_val2017_000000172330.jpg +0 -0
- samples/COCO_val2017_000000173371.jpg +0 -0
- samples/COCO_val2017_000000175535.jpg +0 -0
- samples/COCO_val2017_000000178469.jpg +0 -0
- samples/COCO_val2017_000000180188.jpg +0 -0
- samples/COCO_val2017_000000180296.jpg +0 -0
- samples/COCO_val2017_000000181969.jpg +0 -0
- samples/COCO_val2017_000000190676.jpg +0 -0
- samples/COCO_val2017_000000199055.jpg +0 -0
- samples/COCO_val2017_000000204186.jpg +0 -0
- samples/COCO_val2017_000000213547.jpg +0 -0
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
|
3 |
|
4 |
# Designing the interface
|
@@ -7,56 +8,60 @@ st.write("[Yih-Dar SHIEH](https://huggingface.co/ydshieh)")
|
|
7 |
|
8 |
st.sidebar.markdown(
|
9 |
"""
|
10 |
-
An image captioning model
|
11 |
-
|
12 |
-
|
13 |
The pretrained weights of both models are loaded, with a set of randomly initialized cross-attention weights.
|
14 |
The model is trained on the COCO 2017 dataset for about 6900 steps (batch_size=256).
|
|
|
15 |
"""
|
16 |
)
|
17 |
|
18 |
-
#image = Image.open('samples/val_000000039769.jpg')
|
19 |
-
#show = st.image(image, use_column_width=True)
|
20 |
-
#show.image(image, 'Preloaded Image', use_column_width=True)
|
21 |
-
|
22 |
-
|
23 |
with st.spinner('Loading and compiling ViT-GPT2 model ...'):
|
24 |
-
|
25 |
from model import *
|
26 |
-
# st.sidebar.write(f'Vit-GPT2 model loaded :)')
|
27 |
|
28 |
-
st.sidebar.title("Select a sample image")
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
33 |
)
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
sample_path = os.path.join(sample_dir, sample_name)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# For newline
|
43 |
st.sidebar.write('\n')
|
44 |
|
45 |
-
|
46 |
with st.spinner('Generating image caption ...'):
|
47 |
|
48 |
caption = predict(image)
|
49 |
|
50 |
caption_en = caption
|
51 |
-
st.header(f'
|
52 |
-
|
53 |
-
# caption_en = translator.translate(caption, src='fr', dest='en').text
|
54 |
-
# st.header(f'**Prediction (in French) **{caption}')
|
55 |
-
# st.header(f'**English Translation**: {caption_en}')
|
56 |
-
|
57 |
|
58 |
st.sidebar.header("ViT-GPT2 predicts:")
|
59 |
st.sidebar.write(f"**English**: {caption}")
|
60 |
|
61 |
-
|
62 |
image.close()
|
|
|
1 |
import streamlit as st
|
2 |
+
import requests
|
3 |
|
4 |
|
5 |
# Designing the interface
|
|
|
8 |
|
9 |
st.sidebar.markdown(
|
10 |
"""
|
11 |
+
An image captioning model by combining ViT model with GPT2 model.
|
12 |
+
The encoder (ViT) and decoder (GPT2) are combined using Hugging Face transformers' [Vision-To-Text Encoder-Decoder
|
13 |
+
framework](https://huggingface.co/transformers/master/model_doc/visionencoderdecoder.html).
|
14 |
The pretrained weights of both models are loaded, with a set of randomly initialized cross-attention weights.
|
15 |
The model is trained on the COCO 2017 dataset for about 6900 steps (batch_size=256).
|
16 |
+
[Follow-up work of [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/).]\n
|
17 |
"""
|
18 |
)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
with st.spinner('Loading and compiling ViT-GPT2 model ...'):
|
|
|
21 |
from model import *
|
|
|
22 |
|
|
|
23 |
|
24 |
+
st.sidebar.title("Select a sample image")
|
25 |
+
image_id = st.sidebar.selectbox(
|
26 |
+
"Please choose a sample image",
|
27 |
+
sample_image_ids
|
28 |
)
|
29 |
|
30 |
+
random_image_id = None
|
31 |
+
if st.sidebar.button("Random COCO 2017 (val) images"):
|
32 |
+
random_image_id = get_random_image_id()
|
33 |
+
|
34 |
+
if random_image_id is not None:
|
35 |
+
image_id = random_image_id
|
36 |
+
|
37 |
+
st.write(image_id)
|
38 |
+
|
39 |
+
sample_name = f"COCO_val2017_{str(image_id).zfill(12)}.jpg"
|
40 |
sample_path = os.path.join(sample_dir, sample_name)
|
41 |
|
42 |
+
if os.path.isfile(sample_path):
|
43 |
+
image = Image.open(sample_path)
|
44 |
+
else:
|
45 |
+
url = f"http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg"
|
46 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
47 |
+
|
48 |
+
resized = image.resize(size=(384, 384))
|
49 |
+
show = st.image(resized, width=384)
|
50 |
+
show.image(resized, '\n\nSelected Image', width=384)
|
51 |
+
resized.close()
|
52 |
|
53 |
# For newline
|
54 |
st.sidebar.write('\n')
|
55 |
|
|
|
56 |
with st.spinner('Generating image caption ...'):
|
57 |
|
58 |
caption = predict(image)
|
59 |
|
60 |
caption_en = caption
|
61 |
+
st.header(f'Predicted caption:\n\n')
|
62 |
+
st.subheader(caption_en)
|
|
|
|
|
|
|
|
|
63 |
|
64 |
st.sidebar.header("ViT-GPT2 predicts:")
|
65 |
st.sidebar.write(f"**English**: {caption}")
|
66 |
|
|
|
67 |
image.close()
|
model.py
CHANGED
@@ -1,12 +1,13 @@
|
|
|
|
1 |
import os, shutil
|
|
|
|
|
|
|
2 |
from PIL import Image
|
3 |
import jax
|
4 |
from transformers import FlaxVisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
|
7 |
-
from googletrans import Translator
|
8 |
-
translator = Translator()
|
9 |
-
|
10 |
|
11 |
# create target model directory
|
12 |
model_dir = './models/'
|
@@ -65,4 +66,13 @@ _compile()
|
|
65 |
|
66 |
|
67 |
sample_dir = './samples/'
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
import os, shutil
|
3 |
+
import random
|
4 |
+
|
5 |
+
|
6 |
from PIL import Image
|
7 |
import jax
|
8 |
from transformers import FlaxVisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
|
|
|
|
|
|
|
11 |
|
12 |
# create target model directory
|
13 |
model_dir = './models/'
|
|
|
66 |
|
67 |
|
68 |
sample_dir = './samples/'
|
69 |
+
sample_image_ids = tuple([int(f.replace('COCO_val2017_', '').replace('.jpg', '')) for f in os.listdir(sample_dir) if f.startswith('COCO_val2017_')])
|
70 |
+
|
71 |
+
with open(os.path.join(sample_dir, "coco-val2017-img-ids.json"), "r", encoding="UTF-8") as fp:
|
72 |
+
coco_2017_val_image_ids = json.load(fp)
|
73 |
+
|
74 |
+
|
75 |
+
def get_random_image_id():
|
76 |
+
|
77 |
+
image_id = random.sample(coco_2017_val_image_ids, k=1)[0]
|
78 |
+
return image_id
|
samples/COCO_val2014_000000581632.jpg
DELETED
Binary file (212 kB)
|
|
samples/COCO_val2014_000000581683.jpg
DELETED
Binary file (231 kB)
|
|
samples/COCO_val2014_000000581702.jpg
DELETED
Binary file (214 kB)
|
|
samples/COCO_val2014_000000581717.jpg
DELETED
Binary file (155 kB)
|
|
samples/COCO_val2014_000000581726.jpg
DELETED
Binary file (238 kB)
|
|
samples/COCO_val2014_000000581736.jpg
DELETED
Binary file (180 kB)
|
|
samples/COCO_val2014_000000581781.jpg
DELETED
Binary file (246 kB)
|
|
samples/COCO_val2014_000000581827.jpg
DELETED
Binary file (297 kB)
|
|
samples/COCO_val2014_000000581829.jpg
DELETED
Binary file (226 kB)
|
|
samples/COCO_val2014_000000581863.jpg
DELETED
Binary file (196 kB)
|
|
samples/COCO_val2014_000000581899.jpg
DELETED
Binary file (218 kB)
|
|
samples/COCO_val2017_000000006771.jpg
ADDED
samples/COCO_val2017_000000021903.jpg
ADDED
samples/COCO_val2017_000000030213.jpg
ADDED
samples/COCO_val2017_000000039956.jpg
ADDED
samples/COCO_val2017_000000045472.jpg
ADDED
samples/COCO_val2017_000000053505.jpg
ADDED
samples/COCO_val2017_000000057597.jpg
ADDED
samples/COCO_val2017_000000059386.jpg
ADDED
samples/COCO_val2017_000000067406.jpg
ADDED
samples/COCO_val2017_000000069795.jpg
ADDED
samples/COCO_val2017_000000084431.jpg
ADDED
samples/COCO_val2017_000000088432.jpg
ADDED
samples/COCO_val2017_000000100238.jpg
ADDED
samples/COCO_val2017_000000104619.jpg
ADDED
samples/COCO_val2017_000000104803.jpg
ADDED
samples/COCO_val2017_000000124442.jpg
ADDED
samples/COCO_val2017_000000125936.jpg
ADDED
samples/COCO_val2017_000000132703.jpg
ADDED
samples/COCO_val2017_000000146155.jpg
ADDED
samples/COCO_val2017_000000149770.jpg
ADDED
samples/COCO_val2017_000000152120.jpg
ADDED
samples/COCO_val2017_000000154431.jpg
ADDED
samples/COCO_val2017_000000161609.jpg
ADDED
samples/COCO_val2017_000000163258.jpg
ADDED
samples/COCO_val2017_000000168593.jpg
ADDED
samples/COCO_val2017_000000170116.jpg
ADDED
samples/COCO_val2017_000000172330.jpg
ADDED
samples/COCO_val2017_000000173371.jpg
ADDED
samples/COCO_val2017_000000175535.jpg
ADDED
samples/COCO_val2017_000000178469.jpg
ADDED
samples/COCO_val2017_000000180188.jpg
ADDED
samples/COCO_val2017_000000180296.jpg
ADDED
samples/COCO_val2017_000000181969.jpg
ADDED
samples/COCO_val2017_000000190676.jpg
ADDED
samples/COCO_val2017_000000199055.jpg
ADDED
samples/COCO_val2017_000000204186.jpg
ADDED
samples/COCO_val2017_000000213547.jpg
ADDED