File size: 4,616 Bytes
fea0eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <title>Image to Text - Hugging Face Transformers.js</title>

    <script type="module">
        // Import the library
        import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';

        // Make it available globally
        window.pipeline = pipeline;
    </script>

    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">

    <link rel="stylesheet" href="css/styles.css"></head>
<body>
    <div class="container-main">

        <!-- Back to Home button -->
        <div class="row mt-5">
            <div class="col-md-12 text-center">
                <a href="index.html" class="btn btn-outline-secondary"
                    style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a>
            </div>
        </div>

        <!-- Content -->
        <div class="container mt-5">
            <!-- Centered Titles -->
            <div class="text-center">
                <h2>Computer Vision</h2>
                <h4>Image to Text</h4>
            </div>

            <!-- Actual Content of this page -->
            <div id="image-to-text-container" class="container mt-4">
                <h5>Generate a Caption for an Image w/ Xenova/vit-gpt2-image-captionin:</h5>
                <div class="d-flex align-items-center">
                    <label for="imageToTextURLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter
                        image to Caption URL:</label>
                    <input type="text" class="form-control flex-grow-1" id="imageToTextURLText"
                        value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg"
                        placeholder="Enter image" style="margin-right: 15px; margin-left: 15px;">
                    <button id="ImagetoTextButton" class="btn btn-primary" onclick="captionImage()">Caption</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputArea"></pre>
                </div>
            </div>

            <hr> <!-- Line Separator -->

            <div id="image-to-text-local-container" class="container mt-4">
                <h5>Generate a Caption for a Local Image:</h5>
                <div class="d-flex align-items-center">
                    <label for="imagetoTextLocalFile" class="mb-0 text-nowrap"
                        style="margin-right: 15px;">Select Local Image:</label>
                    <input type="file" id="imagetoTextLocalFile" accept="image/*" />
                    <button id="CaptionButtonLocal" class="btn btn-primary"
                        onclick="captionImageLocal()">Caption</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputAreaLocal"></pre>
                </div>
            </div>

            <!-- Back to Home button -->
            <div class="row mt-5">
                <div class="col-md-12 text-center">
                    <a href="index.html" class="btn btn-outline-secondary"
                        style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a>
                </div>
            </div>
        </div>
    </div>

    <script>

        let captioner;

        // Initialize the sentiment analysis model
        async function initializeModel() {
            captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');

        }

        async function captionImage() {
            const textFieldValue = document.getElementById("imageToTextURLText").value.trim();

            const result = await captioner(textFieldValue);

            document.getElementById("outputArea").innerText = JSON.stringify(result, null, 2);
        }

        async function captionImageLocal() {
            const fileInput = document.getElementById("imagetoTextLocalFile");
            const file = fileInput.files[0];

            if (!file) {
                alert('Please select an image file first.');
                return;
            }

            // Create a Blob URL from the file
            const url = URL.createObjectURL(file);

            const result = await captioner(url);

            document.getElementById("outputAreaLocal").innerText = JSON.stringify(result, null, 2);
        }

        // Initialize the model after the DOM is completely loaded
        window.addEventListener("DOMContentLoaded", initializeModel);
    </script>
</body>

</html>