Skip to content

Commit a42c412

Browse files
Add new AI samples and update existing to use new API shape (#1474)
* Add new AI samples and update existing to use new API shape * address comments
1 parent 0a67271 commit a42c412

23 files changed

Lines changed: 1942 additions & 21 deletions

File tree

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# On-device multimodal AI with Gemini Nano - image understanding
2+
3+
This sample demonstrates how to use the image understanding capabilities of the multi-modal Gemini Nano API preview together with [Chrome's translation API](https://developer.chrome.com/docs/ai/translator-api). To learn more about the API and how to sign-up for the origin trial, head over to [Built-in AI on developer.chrome.com](https://developer.chrome.com/docs/extensions/ai/prompt-api).
4+
5+
## Overview
6+
7+
This extension adds a context menu entry for images on the web to generate an alt text description that is displayed in a popup window.
8+
9+
## Running this extension
10+
11+
1. Clone this repository.
12+
1. Load this directory in Chrome as an [unpacked extension](https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked).
13+
1. Right click an image on a webpage and select "Generate alt text"
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
chrome.runtime.onInstalled.addListener(() => {
2+
chrome.contextMenus.create({
3+
id: 'generateAltText',
4+
title: 'Generate alt text',
5+
contexts: ['image']
6+
});
7+
});
8+
async function generateAltText(imgSrc) {
9+
// Create the model (we're not checking availability here, but will simply fail with an exception
10+
const session = await self.LanguageModel.create({
11+
temperature: 0.8,
12+
topK: 1.0,
13+
expectedInputs: [{ type: 'image' }]
14+
});
15+
16+
// Create an image bitmap to pass it to the prompt
17+
const response = await fetch(imgSrc);
18+
const blob = await response.blob();
19+
const imageBitmap = await createImageBitmap(blob);
20+
21+
// Run the prompt
22+
const prompt = [
23+
`Please provide a functional, objective description of the provided image in no more than around 30 words so that someone who could not see it would be able to imagine it. If possible, follow an “object-action-context” framework. The object is the main focus. The action describes what’s happening, usually what the object is doing. The context describes the surrounding environment. If there is text found in the image, do your best to transcribe the important bits, even if it extends the word count beyond 30 words. It should not contain quotation marks, as those tend to cause issues when rendered on the web. If there is no text found in the image, then there is no need to mention it. You should not begin the description with any variation of “The image”.`,
24+
{ type: 'image', content: imageBitmap }
25+
];
26+
return await session.prompt(prompt);
27+
}
28+
29+
chrome.contextMenus.onClicked.addListener(async (info, tab) => {
30+
if (info.menuItemId === 'generateAltText' && info.srcUrl) {
31+
// Start opening the popup
32+
const [result] = await Promise.allSettled([
33+
generateAltText(info.srcUrl),
34+
chrome.action.openPopup()
35+
]);
36+
chrome.runtime.sendMessage({
37+
action: 'alt-text',
38+
text: result.value === 'fulfilled' ? result.value : result.reason.message
39+
});
40+
}
41+
});
1.03 KB
Loading
170 Bytes
Loading
277 Bytes
Loading
462 Bytes
Loading
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"manifest_version": 3,
3+
"name": "Alt Texter",
4+
"version": "1.0",
5+
"description": "Generates alt text for images using the Prompt API.",
6+
"permissions": ["contextMenus", "clipboardWrite"],
7+
"host_permissions": ["<all_urls>"],
8+
"minimum_chrome_version": "138",
9+
"background": {
10+
"service_worker": "background.js"
11+
},
12+
"action": {
13+
"default_popup": "popup.html"
14+
},
15+
"icons": {
16+
"16": "icons/icon16.png",
17+
"32": "icons/icon32.png",
18+
"48": "icons/icon48.png",
19+
"128": "icons/icon128.png"
20+
}
21+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
<!doctype html>
2+
<html>
3+
<head>
4+
<title>Alt Text Generator</title>
5+
<style>
6+
@import 'https://unpkg.com/open-props';
7+
@import 'https://unpkg.com/open-props/normalize.min.css';
8+
@import 'https://unpkg.com/open-props/buttons.min.css';
9+
@import 'https://unpkg.com/open-props/theme.light.switch.min.css';
10+
@import 'https://unpkg.com/open-props/theme.dark.switch.min.css';
11+
12+
:root {
13+
--font-size-00: 0.6rem;
14+
}
15+
body {
16+
margin: auto;
17+
padding: var(--size-2);
18+
width: 500px;
19+
padding: 10px;
20+
}
21+
h4 {
22+
margin-bottom: var(--size-2);
23+
}
24+
textarea {
25+
width: 100%;
26+
height: 100px;
27+
}
28+
button {
29+
margin-right: 5px;
30+
}
31+
#loading,
32+
textarea {
33+
margin: 16px 0;
34+
height: 200px;
35+
}
36+
</style>
37+
</head>
38+
<body>
39+
<h4>Alt Texter</h4>
40+
<label
41+
>Target language:
42+
<select id="lang">
43+
<option value="bn">Bengali</option>
44+
<option value="en" selected>English</option>
45+
<option value="de">German</option>
46+
<option value="fr">French</option>
47+
<option value="hi">Hindi</option>
48+
<option value="ja">Japanese</option>
49+
<option value="zh">Mandarin Chinese (Simplified)</option>
50+
<option value="pt">Portuguese</option>
51+
<option value="ru">Russian</option>
52+
<option value="es">Spanish</option>
53+
<option value="zh-Hant">Taiwanese Mandarin (Traditional)</option>
54+
<option value="tr">Turkish</option>
55+
<option value="vi">Vietnamese</option>
56+
</select>
57+
</label>
58+
<textarea id="loading">Generating alt text ...</textarea>
59+
<textarea id="altText" hidden></textarea>
60+
<button type="button" id="copyClose">Copy and close</button>
61+
<button type="reset" id="discard">Discard</button>
62+
<script src="popup.js"></script>
63+
</body>
64+
</html>
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* global Translator */
2+
const altTextInput = document.getElementById('altText');
3+
const loading = document.getElementById('loading');
4+
const lang = document.getElementById('lang');
5+
let text = '';
6+
7+
lang.addEventListener('change', async function () {
8+
altTextInput.setAttribute('hidden', true);
9+
loading.removeAttribute('hidden');
10+
text = await translate(text);
11+
showAltText();
12+
});
13+
14+
async function translate(string) {
15+
try {
16+
const translator = await Translator.create({
17+
sourceLanguage: 'en',
18+
targetLanguage: lang.value
19+
});
20+
return translator.translate(string);
21+
} catch (e) {
22+
console.error(e);
23+
return e.message;
24+
}
25+
}
26+
27+
async function showAltText() {
28+
altTextInput.value = text;
29+
loading.setAttribute('hidden', true);
30+
altTextInput.removeAttribute('hidden');
31+
}
32+
33+
chrome.runtime.onMessage.addListener(async function (request) {
34+
if (request.action === 'alt-text') {
35+
text = request.text;
36+
if (lang.value != 'en') {
37+
text = await translate(text);
38+
}
39+
showAltText();
40+
}
41+
});
42+
43+
document.getElementById('copyClose').addEventListener('click', async () => {
44+
const altText = altTextInput.value;
45+
await navigator.clipboard.writeText(altText);
46+
window.close();
47+
});
48+
49+
document.getElementById('discard').addEventListener('click', () => {
50+
window.close();
51+
});
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dist

0 commit comments

Comments
 (0)