Processing images
Some Language Models support generating images, or looking at images, or both. The following are examples for how to use the capabilities of these types of models in your application.
For a list of models capable of image input or output, please refer to the multimodal models on the Models page. On that page, you will can also lookup parameters which are model specific.
Generating images
Generating an image can be fun, but can also be used in your product.
The following is a basic way of generating an image based on a prompt, and saving the image to a file.
1from opperai import AsyncOpper
2from opperai.types import CallConfiguration, ImageOutput
3from tempfile import NamedTemporaryFile
4import asyncio
5
6opper = AsyncOpper()
7
8def save_file(bytes: bytes, path: str = None) -> str:
9 if path is None:
10 with NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
11 path = temp_file.name
12
13 with open(path, "wb") as f:
14 f.write(bytes)
15 f.close()
16
17 return path
18
19async def generate_image(description: str) -> ImageOutput:
20 image, _ = await opper.call(
21 name="generate_image",
22 output_type=ImageOutput,
23 input=description,
24 model="gcp/imagen-3.0-generate-001-eu",
25 configuration=CallConfiguration(
26 model_parameters={
27 "aspectRatio": "9:16",
28 }
29 ),
30 )
31 return image
32
33async def main():
34 description = "wide-angle photo of a person holding a presentation about AI in a room full of people"
35 image = await generate_image(description)
36 path = save_file(image.bytes)
37 print(path)
38
39asyncio.run(main())
40# /var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpave568r8.png
1import Client from "opperai";
2import fs from "fs";
3import path from "path";
4import os from "os";
5
6const client = new Client();
7
8(async () => {
9
10 const cat = await client.generateImage({
11 model: "gcp/imagen-3.0-generate-001-eu",
12 prompt: "wide-angle photo of a person holding a presentation about AI in a room full of people",
13 parameters: {
14 aspectRatio: "9:16",
15 }
16 });
17
18 const tempFilePath = path.join(os.tmpdir(), `image-${Date.now()}-${Math.random().toString(36).substring(2, 15)}.png`);
19 fs.writeFileSync(tempFilePath, cat.bytes);
20 console.log(`image written to temporary file: ${tempFilePath}`);
21 // image written to temporary file: /var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/image-1727768092084-z07q0xpw5zp.png
22})();
Using images as input
Processing images by using them as input to your models can unlock a variety of applications, such as image captioning, classification, or generating detailed descriptions based on the image content.
1from opperai import Opper
2from opperai.types import ImageInput
3
4opper = Opper()
5
6def describe_image(path: str) -> str:
7 description, response = opper.call(
8 name="describe_image",
9 instructions="Describe the content of the image",
10 output_type=str,
11 input=ImageInput.from_path(path),
12 model="openai/gpt-4o",
13 )
14 return description
15
16image_path = "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpave568r8.png"
17description = describe_image(image_path)
18print(f"Image description: {description}")
19# Image description: The image shows a person giving a presentation in a dark room. The presenter is pointing at a screen displaying a visual related to artificial intelligence (AI). The screen has the text 'AI' and 'AI RORENGE' along with circuit-like graphics. Several people are seated, watching the presentation attentively.
1import Client, { OpperMediaHandler } from "opperai";
2
3const client = new Client();
4
5(async () => {
6 const image = new OpperMediaHandler("/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/image-1727768092084-z07q0xpw5zp.png");
7
8 const { message } = await client.call({
9 name: "describe_image",
10 instructions: "Describe the content of the image",
11 input: image.getInput(),
12 model: "openai/gpt-4o",
13 });
14
15 console.log(`Image description: ${message}`);
16 //Image description: The image shows a person giving a presentation in a dark room. The presenter is pointing at a screen displaying a visual related to artificial intelligence (AI). The screen has the text 'AI' and 'AI RORENGE' along with circuit-like graphics. Several people are seated, watching the presentation attentively.
17})();
Common denominator
In this example, we are passing in multiple images and asking the LLM to find the most common denominator between the images.
1from opperai import Opper
2from opperai.types import ImageInput
3from typing import List
4
5opper = Opper()
6
7def common_denominator(paths: List[str]) -> str:
8 images = [ImageInput.from_path(path) for path in paths]
9 description, response = opper.call(
10 name="common_denominator",
11 instructions="given a list of images, return the most common denominator of the images",
12 output_type=str,
13 input=images,
14 model="openai/gpt-4o",
15 )
16 return description
17
18image_paths = [
19 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpid366821.png",
20 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpuhl0wyg4.png",
21 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpijk_v13m.png",
22 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpqglezg1g.png",
23]
24description = common_denominator(image_paths)
25print(f"Image description: {description}")
26# Image description: People using VR headsets in a forest
1import Client, { OpperMediaHandler } from "opperai";
2
3const client = new Client();
4
5(async () => {
6 async function commonDenominator(paths: string[]): Promise<string> {
7 const images = paths.map((path) =>{
8 const image = new OpperMediaHandler(path);
9
10 return image.getInput();
11 });
12
13 const { message } = await client.call({
14 name: "common_denominator",
15 instructions: "given a list of images, return the most common denominator of the images",
16 input: { images },
17 model: "openai/gpt-4o",
18 });
19 return message;
20 }
21
22 const imagePaths = [
23 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpid366821.png",
24 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpuhl0wyg4.png",
25 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpijk_v13m.png",
26 "/var/folders/dx/lwwkf62n61j9z928d87z5txm0000gn/T/tmpqglezg1g.png",
27 ];
28
29 const description = await commonDenominator(imagePaths);
30 console.log(`Image description: ${description}`);
31 // Image description: The most common denominator among the images is that they all feature a person using a virtual reality (VR) headset in an outdoor forest environment.
32
33})();