First_agent_template / tools /image_generation.py
sayed99's picture
teached the agent to generate and caption images(connect things together)
e104f68
from gradio_client import Client
from typing import Any
from smolagents.tools import Tool
import os
import shutil
from pathlib import Path
import uuid
from PIL import Image
class ImageGenerationTool(Tool):
name = "image_generation"
description = """
Generates an image based on the given prompt and saves it locally at generations dir.
Args:
prompt (str): The prompt for image generation.
Returns:
- Image.Image: The generated image.
- str: The path where the image is saved.
"""
inputs = {'prompt': {'type': 'string',
'description': 'The prompt for image generation.'}}
output_type = "any"
def forward(self, prompt: str) -> Any:
client = Client("mukaist/Midjourney")
result = client.predict(
prompt=prompt,
negative_prompt="(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck",
use_negative_prompt=True,
style="2560 x 1440",
seed=0,
width=1024,
height=1024,
guidance_scale=6,
randomize_seed=True,
api_name="/run"
)
image_path = result[0][0]['image']
output_dir = Path(os.getcwd()) / "generations"
output_dir.mkdir(parents=True, exist_ok=True)
save_path = output_dir / f"generated_image_{uuid.uuid4().hex}.png"
shutil.copy(image_path, save_path)
print(f"Image saved at: {save_path}")
return Image.open(save_path), save_path
def __init__(self, *args, **kwargs):
self.is_initialized = False