@@ -4,7 +4,7 @@ author: eric-urban
ms.author: eur
ms.service: azure-ai-openai
ms.topic: include
-ms.date: 1/21/2025
+ms.date: 3/20/2025
---
## Prerequisites
@@ -38,10 +38,10 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
npm init -y
```
-1. Install the real-time audio client library for JavaScript with:
+1. Install the OpenAI client library for JavaScript with:
```console
- npm install https://github.com/Azure-Samples/aoai-realtime-audio-sdk/releases/download/js/v0.5.2/rt-client-0.5.2.tgz
+ npm install openai
```
1. For the **recommended** keyless authentication with Microsoft Entra ID, install the `@azure/identity` package with:
@@ -50,6 +50,7 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
npm install @azure/identity
```
+
## Retrieve resource information
[!INCLUDE [resource authentication](resource-authentication.md)]
@@ -64,55 +65,70 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
1. Create the `index.js` file with the following code:
```javascript
- import { DefaultAzureCredential } from "@azure/identity";
- import { LowLevelRTClient } from "rt-client";
- import dotenv from "dotenv";
- dotenv.config();
- async function text_in_audio_out() {
- // Set environment variables or edit the corresponding values here.
- const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "YourEndpoint";
- const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
- if (!endpoint || !deployment) {
- throw new Error("You didn't set the environment variables.");
- }
- const client = new LowLevelRTClient(new URL(endpoint), new DefaultAzureCredential(), { deployment: deployment });
- try {
- await client.send({
- type: "response.create",
- response: {
- modalities: ["audio", "text"],
- instructions: "Please assist the user."
- }
+ import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
+ import { AzureOpenAI } from "openai/index.mjs";
+ import { DefaultAzureCredential, getBearerTokenProvider } from "@azure/identity";
+ async function main() {
+ // You will need to set these environment variables or edit the following values
+ const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
+ // Required Azure OpenAI deployment name and API version
+ const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
+ const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
+ // Keyless authentication
+ const credential = new DefaultAzureCredential();
+ const scope = "https://cognitiveservices.azure.com/.default";
+ const azureADTokenProvider = getBearerTokenProvider(credential, scope);
+ const azureOpenAIClient = new AzureOpenAI({
+ azureADTokenProvider,
+ apiVersion: apiVersion,
+ deployment: deploymentName,
+ endpoint: endpoint,
+ });
+ const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
+ realtimeClient.socket.on("open", () => {
+ console.log("Connection opened!");
+ realtimeClient.send({
+ type: "session.update",
+ session: {
+ modalities: ["text", "audio"],
+ model: "gpt-4o-mini-realtime-preview",
+ },
+ });
+ realtimeClient.send({
+ type: "conversation.item.create",
+ item: {
+ type: "message",
+ role: "user",
+ content: [{ type: "input_text", text: "Please assist the user" }],
+ },
});
- for await (const message of client.messages()) {
- switch (message.type) {
- case "response.done": {
- break;
- }
- case "error": {
- console.error(message.error);
- break;
- }
- case "response.audio_transcript.delta": {
- console.log(`Received text delta: ${message.delta}`);
- break;
- }
- case "response.audio.delta": {
- const buffer = Buffer.from(message.delta, "base64");
- console.log(`Received ${buffer.length} bytes of audio data.`);
- break;
- }
- }
- if (message.type === "response.done" || message.type === "error") {
- break;
- }
- }
- }
- finally {
- client.close();
- }
+ realtimeClient.send({ type: "response.create" });
+ });
+ realtimeClient.on("error", (err) => {
+ // Instead of throwing the error, you can log it
+ // and continue processing events.
+ throw err;
+ });
+ realtimeClient.on("session.created", (event) => {
+ console.log("session created!", event.session);
+ console.log();
+ });
+ realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
+ realtimeClient.on("response.audio.delta", (event) => {
+ const buffer = Buffer.from(event.delta, "base64");
+ console.log(`Received ${buffer.length} bytes of audio data.`);
+ });
+ realtimeClient.on("response.audio_transcript.delta", (event) => {
+ console.log(`Received text delta:${event.delta}.`);
+ });
+ realtimeClient.on("response.text.done", () => console.log());
+ realtimeClient.on("response.done", () => realtimeClient.close());
+ realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
}
- await text_in_audio_out();
+ main().catch((err) => {
+ console.error("The sample encountered an error:", err);
+ });
+ export { main };
```
1. Sign in to Azure with the following command:
@@ -132,56 +148,66 @@ For the recommended keyless authentication with Microsoft Entra ID, you need to:
1. Create the `index.js` file with the following code:
```javascript
- import { AzureKeyCredential } from "@azure/core-auth";
- import { LowLevelRTClient } from "rt-client";
- import dotenv from "dotenv";
- dotenv.config();
- async function text_in_audio_out() {
- // Set environment variables or edit the corresponding values here.
+ import { OpenAIRealtimeWS } from "openai/beta/realtime/ws";
+ import { AzureOpenAI } from "openai/index.mjs";
+ async function main() {
+ // You will need to set these environment variables or edit the following values
+ const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "AZURE_OPENAI_ENDPOINT";
const apiKey = process.env.AZURE_OPENAI_API_KEY || "Your API key";
- const endpoint = process.env.AZURE_OPENAI_ENDPOINT || "Your endpoint";
- const deployment = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
- if (!endpoint || !deployment) {
- throw new Error("You didn't set the environment variables.");
- }
- const client = new LowLevelRTClient(new URL(endpoint), new AzureKeyCredential(apiKey), { deployment: deployment });
- try {
- await client.send({
- type: "response.create",
- response: {
- modalities: ["audio", "text"],
- instructions: "Please assist the user."
- }
+ // Required Azure OpenAI deployment name and API version
+ const deploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME || "gpt-4o-mini-realtime-preview";
+ const apiVersion = process.env.OPENAI_API_VERSION || "2024-10-01-preview";
+ const azureOpenAIClient = new AzureOpenAI({
+ apiKey: apiKey,
+ apiVersion: apiVersion,
+ deployment: deploymentName,
+ endpoint: endpoint,
+ });
+ const realtimeClient = await OpenAIRealtimeWS.azure(azureOpenAIClient);
+ realtimeClient.socket.on("open", () => {
+ console.log("Connection opened!");
+ realtimeClient.send({
+ type: "session.update",
+ session: {
+ modalities: ["text", "audio"],
+ model: "gpt-4o-mini-realtime-preview",
+ },
});
- for await (const message of client.messages()) {
- switch (message.type) {
- case "response.done": {
- break;
- }
- case "error": {
- console.error(message.error);
- break;
- }
- case "response.audio_transcript.delta": {
- console.log(`Received text delta: ${message.delta}`);
- break;
- }
- case "response.audio.delta": {
- const buffer = Buffer.from(message.delta, "base64");
- console.log(`Received ${buffer.length} bytes of audio data.`);
- break;
- }
- }
- if (message.type === "response.done" || message.type === "error") {
- break;
- }
- }
- }
- finally {
- client.close();
- }
+ realtimeClient.send({
+ type: "conversation.item.create",
+ item: {
+ type: "message",
+ role: "user",
+ content: [{ type: "input_text", text: "Please assist the user" }],
+ },
+ });
+ realtimeClient.send({ type: "response.create" });
+ });
+ realtimeClient.on("error", (err) => {
+ // Instead of throwing the error, you can log it
+ // and continue processing events.
+ throw err;
+ });
+ realtimeClient.on("session.created", (event) => {
+ console.log("session created!", event.session);
+ console.log();
+ });
+ realtimeClient.on("response.text.delta", (event) => process.stdout.write(event.delta));
+ realtimeClient.on("response.audio.delta", (event) => {
+ const buffer = Buffer.from(event.delta, "base64");
+ console.log(`Received ${buffer.length} bytes of audio data.`);
+ });
+ realtimeClient.on("response.audio_transcript.delta", (event) => {
+ console.log(`Received text delta:${event.delta}.`);
+ });
+ realtimeClient.on("response.text.done", () => console.log());
+ realtimeClient.on("response.done", () => realtimeClient.close());
+ realtimeClient.socket.on("close", () => console.log("\nConnection closed!"));
}
- await text_in_audio_out();
+ main().catch((err) => {
+ console.error("The sample encountered an error:", err);
+ });
+ export { main };
```
1. Run the JavaScript file.
@@ -201,22 +227,25 @@ The script gets a response from the model and prints the transcript and audio da
The output will look similar to the following:
```console
-Received text delta: Hello
-Received text delta: !
-Received text delta: How
-Received text delta: can
-Received text delta: I
+Received text delta:Of.
+Received text delta: course.
+Received text delta:!.
+Received text delta: How.
+Received text delta: can.
Received 4800 bytes of audio data.
Received 7200 bytes of audio data.
-Received text delta: help
+Received text delta: I.
Received 12000 bytes of audio data.
-Received text delta: you
-Received text delta: today
-Received text delta: ?
+Received text delta: help.
+Received text delta: you.
+Received text delta: today.
+Received text delta:?.
Received 12000 bytes of audio data.
Received 12000 bytes of audio data.
Received 12000 bytes of audio data.
-Received 24000 bytes of audio data.
+Received 26400 bytes of audio data.
+
+Connection closed!
```
## Web application sample