32 KiB
32 KiB
Pre-Recorded Audio
POST https://api.deepgram.com/v1/listen Content-Type: application/json
Transcribe audio and video using Deepgram's speech-to-text REST API
Reference: https://developers.deepgram.com/reference/speech-to-text/listen-pre-recorded
OpenAPI Specification
openapi: 3.1.1
info:
title: Transcribe and analyze pre-recorded audio and video
version: endpoint_listen/v1/media.transcribe
paths:
/v1/listen:
post:
operationId: transcribe
summary: Transcribe and analyze pre-recorded audio and video
description: Transcribe audio and video using Deepgram's speech-to-text REST API
tags:
- - subpackage_listen
- subpackage_listen/v1
- subpackage_listen/v1/media
parameters:
- name: callback
in: query
description: URL to which we'll make the callback request
required: false
schema:
type: string
- name: callback_method
in: query
description: HTTP method by which the callback request will be made
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersCallbackMethod'
- name: extra
in: query
description: >-
Arbitrary key-value pairs that are attached to the API response for
usage in downstream processing
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersExtra'
- name: sentiment
in: query
description: Recognizes the sentiment throughout a transcript or text
required: false
schema:
type: boolean
default: false
- name: summarize
in: query
description: >-
Summarize content. For Listen API, supports string version option.
For Read API, accepts boolean only.
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersSummarize'
- name: tag
in: query
description: >-
Label your requests for the purpose of identification during usage
reporting
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersTag'
- name: topics
in: query
description: Detect topics throughout a transcript or text
required: false
schema:
type: boolean
default: false
- name: custom_topic
in: query
description: >-
Custom topics you want the model to detect within your input audio
or text if present Submit up to `100`.
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersCustomTopic'
- name: custom_topic_mode
in: query
description: >-
Sets how the model will interpret strings submitted to the
`custom_topic` param. When `strict`, the model will only return
topics submitted using the `custom_topic` param. When `extended`,
the model will return its own detected topics in addition to those
submitted using the `custom_topic` param
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersCustomTopicMode'
- name: intents
in: query
description: Recognizes speaker intent throughout a transcript or text
required: false
schema:
type: boolean
default: false
- name: custom_intent
in: query
description: >-
Custom intents you want the model to detect within your input audio
if present
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersCustomIntent'
- name: custom_intent_mode
in: query
description: >-
Sets how the model will interpret intents submitted to the
`custom_intent` param. When `strict`, the model will only return
intents submitted using the `custom_intent` param. When `extended`,
the model will return its own detected intents in the
`custom_intent` param.
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersCustomIntentMode'
- name: detect_entities
in: query
description: Identifies and extracts key entities from content in submitted audio
required: false
schema:
type: boolean
default: false
- name: detect_language
in: query
description: Identifies the dominant language spoken in submitted audio
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersDetectLanguage'
- name: diarize
in: query
description: >-
Recognize speaker changes. Each word in the transcript will be
assigned a speaker number starting at 0
required: false
schema:
type: boolean
default: false
- name: dictation
in: query
description: Dictation mode for controlling formatting with dictated speech
required: false
schema:
type: boolean
default: false
- name: encoding
in: query
description: Specify the expected encoding of your submitted audio
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersEncoding'
- name: filler_words
in: query
description: >-
Filler Words can help transcribe interruptions in your audio, like
"uh" and "um"
required: false
schema:
type: boolean
default: false
- name: keyterm
in: query
description: >-
Key term prompting can boost or suppress specialized terminology and
brands. Only compatible with Nova-3
required: false
schema:
type: array
items:
type: string
- name: keywords
in: query
description: Keywords can boost or suppress specialized terminology and brands
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersKeywords'
- name: language
in: query
description: >-
The [BCP-47 language tag](https://tools.ietf.org/html/bcp47) that
hints at the primary spoken language. Depending on the Model and API
endpoint you choose only certain languages are available
required: false
schema:
type: string
default: en
- name: measurements
in: query
description: >-
Spoken measurements will be converted to their corresponding
abbreviations
required: false
schema:
type: boolean
default: false
- name: model
in: query
description: AI model used to process submitted audio
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersModel'
- name: multichannel
in: query
description: Transcribe each audio channel independently
required: false
schema:
type: boolean
default: false
- name: numerals
in: query
description: Numerals converts numbers from written format to numerical format
required: false
schema:
type: boolean
default: false
- name: paragraphs
in: query
description: Splits audio into paragraphs to improve transcript readability
required: false
schema:
type: boolean
default: false
- name: profanity_filter
in: query
description: >-
Profanity Filter looks for recognized profanity and converts it to
the nearest recognized non-profane word or removes it from the
transcript completely
required: false
schema:
type: boolean
default: false
- name: punctuate
in: query
description: Add punctuation and capitalization to the transcript
required: false
schema:
type: boolean
default: false
- name: redact
in: query
description: Redaction removes sensitive information from your transcripts
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersRedact'
- name: replace
in: query
description: Search for terms or phrases in submitted audio and replaces them
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersReplace'
- name: search
in: query
description: Search for terms or phrases in submitted audio
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersSearch'
- name: smart_format
in: query
description: >-
Apply formatting to transcript output. When set to true, additional
formatting will be applied to transcripts to improve readability
required: false
schema:
type: boolean
default: false
- name: utterances
in: query
description: Segments speech into meaningful semantic units
required: false
schema:
type: boolean
default: false
- name: utt_split
in: query
description: >-
Seconds to wait before detecting a pause between words in submitted
audio
required: false
schema:
type: number
format: double
default: 0.8
- name: version
in: query
description: Version of an AI model to use
required: false
schema:
$ref: '#/components/schemas/V1ListenPostParametersVersion'
- name: mip_opt_out
in: query
description: >-
Opts out requests from the Deepgram Model Improvement Program. Refer
to our Docs for pricing impacts before setting this to true.
https://dpgr.am/deepgram-mip
required: false
schema:
type: boolean
default: false
- name: Authorization
in: header
description: Header authentication of the form `undefined <token>`
required: true
schema:
type: string
responses:
'200':
description: >-
Returns either transcription results, or a request_id when using a
callback.
content:
application/json:
schema:
$ref: '#/components/schemas/listen_v1_media_transcribe_Response_200'
'400':
description: Invalid Request
content: {}
requestBody:
description: Transcribe an audio or video file
content:
application/json:
schema:
$ref: '#/components/schemas/ListenV1RequestUrl'
components:
schemas:
V1ListenPostParametersCallbackMethod:
type: string
enum:
- value: POST
- value: PUT
default: POST
V1ListenPostParametersExtra:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersSummarize0:
type: string
enum:
- value: v2
V1ListenPostParametersSummarize:
oneOf:
- $ref: '#/components/schemas/V1ListenPostParametersSummarize0'
- type: boolean
default: false
V1ListenPostParametersTag:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersCustomTopic:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersCustomTopicMode:
type: string
enum:
- value: extended
- value: strict
default: extended
V1ListenPostParametersCustomIntent:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersCustomIntentMode:
type: string
enum:
- value: extended
- value: strict
default: extended
V1ListenPostParametersDetectLanguage:
oneOf:
- type: boolean
default: false
- type: array
items:
type: string
V1ListenPostParametersEncoding:
type: string
enum:
- value: linear16
- value: flac
- value: mulaw
- value: amr-nb
- value: amr-wb
- value: opus
- value: speex
- value: g729
V1ListenPostParametersKeywords:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersModel0:
type: string
enum:
- value: nova-3
- value: nova-3-general
- value: nova-3-medical
- value: nova-2
- value: nova-2-general
- value: nova-2-meeting
- value: nova-2-finance
- value: nova-2-conversationalai
- value: nova-2-voicemail
- value: nova-2-video
- value: nova-2-medical
- value: nova-2-drivethru
- value: nova-2-automotive
- value: nova
- value: nova-general
- value: nova-phonecall
- value: nova-medical
- value: enhanced
- value: enhanced-general
- value: enhanced-meeting
- value: enhanced-phonecall
- value: enhanced-finance
- value: base
- value: meeting
- value: phonecall
- value: finance
- value: conversationalai
- value: voicemail
- value: video
V1ListenPostParametersModel:
oneOf:
- $ref: '#/components/schemas/V1ListenPostParametersModel0'
- type: string
V1ListenPostParametersRedactSchemaOneOf1Items:
type: string
enum:
- value: pci
- value: pii
- value: numbers
V1ListenPostParametersRedact1:
type: array
items:
$ref: '#/components/schemas/V1ListenPostParametersRedactSchemaOneOf1Items'
V1ListenPostParametersRedact:
oneOf:
- type: string
- $ref: '#/components/schemas/V1ListenPostParametersRedact1'
V1ListenPostParametersReplace:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersSearch:
oneOf:
- type: string
- type: array
items:
type: string
V1ListenPostParametersVersion0:
type: string
enum:
- value: latest
V1ListenPostParametersVersion:
oneOf:
- $ref: '#/components/schemas/V1ListenPostParametersVersion0'
- type: string
ListenV1RequestUrl:
type: object
properties:
url:
type: string
format: uri
required:
- url
ListenV1ResponseMetadataModelInfo:
type: object
properties: {}
ListenV1ResponseMetadataSummaryInfo:
type: object
properties:
model_uuid:
type: string
input_tokens:
type: number
format: double
output_tokens:
type: number
format: double
ListenV1ResponseMetadataSentimentInfo:
type: object
properties:
model_uuid:
type: string
input_tokens:
type: number
format: double
output_tokens:
type: number
format: double
ListenV1ResponseMetadataTopicsInfo:
type: object
properties:
model_uuid:
type: string
input_tokens:
type: number
format: double
output_tokens:
type: number
format: double
ListenV1ResponseMetadataIntentsInfo:
type: object
properties:
model_uuid:
type: string
input_tokens:
type: number
format: double
output_tokens:
type: number
format: double
ListenV1ResponseMetadata:
type: object
properties:
transaction_key:
type: string
default: deprecated
request_id:
type: string
format: uuid
sha256:
type: string
created:
type: string
format: date-time
duration:
type: number
format: double
channels:
type: number
format: double
models:
type: array
items:
type: string
model_info:
$ref: '#/components/schemas/ListenV1ResponseMetadataModelInfo'
summary_info:
$ref: '#/components/schemas/ListenV1ResponseMetadataSummaryInfo'
sentiment_info:
$ref: '#/components/schemas/ListenV1ResponseMetadataSentimentInfo'
topics_info:
$ref: '#/components/schemas/ListenV1ResponseMetadataTopicsInfo'
intents_info:
$ref: '#/components/schemas/ListenV1ResponseMetadataIntentsInfo'
tags:
type: array
items:
type: string
required:
- request_id
- sha256
- created
- duration
- channels
- models
- model_info
ListenV1ResponseResultsChannelsItemsSearchItemsHitsItems:
type: object
properties:
confidence:
type: number
format: double
start:
type: number
format: double
end:
type: number
format: double
snippet:
type: string
ListenV1ResponseResultsChannelsItemsSearchItems:
type: object
properties:
query:
type: string
hits:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsSearchItemsHitsItems
ListenV1ResponseResultsChannelsItemsAlternativesItemsWordsItems:
type: object
properties:
word:
type: string
start:
type: number
format: double
end:
type: number
format: double
confidence:
type: number
format: double
ListenV1ResponseResultsChannelsItemsAlternativesItemsParagraphsParagraphsItemsSentencesItems:
type: object
properties:
text:
type: string
start:
type: number
format: double
end:
type: number
format: double
ListenV1ResponseResultsChannelsItemsAlternativesItemsParagraphsParagraphsItems:
type: object
properties:
sentences:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsParagraphsParagraphsItemsSentencesItems
speaker:
type: number
format: double
num_words:
type: number
format: double
start:
type: number
format: double
end:
type: number
format: double
ListenV1ResponseResultsChannelsItemsAlternativesItemsParagraphs:
type: object
properties:
transcript:
type: string
paragraphs:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsParagraphsParagraphsItems
ListenV1ResponseResultsChannelsItemsAlternativesItemsEntitiesItems:
type: object
properties:
label:
type: string
value:
type: string
raw_value:
type: string
confidence:
type: number
format: double
start_word:
type: number
format: double
end_word:
type: number
format: double
ListenV1ResponseResultsChannelsItemsAlternativesItemsSummariesItems:
type: object
properties:
summary:
type: string
start_word:
type: number
format: double
end_word:
type: number
format: double
ListenV1ResponseResultsChannelsItemsAlternativesItemsTopicsItems:
type: object
properties:
text:
type: string
start_word:
type: number
format: double
end_word:
type: number
format: double
topics:
type: array
items:
type: string
ListenV1ResponseResultsChannelsItemsAlternativesItems:
type: object
properties:
transcript:
type: string
confidence:
type: number
format: double
words:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsWordsItems
paragraphs:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsParagraphs
entities:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsEntitiesItems
summaries:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsSummariesItems
topics:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItemsTopicsItems
ListenV1ResponseResultsChannelsItems:
type: object
properties:
search:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsSearchItems
alternatives:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsChannelsItemsAlternativesItems
detected_language:
type: string
ListenV1ResponseResultsChannels:
type: array
items:
$ref: '#/components/schemas/ListenV1ResponseResultsChannelsItems'
ListenV1ResponseResultsUtterancesItemsWordsItems:
type: object
properties:
word:
type: string
start:
type: number
format: double
end:
type: number
format: double
confidence:
type: number
format: double
speaker:
type: number
format: double
speaker_confidence:
type: number
format: double
punctuated_word:
type: string
ListenV1ResponseResultsUtterancesItems:
type: object
properties:
start:
type: number
format: double
end:
type: number
format: double
confidence:
type: number
format: double
channel:
type: number
format: double
transcript:
type: string
words:
type: array
items:
$ref: >-
#/components/schemas/ListenV1ResponseResultsUtterancesItemsWordsItems
speaker:
type: number
format: double
id:
type: string
format: uuid
ListenV1ResponseResultsUtterances:
type: array
items:
$ref: '#/components/schemas/ListenV1ResponseResultsUtterancesItems'
ListenV1ResponseResultsSummary:
type: object
properties:
result:
type: string
short:
type: string
SharedTopicsResultsTopicsSegmentsItemsTopicsItems:
type: object
properties:
topic:
type: string
confidence_score:
type: number
format: double
SharedTopicsResultsTopicsSegmentsItems:
type: object
properties:
text:
type: string
start_word:
type: number
format: double
end_word:
type: number
format: double
topics:
type: array
items:
$ref: >-
#/components/schemas/SharedTopicsResultsTopicsSegmentsItemsTopicsItems
SharedTopicsResultsTopics:
type: object
properties:
segments:
type: array
items:
$ref: '#/components/schemas/SharedTopicsResultsTopicsSegmentsItems'
SharedTopicsResults:
type: object
properties:
topics:
$ref: '#/components/schemas/SharedTopicsResultsTopics'
SharedTopics:
type: object
properties:
results:
$ref: '#/components/schemas/SharedTopicsResults'
SharedIntentsResultsIntentsSegmentsItemsIntentsItems:
type: object
properties:
intent:
type: string
confidence_score:
type: number
format: double
SharedIntentsResultsIntentsSegmentsItems:
type: object
properties:
text:
type: string
start_word:
type: number
format: double
end_word:
type: number
format: double
intents:
type: array
items:
$ref: >-
#/components/schemas/SharedIntentsResultsIntentsSegmentsItemsIntentsItems
SharedIntentsResultsIntents:
type: object
properties:
segments:
type: array
items:
$ref: '#/components/schemas/SharedIntentsResultsIntentsSegmentsItems'
SharedIntentsResults:
type: object
properties:
intents:
$ref: '#/components/schemas/SharedIntentsResultsIntents'
SharedIntents:
type: object
properties:
results:
$ref: '#/components/schemas/SharedIntentsResults'
SharedSentimentsSegmentsItems:
type: object
properties:
text:
type: string
start_word:
type: number
format: double
end_word:
type: number
format: double
sentiment:
type: string
sentiment_score:
type: number
format: double
SharedSentimentsAverage:
type: object
properties:
sentiment:
type: string
sentiment_score:
type: number
format: double
SharedSentiments:
type: object
properties:
segments:
type: array
items:
$ref: '#/components/schemas/SharedSentimentsSegmentsItems'
average:
$ref: '#/components/schemas/SharedSentimentsAverage'
ListenV1ResponseResults:
type: object
properties:
channels:
$ref: '#/components/schemas/ListenV1ResponseResultsChannels'
utterances:
$ref: '#/components/schemas/ListenV1ResponseResultsUtterances'
summary:
$ref: '#/components/schemas/ListenV1ResponseResultsSummary'
topics:
$ref: '#/components/schemas/SharedTopics'
intents:
$ref: '#/components/schemas/SharedIntents'
sentiments:
$ref: '#/components/schemas/SharedSentiments'
required:
- channels
ListenV1Response:
type: object
properties:
metadata:
$ref: '#/components/schemas/ListenV1ResponseMetadata'
results:
$ref: '#/components/schemas/ListenV1ResponseResults'
required:
- metadata
- results
ListenV1AcceptedResponse:
type: object
properties:
request_id:
type: string
format: uuid
description: Unique identifier for tracking the asynchronous request
required:
- request_id
listen_v1_media_transcribe_Response_200:
oneOf:
- $ref: '#/components/schemas/ListenV1Response'
- $ref: '#/components/schemas/ListenV1AcceptedResponse'
SDK Code Examples
import requests
url = "https://api.deepgram.com/v1/listen"
payload = { "url": "https://dpgr.am/spacewalk.wav" }
headers = {
"Authorization": "<apiKey>",
"Content-Type": "application/json"
}
response = requests.post(url, json=payload, headers=headers)
print(response.json())
const url = 'https://api.deepgram.com/v1/listen';
const options = {
method: 'POST',
headers: {Authorization: '<apiKey>', 'Content-Type': 'application/json'},
body: '{"url":"https://dpgr.am/spacewalk.wav"}'
};
try {
const response = await fetch(url, options);
const data = await response.json();
console.log(data);
} catch (error) {
console.error(error);
}
package main
import (
"fmt"
"strings"
"net/http"
"io"
)
func main() {
url := "https://api.deepgram.com/v1/listen"
payload := strings.NewReader("{\n \"url\": \"https://dpgr.am/spacewalk.wav\"\n}")
req, _ := http.NewRequest("POST", url, payload)
req.Header.Add("Authorization", "<apiKey>")
req.Header.Add("Content-Type", "application/json")
res, _ := http.DefaultClient.Do(req)
defer res.Body.Close()
body, _ := io.ReadAll(res.Body)
fmt.Println(res)
fmt.Println(string(body))
}
require 'uri'
require 'net/http'
url = URI("https://api.deepgram.com/v1/listen")
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
request = Net::HTTP::Post.new(url)
request["Authorization"] = '<apiKey>'
request["Content-Type"] = 'application/json'
request.body = "{\n \"url\": \"https://dpgr.am/spacewalk.wav\"\n}"
response = http.request(request)
puts response.read_body
HttpResponse<String> response = Unirest.post("https://api.deepgram.com/v1/listen")
.header("Authorization", "<apiKey>")
.header("Content-Type", "application/json")
.body("{\n \"url\": \"https://dpgr.am/spacewalk.wav\"\n}")
.asString();
<?php
$client = new \GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.deepgram.com/v1/listen', [
'body' => '{
"url": "https://dpgr.am/spacewalk.wav"
}',
'headers' => [
'Authorization' => '<apiKey>',
'Content-Type' => 'application/json',
],
]);
echo $response->getBody();
var client = new RestClient("https://api.deepgram.com/v1/listen");
var request = new RestRequest(Method.POST);
request.AddHeader("Authorization", "<apiKey>");
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n \"url\": \"https://dpgr.am/spacewalk.wav\"\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
import Foundation
let headers = [
"Authorization": "<apiKey>",
"Content-Type": "application/json"
]
let parameters = ["url": "https://dpgr.am/spacewalk.wavhttps://dpgr.am/spacewalk.wav"] as [String : Any]
let postData = JSONSerialization.data(withJSONObject: parameters, options: [])
let request = NSMutableURLRequest(url: NSURL(string: "https://api.deepgram.com/v1/listen")! as URL,
cachePolicy: .useProtocolCachePolicy,
timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data
let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
if (error != nil) {
print(error as Any)
} else {
let httpResponse = response as? HTTPURLResponse
print(httpResponse)
}
})
dataTask.resume()