[{"@type":"PropertyValue","name":"Format","value":"44.1kHz,16bit, uncompressed wav, mono channel;"},{"@type":"PropertyValue","name":"Recording condition","value":"Low background noise(indoor), without echo ;"},{"@type":"PropertyValue","name":"Content category","value":"10 types of emotional scripts;"},{"@type":"PropertyValue","name":"Recording device","value":"Hi-Fi microphone;"},{"@type":"PropertyValue","name":"Speaker","value":"20 American, 50% male and 50% female;"},{"@type":"PropertyValue","name":"Country","value":"the United States(USA);"},{"@type":"PropertyValue","name":"Language(Region) Code","value":"en-US;"},{"@type":"PropertyValue","name":"Language","value":"English;"},{"@type":"PropertyValue","name":"Features of annotation","value":"Transcription text;"},{"@type":"PropertyValue","name":"Accuracy Rate","value":"Sentence Accuracy Rate (SAR) 95%"}]
{"id":179,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY170501038.png?Expires=2007353643&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=jpO6r9ECU4FnWP1aWyce52ul3TM%3D","type1":"165","type1str":null,"type2":"166","type2str":null,"dataname":"English Speech Emotion Dataset – Labeled Audio from 20 Native Speakers with 10 Emotions","datazy":[{"title":"Format","content":"44.1kHz,16bit, uncompressed wav, mono channel;","desc":"Format"},{"title":"Recording condition","content":"Low background noise(indoor), without echo ;","desc":"Recording condition"},{"title":"Content category","content":"10 types of emotional scripts;","desc":"Content category"},{"title":"Recording device","content":"Hi-Fi microphone;","desc":"Recording device"},{"title":"Speaker","content":"20 American, 50% male and 50% female;","desc":"Speaker"},{"title":"Country","content":"the United States(USA);","desc":"Country"},{"title":"Language(Region) Code","content":"en-US;","desc":"Language(Region) Code"},{"title":"Language","content":"English;","desc":"Language"},{"title":"Features of annotation","content":"Transcription text;","desc":"Features of annotation"},{"title":"Accuracy Rate","content":"Sentence Accuracy Rate (SAR) 95%","desc":"Accuracy Rate"}],"datatag":"English,USA,Emotion,Microphone,Reading,Scripted Monologue","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":null,"samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0019S00012.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0019S00012.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=YEN3feEep80qwCr44tq0eyHmsWs%3D","intro":"I am so happy{-laughter=mmm-} to hear that!","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0006S00004.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0006S00004.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=7J%2FQyF6hY0QE77qfJHgbDTT0jxA%3D","intro":"I was overcome with joy{-laughter=haha-} when hearing the news.","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0018S00004.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0018S00004.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=9QLCsJCRKF3EI4ZXSQV8H%2Facuxc%3D","intro":"{-laughter=yeah-}I'm okay.","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0001S00775.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0001S00775.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=OxieiPydyK%2Fd4N8uVauVSI4CiEU%3D","intro":"This game is amazing.","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0003S00001.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501038_demo1706954405836/apy170501038/T0001G0003S00001.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=XjwQkXR%2BEV2Y3bxfnLP0RwyR6UQ%3D","intro":"I was so happy{-laughter=aww-}to hear the good news.","size":0,"progress":100,"type":"mp3"}],"officialSummary":"This English Emotional Speech Dataset features recordings from 20 native American English speakers. Each participant performed scripted monologues expressing 10 distinct emotions, including anger, happiness, sadness, fear, disgust, and others, simulating real-world scenarios. The recordings were captured via high-quality microphones and are accompanied by accurate transcriptions and relevant metadata.The dataset is ideal for training and evaluating speech emotion recognition (SER) systems, emotional TTS, affective computing, and conversational AI applications. Its geographic and speaker diversity enhances generalizability in real-life environments.All data was collected in compliance with international data privacy laws including GDPR, CCPA, and PIPL, ensuring legal and ethical use in both research and commercial settings. The dataset has been validated by multiple AI companies for performance benchmarking.","dataexampl":null,"datakeyword":["emotional speech dataset","English speech emotion dataset","speech emotion recognition","SER dataset","emotional audio data","labeled emotional speech","affective computing dataset","native English speakers","AI speech dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
English Speech Emotion Dataset – Labeled Audio from 20 Native Speakers with 10 Emotions
emotional speech dataset
English speech emotion dataset
speech emotion recognition
SER dataset
emotional audio data
labeled emotional speech
affective computing dataset
native English speakers
AI speech dataset
This English Emotional Speech Dataset features recordings from 20 native American English speakers. Each participant performed scripted monologues expressing 10 distinct emotions, including anger, happiness, sadness, fear, disgust, and others, simulating real-world scenarios. The recordings were captured via high-quality microphones and are accompanied by accurate transcriptions and relevant metadata.The dataset is ideal for training and evaluating speech emotion recognition (SER) systems, emotional TTS, affective computing, and conversational AI applications. Its geographic and speaker diversity enhances generalizability in real-life environments.All data was collected in compliance with international data privacy laws including GDPR, CCPA, and PIPL, ensuring legal and ethical use in both research and commercial settings. The dataset has been validated by multiple AI companies for performance benchmarking.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Format
44.1kHz,16bit, uncompressed wav, mono channel;
Recording condition
Low background noise(indoor), without echo ;
Content category
10 types of emotional scripts;
Recording device
Hi-Fi microphone;
Speaker
20 American, 50% male and 50% female;
Country
the United States(USA);
Language(Region) Code
en-US;
Language
English;
Features of annotation
Transcription text;
Accuracy Rate
Sentence Accuracy Rate (SAR) 95%
Sample
Audio
I am so happy{-laughter=mmm-} to hear that!
Audio
I was overcome with joy{-laughter=haha-} when hearing the news.
Audio
{-laughter=yeah-}I'm okay.
Audio
This game is amazing.
Audio
I was so happy{-laughter=aww-}to hear the good news.