[{"@type":"PropertyValue","name":"Format","value":"16kHz, 16bit, uncompressed wav, mono channel;"},{"@type":"PropertyValue","name":"Recording condition","value":"Mixed(quiet,noisy);"},{"@type":"PropertyValue","name":"Content category","value":"wake words;"},{"@type":"PropertyValue","name":"Recording device","value":"Android Smartphone(honor 8, Huawei G9);"},{"@type":"PropertyValue","name":"Speaker","value":"200 Chinese, 47% male and 53% female;"},{"@type":"PropertyValue","name":"Country","value":"China(CHN);"},{"@type":"PropertyValue","name":"Language(Region) Code","value":"zh-CN;"},{"@type":"PropertyValue","name":"Language","value":"Mandarin Chinese;"},{"@type":"PropertyValue","name":"Features of annotation","value":"Transcription text;"},{"@type":"PropertyValue","name":"Accuracy Rate","value":"Sentence Accuracy Rate (SAR) 99%"}]
{"id":177,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY170401036_W.png?Expires=2007353641&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=Ab%2BaAZzyD7iQfvWoz43zgDg0Log%3D","type1":"165","type1str":null,"type2":"165","type2str":null,"dataname":"200 People - Mandarin Chinese(China) Wake Words Scripted Monologue Smartphone speech dataset","datazy":[{"title":"Format","value":"16kHz, 16bit, uncompressed wav, mono channel;"},{"title":"Recording condition","value":"Mixed(quiet,noisy);"},{"title":"Content category","value":"wake words;"},{"title":"Recording device","value":"Android Smartphone(honor 8, Huawei G9);"},{"title":"Speaker","value":"200 Chinese, 47% male and 53% female;"},{"title":"Country","value":"China(CHN);"},{"title":"Language(Region) Code","value":"zh-CN;"},{"title":"Language","value":"Mandarin Chinese;"},{"title":"Features of annotation","value":"Transcription text;"},{"title":"Accuracy Rate","value":"Sentence Accuracy Rate (SAR) 99%"}],"datatag":"Wake word,Smartphone,Noise,In-car,Street,Scripted Monologue","technologydoc":null,"downurl":null,"datainfo":"For the Mobile phone voice acquisition data of Chinese wake-up words by 200 people, 53% of all speakers are women. It was recorded in diverse environments. The recorded text includes wake-up words and colloquial sentences, and the speakers came from seven dialect areas.","standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":["200 people, 180 sentences per person","24.6 hours","covering seven dialect regions"],"samplePresentation":[["mp3","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0001S0122.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=SvpogIj6%2BHBolE83ygpBzXeIp8k%3D","/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0001S0122.wav","T0253G0001S0122.wavfgf成吉思汗"],["mp3","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0004S0148.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=YbOSl5dcTu6cCClrY2gkjmmh3U8%3D","/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0004S0148.wav","T0253G0004S0148.wavfgf天下无敌"],["mp3","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0149S0152.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=TLp%2Bwn11mhwOfMOMMtjOVOPUwBM%3D","/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0149S0152.wav","T0253G0149S0152.wavfgf天下无敌"],["mp3","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0149S0168.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=nCUAYtsC%2B%2B%2B%2BlBnOwy9sB8ukCJI%3D","/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0149S0168.wav","T0253G0149S0168.wavfgf天下无敌"],["mp3","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0001S0169.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=LzMLMf6vP4gk7Ne8ieF4GRZ%2BcVY%3D","/data/apps/damp/temp/ziptemp/APY170401036_W_demo1712829601487/APY170401036_W/T0253G0001S0169.wav","T0253G0001S0169.wavfgf天下无敌"]],"officialSummary":"Mandarin Chinese(China) Wake Words Scripted Monologue Smartphone speech dataset, collected from monologue based on given scripts, covering variety wake words. Transcribed with text content and other attributes. Our dataset was collected from extensive and diversify speakers(200 Chinese), geographicly speaking, enhancing model performance in real and complex tasks.rnQuality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.","dataexampl":"","datakeyword":["Chinese wake-up words audio data captured by mobile phone"," Chinese wake-up words collection"," wake-up words data"," wake-up words audio data"," wake-up words audio collection"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Language,Data Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"],"single":"no"}
200 People - Mandarin Chinese(China) Wake Words Scripted Monologue Smartphone speech dataset
Chinese wake-up words audio data captured by mobile phone
Chinese wake-up words collection
wake-up words data
wake-up words audio data
wake-up words audio collection
Mandarin Chinese(China) Wake Words Scripted Monologue Smartphone speech dataset, collected from monologue based on given scripts, covering variety wake words. Transcribed with text content and other attributes. Our dataset was collected from extensive and diversify speakers(200 Chinese), geographicly speaking, enhancing model performance in real and complex tasks.rnQuality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Indonesian(Indonesia) Spontaneous Dialogue Smartphone speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(412 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
audio data dataset conversational asr data Indonesian
Filipino(the Philippines) Spontaneous Dialogue Smartphone speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(140 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
audio data dataset conversational asr data Filipino
Spanish(Spain) Spontaneous Dialogue Telephony speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(600 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
French(France) Spontaneous Dialogue Telephony speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(964 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
Italian(Italy) Spontaneous Dialogue Telephony speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(676 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
ItalianConversationaltelephone
444,202 Korean Pronunciation Dictionary
The data contains 444,202 entries. All words and pronunciations are produced by Korean linguists. It can be used in the research and development of Korean ASR technology.
Thai(Thailand) Spontaneous Dialogue Telephony speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(1,986 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
Portuguese(Brazil) Spontaneous Dialogue Smartphone speech dataset, collected from dialogues based on given topics, covering 20+ domains. Transcribed with text content, speaker's ID, gender, age and other attributes. Our dataset was collected from extensive and diversify speakers(142 native speakers), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
Conversational speechPortuguese asr data russian asr dataset Brazilian Portuguese