[{"@type":"PropertyValue","name":"Format","value":"16kHz, 16 bit, wav, mono channel;"},{"@type":"PropertyValue","name":"Content category","value":"Recorders in free conversation without a set topic;"},{"@type":"PropertyValue","name":"Recording condition","value":"Low background noise (indoor);"},{"@type":"PropertyValue","name":"Recording device","value":"Android smartphone, iPhone;"},{"@type":"PropertyValue","name":"Speaker","value":"About 200 people;"},{"@type":"PropertyValue","name":"Country","value":"Korea;"},{"@type":"PropertyValue","name":"Language","value":"Korean;"},{"@type":"PropertyValue","name":"Features of annotation","value":"Transcription text, timestamp, speaker ID, gender."},{"@type":"PropertyValue","name":"Accuracy Rate","value":"Word Accuracy Rate (WAR) 98%"}]
{"id":1704,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_yuyin_default.webp","type1":"165","type1str":null,"type2":"166","type2str":null,"dataname":"200 Hours Korean Full-Duplex Multi-Channel Speech Dataset","datazy":[{"title":"Format","content":"16kHz, 16 bit, wav, mono channel;"},{"title":"Content category","content":"Recorders in free conversation without a set topic;"},{"title":"Recording condition","content":"Low background noise (indoor);"},{"title":"Recording device","content":"Android smartphone, iPhone;"},{"title":"Speaker","content":"About 200 people;"},{"title":"Country","content":"Korea;"},{"title":"Language","content":"Korean;"},{"title":"Features of annotation","content":"Transcription text, timestamp, speaker ID, gender."},{"title":"Accuracy Rate","content":"Word Accuracy Rate (WAR) 98%"}],"datatag":"korean,korea,multi-stream,Dialogue ,full duplex","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":null,"samplePresentation":[{"name":"00005_002_1_phone-2.wav","url":"https://storage-product.datatang.com/damp/product/samplePresentation_ipad/20250709103104/00005_002_1_phone-2.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=NlsqZnHGAvbSvVH1OEhwiTk5wzY%3D","intro":"오케이 오케이.","size":43724,"progress":100,"type":"mp3"},{"name":"00005_002_1_phone-3.wav","url":"https://storage-product.datatang.com/damp/product/samplePresentation_ipad/20250709103104/00005_002_1_phone-3.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=zkhIVWAhS3uKo2h%2BkJ%2BWwrsCANQ%3D","intro":"여행 여행 요즘 뭐 여행가고 싶은데 없어?","size":223820,"progress":100,"type":"mp3"},{"name":"00005_002_2_phone-1.wav","url":"https://storage-product.datatang.com/damp/product/samplePresentation_ipad/20250709103104/00005_002_2_phone-1.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=SE4CYmf6ut37dAFxAogB7QNagKo%3D","intro":"어, 네, 저는 목소리가 엄청 작게 들리는데 원래 이런가?","size":160700,"progress":100,"type":"mp3"},{"name":"00005_002_2_phone-2.wav","url":"https://storage-product.datatang.com/damp/product/samplePresentation_ipad/20250709103104/00005_002_2_phone-2.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=wNJqEzby78LskY9GS8bSn0VrYow%3D","intro":"아, 그래요? 이건 어쩔 수 없나 봐요, 끊기진 않은데 저는.","size":167180,"progress":100,"type":"mp3"},{"name":"00005_002_2_phone-6.wav","url":"https://storage-product.datatang.com/damp/product/samplePresentation_ipad/20250709103104/00005_002_2_phone-6.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=eNXO3T2Q0J8MypsJ1mFZGhLYv1o%3D","intro":"저번에 여행 갔다 오고 나서 이제","size":153452,"progress":100,"type":"mp3"}],"officialSummary":"This 200 Hours Korean Full-Duplex Multi-Channel Speech Dataset features multi-stream audio recorded via smartphones, simulating natural conversations across a range of everyday topics. Each dialogue is annotated with transcripts, speaker ID, gender, and age. Collected from diverse speakers across various regions in Korea, the dataset enhances AI model robustness in real-world applications. Ideal for training automatic speech recognition (ASR), conversational AI, multilingual dialogue systems, and natural speech processing models. All data complies with GDPR, CCPA, and PIPL privacy standards, ensuring safe and ethical AI training.","dataexampl":null,"datakeyword":["Korean speech dataset","spontaneous dialogue Korean","multi-stream audio dataset","conversational Korean speech","smartphone-recorded audio","dual-speaker dataset","real-world Korean conversation","full-duplex speech dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
200 Hours Korean Full-Duplex Multi-Channel Speech Dataset
Korean speech dataset
spontaneous dialogue Korean
multi-stream audio dataset
conversational Korean speech
smartphone-recorded audio
dual-speaker dataset
real-world Korean conversation
full-duplex speech dataset
This 200 Hours Korean Full-Duplex Multi-Channel Speech Dataset features multi-stream audio recorded via smartphones, simulating natural conversations across a range of everyday topics. Each dialogue is annotated with transcripts, speaker ID, gender, and age. Collected from diverse speakers across various regions in Korea, the dataset enhances AI model robustness in real-world applications. Ideal for training automatic speech recognition (ASR), conversational AI, multilingual dialogue systems, and natural speech processing models. All data complies with GDPR, CCPA, and PIPL privacy standards, ensuring safe and ethical AI training.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Format
16kHz, 16 bit, wav, mono channel;
Content category
Recorders in free conversation without a set topic;