[{"@type":"PropertyValue","name":"Format","value":"24kHz, 16bit, uncompressed wav, mono channel;"},{"@type":"PropertyValue","name":"Recording Environment","value":"quiet indoor environment, without echo;"},{"@type":"PropertyValue","name":"Recording content","value":"dozens of topics are specified, and the speakers make dialogue under those topics while the recording is performed;"},{"@type":"PropertyValue","name":"Demographics","value":"about 500 speakers; balanced gender ratio among speakers, with age distribution ranging from 18 to 60 years old;"},{"@type":"PropertyValue","name":"Annotation","value":"extract and annotate individual sentences with their start and end timestamps, speaker identification, and spoken text content; noise annotation;"},{"@type":"PropertyValue","name":"Device","value":"Android mobile phone, iPhone;"},{"@type":"PropertyValue","name":"Language","value":"Guilin dialect;"},{"@type":"PropertyValue","name":"Application scenarios","value":"speech recognition; voiceprint recognition;"},{"@type":"PropertyValue","name":"Accuracy rate","value":"word accuracy rate of 98%."}]
{"id":1682,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_yuyin_default.webp","type1":"165","type1str":null,"type2":"166","type2str":null,"dataname":"500 Hours - Guilin Dialect Conversation (Bilingual Annotated) Speech Data by Mobile Phone","datazy":[{"title":"Format","content":"24kHz, 16bit, uncompressed wav, mono channel;"},{"title":"Recording Environment","content":"quiet indoor environment, without echo;"},{"title":"Recording content","content":"dozens of topics are specified, and the speakers make dialogue under those topics while the recording is performed;"},{"title":"Demographics","content":"about 500 speakers; balanced gender ratio among speakers, with age distribution ranging from 18 to 60 years old;"},{"title":"Annotation","content":"extract and annotate individual sentences with their start and end timestamps, speaker identification, and spoken text content; noise annotation;"},{"title":"Device","content":"Android mobile phone, iPhone;"},{"title":"Language","content":"Guilin dialect;"},{"title":"Application scenarios","content":"speech recognition; voiceprint recognition;"},{"title":"Accuracy rate","content":"word accuracy rate of 98%."}],"datatag":"ast,Dialect ,guilin","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":null,"samplePresentation":[{"name":"wea_001_00428_16k-1.wav","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250728104926/wea_001_00428_16k-1.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=hrwhkGRxXISVobVkRDURULYmswY%3D","intro":"就是特别特别冷滴时候,那你,出去,\t就是特别特别冷的时候,那你,出去,","size":77164,"progress":100,"type":"mp3"},{"name":"wea_001_00428_16k-2.wav","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250728104926/wea_001_00428_16k-2.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=0TOE62FZNkDOIAh0%2BBeujeXl8rA%3D","intro":"开电动车倒是没有。\t骑电动车倒是没有。","size":45516,"progress":100,"type":"mp3"},{"name":"wea_001_00428_16k-3.wav","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250728104926/wea_001_00428_16k-3.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=gLsxIqegpSGeAMVvHbtWQsMlhFc%3D","intro":"那不是就是穿起多多哩。\t那不是就是穿得多多的。","size":61196,"progress":100,"type":"mp3"},{"name":"wea_001_00428_16k-4.wav","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250728104926/wea_001_00428_16k-4.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=2Q39u6rWZn7DJFQ8tbymuIxF1OY%3D","intro":"然后桂林南方这种就是湿冷天气,这不[OVERLAP/]是,不[/OVERLAP]是你穿好多就能够解决问题哩。\t然后桂林南方这种就是湿冷天气,这不是,不是你穿很多就能够解决问题的。","size":179116,"progress":100,"type":"mp3"},{"name":"wea_001_00428_16k-5.wav","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250728104926/wea_001_00428_16k-5.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=OrylBVGjLZr3bMPU0J6mcMNT34w%3D","intro":"但是我们去去北方,去北方其实北方也是,北方是真滴冷但人家室内有有地暖。\t但是我们去去北方,去北方其实北方也是,北方是真的冷但人家室内有有地暖。","size":176780,"progress":100,"type":"mp3"}],"officialSummary":"Guilin Dialect(China) Spontaneous Dialogue Smartphone speech dataset, transcribed with text content, timestamp, speaker's ID, gender and other attributes. Our dataset was collected from extensive and diversify speakers, geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.","dataexampl":null,"datakeyword":["ast","Dialect ","guilin"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
500 Hours - Guilin Dialect Conversation (Bilingual Annotated) Speech Data by Mobile Phone
ast
Dialect
guilin
Guilin Dialect(China) Spontaneous Dialogue Smartphone speech dataset, transcribed with text content, timestamp, speaker's ID, gender and other attributes. Our dataset was collected from extensive and diversify speakers, geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Format
24kHz, 16bit, uncompressed wav, mono channel;
Recording Environment
quiet indoor environment, without echo;
Recording content
dozens of topics are specified, and the speakers make dialogue under those topics while the recording is performed;
Demographics
about 500 speakers; balanced gender ratio among speakers, with age distribution ranging from 18 to 60 years old;
Annotation
extract and annotate individual sentences with their start and end timestamps, speaker identification, and spoken text content; noise annotation;