[{"@type":"PropertyValue","name":"Format","value":"16k Hz, 16 bit, wav, mono channel"},{"@type":"PropertyValue","name":"Recording condition","value":"Normal environment"},{"@type":"PropertyValue","name":"Content category","value":"Chat and comment,etc."},{"@type":"PropertyValue","name":"Country","value":"India(IND)"},{"@type":"PropertyValue","name":"Language","value":"Gujarati"},{"@type":"PropertyValue","name":"Accuracy","value":"Word Accuracy Rate 95%"},{"@type":"PropertyValue","name":"Features of annotation","value":"Transcription text"},{"@type":"PropertyValue","name":"Language(Region) Code","value":"gu_IN,etc."}]
{"id":1736,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_yuyin_default.webp","type1":"165","type1str":null,"type2":"166","type2str":null,"dataname":"161 Hours Gujarati Speech Dataset with Transcriptions for ASR Training","datazy":[{"title":"Format","content":"16k Hz, 16 bit, wav, mono channel","desc":"Format"},{"title":"Recording condition","content":"Normal environment","desc":"Recording condition"},{"title":"Content category","content":"Chat and comment,etc.","desc":"Content category"},{"title":"Country","content":"India(IND)","desc":"Country"},{"title":"Language","content":"Gujarati","desc":"Language"},{"title":"Accuracy","content":"Word Accuracy Rate 95%","desc":"Accuracy"},{"title":"Features of annotation","content":"Transcription text","desc":"Features of annotation"},{"title":"Language(Region) Code","content":"gu_IN,etc.","desc":"Language(Region) Code"}],"datatag":"Gujarati,Reading,Scripted Monologue","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"000002.wav","url":"https://storage-product.datatang.com/damp/product/sample_presentation/20250718180131/000002.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=1s98DW4P4modnkieUyrDfSPg%2BUY%3D","intro":"અયાન મુખર્જીની ફિલ્મ બ્રહ્માસ્ત્રમાં એક સાથે કામ કરવાનો મોકો મળ્યો.","size":216044,"progress":100,"type":"mp3"},{"name":"000004.wav","url":"https://storage-product.datatang.com/damp/product/sample_presentation/20250718180131/000004.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=850CeZXrcMV%2FMUsTocRCA6GwcXU%3D","intro":"અમેરિકીઓને અમદાવાદના મેન્યુફેકચરીંગ અને ટેકસ ટાઈલ્સ ક્ષેત્રે વધુ રોકાણની તકો દેખાઈ રહી છે.","size":246764,"progress":100,"type":"mp3"},{"name":"000005.wav","url":"https://storage-product.datatang.com/damp/product/sample_presentation/20250718180131/000005.wav?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=d4GJdGZAA7IVX0sAWJfHrERKZfU%3D","intro":"આ માટે પાલિકાના આઠ હજાર કર્મચારીઓને વિવિધ ફરજ પર તહેનાત કરાયા છે.","size":236204,"progress":100,"type":"mp3"}],"officialSummary":"This dataset covers general domains, reflects real-world interactions, and contains transcribed text content. Our dataset will help improve model performance on real-world, complex tasks. Its quality has been validated by multiple AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.","dataexampl":null,"datakeyword":["gujarati speech dataset","gujarati asr dataset","gujarati language dataset","indian language speech dataset","low resource language dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","dataShowType":"[{\"code\":\"0\",\"language\":\"ZH\"},{\"code\":\"1\",\"language\":\"ZH\"},{\"code\":\"2\",\"language\":\"EN,PT,DE,KO,FR,ES\"},{\"code\":\"3\",\"language\":\"EN\"},{\"code\":\"4\",\"language\":\"JP\"}]","productNameEn":"161 Hours - Gujarati(India) Scripted Monologue speech dataset","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
161 Hours Gujarati Speech Dataset with Transcriptions for ASR Training
gujarati speech dataset
gujarati asr dataset
gujarati language dataset
indian language speech dataset
low resource language dataset
This dataset covers general domains, reflects real-world interactions, and contains transcribed text content. Our dataset will help improve model performance on real-world, complex tasks. Its quality has been validated by multiple AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Format
16k Hz, 16 bit, wav, mono channel
Recording condition
Normal environment
Content category
Chat and comment,etc.
Country
India(IND)
Language
Gujarati
Accuracy
Word Accuracy Rate 95%
Features of annotation
Transcription text
Language(Region) Code
gu_IN,etc.
Sample
Audio
અયાન મુખર્જીની ફિલ્મ બ્રહ્માસ્ત્રમાં એક સાથે કામ કરવાનો મોકો મળ્યો.
Audio
અમેરિકીઓને અમદાવાદના મેન્યુફેકચરીંગ અને ટેકસ ટાઈલ્સ ક્ષેત્રે વધુ રોકાણની તકો દેખાઈ રહી છે.
Audio
આ માટે પાલિકાના આઠ હજાર કર્મચારીઓને વિવિધ ફરજ પર તહેનાત કરાયા છે.