{"id":1390,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_yuyin_default.webp","type1":"165","type1str":null,"type2":"166","type2str":null,"dataname":"203 Hours Tamil Speech Dataset – Conversation & Monologue Audio","datazy":[{"title":"Format","desc":"Format","content":"16k Hz, 16 bit, wav, mono channel;"},{"title":"Recording environment","desc":"Recording environment","content":"Low background noise;"},{"title":"Country","desc":"Country","content":"India(IND);"},{"title":"Language(Region) Code","desc":"Language(Region) Code","content":"ta-IN;"},{"title":"Language","desc":"Language","content":"Tamil;"},{"title":"Features of annotation","desc":"Features of annotation","content":"Transcription text, timestamp, speaker ID, gender, noise."},{"title":"Accuracy Rate","desc":"Accuracy Rate","content":"Word Accuracy Rate (WAR) 98%"}],"datatag":"Tamil,India","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-8.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-8.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=foGBcxzZ6YZUzUGekvu5FfUve0I%3D","intro":"தமிழ்நாட்டு வரலாற்றில், இது தான் முதல் முறை ஒரு அரசாங்கம் கடனை வாங்கி வட்டி கட்டுது.","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-2.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-2.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=fNREn52miYKDMbPUp1tVAOHQEdo%3D","intro":"உறுப்பினர் சொல்ற கருத்து, நல்ல கருத்து. ஏற்கனவே எங்களுக்கு ஒரு idea இருஞ்சு. என்ன சொல்ல மறந்துட்டாரு? இதே கருத்தை நான் ரெண்டாயிரத்தி பதினேழில் கூறினேன்.","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-10.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-10.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=esuHdkwAgKa0FUymtrzUUKG3axM%3D","intro":"ஒரே ஒரு நிமிஷம் பதினஞ்சாவது நிதிக்குழு இந்த அதிமுகவின்","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-1.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-1.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=zWCa8C6RzIbZKg764Mx0i%2FEbJps%3D","intro":"வீட்டிக்குள்ள பூட்டி வச்சுருந்திங்கன்னா, இது யாருங்க சொத்து? மக்கள் சொத்து இல்லையா? எதற்காக இது செய்யப்பட்டது? சட்டமன்றத்துல சொல்றிங்க மாண்புமிகு","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-6.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY240315002_demo1725271202051/APY240315002_demo/000003-1-6.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=tZnJh1ssOZfbfPebKrl8N6BkS%2FA%3D","intro":"யாருக்கு வெற்றி?","size":0,"progress":100,"type":"mp3"}],"officialSummary":"203 hours of real-world Tamil speech data featuring both casual conversations and scripted monologues. All audio was recorded from native Tamil speakers across various regions, reflecting real-world linguistic and acoustic diversity. Each sample is manually transcribed and annotated with speaker ID, gender, and other metadata, making it highly suitable for automatic speech recognition (ASR), speech synthesis (TTS), speaker identification, and natural language processing (NLP) applications. The dataset has been validated by leading AI companies and is particularly valuable for training robust AI models for underrepresented languages. All data collection, processing, and usage comply strictly with global data privacy laws including GDPR, CCPA, and PIPL, ensuring legal and ethical use.","dataexampl":null,"datakeyword":["Tamil conversation dataset","real-world Tamil speech","transcribed Tamil audio","speech recognition in Tamil","multilingual audio training","Tamil ASR data","Tamil voice corpus","Tamil monologue dataset","smartphone Tamil dataset","speech recognition Tamil dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
203 Hours Tamil Speech Dataset – Conversation & Monologue Audio
Tamil conversation dataset
real-world Tamil speech
transcribed Tamil audio
speech recognition in Tamil
multilingual audio training
Tamil ASR data
Tamil voice corpus
Tamil monologue dataset
smartphone Tamil dataset
speech recognition Tamil dataset
203 hours of real-world Tamil speech data featuring both casual conversations and scripted monologues. All audio was recorded from native Tamil speakers across various regions, reflecting real-world linguistic and acoustic diversity. Each sample is manually transcribed and annotated with speaker ID, gender, and other metadata, making it highly suitable for automatic speech recognition (ASR), speech synthesis (TTS), speaker identification, and natural language processing (NLP) applications. The dataset has been validated by leading AI companies and is particularly valuable for training robust AI models for underrepresented languages. All data collection, processing, and usage comply strictly with global data privacy laws including GDPR, CCPA, and PIPL, ensuring legal and ethical use.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
தமிழ்நாட்டு வரலாற்றில், இது தான் முதல் முறை ஒரு அரசாங்கம் கடனை வாங்கி வட்டி கட்டுது.
Audio
உறுப்பினர் சொல்ற கருத்து, நல்ல கருத்து. ஏற்கனவே எங்களுக்கு ஒரு idea இருஞ்சு. என்ன சொல்ல மறந்துட்டாரு? இதே கருத்தை நான் ரெண்டாயிரத்தி பதினேழில் கூறினேன்.
Audio
ஒரே ஒரு நிமிஷம் பதினஞ்சாவது நிதிக்குழு இந்த அதிமுகவின்
Audio
வீட்டிக்குள்ள பூட்டி வச்சுருந்திங்கன்னா, இது யாருங்க சொத்து? மக்கள் சொத்து இல்லையா? எதற்காக இது செய்யப்பட்டது? சட்டமன்றத்துல சொல்றிங்க மாண்புமிகு