[{"@type":"PropertyValue","name":"Data content","value":"Traditional Chinese SMS corpus text data"},{"@type":"PropertyValue","name":"Data size","value":"10 million"},{"@type":"PropertyValue","name":"Collecting period","value":"The year 2,014"},{"@type":"PropertyValue","name":"Storage format","value":"txt"},{"@type":"PropertyValue","name":"Language","value":"Chinese"}]
{"id":182,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY170501235.png?Expires=2007353643&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=VwdnvmZ0I1U9Oi9Pg4H7tsVGm4Y%3D","type1":"183","type1str":null,"type2":"184","type2str":null,"dataname":"Traditional Chinese SMS Corpus – 10 Million Conversational Texts","datazy":[{"title":"Data content","content":"Traditional Chinese SMS corpus text data","desc":"Data content"},{"title":"Data size","content":"10 million","desc":"Data size"},{"title":"Collecting period","content":"The year 2,014","desc":"Collecting period"},{"title":"Storage format","content":"txt","desc":"Storage format"},{"title":"Language","content":"Chinese","desc":"Language"}],"datatag":"Traditional Chinese,SMS","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":["10 million pieces","traditional Chinese SMS corpus","stored in txt format"],"samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/APY170501235_demo1714039200265/APY170501235_demo/APY170501235_demo.jpg","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501235_demo1714039200265/APY170501235_demo/APY170501235_demo.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=Qfj2xXvZeZcrZ0ANe5GbhFz3ahs%3D","intro":"","size":0,"progress":100,"type":"jpg"}],"officialSummary":"This dataset is a large-scale Traditional Chinese conversational text dataset consisting of 10 million real-world SMS messages written in spoken-style Traditional Chinese. All content is provided in plain text (TXT) format, the dataset is well suited for training and evaluating large language models, dialogue systems, Chinese conversational text analysis and related tasks.","dataexampl":null,"datakeyword":["Traditional Chinese SMS corpus","NLP Text Dataset","Traditional Chinese dataset","Chinese NLP dataset","Chinese text corpus","NLU training data","SMS corpus"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"nlu","dataShowType":"[{\"code\":\"1\",\"language\":\"ZH\"},{\"code\":\"2\",\"language\":\"EN,JP,PT,DE,KO,FR,ES\"},{\"code\":\"3\",\"language\":\"EN\"}]","productNameEn":"10 Million Traditional Chinese Oral Message Data","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
https://www.nexdata.ai/shujutang/static/image/index/datatang_wenben_default.webp
[{"@type":"ImageObject","embedUrl":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170501235_demo1714039200265/APY170501235_demo/APY170501235_demo.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=Qfj2xXvZeZcrZ0ANe5GbhFz3ahs%3D"}]
Traditional Chinese SMS Corpus – 10 Million Conversational Texts
Traditional Chinese SMS corpus
NLP Text Dataset
Traditional Chinese dataset
Chinese NLP dataset
Chinese text corpus
NLU training data
SMS corpus
This dataset is a large-scale Traditional Chinese conversational text dataset consisting of 10 million real-world SMS messages written in spoken-style Traditional Chinese. All content is provided in plain text (TXT) format, the dataset is well suited for training and evaluating large language models, dialogue systems, Chinese conversational text analysis and related tasks.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
![Specifications]()
Specifications
Data content
Traditional Chinese SMS corpus text data
Collecting period
The year 2,014
![Sample]()
Sample
![Recommended Datasets]()
Recommended Dataset
Tell Us Your Special Needs
5a621cba-b9e5-4e33-a37c-c5c3ab2ca8dc