[{"@type":"PropertyValue","name":"Storage format","value":"TXT"},{"@type":"PropertyValue","name":"Data content","value":"Chinese-English Parallel Corpus Data"},{"@type":"PropertyValue","name":"Data size","value":"80.12 million pairs of Chinese-English Parallel Corpus Data."},{"@type":"PropertyValue","name":"Language","value":"Chinese, English"},{"@type":"PropertyValue","name":"Application scenario","value":"machine translation"}]
{"id":147,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY170101223.png?Expires=2007353638&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=uu%2BOjBbZoOeVHqwYc1zHqgdwXhE%3D","type1":"183","type1str":null,"type2":"185","type2str":null,"dataname":"Chinese-English Parallel Corpus Dataset (80,120,000 Sentence Pairs) – Translation & NLP","datazy":[{"title":"Storage format","desc":"Storage format","content":"TXT"},{"title":"Data content","desc":"Data content","content":"Chinese-English Parallel Corpus Data"},{"title":"Data size","desc":"Data size","content":"80.12 million pairs of Chinese-English Parallel Corpus Data."},{"title":"Language","desc":"Language","content":"Chinese, English"},{"title":"Application scenario","desc":"Application scenario","content":"machine translation"}],"datatag":"Chinese-English,Parallel Corpus","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":["3,062,170pairs","Chinese, English","4-25 words for each pair"],"samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/APY170101223_demo1709805600140/APY170101223-demo/zh-en ????.png","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY170101223_demo1709805600140/APY170101223-demo/zh-en%20%3F%3F%3F%3F.png?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=hV7rToYxKOSn4Bomdmt%2Bqtp2maY%3D","intro":"","size":0,"progress":100,"type":"jpg"}],"officialSummary":"This dataset contains 80 million Chinese-English parallel sentences, covering domains such as travel, medicine, daily conversation, and TV scripts. It is stored in txt format, cleaned, desensitized, and quality-checked. It can be used as a fundamental dataset for machine translation, bilingual NLP tasks, and other text processing applications.","dataexampl":null,"datakeyword":["Chinese English parallel corpus","Chinese English translation dataset","Chinese English machine translation data","Chinese English bilingual corpus","Chinese English parallel dataset","Chinese English text dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"nlu","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
This dataset contains 80 million Chinese-English parallel sentences, covering domains such as travel, medicine, daily conversation, and TV scripts. It is stored in txt format, cleaned, desensitized, and quality-checked. It can be used as a fundamental dataset for machine translation, bilingual NLP tasks, and other text processing applications.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Storage format
TXT
Data content
Chinese-English Parallel Corpus Data
Data size
80.12 million pairs of Chinese-English Parallel Corpus Data.