[{"@type":"PropertyValue","name":"Data size","value":"29,954 images, including 8,798 images in Khmer (Cambodian), 11,575 images in Lao, and 9,581 images in Burmese"},{"@type":"PropertyValue","name":"Collecting environment","value":"Natural scenes: shop signs, posters, warnings, road signs, food packages, billboards, street views, etc. Document Photograph: cards, receipts, newspapers, books (documents, newspapers, books, test pap"},{"@type":"PropertyValue","name":"Data diversity","value":"multiple languages, multiple collection types, multiple shooting angles"},{"@type":"PropertyValue","name":"Device","value":"cellphone, computer"},{"@type":"PropertyValue","name":"Data format","value":"the image format is a common one such as .png"},{"@type":"PropertyValue","name":"Accuracy rate","value":"according to the collection requirements, the collection accuracy is not less than 95%"}]
{"id":1758,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_tuxiang_default.webp","type1":"147","type1str":null,"type2":"150","type2str":null,"dataname":"29,954 Images - Southeast Asian OCR Dataset (Khmer, Lao, Burmese)","datazy":[{"title":"Data size","content":"29,954 images, including 8,798 images in Khmer (Cambodian), 11,575 images in Lao, and 9,581 images in Burmese","desc":"Data size"},{"title":"Collecting environment","content":"Natural scenes: shop signs, posters, warnings, road signs, food packages, billboards, street views, etc. Document Photograph: cards, receipts, newspapers, books (documents, newspapers, books, test pap","desc":"Collecting environment"},{"title":"Data diversity","content":"multiple languages, multiple collection types, multiple shooting angles","desc":"Data diversity"},{"title":"Device","content":"cellphone, computer","desc":"Device"},{"title":"Data format","content":"the image format is a common one such as .png","desc":"Data format"},{"title":"Accuracy rate","content":"according to the collection requirements, the collection accuracy is not less than 95%","desc":"Accuracy rate"}],"datatag":"OCR,Southeast Asian Languages,Natural Scenes,Document Photograph,Electronic Scenes","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"000403_Khmer.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20251203171931/000403_Khmer.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=L2J7muvDBy0QPwH2rM1u6k6w5JM%3D","intro":"","size":227016,"progress":100,"type":"jpg"},{"name":"002617_Khmer.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20251203171931/002617_Khmer.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=ROYTGM4k6aYF0orFqlmCCSxQMx8%3D","intro":"","size":9170388,"progress":100,"type":"jpg"},{"name":"016759_Burmese.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20251203171931/016759_Burmese.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=XBOxiiJt6plkUNjHNn3x3FcjZ1k%3D","intro":"","size":1105131,"progress":100,"type":"jpg"}],"officialSummary":"29,954 Images - OCR Collection Data in Southeast Asian Languages, including Khmer (Cambodia), Lao and Burmese. The diversity of collection includes multiple languages, multiple collection types, multiple shooting angles. This set of data can be used for Southeast Asian language OCR tasks.","dataexampl":null,"datakeyword":["Southeast Asian OCR dataset","Khmer OCR data","Lao OCR dataset","Burmese OCR dataset","natural scene OCR","minority language text recognition","multilingual OCR dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"ocr","dataShowType":"[{\"code\":\"0\",\"language\":\"ZH\"},{\"code\":\"1\",\"language\":\"ZH\"},{\"code\":\"2\",\"language\":\"EN,PT,DE\"},{\"code\":\"3\",\"language\":\"EN\"},{\"code\":\"4\",\"language\":\"JP\"}]","productNameEn":"29,954 Images - OCR Collection Data in Southeast Asian Languages","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"],"firstList":[{"name":"030294_Laos.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20251203171931/030294_Laos.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=eJt2N7fokJRVZfF9OWTx3%2BOWy0A%3D","intro":"","size":8952795,"progress":100,"type":"jpg"}]}
29,954 Images - Southeast Asian OCR Dataset (Khmer, Lao, Burmese)
Southeast Asian OCR dataset
Khmer OCR data
Lao OCR dataset
Burmese OCR dataset
natural scene OCR
minority language text recognition
multilingual OCR dataset
29,954 Images - OCR Collection Data in Southeast Asian Languages, including Khmer (Cambodia), Lao and Burmese. The diversity of collection includes multiple languages, multiple collection types, multiple shooting angles. This set of data can be used for Southeast Asian language OCR tasks.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Data size
29,954 images, including 8,798 images in Khmer (Cambodian), 11,575 images in Lao, and 9,581 images in Burmese
Collecting environment
Natural scenes: shop signs, posters, warnings, road signs, food packages, billboards, street views, etc. Document Photograph: cards, receipts, newspapers, books (documents, newspapers, books, test pap