[{"@type":"PropertyValue","name":"Data size","value":"120,000 questions"},{"@type":"PropertyValue","name":"Image resolution","value":"short side resolution ≥ 500 pixels"},{"@type":"PropertyValue","name":"Subject categories","value":"arts, business, science, medicine,humanities and social sciences, engineering"},{"@type":"PropertyValue","name":"QA length","value":"question length ≥ 10 Chinese characters, and the answer and analysis length ≥ 40 characters"},{"@type":"PropertyValue","name":"Collection equipment","value":"mobile phone, scanner"},{"@type":"PropertyValue","name":"Language","value":"Chinese"},{"@type":"PropertyValue","name":"Diversity","value":"multiple disciplines, multiple image types, multiple types of questions"},{"@type":"PropertyValue","name":"Data format","value":".jpg, .png, .josn"}]
{"id":1751,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_tuxiang_default.webp","type1":"226","type1str":null,"type2":"254","type2str":null,"dataname":"120,000 Questions - Multi-discipline Multi-modal Understanding & Reasoning Data","datazy":[{"title":"Data size","content":"120,000 questions"},{"title":"Image resolution","content":"short side resolution ≥ 500 pixels"},{"title":"Subject categories","content":"arts, business, science, medicine,humanities and social sciences, engineering"},{"title":"QA length","content":"question length ≥ 10 Chinese characters, and the answer and analysis length ≥ 40 characters"},{"title":"Collection equipment","content":"mobile phone, scanner"},{"title":"Language","content":"Chinese"},{"title":"Diversity","content":"multiple disciplines, multiple image types, multiple types of questions"},{"title":"Data format","content":".jpg, .png, .josn"}],"datatag":"VQA,VLM,LLM,MMMU","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":null,"samplePresentation":[],"officialSummary":"120,000 Questions - Multi-discipline Multi-modal Understanding & Reasoning Data, covering six major academic fields such as art, engineering, and medicine, as well as various specialized sub-domains. It includes professional-level mixed text and visual questions (e.g., charts, engineering drawings, and art analysis) that have been quality-checked by specialists. This dataset enables deep understanding of textual and visual information and can enhance the logical reasoning and knowledge application capabilities of large models. We strictly adhere to data protection laws and privacy regulations, ensuring the protection of users' privacy and legal rights throughout the data collection, storage, and usage processes. All data comply with GDPR, CCPA, and PIPL.","dataexampl":null,"datakeyword":["VQA","VLM","LLM","MMMU"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"llm","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
120,000 Questions - Multi-discipline Multi-modal Understanding & Reasoning Data
VQA
VLM
LLM
MMMU
120,000 Questions - Multi-discipline Multi-modal Understanding & Reasoning Data, covering six major academic fields such as art, engineering, and medicine, as well as various specialized sub-domains. It includes professional-level mixed text and visual questions (e.g., charts, engineering drawings, and art analysis) that have been quality-checked by specialists. This dataset enables deep understanding of textual and visual information and can enhance the logical reasoning and knowledge application capabilities of large models. We strictly adhere to data protection laws and privacy regulations, ensuring the protection of users' privacy and legal rights throughout the data collection, storage, and usage processes. All data comply with GDPR, CCPA, and PIPL.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Data size
120,000 questions
Image resolution
short side resolution ≥ 500 pixels
Subject categories
arts, business, science, medicine,humanities and social sciences, engineering
QA length
question length ≥ 10 Chinese characters, and the answer and analysis length ≥ 40 characters
Collection equipment
mobile phone, scanner
Language
Chinese
Diversity
multiple disciplines, multiple image types, multiple types of questions