[{"@type":"PropertyValue","name":"Content","value":"Science subjects questions text;"},{"@type":"PropertyValue","name":"Data Size","value":"About 32 million;"},{"@type":"PropertyValue","name":"Data Fields","value":"Contains title, answer, parse, subject, grade, question type;"},{"@type":"PropertyValue","name":"Subject categories","value":"Primary school, middle school, high school and university science subjects;"},{"@type":"PropertyValue","name":"Format","value":"Jsonl;"},{"@type":"PropertyValue","name":"Language","value":"Chinese;"},{"@type":"PropertyValue","name":"Data processing","value":"Subject, questions, parse and answers were analyzed, formula conversion and table format conversion were done, and content was also cleaned"}]
{"id":1735,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_tuxiang_default.webp","type1":"226","type1str":null,"type2":"227","type2str":null,"dataname":"32M Science QA Dataset – Answers & Parsing for LLMs","datazy":[{"title":"Content","content":"Science subjects questions text;","desc":"Content"},{"title":"Data Size","content":"About 32 million;","desc":"Data Size"},{"title":"Data Fields","content":"Contains title, answer, parse, subject, grade, question type;","desc":"Data Fields"},{"title":"Subject categories","content":"Primary school, middle school, high school and university science subjects;","desc":"Subject categories"},{"title":"Format","content":"Jsonl;","desc":"Format"},{"title":"Language","content":"Chinese;","desc":"Language"},{"title":"Data processing","content":"Subject, questions, parse and answers were analyzed, formula conversion and table format conversion were done, and content was also cleaned","desc":"Data processing"}],"datatag":"Science Subjects Questions,LLM,Text","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"1.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250714165128/1.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=HwitiNqnx0KPlNvVZQYQ8z2l2d8%3D","intro":"","size":306218,"progress":100,"type":"jpg"},{"name":"2.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250714165128/2.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=MUg8o9HyNARMc23IFIgnHm6hU0g%3D","intro":"","size":330327,"progress":100,"type":"jpg"},{"name":"3.png","url":"https://storage-product.datatang.com/damp/product/instructions_zh/20250714165128/3.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=JWnDfi%2Fszq2dcEba4kbjjUzM1ng%3D","intro":"","size":71115,"progress":100,"type":"jpg"}],"officialSummary":"32 million structured science questions covering mathematics, physics, chemistry, and biology across primary, middle, high school, and university levels. Each question entry includes a title, answer, solution parsing, question type, subject category, and corresponding grade level. The dataset is designed to support AI training tasks such as large language model development, subject-specific knowledge enhancement, machine reading comprehension, and question-answering systems. It provides a rich resource for educational NLP applications and has been validated for quality and completeness. All data complies with global data protection standards including GDPR, CCPA, and PIPL.","dataexampl":null,"datakeyword":["science question dataset","STEM QA dataset","math physics chemistry biology questions","education NLP dataset","AI training data","structured question answer dataset","academic QA dataset","question parsing dataset","K-12 science dataset","university level questions dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"llm","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
32M Science QA Dataset – Answers & Parsing for LLMs
science question dataset
STEM QA dataset
math physics chemistry biology questions
education NLP dataset
AI training data
structured question answer dataset
academic QA dataset
question parsing dataset
K-12 science dataset
university level questions dataset
32 million structured science questions covering mathematics, physics, chemistry, and biology across primary, middle, high school, and university levels. Each question entry includes a title, answer, solution parsing, question type, subject category, and corresponding grade level. The dataset is designed to support AI training tasks such as large language model development, subject-specific knowledge enhancement, machine reading comprehension, and question-answering systems. It provides a rich resource for educational NLP applications and has been validated for quality and completeness. All data complies with global data protection standards including GDPR, CCPA, and PIPL.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.