[{"@type":"PropertyValue","name":"Content","value":"Code questions text;"},{"@type":"PropertyValue","name":"Data Size","value":"About 1 million;"},{"@type":"PropertyValue","name":"Data Fields","value":"Contains title, answer, parse and language;"},{"@type":"PropertyValue","name":"Data Categories","value":"c, c++, java, python, javascript;"},{"@type":"PropertyValue","name":"Format","value":"Jsonl;"},{"@type":"PropertyValue","name":"Language","value":"Chinese;"},{"@type":"PropertyValue","name":"Data processing","value":"Subject, questions, parse and answers were analyzed, and content was also cleaned"}]
{"id":1738,"datatype":"1","titleimg":"https://www.nexdata.ai/shujutang/static/image/index/datatang_tuxiang_default.webp","type1":"226","type1str":null,"type2":"227","type2str":null,"dataname":"1M Chinese Coding Questions Dataset – Python/Java/C++","datazy":[{"title":"Content","content":"Code questions text;","desc":"Content"},{"title":"Data Size","content":"About 1 million;","desc":"Data Size"},{"title":"Data Fields","content":"Contains title, answer, parse and language;","desc":"Data Fields"},{"title":"Data Categories","content":"c, c++, java, python, javascript;","desc":"Data Categories"},{"title":"Format","content":"Jsonl;","desc":"Format"},{"title":"Language","content":"Chinese;","desc":"Language"},{"title":"Data processing","content":"Subject, questions, parse and answers were analyzed, and content was also cleaned","desc":"Data processing"}],"datatag":"Code Questions,LLM,Text","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"1.png","url":"https://storage-product.datatang.com/damp/product/sample_presentation/20250715145112/1.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=AcAwLSjMBTpjcuucVZx7%2FNfbngE%3D","intro":"","size":327523,"progress":100,"type":"jpg"},{"name":"2.png","url":"https://storage-product.datatang.com/damp/product/sample_presentation/20250715145112/2.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=RfRkQc1UU%2BvxnyGOUKoCfInKRcU%3D","intro":"","size":90592,"progress":100,"type":"jpg"},{"name":"3.png","url":"https://storage-product.datatang.com/damp/product/sample_presentation/20250715145112/3.png?Expires=4102415999&OSSAccessKeyId=LTAI5tEBeSWUJiqjXvBMsxEu&Signature=ZpdioBOYCwvS2yN1hZOvjNS26Lw%3D","intro":"","size":117057,"progress":100,"type":"jpg"}],"officialSummary":"This dataset contains 1 million Chinese programming questions with corresponding answers, detailed parses (explanations), and programming language labels. It includes a wide range of questions in C, C++, Python, Java, and JavaScript, making it ideal for training large language models (LLMs) on multilingual code understanding and generation. The questions cover fundamental to advanced topics, supporting AI applications such as code completion, bug fixing, and programming reasoning. This structured dataset enhances model performance in natural language programming tasks and helps reinforce code logic skills in AI systems. All data complies with international privacy regulations including GDPR, CCPA, and PIPL.","dataexampl":null,"datakeyword":["Chinese coding questions dataset","programming QA data","parsed coding problems","Python Java C++ dataset","code generation LLM dataset","Chinese code questions"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"llm","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
1M Chinese Coding Questions Dataset – Python/Java/C++
Chinese coding questions dataset
programming QA data
parsed coding problems
Python Java C++ dataset
code generation LLM dataset
Chinese code questions
This dataset contains 1 million Chinese programming questions with corresponding answers, detailed parses (explanations), and programming language labels. It includes a wide range of questions in C, C++, Python, Java, and JavaScript, making it ideal for training large language models (LLMs) on multilingual code understanding and generation. The questions cover fundamental to advanced topics, supporting AI applications such as code completion, bug fixing, and programming reasoning. This structured dataset enhances model performance in natural language programming tasks and helps reinforce code logic skills in AI systems. All data complies with international privacy regulations including GDPR, CCPA, and PIPL.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Content
Code questions text;
Data Size
About 1 million;
Data Fields
Contains title, answer, parse and language;
Data Categories
c, c++, java, python, javascript;
Format
Jsonl;
Language
Chinese;
Data processing
Subject, questions, parse and answers were analyzed, and content was also cleaned