XMLParser.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /*
  2. * Author: Patrick-Christopher Mattulat
  3. * Company: Lynar Studios
  4. * E-Mail: webmaster@lynarstudios.com
  5. * Created: 2020-11-26
  6. * Changed: 2021-04-23
  7. *
  8. * */
  9. #include <ls_std/io/xml/XMLParser.hpp>
  10. #include <ls_std/exception/IllegalArgumentException.hpp>
  11. #include <ls_std/boxing/String.hpp>
  12. ls_std::XMLParser::XMLParser(const std::shared_ptr<ls_std::XMLDocument> &_document) : ls_std::Class("XMLParser")
  13. {
  14. this->_assignDocument(_document);
  15. this->_reset();
  16. }
  17. std::shared_ptr<ls_std::XMLDocument> ls_std::XMLParser::getDocument()
  18. {
  19. return this->document;
  20. }
  21. void ls_std::XMLParser::parse(const ls_std::byte_field &_data)
  22. {
  23. this->_parse(_data);
  24. this->_mergeNodes();
  25. this->_reset();
  26. }
  27. void ls_std::XMLParser::setDocument(const std::shared_ptr<ls_std::XMLDocument> &_document)
  28. {
  29. this->_assignDocument(_document);
  30. }
  31. std::pair<std::string, std::string> ls_std::XMLParser::_readAttribute_(const ls_std::byte_field &_data)
  32. {
  33. return ls_std::XMLParser::_parseAttribute(_data);
  34. }
  35. std::list<std::pair<std::string, std::string>> ls_std::XMLParser::_readAttributes_(ls_std::byte_field _data)
  36. {
  37. return ls_std::XMLParser::_parseAttributes(std::move(_data));
  38. }
  39. void ls_std::XMLParser::_analyze(const ls_std::byte_field &_data, std::string::size_type _index)
  40. {
  41. this->_isDeclaration(_data, _index);
  42. this->_isClosingTag(_data, _index);
  43. this->_isOpeningTag(_data, _index);
  44. this->_isValue(_data, _index);
  45. }
  46. void ls_std::XMLParser::_assignDocument(const std::shared_ptr<ls_std::XMLDocument> &_document)
  47. {
  48. if (_document == nullptr)
  49. {
  50. throw ls_std::IllegalArgumentException{};
  51. }
  52. this->document = _document;
  53. }
  54. std::shared_ptr<ls_std::XMLDeclaration> ls_std::XMLParser::_createDeclaration(const std::list<std::pair<std::string, std::string>> &_attributes)
  55. {
  56. std::shared_ptr<ls_std::XMLDeclaration> declaration = std::make_shared<ls_std::XMLDeclaration>("1.0");
  57. std::pair<std::string, std::string> attribute = ls_std::XMLParser::_findAttribute(_attributes, "version");
  58. if (!attribute.first.empty())
  59. {
  60. declaration->setVersion(attribute.second);
  61. }
  62. attribute = ls_std::XMLParser::_findAttribute(_attributes, "encoding");
  63. if (!attribute.first.empty())
  64. {
  65. declaration->setEncoding(attribute.second);
  66. }
  67. attribute = ls_std::XMLParser::_findAttribute(_attributes, "standalone");
  68. if (!attribute.first.empty())
  69. {
  70. declaration->setStandalone(attribute.second);
  71. }
  72. return declaration;
  73. }
  74. std::shared_ptr<ls_std::XMLNode> ls_std::XMLParser::_createNode(const std::list<std::pair<std::string, std::string>> &_attributes, const std::string &_name)
  75. {
  76. std::shared_ptr<ls_std::XMLNode> node = std::make_shared<ls_std::XMLNode>(_name);
  77. std::shared_ptr<ls_std::XMLAttribute> attribute{};
  78. for (const auto &parsedAttribute : _attributes)
  79. {
  80. attribute = std::make_shared<ls_std::XMLAttribute>(parsedAttribute.first);
  81. attribute->setValue(parsedAttribute.second);
  82. node->addAttributeToEnd(attribute);
  83. }
  84. return node;
  85. }
  86. std::pair<std::string, std::string> ls_std::XMLParser::_findAttribute(const std::list<std::pair<std::string, std::string>> &_attributes, const std::string &_name)
  87. {
  88. std::pair<std::string, std::string> attribute{};
  89. for (const auto &currentAttribute : _attributes)
  90. {
  91. if (currentAttribute.first == _name)
  92. {
  93. attribute = currentAttribute;
  94. break;
  95. }
  96. }
  97. return attribute;
  98. }
  99. size_t ls_std::XMLParser::_findAttributeEndPosition(const ls_std::byte_field &_data)
  100. {
  101. std::string::size_type position = std::string::npos;
  102. std::string::size_type counter{};
  103. for (char letter : _data)
  104. {
  105. if (letter == '"')
  106. {
  107. counter++;
  108. }
  109. if (counter == 2)
  110. {
  111. break;
  112. }
  113. position++;
  114. }
  115. return position;
  116. }
  117. ls_std::byte_field ls_std::XMLParser::_getNextTagString(const ls_std::byte_field &_data, std::string::size_type _index)
  118. {
  119. ls_std::byte_field tag{};
  120. size_t closingCharacterPosition = _index + _data.substr(_index).find('>');
  121. if (closingCharacterPosition != std::string::npos)
  122. {
  123. tag = _data.substr(_index, (closingCharacterPosition - _index) + 1);
  124. }
  125. return tag;
  126. }
  127. void ls_std::XMLParser::_isClosingTag(const ls_std::byte_field &_data, std::string::size_type _index)
  128. {
  129. if (this->mode == XML_PARSE_MODE_ANALYZE && _data.substr(_index, 2) == "</")
  130. {
  131. this->mode = XML_PARSE_MODE_CLOSING_TAG;
  132. }
  133. }
  134. void ls_std::XMLParser::_isDeclaration(const ls_std::byte_field &_data, std::string::size_type _index)
  135. {
  136. if (_data.substr(_index, 5) == "<?xml")
  137. {
  138. this->mode = XML_PARSE_MODE_DECLARATION;
  139. }
  140. }
  141. void ls_std::XMLParser::_isOpeningTag(const ls_std::byte_field &_data, std::string::size_type _index)
  142. {
  143. if (this->mode == XML_PARSE_MODE_ANALYZE && _data.substr(_index, 1) == "<")
  144. {
  145. this->mode = XML_PARSE_MODE_OPENING_TAG;
  146. }
  147. }
  148. void ls_std::XMLParser::_isValue(const ls_std::byte_field &_data, std::string::size_type _index)
  149. {
  150. if (this->mode == XML_PARSE_MODE_ANALYZE)
  151. {
  152. std::string::size_type end = _data.substr(_index).find('<');
  153. bool isValue = _data[_index - 1] == '>' && end != std::string::npos && end > 0;
  154. if (isValue)
  155. {
  156. ls_std::String value{_data.substr(_index, end)};
  157. if (!value.contains("\n") && !value.contains("\r\n"))
  158. {
  159. this->mode = XML_PARSE_MODE_VALUE;
  160. }
  161. }
  162. }
  163. }
  164. void ls_std::XMLParser::_mergeNodes()
  165. {
  166. while (this->maxLevel > 1)
  167. {
  168. this->_mergeNodesOnCurrentLevel();
  169. this->maxLevel -= 1;
  170. }
  171. this->document->setRootElement(this->parseData.front().node);
  172. }
  173. void ls_std::XMLParser::_mergeChildrenToParentNode(const std::shared_ptr<ls_std::XMLNode> &_parent, std::list<ls_std::XMLParseData>::iterator &_iterator, uint8_t _parentLevel)
  174. {
  175. do
  176. {
  177. _iterator++;
  178. if (_iterator == this->parseData.end())
  179. {
  180. break;
  181. }
  182. else
  183. {
  184. if (_iterator->level == this->maxLevel)
  185. {
  186. _parent->addChildToEnd(_iterator->node);
  187. }
  188. }
  189. } while (_iterator->level > _parentLevel);
  190. }
  191. void ls_std::XMLParser::_mergeNodesOnCurrentLevel()
  192. {
  193. auto iterator = this->parseData.begin();
  194. uint8_t parentLevel = this->maxLevel - 1;
  195. while (iterator != this->parseData.end())
  196. {
  197. if (iterator->level == parentLevel)
  198. {
  199. this->_mergeChildrenToParentNode(iterator->node, iterator, parentLevel);
  200. }
  201. else
  202. {
  203. iterator++;
  204. }
  205. }
  206. }
  207. void ls_std::XMLParser::_parse(const ls_std::byte_field &_data)
  208. {
  209. for (std::string::size_type index = 0; index < _data.size(); index++)
  210. {
  211. switch (this->mode)
  212. {
  213. case XML_PARSE_MODE_ANALYZE:
  214. {
  215. this->_analyze(_data, index);
  216. }
  217. break;
  218. case XML_PARSE_MODE_DECLARATION:
  219. {
  220. --index;
  221. index = this->_parseDeclaration(_data, index);
  222. this->mode = XML_PARSE_MODE_ANALYZE;
  223. }
  224. break;
  225. case XML_PARSE_MODE_OPENING_TAG:
  226. {
  227. --index;
  228. index = ls_std::XMLParser::_parseOpeningTag(_data, index);
  229. this->mode = XML_PARSE_MODE_ANALYZE;
  230. }
  231. break;
  232. case XML_PARSE_MODE_VALUE:
  233. {
  234. --index;
  235. index = ls_std::XMLParser::_parseValue(_data, index);
  236. this->mode = XML_PARSE_MODE_ANALYZE;
  237. }
  238. break;
  239. case XML_PARSE_MODE_CLOSING_TAG:
  240. {
  241. --index;
  242. index = ls_std::XMLParser::_parseClosingTag(_data, index);
  243. this->mode = XML_PARSE_MODE_ANALYZE;
  244. }
  245. break;
  246. }
  247. }
  248. }
  249. std::pair<std::string, std::string> ls_std::XMLParser::_parseAttribute(const ls_std::byte_field &_data)
  250. {
  251. std::pair<std::string, std::string> parsedAttribute{};
  252. parsedAttribute.first = _data.substr(0, _data.find('='));
  253. parsedAttribute.second = _data.substr(_data.find('"') + 1);
  254. parsedAttribute.second.pop_back();
  255. return parsedAttribute;
  256. }
  257. std::list<std::pair<std::string, std::string>> ls_std::XMLParser::_parseAttributes(ls_std::byte_field _data)
  258. {
  259. std::list<std::pair<std::string, std::string>> attributes{};
  260. size_t position = _data.find(' ');
  261. _data = position == std::string::npos ? "" : _data.substr(position);
  262. while (!_data.empty())
  263. {
  264. do
  265. {
  266. position = _data.find(' ') + 1;
  267. } while (_data[position] == ' ');
  268. if (_data.size() <= 3 && ls_std::String{_data}.endsWith(">"))
  269. {
  270. break;
  271. }
  272. std::string attributeString = _data.substr(position, ls_std::XMLParser::_findAttributeEndPosition(_data) + 1);
  273. attributes.push_back(ls_std::XMLParser::_parseAttribute(attributeString));
  274. _data = _data.substr(position + attributeString.size());
  275. }
  276. return attributes;
  277. }
  278. size_t ls_std::XMLParser::_parseClosingTag(const ls_std::byte_field &_data, std::string::size_type _index)
  279. {
  280. std::string tagString = ls_std::XMLParser::_getNextTagString(_data, _index);
  281. this->currentLevel -= 1;
  282. return tagString.empty() ? _index : _index + (tagString.size() - 1);
  283. }
  284. size_t ls_std::XMLParser::_parseDeclaration(const ls_std::byte_field &_data, std::string::size_type _index)
  285. {
  286. std::string tagString = ls_std::XMLParser::_getNextTagString(_data, _index);
  287. bool isValidTagString = !tagString.empty();
  288. if (isValidTagString)
  289. {
  290. std::shared_ptr<ls_std::XMLDeclaration> declaration = this->_createDeclaration(ls_std::XMLParser::_parseAttributes(tagString));
  291. this->document->setDeclaration(declaration);
  292. }
  293. return !isValidTagString ? _index : _index + (tagString.size() - 1);
  294. }
  295. size_t ls_std::XMLParser::_parseOpeningTag(const ls_std::byte_field &_data, std::string::size_type _index)
  296. {
  297. ls_std::String tagString{ls_std::XMLParser::_getNextTagString(_data, _index)};
  298. bool isValidTagString = !tagString.toString().empty();
  299. ls_std::XMLParseData singleParseData{};
  300. if (isValidTagString)
  301. {
  302. std::shared_ptr<ls_std::XMLNode> node = ls_std::XMLParser::_createNode(ls_std::XMLParser::_parseAttributes(tagString), ls_std::XMLParser::_parseTagName(tagString));
  303. singleParseData.level = this->currentLevel;
  304. singleParseData.node = node;
  305. this->parseData.push_back(singleParseData);
  306. if (!tagString.endsWith("/>"))
  307. {
  308. this->currentLevel += 1;
  309. this->_setMaxLevel();
  310. }
  311. }
  312. return !isValidTagString ? _index : _index + (tagString.toString().size() - 1);
  313. }
  314. ls_std::byte_field ls_std::XMLParser::_parseTagName(const ls_std::byte_field &_data)
  315. {
  316. std::string::size_type position = _data.find(' ');
  317. if (position == std::string::npos)
  318. {
  319. position = _data.find('>');
  320. }
  321. return _data.substr(1, position - 1);
  322. }
  323. size_t ls_std::XMLParser::_parseValue(const ls_std::byte_field &_data, std::string::size_type _index)
  324. {
  325. ls_std::byte_field value = _data.substr(_index, _data.substr(_index).find('<'));
  326. this->parseData.back().node->setValue(value);
  327. return _index + (value.size() - 1);
  328. }
  329. void ls_std::XMLParser::_reset()
  330. {
  331. this->currentLevel = 1;
  332. this->maxLevel = 1;
  333. this->mode = XML_PARSE_MODE_ANALYZE;
  334. this->parseData.clear();
  335. }
  336. void ls_std::XMLParser::_setMaxLevel()
  337. {
  338. if (this->currentLevel > this->maxLevel)
  339. {
  340. this->maxLevel = this->currentLevel;
  341. }
  342. }