XmlParser.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Author: Patrick-Christopher Mattulat
  3. * Co-Author: Claude Sonnet 4.6 (LLM)
  4. * Company: Lynar Studios
  5. * E-Mail: webmaster@lynarstudios.com
  6. * Created: 2020-11-26
  7. * Changed: 2026-06-23
  8. *
  9. * */
  10. #include <ls-std/core/evaluator/NullPointerArgumentEvaluator.hpp>
  11. #include <ls-std/io/xml/XmlParser.hpp>
  12. using ls::standard::core::Class;
  13. using ls::standard::core::NullPointerArgumentEvaluator;
  14. using ls::standard::core::type::byte_field;
  15. using ls::standard::io::XmlAttribute;
  16. using ls::standard::io::XmlDeclaration;
  17. using ls::standard::io::XmlDocument;
  18. using ls::standard::io::XmlNode;
  19. using ls::standard::io::XmlParseMode;
  20. using ls::standard::io::XmlParseParameter;
  21. using ls::standard::io::XmlParser;
  22. using std::find_if;
  23. using std::list;
  24. using std::make_shared;
  25. using std::move;
  26. using std::pair;
  27. using std::shared_ptr;
  28. using std::string;
  29. using std::string_view;
  30. XmlParser::XmlParser(const shared_ptr<XmlDocument> &_document) : Class("XmlParser")
  31. {
  32. this->_assignDocument(_document);
  33. this->_reset();
  34. }
  35. XmlParser::~XmlParser() noexcept = default;
  36. shared_ptr<XmlDocument> XmlParser::getDocument() const
  37. {
  38. return this->document;
  39. }
  40. void XmlParser::parse(const byte_field &_data)
  41. {
  42. this->_parse(_data);
  43. this->_mergeNodes();
  44. this->_reset();
  45. }
  46. void XmlParser::setDocument(const shared_ptr<XmlDocument> &_document)
  47. {
  48. this->_assignDocument(_document);
  49. }
  50. pair<string, string> XmlParser::_readAttribute_(const byte_field &_data)
  51. {
  52. return _parseAttribute(_data);
  53. }
  54. list<pair<string, string>> XmlParser::_readAttributes_(byte_field _data)
  55. {
  56. return _parseAttributes(::move(_data));
  57. }
  58. void XmlParser::_analyze(const byte_field &_data, const string::size_type _index)
  59. {
  60. this->_isDeclaration(_data, _index);
  61. this->_isClosingTag(_data, _index);
  62. this->_isOpeningTag(_data, _index);
  63. this->_isValue(_data, _index);
  64. }
  65. void XmlParser::_assignDocument(const shared_ptr<XmlDocument> &_document)
  66. {
  67. NullPointerArgumentEvaluator{_document, "passed document reference is null!"}.evaluate();
  68. this->document = _document;
  69. }
  70. bool XmlParser::_contains(const string_view _text, const string_view _searchText)
  71. {
  72. return _text.find(_searchText) != string::npos;
  73. }
  74. shared_ptr<XmlDeclaration> XmlParser::_createDeclaration(const list<pair<string, string>> &_attributes)
  75. {
  76. auto declaration = make_shared<XmlDeclaration>("1.0");
  77. pair<string, string> attribute = _findAttribute(_attributes, "version");
  78. if (!attribute.first.empty())
  79. {
  80. declaration->setVersion(attribute.second);
  81. }
  82. attribute = _findAttribute(_attributes, "encoding");
  83. if (!attribute.first.empty())
  84. {
  85. declaration->setEncoding(attribute.second);
  86. }
  87. attribute = _findAttribute(_attributes, "standalone");
  88. if (!attribute.first.empty())
  89. {
  90. declaration->setStandalone(attribute.second);
  91. }
  92. return declaration;
  93. }
  94. shared_ptr<XmlNode> XmlParser::_createNode(const list<pair<string, string>> &_attributes, const string_view _name)
  95. {
  96. auto node = make_shared<XmlNode>(string{_name});
  97. shared_ptr<XmlAttribute> attribute{};
  98. for (const auto &[attributeName, attributeValue] : _attributes)
  99. {
  100. attribute = make_shared<XmlAttribute>(attributeName);
  101. attribute->setValue(attributeValue);
  102. node->addAttributeToEnd(attribute);
  103. }
  104. return node;
  105. }
  106. bool XmlParser::_endsWith(const string_view _text, const string_view _ending)
  107. {
  108. return _text.rfind(_ending) == (_text.size() - _ending.size());
  109. }
  110. pair<string, string> XmlParser::_findAttribute(const list<pair<string, string>> &_attributes, const string &_name)
  111. {
  112. const auto &iterator = find_if(_attributes.begin(), _attributes.end(), [&_name](const pair<string, string> &_attribute) { return _attribute.first == _name; });
  113. return iterator != _attributes.end() ? *iterator : pair<string, string>{};
  114. }
  115. size_t XmlParser::_findAttributeEndPosition(const byte_field &_data)
  116. {
  117. string::size_type position = string::npos;
  118. string::size_type counter{};
  119. for (const char letter : _data)
  120. {
  121. if (letter == '"')
  122. {
  123. counter++;
  124. }
  125. if (counter == 2)
  126. {
  127. break;
  128. }
  129. position++;
  130. }
  131. return position;
  132. }
  133. byte_field XmlParser::_getNextTagString(const string_view _data, const string::size_type _index)
  134. {
  135. byte_field tag{};
  136. if (const size_t closingCharacterPosition = _index + _data.substr(_index).find('>'); closingCharacterPosition != string::npos)
  137. {
  138. tag = _data.substr(_index, (closingCharacterPosition - _index) + 1);
  139. }
  140. return tag;
  141. }
  142. void XmlParser::_isClosingTag(const string_view _data, const string::size_type _index)
  143. {
  144. if (this->mode == XmlParseMode::XML_PARSE_MODE_ANALYZE && _data.substr(_index, 2) == "</")
  145. {
  146. this->mode = XmlParseMode::XML_PARSE_MODE_CLOSING_TAG;
  147. }
  148. }
  149. void XmlParser::_isDeclaration(const string_view _data, const string::size_type _index)
  150. {
  151. if (_data.substr(_index, 5) == "<?xml")
  152. {
  153. this->mode = XmlParseMode::XML_PARSE_MODE_DECLARATION;
  154. }
  155. }
  156. void XmlParser::_isOpeningTag(const string_view _data, const string::size_type _index)
  157. {
  158. if (this->mode == XmlParseMode::XML_PARSE_MODE_ANALYZE && _data.substr(_index, 1) == "<")
  159. {
  160. this->mode = XmlParseMode::XML_PARSE_MODE_OPENING_TAG;
  161. }
  162. }
  163. void XmlParser::_isValue(const byte_field &_data, const string::size_type _index)
  164. {
  165. if (this->mode == XmlParseMode::XML_PARSE_MODE_ANALYZE)
  166. {
  167. const string::size_type end = _data.substr(_index).find('<');
  168. const bool isValue = _data[_index - 1] == '>' && end != string::npos && end > 0; // kept as dedicated variable for readability
  169. if (isValue)
  170. {
  171. const string value{_data.substr(_index, end)};
  172. if (!_contains(string_view{value}, string_view{"\n"}) && !_contains(string_view{value}, string_view{"\r\n"}))
  173. {
  174. this->mode = XmlParseMode::XML_PARSE_MODE_VALUE;
  175. }
  176. }
  177. }
  178. }
  179. void XmlParser::_mergeNodes()
  180. {
  181. while (this->maxLevel > 1)
  182. {
  183. this->_mergeNodesOnCurrentLevel();
  184. this->maxLevel -= 1;
  185. }
  186. this->document->setRootElement(this->parseParameters.front().getNode());
  187. }
  188. void XmlParser::_mergeChildrenToParentNode(const shared_ptr<XmlNode> &_parent, list<XmlParseParameter>::iterator &_iterator, const uint8_t _parentLevel)
  189. {
  190. do
  191. {
  192. ++_iterator;
  193. if (_iterator == this->parseParameters.end())
  194. {
  195. break;
  196. }
  197. else
  198. {
  199. if (_iterator->getLevel() == this->maxLevel)
  200. {
  201. _parent->addChildToEnd(_iterator->getNode());
  202. }
  203. }
  204. } while (_iterator->getLevel() > _parentLevel);
  205. }
  206. void XmlParser::_mergeNodesOnCurrentLevel()
  207. {
  208. auto iterator = this->parseParameters.begin();
  209. const uint8_t parentLevel = this->maxLevel - 1;
  210. while (iterator != this->parseParameters.end())
  211. {
  212. if (iterator->getLevel() == parentLevel)
  213. {
  214. this->_mergeChildrenToParentNode(iterator->getNode(), iterator, parentLevel);
  215. }
  216. else
  217. {
  218. ++iterator;
  219. }
  220. }
  221. }
  222. void XmlParser::_parse(const byte_field &_data)
  223. {
  224. for (string::size_type index = 0; index < _data.size(); index++)
  225. {
  226. index = this->_parseMode(index, _data);
  227. }
  228. }
  229. pair<string, string> XmlParser::_parseAttribute(string_view _data)
  230. {
  231. pair<string, string> parsedAttribute{};
  232. parsedAttribute.first = _data.substr(0, _data.find('='));
  233. parsedAttribute.second = _data.substr(_data.find('"') + 1);
  234. parsedAttribute.second.pop_back();
  235. return parsedAttribute;
  236. }
  237. list<pair<string, string>> XmlParser::_parseAttributes(byte_field _data)
  238. {
  239. list<pair<string, string>> attributes{};
  240. size_t position = _data.find(' ');
  241. _data = position == string::npos ? "" : _data.substr(position);
  242. while (!_data.empty())
  243. {
  244. do
  245. {
  246. position = _data.find(' ') + 1;
  247. } while (_data[position] == ' ');
  248. if (_data.size() <= 3 && _endsWith(string_view{_data}, ">"))
  249. {
  250. break;
  251. }
  252. string attributeString = _data.substr(position, _findAttributeEndPosition(_data) + 1);
  253. attributes.push_back(_parseAttribute(attributeString));
  254. _data = _data.substr(position + attributeString.size());
  255. }
  256. return attributes;
  257. }
  258. size_t XmlParser::_parseClosingTag(const byte_field &_data, const string::size_type _index)
  259. {
  260. const string tagString = _getNextTagString(_data, _index);
  261. this->currentLevel -= 1;
  262. return tagString.empty() ? _index : _index + (tagString.size() - 1);
  263. }
  264. size_t XmlParser::_parseDeclaration(const byte_field &_data, const string::size_type _index) const
  265. {
  266. const string tagString = _getNextTagString(_data, _index);
  267. const bool isValidTagString = !tagString.empty();
  268. if (isValidTagString)
  269. {
  270. const shared_ptr<XmlDeclaration> declaration = _createDeclaration(_parseAttributes(tagString));
  271. this->document->setDeclaration(declaration);
  272. }
  273. return !isValidTagString ? _index : _index + (tagString.size() - 1);
  274. }
  275. string::size_type XmlParser::_parseMode(const string::size_type _index, const byte_field &_data)
  276. {
  277. string::size_type index = _index;
  278. switch (this->mode)
  279. {
  280. case XmlParseMode::XML_PARSE_MODE_ANALYZE:
  281. {
  282. this->_analyze(_data, index);
  283. }
  284. break;
  285. case XmlParseMode::XML_PARSE_MODE_DECLARATION:
  286. {
  287. --index;
  288. index = this->_parseDeclaration(_data, index);
  289. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  290. }
  291. break;
  292. case XmlParseMode::XML_PARSE_MODE_OPENING_TAG:
  293. {
  294. --index;
  295. index = _parseOpeningTag(_data, index);
  296. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  297. }
  298. break;
  299. case XmlParseMode::XML_PARSE_MODE_VALUE:
  300. {
  301. --index;
  302. index = _parseValue(_data, index);
  303. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  304. }
  305. break;
  306. case XmlParseMode::XML_PARSE_MODE_CLOSING_TAG:
  307. {
  308. --index;
  309. index = _parseClosingTag(_data, index);
  310. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  311. }
  312. break;
  313. }
  314. return index;
  315. }
  316. size_t XmlParser::_parseOpeningTag(const byte_field &_data, const string::size_type _index)
  317. {
  318. const string tagString{_getNextTagString(_data, _index)};
  319. const bool isValidTagString = !tagString.empty();
  320. XmlParseParameter singleParseParameter{};
  321. if (isValidTagString)
  322. {
  323. const shared_ptr<XmlNode> node = _createNode(_parseAttributes(tagString), _parseTagName(tagString));
  324. singleParseParameter.setLevel(this->currentLevel);
  325. singleParseParameter.setNode(node);
  326. this->parseParameters.push_back(singleParseParameter);
  327. if (!_endsWith(tagString, "/>"))
  328. {
  329. this->currentLevel += 1;
  330. this->_setMaxLevel();
  331. }
  332. }
  333. return !isValidTagString ? _index : _index + (tagString.size() - 1);
  334. }
  335. string_view XmlParser::_parseTagName(string_view _data)
  336. {
  337. string::size_type position = _data.find(' ');
  338. if (position == string::npos)
  339. {
  340. position = _data.find('>');
  341. }
  342. return _data.substr(1, position - 1);
  343. }
  344. size_t XmlParser::_parseValue(const string_view _data, const string::size_type _index) const
  345. {
  346. const string_view value = _data.substr(_index, _data.substr(_index).find('<'));
  347. this->parseParameters.back().getNode()->setValue(string{value});
  348. return _index + (value.size() - 1);
  349. }
  350. void XmlParser::_reset()
  351. {
  352. this->currentLevel = 1;
  353. this->maxLevel = 1;
  354. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  355. this->parseParameters.clear();
  356. }
  357. void XmlParser::_setMaxLevel()
  358. {
  359. if (this->currentLevel > this->maxLevel)
  360. {
  361. this->maxLevel = this->currentLevel;
  362. }
  363. }