XmlParser.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*
  2. * Author: Patrick-Christopher Mattulat
  3. * Company: Lynar Studios
  4. * E-Mail: webmaster@lynarstudios.com
  5. * Created: 2020-11-26
  6. * Changed: 2023-05-16
  7. *
  8. * */
  9. #include <ls-std/core/evaluator/NullPointerArgumentEvaluator.hpp>
  10. #include <ls-std/io/xml/XmlParser.hpp>
  11. using ls::std::core::Class;
  12. using ls::std::core::NullPointerArgumentEvaluator;
  13. using ls::std::core::type::byte_field;
  14. using ls::std::io::XmlAttribute;
  15. using ls::std::io::XmlDeclaration;
  16. using ls::std::io::XmlDocument;
  17. using ls::std::io::XmlNode;
  18. using ls::std::io::XmlParseMode;
  19. using ls::std::io::XmlParseParameter;
  20. using ls::std::io::XmlParser;
  21. using std::find_if;
  22. using std::list;
  23. using std::make_shared;
  24. using std::move;
  25. using std::pair;
  26. using std::shared_ptr;
  27. using std::string;
  28. using std::string_view;
  29. XmlParser::XmlParser(const shared_ptr<XmlDocument> &_document) : Class("XmlParser")
  30. {
  31. this->_assignDocument(_document);
  32. this->_reset();
  33. }
  34. XmlParser::~XmlParser() noexcept = default;
  35. shared_ptr<XmlDocument> XmlParser::getDocument() const
  36. {
  37. return this->document;
  38. }
  39. void XmlParser::parse(const byte_field &_data)
  40. {
  41. this->_parse(_data);
  42. this->_mergeNodes();
  43. this->_reset();
  44. }
  45. void XmlParser::setDocument(const shared_ptr<XmlDocument> &_document)
  46. {
  47. this->_assignDocument(_document);
  48. }
  49. pair<string, string> XmlParser::_readAttribute_(const byte_field &_data)
  50. {
  51. return XmlParser::_parseAttribute(_data);
  52. }
  53. list<pair<string, string>> XmlParser::_readAttributes_(byte_field _data)
  54. {
  55. return XmlParser::_parseAttributes(::move(_data));
  56. }
  57. void XmlParser::_analyze(const byte_field &_data, string::size_type _index)
  58. {
  59. this->_isDeclaration(_data, _index);
  60. this->_isClosingTag(_data, _index);
  61. this->_isOpeningTag(_data, _index);
  62. this->_isValue(_data, _index);
  63. }
  64. void XmlParser::_assignDocument(const shared_ptr<XmlDocument> &_document)
  65. {
  66. NullPointerArgumentEvaluator{_document, "passed document reference is null!"}.evaluate();
  67. this->document = _document;
  68. }
  69. bool XmlParser::_contains(string_view _text, string_view _searchText)
  70. {
  71. return _text.find(_searchText) != string::npos;
  72. }
  73. shared_ptr<XmlDeclaration> XmlParser::_createDeclaration(const list<pair<string, string>> &_attributes)
  74. {
  75. auto declaration = make_shared<XmlDeclaration>("1.0");
  76. pair<string, string> attribute = XmlParser::_findAttribute(_attributes, "version");
  77. if (!attribute.first.empty())
  78. {
  79. declaration->setVersion(attribute.second);
  80. }
  81. attribute = XmlParser::_findAttribute(_attributes, "encoding");
  82. if (!attribute.first.empty())
  83. {
  84. declaration->setEncoding(attribute.second);
  85. }
  86. attribute = XmlParser::_findAttribute(_attributes, "standalone");
  87. if (!attribute.first.empty())
  88. {
  89. declaration->setStandalone(attribute.second);
  90. }
  91. return declaration;
  92. }
  93. shared_ptr<XmlNode> XmlParser::_createNode(const list<pair<string, string>> &_attributes, string_view _name)
  94. {
  95. auto node = make_shared<XmlNode>(string{_name});
  96. shared_ptr<XmlAttribute> attribute{};
  97. for (const auto &[attributeName, attributeValue] : _attributes)
  98. {
  99. attribute = make_shared<XmlAttribute>(attributeName);
  100. attribute->setValue(attributeValue);
  101. node->addAttributeToEnd(attribute);
  102. }
  103. return node;
  104. }
  105. bool XmlParser::_endsWith(string_view _text, string_view _ending)
  106. {
  107. return _text.rfind(_ending) == (_text.size() - _ending.size());
  108. }
  109. pair<string, string> XmlParser::_findAttribute(const list<pair<string, string>> &_attributes, const string &_name)
  110. {
  111. const auto &iterator = find_if(_attributes.begin(), _attributes.end(), [&_name](const pair<string, string> &_attribute) { return _attribute.first == _name; });
  112. return iterator != _attributes.end() ? *iterator : pair<string, string>{};
  113. }
  114. size_t XmlParser::_findAttributeEndPosition(const byte_field &_data)
  115. {
  116. string::size_type position = string::npos;
  117. string::size_type counter{};
  118. for (char letter : _data)
  119. {
  120. if (letter == '"')
  121. {
  122. counter++;
  123. }
  124. if (counter == 2)
  125. {
  126. break;
  127. }
  128. position++;
  129. }
  130. return position;
  131. }
  132. byte_field XmlParser::_getNextTagString(string_view _data, string::size_type _index)
  133. {
  134. byte_field tag{};
  135. if (size_t closingCharacterPosition = _index + _data.substr(_index).find('>'); closingCharacterPosition != string::npos)
  136. {
  137. tag = _data.substr(_index, (closingCharacterPosition - _index) + 1);
  138. }
  139. return tag;
  140. }
  141. void XmlParser::_isClosingTag(string_view _data, string::size_type _index)
  142. {
  143. if (this->mode == XmlParseMode::XML_PARSE_MODE_ANALYZE && _data.substr(_index, 2) == "</")
  144. {
  145. this->mode = XmlParseMode::XML_PARSE_MODE_CLOSING_TAG;
  146. }
  147. }
  148. void XmlParser::_isDeclaration(string_view _data, string::size_type _index)
  149. {
  150. if (_data.substr(_index, 5) == "<?xml")
  151. {
  152. this->mode = XmlParseMode::XML_PARSE_MODE_DECLARATION;
  153. }
  154. }
  155. void XmlParser::_isOpeningTag(string_view _data, string::size_type _index)
  156. {
  157. if (this->mode == XmlParseMode::XML_PARSE_MODE_ANALYZE && _data.substr(_index, 1) == "<")
  158. {
  159. this->mode = XmlParseMode::XML_PARSE_MODE_OPENING_TAG;
  160. }
  161. }
  162. void XmlParser::_isValue(const byte_field &_data, string::size_type _index)
  163. {
  164. if (this->mode == XmlParseMode::XML_PARSE_MODE_ANALYZE)
  165. {
  166. string::size_type end = _data.substr(_index).find('<');
  167. bool isValue = _data[_index - 1] == '>' && end != string::npos && end > 0;
  168. if (isValue)
  169. {
  170. string value{_data.substr(_index, end)};
  171. if (!XmlParser::_contains(string_view{value}, string_view{"\n"}) && !XmlParser::_contains(string_view{value}, string_view{"\r\n"}))
  172. {
  173. this->mode = XmlParseMode::XML_PARSE_MODE_VALUE;
  174. }
  175. }
  176. }
  177. }
  178. void XmlParser::_mergeNodes()
  179. {
  180. while (this->maxLevel > 1)
  181. {
  182. this->_mergeNodesOnCurrentLevel();
  183. this->maxLevel -= 1;
  184. }
  185. this->document->setRootElement(this->parseParameters.front().getNode());
  186. }
  187. void XmlParser::_mergeChildrenToParentNode(const shared_ptr<XmlNode> &_parent, list<XmlParseParameter>::iterator &_iterator, uint8_t _parentLevel)
  188. {
  189. do
  190. {
  191. _iterator++;
  192. if (_iterator == this->parseParameters.end())
  193. {
  194. break;
  195. }
  196. else
  197. {
  198. if (_iterator->getLevel() == this->maxLevel)
  199. {
  200. _parent->addChildToEnd(_iterator->getNode());
  201. }
  202. }
  203. } while (_iterator->getLevel() > _parentLevel);
  204. }
  205. void XmlParser::_mergeNodesOnCurrentLevel()
  206. {
  207. auto iterator = this->parseParameters.begin();
  208. uint8_t parentLevel = this->maxLevel - 1;
  209. while (iterator != this->parseParameters.end())
  210. {
  211. if (iterator->getLevel() == parentLevel)
  212. {
  213. this->_mergeChildrenToParentNode(iterator->getNode(), iterator, parentLevel);
  214. }
  215. else
  216. {
  217. iterator++;
  218. }
  219. }
  220. }
  221. void XmlParser::_parse(const byte_field &_data)
  222. {
  223. for (string::size_type index = 0; index < _data.size(); index++)
  224. {
  225. index = this->_parseMode(index, _data);
  226. }
  227. }
  228. pair<string, string> XmlParser::_parseAttribute(string_view _data)
  229. {
  230. pair<string, string> parsedAttribute{};
  231. parsedAttribute.first = _data.substr(0, _data.find('='));
  232. parsedAttribute.second = _data.substr(_data.find('"') + 1);
  233. parsedAttribute.second.pop_back();
  234. return parsedAttribute;
  235. }
  236. list<pair<string, string>> XmlParser::_parseAttributes(byte_field _data)
  237. {
  238. list<pair<string, string>> attributes{};
  239. size_t position = _data.find(' ');
  240. _data = position == string::npos ? "" : _data.substr(position);
  241. while (!_data.empty())
  242. {
  243. do
  244. {
  245. position = _data.find(' ') + 1;
  246. } while (_data[position] == ' ');
  247. if (_data.size() <= 3 && XmlParser::_endsWith(string_view{_data}, ">"))
  248. {
  249. break;
  250. }
  251. string attributeString = _data.substr(position, XmlParser::_findAttributeEndPosition(_data) + 1);
  252. attributes.push_back(XmlParser::_parseAttribute(attributeString));
  253. _data = _data.substr(position + attributeString.size());
  254. }
  255. return attributes;
  256. }
  257. size_t XmlParser::_parseClosingTag(const byte_field &_data, string::size_type _index)
  258. {
  259. string tagString = XmlParser::_getNextTagString(_data, _index);
  260. this->currentLevel -= 1;
  261. return tagString.empty() ? _index : _index + (tagString.size() - 1);
  262. }
  263. size_t XmlParser::_parseDeclaration(const byte_field &_data, string::size_type _index) const
  264. {
  265. string tagString = XmlParser::_getNextTagString(_data, _index);
  266. bool isValidTagString = !tagString.empty();
  267. if (isValidTagString)
  268. {
  269. shared_ptr<XmlDeclaration> declaration = this->_createDeclaration(XmlParser::_parseAttributes(tagString));
  270. this->document->setDeclaration(declaration);
  271. }
  272. return !isValidTagString ? _index : _index + (tagString.size() - 1);
  273. }
  274. string::size_type XmlParser::_parseMode(string::size_type _index, const byte_field &_data)
  275. {
  276. string::size_type index = _index;
  277. switch (this->mode)
  278. {
  279. case XmlParseMode::XML_PARSE_MODE_ANALYZE:
  280. {
  281. this->_analyze(_data, index);
  282. }
  283. break;
  284. case XmlParseMode::XML_PARSE_MODE_DECLARATION:
  285. {
  286. --index;
  287. index = this->_parseDeclaration(_data, index);
  288. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  289. }
  290. break;
  291. case XmlParseMode::XML_PARSE_MODE_OPENING_TAG:
  292. {
  293. --index;
  294. index = XmlParser::_parseOpeningTag(_data, index);
  295. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  296. }
  297. break;
  298. case XmlParseMode::XML_PARSE_MODE_VALUE:
  299. {
  300. --index;
  301. index = XmlParser::_parseValue(_data, index);
  302. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  303. }
  304. break;
  305. case XmlParseMode::XML_PARSE_MODE_CLOSING_TAG:
  306. {
  307. --index;
  308. index = XmlParser::_parseClosingTag(_data, index);
  309. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  310. }
  311. break;
  312. }
  313. return index;
  314. }
  315. size_t XmlParser::_parseOpeningTag(const byte_field &_data, string::size_type _index)
  316. {
  317. string tagString{XmlParser::_getNextTagString(_data, _index)};
  318. bool isValidTagString = !tagString.empty();
  319. XmlParseParameter singleParseParameter{};
  320. if (isValidTagString)
  321. {
  322. shared_ptr<XmlNode> node = XmlParser::_createNode(XmlParser::_parseAttributes(tagString), XmlParser::_parseTagName(tagString));
  323. singleParseParameter.setLevel(this->currentLevel);
  324. singleParseParameter.setNode(node);
  325. this->parseParameters.push_back(singleParseParameter);
  326. if (!XmlParser::_endsWith(tagString, "/>"))
  327. {
  328. this->currentLevel += 1;
  329. this->_setMaxLevel();
  330. }
  331. }
  332. return !isValidTagString ? _index : _index + (tagString.size() - 1);
  333. }
  334. string_view XmlParser::_parseTagName(string_view _data)
  335. {
  336. string::size_type position = _data.find(' ');
  337. if (position == string::npos)
  338. {
  339. position = _data.find('>');
  340. }
  341. return _data.substr(1, position - 1);
  342. }
  343. size_t XmlParser::_parseValue(string_view _data, string::size_type _index)
  344. {
  345. string_view value = _data.substr(_index, _data.substr(_index).find('<'));
  346. this->parseParameters.back().getNode()->setValue(string{value});
  347. return _index + (value.size() - 1);
  348. }
  349. void XmlParser::_reset()
  350. {
  351. this->currentLevel = 1;
  352. this->maxLevel = 1;
  353. this->mode = XmlParseMode::XML_PARSE_MODE_ANALYZE;
  354. this->parseParameters.clear();
  355. }
  356. void XmlParser::_setMaxLevel()
  357. {
  358. if (this->currentLevel > this->maxLevel)
  359. {
  360. this->maxLevel = this->currentLevel;
  361. }
  362. }