From 9d53d8857eaa1c9405894a88ca75bc4657e42f35 Mon Sep 17 00:00:00 2001 From: msglm Date: Sat, 14 Jan 2023 05:31:48 -0600 Subject: Inital Commit --- C++/String Processing/String Processing.cpp | 88 +++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 C++/String Processing/String Processing.cpp (limited to 'C++/String Processing') diff --git a/C++/String Processing/String Processing.cpp b/C++/String Processing/String Processing.cpp new file mode 100644 index 0000000..4e35a80 --- /dev/null +++ b/C++/String Processing/String Processing.cpp @@ -0,0 +1,88 @@ +// Name: msglm +// Date: September 2nd, 2022 +// Program Name: HTML Parser +// Description: removes HTML from inputted string. + +//any code with /// is merely commented out debug code. + +#include +#include +using namespace std; + +//Takes input from user +string in() { + //I personally dislike C++'s streaming capabilities + //They aren't like Bash pipes and that's a blunder + //I wish I could do getline(cin, '\n') >> return; or getline(cin, '\n') | return; + string userInput; + getline(cin, userInput, '\n'); + return userInput; +} + +//processes the input data +//this is the only process in the entire program thus the +//term "proc" is accurate. We don't process data in in() or out(). +// +//How this works is it takes a string, and goes through it char-by-char, +//once it hits a < it notes its position and marks a flag +//then it does the same for the next >, marking its pos and marking a flag. +//once the < and > flags have been raised, the total +//length of the tag that was detected is calculated, and then the string itself is sliced +// +//from there a small bit of clean up is necessary, restarting +//the position counter to count from the start +//and resetting the flags. + +string proc(string userInput) { + bool insideATag = false; + bool metTheTagTerminator = false; + int beginBracketPos; + int endBracketPos; + int tagLength; + + for (int charPos=0; charPos<=userInput.length();charPos++) { + if (insideATag && metTheTagTerminator) { + ///cout << "tag found at " << beginBracketPos << " and " << endBracketPos << endl; + ///cout << "Full String:" << userInput << endl; + + //Tag length only exists because std::String.erase() doesn't accept + //a range, but rather a start and the chars to go. it's accursed. + tagLength = (endBracketPos-beginBracketPos)+1; + userInput.erase(beginBracketPos, tagLength); + charPos = 0; + insideATag = false; + metTheTagTerminator = false; + } + if (userInput[charPos] == '<') { + insideATag = true; + beginBracketPos = charPos; + } + + if (userInput[charPos] == '>' && insideATag) { + metTheTagTerminator = true; + endBracketPos = charPos; + } + + ///cout << "char pos: " << charPos << ";" << "char: " << userInput[charPos] << endl; + + } + return userInput; +} + +//Outs to screen +void out(string processedString) { + cout << "sanitized: " << processedString << endl; +} + +int main() { + //LISP called, they want their () back. + out(proc(in())); +} + +/* + This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License Version 3 ONLY as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + */ -- cgit v1.2.3