Have you ever wondered how would you show on a web form text that is stripped of HTML tags but you still want to accept HTML tags when saving to your form? Sounds confusing?
To make it clear, I will give a good example. Lets say you have a form that has a Rich Text Box (FTB or FCK) which you allow users to cut and paste items that have HTML tags so that you can display it properly like a Blog Article, but there are some instances that you want this to be stripped of the HTML tags like displaying a summary on a Grid. Now my solution is to strip the HTML codes before displaying it when needed.
So when you copy this
Hello, World!
it wont show on your grid as this
<html> <head> <title> Hello World </title> </head> <body> <font size ="4" color="blue"> Hello, World! </font> </body> </html>
but as this
"Hello World!"
Now with a mix or Replace and Regular Expressions I created a class to handle that and here it is:
public string StripHTML(string sInputString) { try { string sOutputString; sOutputString = sInputString; //Initial Cleaning Step //Replace new line and carriage return with Spaces sOutputString = sOutputString.Replace("r", " "); sOutputString = sOutputString.Replace("n", " "); // Remove sTabs sOutputString = sOutputString.Replace("t", string.Empty); //Tag Removal DataTable myDataTable = GetTableDefinition(); myDataTable.DefaultView.Sort = "iID ASC"; foreach (DataRow drCleaningItem in myDataTable.Rows) { string sOriginalString = (drCleaningItem["sOriginalString"]).ToString(); string sReplacementString = (drCleaningItem["sReplacementString"]).ToString(); sOutputString = Regex.Replace(sOutputString, sOriginalString, sReplacementString, RegexOptions.IgnoreCase); } //Initial replacement target string for linebreaks string sBreaks = "rrr"; // Initial replacement target string for sTabs string sTabs = "ttttt"; for (int x = 0; x < sOutputString.Length; x++) { sOutputString = sOutputString.Replace(sBreaks, "rr"); sOutputString = sOutputString.Replace(sTabs, "tttt"); sBreaks = sBreaks + "r"; sTabs = sTabs + "t"; } return sOutputString; } catch { return sInputString; } } private DataTable GetTableDefinition() { DataTable dtCleaningCollection = new DataTable(); dtCleaningCollection.Columns.Add("iID", typeof(int)); dtCleaningCollection.Columns.Add("sOriginalString", typeof(string)); dtCleaningCollection.Columns.Add("sReplacementString", typeof(string)); // Replace repeating spaces with single space dtCleaningCollection.Rows.Add(1, @"( )+", " "); // Prepare and clean Header Tag dtCleaningCollection.Rows.Add(2, @"<( )*head([^>])*>", "<head>"); dtCleaningCollection.Rows.Add(3, @"(<( )*(/)( )*head( )*>)", "</head>"); dtCleaningCollection.Rows.Add(4, "(<head>).*(</head>)", string.Empty); // Prepare and clean Script Tag dtCleaningCollection.Rows.Add(5, @"<( )*script([^>])*>", "<script>"); dtCleaningCollection.Rows.Add(6, @"(<( )*(/)( )*script( )*>)", "</script>"); dtCleaningCollection.Rows.Add(7, @"(<script>).*(</script>)", string.Empty); // Prepare and clean Style Tag dtCleaningCollection.Rows.Add(8, @"<( )*style([^>])*>", "<style>"); dtCleaningCollection.Rows.Add(9, @"(<( )*(/)( )*style( )*>)", "</style>"); dtCleaningCollection.Rows.Add(10, "(<style>).*(</style>)", string.Empty); // Replace <td> with sTabs dtCleaningCollection.Rows.Add(11, @"<( )*td([^>])*>", "t"); // Replace <BR> and <LI> with Line sBreaks dtCleaningCollection.Rows.Add(12, @"<( )*br( )*>", "r"); dtCleaningCollection.Rows.Add(13, @"<( )*li( )*>", "r"); // Replace <P>, <DIV> and <TR> with Double Line sBreaks dtCleaningCollection.Rows.Add(14, @"<( )*div([^>])*>", "rr"); dtCleaningCollection.Rows.Add(15, @"<( )*tr([^>])*>", "rr"); dtCleaningCollection.Rows.Add(16, @"<( )*p([^>])*>", "rr"); // Remove Remaining tags enclosed in < > dtCleaningCollection.Rows.Add(17, @"<[^>]*>", string.Empty); // Replace special characters: dtCleaningCollection.Rows.Add(18, @" ", " "); dtCleaningCollection.Rows.Add(19, @"•", " * "); dtCleaningCollection.Rows.Add(20, @"‹", "<"); dtCleaningCollection.Rows.Add(21, @"›", ">"); dtCleaningCollection.Rows.Add(22, @"™", "(tm)"); dtCleaningCollection.Rows.Add(23, @"⁄", "/"); dtCleaningCollection.Rows.Add(24, @"<", "<"); dtCleaningCollection.Rows.Add(25, @">", ">"); dtCleaningCollection.Rows.Add(26, @"©", "(c)"); dtCleaningCollection.Rows.Add(27, @"®", "(r)"); dtCleaningCollection.Rows.Add(28, @"¼", "1/4"); dtCleaningCollection.Rows.Add(29, @"½", "1/2"); dtCleaningCollection.Rows.Add(30, @"¾", "3/4"); dtCleaningCollection.Rows.Add(31, @"‘", "'"); dtCleaningCollection.Rows.Add(32, @"’", "'"); dtCleaningCollection.Rows.Add(33, @"“", """); dtCleaningCollection.Rows.Add(34, @"”", """); // Remove all others remianing special characters // you dont want to replace with another string dtCleaningCollection.Rows.Add(35, @"&(.{2,6});", string.Empty); // Remove extra line sBreaks and sTabs dtCleaningCollection.Rows.Add(36, "(r)( )+(r)", "rr"); dtCleaningCollection.Rows.Add(37, "(t)( )+(t)", "tt"); dtCleaningCollection.Rows.Add(38, "(t)( )+(r)", "tr"); dtCleaningCollection.Rows.Add(39, "(r)( )+(t)", "rt"); dtCleaningCollection.Rows.Add(40, "(r)(t)+(r)", "rr"); dtCleaningCollection.Rows.Add(41, "(r)(t)+", "rt"); return dtCleaningCollection; }