شركة التطبيقات المتكاملة لتصميم النظم البرمجية الخاصة

Integrated Applications Programming Company

Skip Navigation LinksHome » Code Library » Html

Public general use code classes and xml files that we've compiled and used over the years:

Handle HTML encoding, decoding functions.

   1:  using System.Text.RegularExpressions;
   2:  using System.Web;
   3:   
   4:  namespace Ia.Cl.Model
   5:  {
   6:      ////////////////////////////////////////////////////////////////////////////
   7:   
   8:      /// <summary publish="true">
   9:      /// Handle HTML encoding, decoding functions.
  10:      /// </summary>
  11:      /// <remarks> 
  12:      /// Copyright � 2001-2018 Jasem Y. Al-Shamlan (info@ia.com.kw), Integrated Applications - Kuwait. All Rights Reserved.
  13:      ///
  14:      /// This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
  15:      /// the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  16:      ///
  17:      /// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  18:      /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  19:      /// 
  20:      /// You should have received a copy of the GNU General Public License along with this library. If not, see http://www.gnu.org/licenses.
  21:      /// 
  22:      /// Copyright notice: This notice may not be removed or altered from any source distribution.
  23:      /// </remarks> 
  24:      public class Html
  25:      {
  26:          private static readonly Regex regexBetweenTags = new Regex(@">\s+<", RegexOptions.Compiled);
  27:          private static readonly Regex regexLineBreaks = new Regex(@"\n\s+", RegexOptions.Compiled);
  28:   
  29:          ////////////////////////////////////////////////////////////////////////////
  30:   
  31:          /// <summary>
  32:          ///
  33:          /// </summary>
  34:          public Html() { }
  35:   
  36:          ////////////////////////////////////////////////////////////////////////////
  37:   
  38:          ///<summary>
  39:          ///
  40:          /// </summary>
  41:          /// <param name="s"></param>
  42:          /// <returns></returns>
  43:          /// <remark>http://www.west-wind.com/weblog/posts/2009/Feb/05/Html-and-Uri-String-Encoding-without-SystemWeb</remark>
  44:          public static string HtmlEncode(string s)
  45:          {
  46:  #if WFA
  47:   
  48:              if (s == null) return null;
  49:   
  50:              StringBuilder sb = new StringBuilder(s.Length);
  51:   
  52:              int len = s.Length;
  53:   
  54:              for (int i = 0; i < len; i++)
  55:              {
  56:                  switch (s[i])
  57:                  {
  58:                      case '<': sb.Append("&lt;"); break;
  59:                      case '>': sb.Append("&gt;"); break;
  60:                      case '"': sb.Append("&quot;"); break;
  61:                      case '&': sb.Append("&amp;"); break;
  62:                      default:
  63:                          if (s[i] > 159)
  64:                          {
  65:                              // decimal numeric entity
  66:                              sb.Append("&#");
  67:                              sb.Append(((int)s[i]).ToString(CultureInfo.InvariantCulture));
  68:                              sb.Append(";");
  69:                          }
  70:                          else sb.Append(s[i]);
  71:                          break;
  72:                  }
  73:              }
  74:   
  75:              return sb.ToString();
  76:  #else
  77:              return HttpUtility.HtmlEncode(s);
  78:  #endif
  79:          }
  80:   
  81:          ////////////////////////////////////////////////////////////////////////////
  82:   
  83:          ///<summary>
  84:          ///
  85:          /// </summary>
  86:          public static string HtmlDecode(string s)
  87:          {
  88:  #if WFA
  89:              s = s.Replace("&lt;","<");
  90:              s = s.Replace("&gt;",">");
  91:              s = s.Replace("&quot;",@"""");
  92:              s = s.Replace("&amp;","&");
  93:   
  94:              return s;
  95:  #else
  96:              return HttpUtility.HtmlDecode(s);
  97:  #endif
  98:          }
  99:   
 100:          ////////////////////////////////////////////////////////////////////////////
 101:   
 102:          /// <summary>
 103:          ///
 104:          /// </summary>
 105:          public static string Encode(string s)
 106:          {
 107:              s = HtmlEncode(s);
 108:   
 109:              // database requirement:
 110:              s = s.Replace(@"'", @"_#039_");
 111:              s = s.Replace(@"?", @"_#063_");
 112:   
 113:              return s;
 114:          }
 115:   
 116:          ////////////////////////////////////////////////////////////////////////////
 117:   
 118:          /// <summary>
 119:          ///
 120:          /// </summary>
 121:          public static string Decode(string s)
 122:          {
 123:              // database requirement:
 124:              s = s.Replace(@"_#063_", @"?");
 125:              s = s.Replace(@"_#039_", @"'");
 126:   
 127:              s = HtmlDecode(s);
 128:   
 129:              return s;
 130:          }
 131:   
 132:          ////////////////////////////////////////////////////////////////////////////
 133:   
 134:          /// <summary>
 135:          ///
 136:          /// </summary>
 137:          public static string DecodeRemoveNLLF(string s)
 138:          {
 139:              // database requirement:
 140:   
 141:              s = s.Replace(@"_#063_", @"?");
 142:              s = s.Replace(@"_#039_", @"'");
 143:   
 144:              s = HtmlDecode(s);
 145:   
 146:              s = s.Replace("\n\r", " ");
 147:              s = s.Replace("\r\n", " ");
 148:              s = s.Replace("\n", " ");
 149:              s = s.Replace("\r", " ");
 150:   
 151:              return s;
 152:          }
 153:   
 154:          ////////////////////////////////////////////////////////////////////////////
 155:   
 156:          /// <summary>
 157:          ///
 158:          /// </summary>
 159:          public static string XmlEncode(string s)
 160:          {
 161:              s = HtmlEncode(s);
 162:   
 163:              s = s.Replace(@"'", @"_#039_");
 164:              s = s.Replace(@"\", @"_#092_");
 165:              s = s.Replace(@"?", @"_#063_");
 166:   
 167:              /*
 168:              &amp;  =  &
 169:              &lt;   =  <
 170:              &gt;   =  >
 171:              &quot; =  "
 172:              &apos; =  '
 173:              */
 174:   
 175:              // XML requirement:
 176:              s = s.Replace("&", "_amp_");
 177:              s = s.Replace(">", "_gt_");
 178:              s = s.Replace("<", "_lt_");
 179:   
 180:              return s;
 181:          }
 182:   
 183:          ////////////////////////////////////////////////////////////////////////////
 184:   
 185:          /// <summary>
 186:          ///
 187:          /// </summary>
 188:          public static string XmlDecode(string s)
 189:          {
 190:              // XML requirement
 191:              s = s.Replace("_gt_", ">");
 192:              s = s.Replace("_lt_", "<");
 193:              s = s.Replace("_amp_", "&");
 194:   
 195:              s = s.Replace(@"_#039_", @"'");
 196:              s = s.Replace(@"_#092_", @"\");
 197:              s = s.Replace(@"_#063_", @"?");
 198:   
 199:              s = HtmlDecode(s);
 200:              return s;
 201:          }
 202:   
 203:          ////////////////////////////////////////////////////////////////////////////
 204:   
 205:          /// <summary>
 206:          ///
 207:          /// </summary>
 208:          public static string Code(string code)
 209:          {
 210:              // this displays an HTML code in regular text
 211:              /*
 212:              s=s.Replace("_gt_",">");
 213:              s=s.Replace("_lt_","<");
 214:              s=s.Replace("_amp_","&");
 215:   
 216:              s=s.Replace(@"_#039_",@"'");
 217:              s=s.Replace(@"_#092_",@"\");
 218:              s=s.Replace(@"_#063_",@"?");
 219:              */
 220:   
 221:              code = HtmlEncode(code);
 222:              return code;
 223:          }
 224:   
 225:          ////////////////////////////////////////////////////////////////////////////
 226:   
 227:          /// <summary>
 228:          ///
 229:          /// </summary>
 230:          public static string StripHtml(string source)
 231:          {
 232:              try
 233:              {
 234:                  string result;
 235:   
 236:                  // Remove HTML Development formatting
 237:                  // Replace line breaks with space
 238:                  // because browsers inserts space
 239:                  result = source.Replace("\r", " ");
 240:   
 241:                  // Replace line breaks with space
 242:                  // because browsers inserts space
 243:                  result = result.Replace("\n", " ");
 244:   
 245:                  // Remove step-formatting
 246:                  result = result.Replace("\t", string.Empty);
 247:   
 248:                  // Remove repeating speces becuase browsers ignore them
 249:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"( )+", " ");
 250:   
 251:                  // Remove the header (prepare first by clearing attributes)
 252:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*head([^>])*>", "<head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 253:   
 254:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*head( )*>)", "</head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 255:   
 256:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(<head>).*(</head>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 257:   
 258:                  // remove all scripts (prepare first by clearing attributes)
 259:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*script([^>])*>", "<script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 260:   
 261:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*script( )*>)", "</script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 262:   
 263:                  //result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>)([^(<script>\.</script>)])*(</script>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 264:   
 265:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>).*(</script>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 266:   
 267:                  // remove all styles (prepare first by clearing attributes)
 268:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*style([^>])*>", "<style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 269:   
 270:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*style( )*>)", "</style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 271:   
 272:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(<style>).*(</style>)", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 273:   
 274:                  // insert tabs in spaces of <td> tags
 275:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*td([^>])*>", "\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 276:   
 277:                  // insert line breaks in places of <BR> and <LI> tags
 278:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*br( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 279:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*li( )*>", "\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 280:   
 281:                  // insert line paragraphs (double line breaks) in place
 282:                  // if <P>, <DIV> and <TR> tags
 283:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*div([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 284:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*tr([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 285:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*p([^>])*>", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 286:   
 287:                  // Remove remaining tags like <a>, links, images, // comments etc - anything thats enclosed inside < >
 288:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"<[^>]*>", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 289:   
 290:                  // replace special characters:
 291:                  result = System.Text.RegularExpressions.Regex.Replace(result, @" ", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 292:   
 293:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&bull;", " * ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 294:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&lsaquo;", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 295:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&rsaquo;", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 296:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&trade;", "(tm)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 297:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&frasl;", "/", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 298:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&lt;", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 299:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&gt;", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 300:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&copy;", "(c)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 301:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&reg;", "(r)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 302:   
 303:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&nbsp;", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 304:   
 305:                  // Remove all others. More can be added, see
 306:                  // http://hotwired.lycos.com/webmonkey/reference/special_characters/
 307:                  result = System.Text.RegularExpressions.Regex.Replace(result, @"&(.{2,6});", string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 308:   
 309:                  // for testng
 310:                  //System.Text.RegularExpressions.Regex.Replace(result, 
 311:                  //      this.txtRegex.Text,string.Empty, 
 312:                  //      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 313:   
 314:                  // make line breaking consistent
 315:                  result = result.Replace("\n", "\r");
 316:   
 317:                  // Remove extra line breaks and tabs:
 318:                  // replace over 2 breaks with 2 and over 4 tabs with 4. 
 319:                  // Prepare first to remove any whitespaces inbetween
 320:                  // the escaped characters and remove redundant tabs inbetween linebreaks
 321:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 322:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\t)", "\t\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 323:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\r)", "\t\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 324:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\t)", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 325:   
 326:                  // Remove redundant tabs
 327:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+(\r)", "\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 328:   
 329:                  // Remove multible tabs followind a linebreak with just one tab
 330:                  result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+", "\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 331:   
 332:                  // Initial replacement target string for linebreaks
 333:                  string breaks = "\r\r\r";
 334:                  // Initial replacement target string for tabs
 335:                  string tabs = "\t\t\t\t\t";
 336:   
 337:                  for (int index = 0; index < result.Length; index++)
 338:                  {
 339:                      result = result.Replace(breaks, "\r\r");
 340:                      result = result.Replace(tabs, "\t\t\t\t");
 341:                      breaks = breaks + "\r";
 342:                      tabs = tabs + "\t";
 343:                  }
 344:   
 345:                  // Thats it.
 346:                  return result;
 347:   
 348:              }
 349:              catch
 350:              {
 351:                  //MessageBox.Show("Error");
 352:                  return null;
 353:              }
 354:          }
 355:   
 356:          ////////////////////////////////////////////////////////////////////////////
 357:   
 358:          /// <summary>
 359:          ///
 360:          /// </summary>
 361:          public static string TextToHtml(string source)
 362:          {
 363:              // clean regular text format pages and return an equivalent html format
 364:   
 365:              string s;
 366:   
 367:              s = Decode(source);
 368:              //s = global::Ia.Cl.Model.Html.Html_Strip(s);
 369:              s = Regex.Replace(s, @"\.", @". ");
 370:              s = Regex.Replace(s, @"[ ]+", @" ");
 371:              s = s.Replace("\r", "");
 372:              s = s.Replace("\n+", "\n");
 373:              //s = "<p>" + s.Replace("\n", "</p>\n<p>") + "</p>";
 374:              /*
 375:              s = s.Replace("\n", "</p>\n<p>");
 376:   
 377:              // clean up
 378:              u = sb.ToString();
 379:              u = Regex.Replace(u, @"^\s+", "");
 380:              u = Regex.Replace(u, @">\s+", ">");
 381:              u = Regex.Replace(u, @"\s+<", "<");
 382:              u = Regex.Replace(u, @"\s+", " ");
 383:              u = Regex.Replace(u, @"\n+", @"<br/>"); // keep newlines
 384:              //u = Regex.Replace(u, @"</ul>(.+?)</ul>", "</ul><p>$1</p></ul>");
 385:              //u = Regex.Replace(u, @"</ul>(.+?)</p>", "</ul><p>$1</p></p>");
 386:              //u = u.Replace(@"�", "<p/>&nbsp;&nbsp;&nbsp;�&nbsp;");
 387:              */
 388:   
 389:              return s;
 390:          }
 391:   
 392:          ////////////////////////////////////////////////////////////////////////////
 393:   
 394:          /// <summary>
 395:          ///
 396:          /// </summary>
 397:          public static string TextToHtml2(string source)
 398:          {
 399:              // clean regular text format pages and return an equivalent html format
 400:   
 401:              string s;
 402:   
 403:              s = Decode(source);
 404:              //s = global::Ia.Cl.Model.Html.Html_Strip(s);
 405:              s = Regex.Replace(s, @"\.", @". ");
 406:              s = Regex.Replace(s, @"[ ]+", @" ");
 407:              s = s.Replace("\r", "");
 408:              s = s.Replace("\n+", "\n");
 409:              s = "<p>" + s.Replace("\n", "</p>\n<p>") + "</p>";
 410:   
 411:              /*
 412:              s = s.Replace("\n", "</p>\n<p>");
 413:   
 414:              // clean up
 415:              u = sb.ToString();
 416:              u = Regex.Replace(u, @"^\s+", "");
 417:              u = Regex.Replace(u, @">\s+", ">");
 418:              u = Regex.Replace(u, @"\s+<", "<");
 419:              u = Regex.Replace(u, @"\s+", " ");
 420:              u = Regex.Replace(u, @"\n+", @"<br/>"); // keep newlines
 421:              //u = Regex.Replace(u, @"</ul>(.+?)</ul>", "</ul><p>$1</p></ul>");
 422:              //u = Regex.Replace(u, @"</ul>(.+?)</p>", "</ul><p>$1</p></p>");
 423:              //u = u.Replace(@"�", "<p/>&nbsp;&nbsp;&nbsp;�&nbsp;");
 424:              */
 425:   
 426:              return s;
 427:          }
 428:   
 429:          ////////////////////////////////////////////////////////////////////////////
 430:   
 431:          /// <summary>
 432:          ///
 433:          /// </summary>
 434:          public static string TextToHtmlAndOl_Ul_LiToBr(string source)
 435:          {
 436:              // clean regular text format pages and return an equivalent html format
 437:   
 438:              string s;
 439:   
 440:              s = Decode(source);
 441:              s = Regex.Replace(s, @"\.", @". ");
 442:              s = Regex.Replace(s, @"[ ]+", @" ");
 443:              s = s.Replace("\r", "");
 444:              s = s.Replace("\n+", "\n");
 445:   
 446:              s = s.Replace("<ol>", "<br/> <br/>");
 447:              s = s.Replace("</ol>", "");
 448:              s = s.Replace("<ul>", "<br/> <br/>");
 449:              s = s.Replace("</ul>", "");
 450:              s = s.Replace("<li>", "-");
 451:              s = s.Replace("</li>", "<br/>");
 452:   
 453:              return s;
 454:          }
 455:   
 456:          ////////////////////////////////////////////////////////////////////////////
 457:   
 458:          /// <summary>
 459:          ///
 460:          /// <see cref="http://madskristensen.net/post/remove-whitespace-from-your-pages"/>
 461:          /// </summary>
 462:          public static string RemoveWhitespaceFromHtml(string html)
 463:          {
 464:              // for now we will skip if page has <pre>
 465:   
 466:              if (!html.Contains("<pre>"))
 467:              {
 468:                  html = regexBetweenTags.Replace(html, "> <");
 469:                  html = regexLineBreaks.Replace(html, string.Empty);
 470:              }
 471:   
 472:              return html.Trim();
 473:          }
 474:   
 475:          ////////////////////////////////////////////////////////////////////////////
 476:          ////////////////////////////////////////////////////////////////////////////
 477:      }
 478:  }