Run Code
|
API
|
Code Wall
|
Misc
|
Feedback
|
Login
|
Theme
|
Privacy
|
Patreon
WebScrape using Regex
//Rextester.Program.Main is the entry point for your code. Don't change it. //Compiler version 4.0.30319.17929 for Microsoft (R) .NET Framework 4.5 using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using System.Net; namespace Rextester { public class Program { public static void Main(string[] args) { // 1. // URL: http://en.wikipedia.org/wiki/Main_Page WebClient w = new WebClient(); string s = w.DownloadString("http://en.wikipedia.org/wiki/Main_Page"); // 2. foreach (LinkItem i in LinkFinder.Find(s)) { Console.WriteLine(i); } } } public struct LinkItem { public string Href; public string Text; public override string ToString() { return Href + "\n\t" + Text; } } static class LinkFinder { public static List<LinkItem> Find(string file) { List<LinkItem> list = new List<LinkItem>(); // 1. // Find all matches in file. MatchCollection m1 = Regex.Matches(file, @"(<a.*?>.*?</a>)", RegexOptions.Singleline); // 2. // Loop over each match. foreach (Match m in m1) { string value = m.Groups[1].Value; LinkItem i = new LinkItem(); // 3. // Get href attribute. Match m2 = Regex.Match(value, @"href=\""(.*?)\""", RegexOptions.Singleline); if (m2.Success) { i.Href = m2.Groups[1].Value; } // 4. // Remove inner tags from text. string t = Regex.Replace(value, @"\s*<.*?>\s*", "", RegexOptions.Singleline); i.Text = t; list.Add(i); } return list; } } }
run
|
edit
|
history
|
help
0
Test DateTimeStyles.AdjustToUniversal
((((STReaming))))%^%% The 2022 RBC Canadian Open Golf PGA Tour event
Metodos y como llmarlos
|= Caching
2.2 gz
simult 3x3
Uri Parts
p link gener tor
codetility CyclicRotation
codejam1a1