It looks like you're new here. If you want to get involved, click one of these buttons!
Imports System.Net.WebClient
Imports System.Text.RegularExpressions
' Simple structure representing a URL and the file at the end of it (if any)
' Example: New Link(\"http://me.h/bl.a\").File - returns \"bl.a\"
' Example 2: New Link(\"http://me.h/bl.a\").URL - returns \"http://me.h/bl.a\"
Public Class Link
Private link_url As String
Private link_file As String
Public Sub New(ByVal url As String)
link_url = url
link_file = url.Substring(url.LastIndexOf(\"/\") + 1)
End Sub
ReadOnly Property URL() As String
Get
Return link_url
End Get
End Property
ReadOnly Property File() As String
Get
Return link_file
End Get
End Property
End Class
' Class for extracting links from HTML over HTTP
' Builds a list of Link objects representing both the full URL and file (if any)
' Supports only absolute links (ie. 'http://')
Public Class LinkReaper
Private link_list As ArrayList
Private Sub ParseURLsFromHTML(ByVal html As String)
Dim links_found As MatchCollection
links_found = Regex.Matches(html, \"<a.*?href=\"\"(.*?)\"\".*?>(.*?)</a>\")
For Each match As Match In links_found
Dim match_url As String = match.Groups(1).Value
If match_url.StartsWith(\"http\") Then
link_list.Add(New Link(match_url))
End If
Next
End Sub
Public Sub New(ByVal url As String)
link_list = New ArrayList()
Dim client As New System.Net.WebClient
ParseURLsFromHTML(client.DownloadString(url))
End Sub
ReadOnly Property GetLinks() As ArrayList
Get
Return link_list
End Get
End Property
End Class
Dim links As LinkReaper = New LinkReaper(\"http://google.co.uk\")
For Each link As Link in links.GetLinks() ' lol, funny to say out loud.
System.Console.WriteLine(link.URL) ' full URL
System.Console.WriteLine(link.File) ' file at the end of the URL (if any)
Next