royrolls81
New Member
>>> use code - tag <<<
Code:
Sub WebScrape()
'Variable declarations
Dim HTTPReq As Object
Dim HTMLDoc As Object
Dim tbl As Object
Dim row As Object
Dim cell As Object
Dim rowNum As Long
Dim colNum As Long
Dim csvLine As String
Dim csvFilePath As String
Dim link As Object
Dim subLink As Object
'Set the URL to be scraped
Url = "https://www.soccerstats.com/matches.asp?matchday=2&listing=1"
'Set the CSV file name
csvFilePath = "scrapedData.csv"
'Create an instance of the MSXML2.XMLHTTP object and make the HTTP request
Set HTTPReq = CreateObject("MSXML2.XMLHTTP")
HTTPReq.Open "GET", Url, False
HTTPReq.setTimeouts 60000, 60000, 60000, 60000 'Set the time-out period to 1 minute
HTTPReq.send
'Create an instance of the HTML document and parse the response text
Set HTMLDoc = CreateObject("HTMLfile")
HTMLDoc.body.innerHTML = HTTPReq.responseText
'Get all the links on the page and scrape data from each link
For Each link In HTMLDoc.getElementsByTagName("a")
'Get the href attribute of the link
linkHref = link.getAttribute("href")
'Check if the link is valid
If linkHref <> "" And Not linkHref Like "mailto:*" Then
'Create an instance of the MSXML2.XMLHTTP object and make the HTTP request
Set HTTPReq = CreateObject("MSXML2.XMLHTTP")
HTTPReq.Open "GET", linkHref, False
HTTPReq.setTimeouts 60000, 60000, 60000, 60000 'Set the time-out period to 1 minute
HTTPReq.send
'Create an instance of the HTML document and parse the response text
Set HTMLDoc = CreateObject("HTMLfile")
HTMLDoc.body.innerHTML = HTTPReq.responseText
'Get all the sub links on the page and scrape data from each sub link
For Each subLink In HTMLDoc.getElementsByTagName("a")
'Get the href attribute of the sub link
subLinkHref = subLink.getAttribute("href")
'Check if the sub link is valid
If subLinkHref <> "" And Not subLinkHref Like "mailto:*" Then
'Create an instance of the MSXML2.XMLHTTP object and make the HTTP request
Set HTTPReq = CreateObject("MSXML2.XMLHTTP")
HTTPReq.Open "GET", subLinkHref, False
HTTPReq.setTimeouts 60000, 60000, 60000, 60000 'Set the time-out period to 1 minute
HTTPReq.send
'Create an instance of the HTML document and parse the response text
Set HTMLDoc = CreateObject("HTMLfile")
HTMLDoc.body.innerHTML = HTTPReq.responseText
'Get all the tables on the page and write data to CSV file
For Each tbl In HTMLDoc.getElementsByTagName("table")
'Write the table headers to the CSV file
Dim fso As Object
Set fso = CreateObject("Scripting.FileSystemObject")
Dim file As Object
Set file = fso.CreateTextFile(csvFilePath, True, True) 'create the file with write access
For Each cell In tbl.Rows(0).Cells
csvLine = csvLine & cell.innerText & ","
Next cell
csvLine = Left(csvLine, Len(csvLine) - 1)
file.WriteLine csvLine
csvLine = ""
'Loop through each row in the table and write to the CSV file
For rowNum = 1 To tbl.Rows.Length - 1
For colNum = 0 To tbl.Rows(rowNum).Cells.Length - 1
csvLine = csvLine & tbl.Rows(rowNum).Cells(colNum).innerText & ","
Next colNum
csvLine = Left(csvLine, Len(csvLine) - 1)
file.WriteLine csvLine
csvLine = ""
Next rowNum
'Close the csv file
file.Close
Next tbl
End If
Next subLink
End If
Next link
'Release the object references
Set tbl = Nothing
Set file = Nothing
Set fso = Nothing
Set HTTPReq = Nothing
Set HTMLDoc = Nothing
'Notify the user that the task is complete
MsgBox "Data scraping and csv creation complete."
End Sub
Last edited by a moderator: