shahin
Active Member
Hi there! Good day. I've made a scraper which is parsing data as I wanted it to. However, the problem is that the code I've written is looking damn messy cause i had to overwrite my code to get the data as in some elements data were not present rather it was the next element where I found it. There must be any easy way or well organized code that should be replaced with what I've written. Thanks in advance. Here I'm pasting what I've written and attaching an excel file to show that it is working nicely.
Code:
Sub houzzData()
Const url = "http://www.houzz.com/pro/brickmoondesign/brickmoon-design"
Dim html As New HTMLDocument, cel As Range
Dim topics As Object, posts As Object, data As HTMLHtmlElement
x = 2
For Each cel In Range("A2:A7")
With CreateObject("MSXML2.serverXMLHTTP")
.Open "GET", cel, False
.send
html.body.innerHTML = .responseText
End With
Set topics = html.getElementsByClassName("container profile-carded")
On Error Resume Next
For i = 0 To topics.Length - 1
Set data = topics(i)
Cells(x, 2) = data.getElementsByClassName("profile-full-name")(0).innerText
Cells(x, 3) = Replace(data.getElementsByClassName("info-list-text")(1).innerText, "Contact: ", "")
Cells(x, 4) = data.getElementsByClassName("info-list-text")(1).getElementsByTagName("span")(0).innerText
Cells(x, 5) = data.getElementsByClassName("info-list-text")(1).getElementsByTagName("span")(1).innerText
Cells(x, 6) = data.getElementsByClassName("info-list-text")(1).getElementsByTagName("span")(2).innerText
Cells(x, 7) = data.getElementsByClassName("info-list-text")(1).getElementsByTagName("span")(3).innerText
Cells(x, 8) = data.getElementsByClassName("info-list-text")(1).getElementsByTagName("span")(4).innerText
Cells(x, 9) = data.getElementsByClassName("info-list-text")(1).getElementsByTagName("span")(5).innerText
Cells(x, 4) = data.getElementsByClassName("info-list-text")(2).getElementsByTagName("span")(0).innerText
Cells(x, 5) = data.getElementsByClassName("info-list-text")(2).getElementsByTagName("span")(1).innerText
Cells(x, 6) = data.getElementsByClassName("info-list-text")(2).getElementsByTagName("span")(2).innerText
Cells(x, 7) = data.getElementsByClassName("info-list-text")(2).getElementsByTagName("span")(3).innerText
Cells(x, 8) = data.getElementsByClassName("info-list-text")(2).getElementsByTagName("span")(4).innerText
Cells(x, 9) = data.getElementsByClassName("info-list-text")(2).getElementsByTagName("span")(5).innerText
Cells(x, 10) = data.getElementsByClassName("pro-contact-text")(0).innerText
Cells(x, 11) = data.getElementsByClassName("proWebsiteLink")(0).href
x = x + 1
Next i
Next cel
End Sub
Attachments
Last edited: