I have this macro with which I can search by Xpath if the HTML is simple
Sub GetByXpathFromHTML_String()
Dim XDoc As Object, myList As Object
Dim myStr As String
Set XDoc = CreateObject("MSXML2.DOMDocument.6.0")
XDoc.async = False
XDoc.SetProperty "SelectionLanguage", "XPath"
XDoc.validateOnParse = False
myStr = "<html> " & _
"<body> " & _
"<div>This is a division " & _
"<a target='_top' href='default.asp'>HTML Home</a>" & _
"<a target='_top' href='default.asp'>HTML Introduction</a> " & _
"<a target='_top' href='default.asp'>HTML Editors</a> " & _
"<a target='_top' href='default.asp'>HTML Basic</a>" & _
"</div> " & _
"<span class='color_h1'> HTML Tutorial</span>" & _
"</body>" & _
"</html>"
XDoc.LoadXML (myStr)
XDoc.Save (ThisWorkbook.Path & "\Test.xml")
Set myList = XDoc.SelectNodes("//a[contains(.,'HTML Editors')]")
If XDoc.parseError.ErrorCode <> 0 Then
MsgBox "Error at Line No: " & XDoc.parseError.Line & vbCrLf & vbCrLf & "Error : " & XDoc.parseError.reason
GoTo SafeExit:
End If
If myList.Length = 0 Then GoTo SafeExit:
MsgBox "Number of found items= " & myList.Length
MsgBox "Found item: " & myList(0).Text
SafeExit:
If myList.Length = 0 Then
MsgBox "Not found....", vbCritical
End If
Set myList = Nothing
Set XDoc = Nothing
End Sub
Now, if I want to use the same Xpath but with the source change to an URL, I get the error "DTD is prohibited".
Why is happening this and how to fix it?
I can't use 3rd party tool like Selenium since I cannot use or installed exe files due to security policy in company.
Thanks in advance for any help
Sub GetXpathFromWeb()
Dim strURL As String, objHTTP As Object, XDoc As Object, myList As Object, Num As Integer, i As Integer
Dim myStr As String
strURL = "https://www.w3schools.com/html/"
Set objHTTP = CreateObject("MSXML2.XMLHTTP.6.0")
objHTTP.Open "GET", strURL, False
objHTTP.send
Set XDoc = CreateObject("MSXML2.DOMDocument.6.0")
XDoc.async = False
XDoc.SetProperty "SelectionLanguage", "XPath"
XDoc.validateOnParse = False
XDoc.LoadXML (objHTTP.responseText)
XDoc.Save (ThisWorkbook.Path & "\Test.xml")
Set myList = XDoc.SelectNodes("//a[contains(.,'HTML Editors')]")
If XDoc.parseError.ErrorCode <> 0 Then
MsgBox "Error at Line No: " & XDoc.parseError.Line & vbCrLf & vbCrLf & "Error : " & XDoc.parseError.reason
GoTo SafeExit:
End If
If myList.Length = 0 Then GoTo SafeExit:
SafeExit:
If myList.Length = 0 Then
MsgBox "Not found....", vbCritical, objHTTP.Status
End If
Set myList = Nothing
Set XDoc = Nothing
Set objHTTP = Nothing
End Sub
Bookmarks