+ Reply to Thread
Results 1 to 12 of 12

Data Scraping from the WebSite

Hybrid View

  1. #1
    Registered User
    Join Date
    04-30-2007
    Posts
    26

    Data Scraping from the WebSite

    Hi,

    Web scraping from the following site fails to get data

    http://www.religareonline.com/resear...arch-reports/4

    Anything iam really missing or missing the Element ID

    Sathis


    
    Sub Data()
    
        Dim xmlHttp As Object
        Dim TR_col As Object, TR As Object
        Dim TD_col As Object, TD As Object
        Dim row As Long, col As Long
    
        Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
        xmlHttp.Open "GET", "http://www.religareonline.com/research/research-reports/currency-research-reports/4", False
        xmlHttp.setRequestHeader "Content-Type", "text/xml"
        xmlHttp.send
    
        Dim html As Object
        Set html = CreateObject("htmlfile")
        html.body.innerHTML = xmlHttp.ResponseText
    
        Dim tbl As Object
        Set tbl = html.getElementById("divReportListingContent")
    
        row = 1
        col = 1
    
        Set TR_col = html.getelementsbytagname("TR")
        For Each TR In TR_col
            Set TD_col = TR.getelementsbytagname("TD")
            For Each TD In TD_col
                Cells(row, col) = TD.innerText
                col = col + 1
            Next
            col = 1
            row = row + 1
        Next
    End Sub

  2. #2
    Forum Expert
    Join Date
    03-28-2012
    Location
    TBA
    MS-Off Ver
    Office 365
    Posts
    12,454

    Re: Data Scraping from the WebSite

    I do not know what was the expected output, but the code seems to work and get some data. See attached.
    Attached Files Attached Files

  3. #3
    Registered User
    Join Date
    04-30-2007
    Posts
    26

    Re: Data Scraping from the WebSite

    Quote Originally Posted by AB33 View Post
    I do not know what was the expected output, but the code seems to work and get some data. See attached.
    Hi,
    Thanks for the Reply,
    The data below the headers (All report,.........) need to be queried.

    Any Insights.

    Sathish

  4. #4
    Forum Expert JasperD's Avatar
    Join Date
    05-07-2013
    Location
    Netherlands
    MS-Off Ver
    Excel 2016
    Posts
    1,393

    Re: Data Scraping from the WebSite

    You're looking for something like this:
    Set reference to Microsoft Scriptcontrol

    Sub ExcelForum()
    
        Dim json As String
        Dim Tables As Object, Rows As Object, TableKeys As Object, key
        Dim ScriptEngine As ScriptControl
        
        Set ScriptEngine = New ScriptControl
        With ScriptEngine
            .Language = "JScript"
            .AddCode "function getKeys(jsonObj) { var keys = []; for (var i in jsonObj) { keys.push(i); } return keys; } "
        End With
        
        ActiveSheet.Cells.ClearContents
        
        With CreateObject("WinHTTP.WinHTTPRequest.5.1")
            .Open "POST", "http://www.religareonline.com/Research/DefaultAPI.aspx?action=RES-REP-GETRESLIST", False
            .setrequestheader "Referer", "http://www.religareonline.com/research/research-reports/currency-research-reports/4"
            .setrequestheader "Cookie", "_ga=GA1.2.1071975320.1437286029; activeMenu=n3Research-menu_r_Res_Rep-submenu_res_Currency; _gat=1"
            .send "p_pageno=1&p_code=&p_asset=4&p_assettype=Currency&p_broker=&p_status=0&p_usertype=0&p_title=&p_expiredonly=&p_todate=&p_fromdate=&p_id=&p_exchangeName=&p_exchange=&p_cname=&p_InsType=&p_type=&p_Symbol=&p_ExpiryDate=&p_featured=&p_OptionType=&p_StrikePrice=&EquityTag=&IPOTag=&MFTag=&DerTag=&comTag=&curTag=&p_PassiveModFlag=&p_DeliveryID=1&p_TimehorizonID=&p_RCommID=&p_RepType=&p_assettypeID=4"
            json = .responseText
        End With
        
        Set Tables = ScriptEngine.Eval("(" & json & ")")
        json = CallByName(Tables, "data", VbGet)
        Set Tables = ScriptEngine.Eval("(" & json & ")")
        Set Tables = CallByName(Tables, "response", VbGet)
        Set Tables = CallByName(Tables, "reportlist", VbGet)
        Set Tables = CallByName(Tables, "report", VbGet)
        
        Set TableKeys = ScriptEngine.Run("getKeys", Tables)
        
        For Each key In TableKeys
            Set Rows = CallByName(Tables, key, VbGet)
            If CallByName(Rows, "title", VbGet) = Replace(CallByName(Rows, "desc", VbGet), "+", " ") Then GoTo nextkey
            ActiveSheet.Cells(ActiveSheet.Rows.Count, 1).End(xlUp).Offset(1) = CallByName(Rows, "publisheddate", VbGet)
            ActiveSheet.Cells(ActiveSheet.Rows.Count, 1).End(xlUp).Offset(, 1) = CallByName(Rows, "title", VbGet)
            ActiveSheet.Cells(ActiveSheet.Rows.Count, 1).End(xlUp).Offset(1) = Replace(CallByName(Rows, "desc", VbGet), "+", " ")
    nextkey:
        Next key
        ActiveSheet.Cells.WrapText = False
        
    json = vbNullString
    Set Rows = Nothing
    Set Tables = Nothing
    Set TableKeys = Nothing
    Set ScriptEngine = Nothing
    
    
    End Sub
    As always - props to Kyle123 for originally coming up with this
    Please click the * below if this helps

  5. #5
    Registered User
    Join Date
    04-30-2007
    Posts
    26

    Re: Data Scraping from the WebSite

    Hi,
    Thanks again for your reply.
    I was struggling for the code for the past 3 days and it worked.
    You are doing great.

    Sathis

    Quote Originally Posted by JasperD View Post
    You're looking for something like this:
    Set reference to Microsoft Scriptcontrol

    Sub ExcelForum()
    
        Dim json As String
        Dim Tables As Object, Rows As Object, TableKeys As Object, key
        Dim ScriptEngine As ScriptControl
        
        Set ScriptEngine = New ScriptControl
        With ScriptEngine
            .Language = "JScript"
            .AddCode "function getKeys(jsonObj) { var keys = []; for (var i in jsonObj) { keys.push(i); } return keys; } "
        End With
        
        ActiveSheet.Cells.ClearContents
        
        With CreateObject("WinHTTP.WinHTTPRequest.5.1")
            .Open "POST", "http://www.religareonline.com/Research/DefaultAPI.aspx?action=RES-REP-GETRESLIST", False
            .setrequestheader "Referer", "http://www.religareonline.com/research/research-reports/currency-research-reports/4"
            .setrequestheader "Cookie", "_ga=GA1.2.1071975320.1437286029; activeMenu=n3Research-menu_r_Res_Rep-submenu_res_Currency; _gat=1"
            .send "p_pageno=1&p_code=&p_asset=4&p_assettype=Currency&p_broker=&p_status=0&p_usertype=0&p_title=&p_expiredonly=&p_todate=&p_fromdate=&p_id=&p_exchangeName=&p_exchange=&p_cname=&p_InsType=&p_type=&p_Symbol=&p_ExpiryDate=&p_featured=&p_OptionType=&p_StrikePrice=&EquityTag=&IPOTag=&MFTag=&DerTag=&comTag=&curTag=&p_PassiveModFlag=&p_DeliveryID=1&p_TimehorizonID=&p_RCommID=&p_RepType=&p_assettypeID=4"
            json = .responseText
        End With
        
        Set Tables = ScriptEngine.Eval("(" & json & ")")
        json = CallByName(Tables, "data", VbGet)
        Set Tables = ScriptEngine.Eval("(" & json & ")")
        Set Tables = CallByName(Tables, "response", VbGet)
        Set Tables = CallByName(Tables, "reportlist", VbGet)
        Set Tables = CallByName(Tables, "report", VbGet)
        
        Set TableKeys = ScriptEngine.Run("getKeys", Tables)
        
        For Each key In TableKeys
            Set Rows = CallByName(Tables, key, VbGet)
            If CallByName(Rows, "title", VbGet) = Replace(CallByName(Rows, "desc", VbGet), "+", " ") Then GoTo nextkey
            ActiveSheet.Cells(ActiveSheet.Rows.Count, 1).End(xlUp).Offset(1) = CallByName(Rows, "publisheddate", VbGet)
            ActiveSheet.Cells(ActiveSheet.Rows.Count, 1).End(xlUp).Offset(, 1) = CallByName(Rows, "title", VbGet)
            ActiveSheet.Cells(ActiveSheet.Rows.Count, 1).End(xlUp).Offset(1) = Replace(CallByName(Rows, "desc", VbGet), "+", " ")
    nextkey:
        Next key
        ActiveSheet.Cells.WrapText = False
        
    json = vbNullString
    Set Rows = Nothing
    Set Tables = Nothing
    Set TableKeys = Nothing
    Set ScriptEngine = Nothing
    
    
    End Sub
    As always - props to Kyle123 for originally coming up with this

  6. #6
    Forum Expert
    Join Date
    02-22-2013
    Location
    London, UK
    MS-Off Ver
    Office 365
    Posts
    1,218

    Re: Data Scraping from the WebSite

    @JAsperD, will look into your version more in details, looks interesting...

    Alternatively, using Internet Explorer for scraping seems to work in this case...

    For instance, try:
    Sub test()
    
        Dim a(1 To 15), b(1 To 15, 1 To 3), e, i As Integer, c As Range
    
        With CreateObject("InternetExplorer.Application")
    
            .Navigate "http://www.religareonline.com/research/research-reports/currency-research-reports/4"
            .Visible = True
    
            While .busy Or .Readystate <> 4: DoEvents: Wend
    
            For Each e In .document.getElementById("divReportListingContent").getElementsByTagName("div")
                If e.classname Like "mT10*" Then
                    i = i + 1
                    a(i) = e.Children(0).Children(0).getElementsByTagName("a")(0)
                    b(i, 1) = e.Children(0).Children(0).innertext
                    b(i, 2) = Trim$(Split(e.Children(0).Children(1).innertext, "|")(1))
                    b(i, 3) = e.Children(0).Children(2).innertext
                End If
            Next
    
            With Sheets.Add.Cells(1).Resize(UBound(b, 1), UBound(b, 2))
                .Value = b
                For i = 1 To UBound(a, 1)
                    .Parent.Hyperlinks.Add .Cells(i, 1), a(i), .Cells(i, 1).Value
                Next
                .EntireColumn.AutoFit
            End With
    
        End With
    
    End Sub
    where the output will be stored a new sheet; Col.A contains report name w/ hyperlink, Col.B date, Col.C description.

    Cheers,
    berlan

  7. #7
    Registered User
    Join Date
    04-30-2007
    Posts
    26

    Re: Data Scraping from the WebSite

    Hi,

    Thanks for your reply
    Your code works like a charm.
    Sathish

    Quote Originally Posted by berlan View Post
    @JAsperD, will look into your version more in details, looks interesting...

    Alternatively, using Internet Explorer for scraping seems to work in this case...

    For instance, try:
    Sub test()
    
        Dim a(1 To 15), b(1 To 15, 1 To 3), e, i As Integer, c As Range
    
        With CreateObject("InternetExplorer.Application")
    
            .Navigate "http://www.religareonline.com/research/research-reports/currency-research-reports/4"
            .Visible = True
    
            While .busy Or .Readystate <> 4: DoEvents: Wend
    
            For Each e In .document.getElementById("divReportListingContent").getElementsByTagName("div")
                If e.classname Like "mT10*" Then
                    i = i + 1
                    a(i) = e.Children(0).Children(0).getElementsByTagName("a")(0)
                    b(i, 1) = e.Children(0).Children(0).innertext
                    b(i, 2) = Trim$(Split(e.Children(0).Children(1).innertext, "|")(1))
                    b(i, 3) = e.Children(0).Children(2).innertext
                End If
            Next
    
            With Sheets.Add.Cells(1).Resize(UBound(b, 1), UBound(b, 2))
                .Value = b
                For i = 1 To UBound(a, 1)
                    .Parent.Hyperlinks.Add .Cells(i, 1), a(i), .Cells(i, 1).Value
                Next
                .EntireColumn.AutoFit
            End With
    
        End With
    
    End Sub
    where the output will be stored a new sheet; Col.A contains report name w/ hyperlink, Col.B date, Col.C description.

    Cheers,
    berlan

  8. #8
    Registered User
    Join Date
    04-30-2007
    Posts
    26

    Re: Data Scraping from the WebSite

    Hi,

    Thanks all for your posts
    Closing the IE is missing,

    Quiet.IE
    Sub test()
    
        Dim a(1 To 15), b(1 To 15, 1 To 3), e, i As Integer, c As Range
    
        With CreateObject("InternetExplorer.Application")
    
            .Navigate "http://www.religareonline.com/research/research-reports/currency-research-reports/4"
            .Visible = True
    
            While .busy Or .Readystate <> 4: DoEvents: Wend
    
            For Each e In .document.getElementById("divReportListingContent").getElementsByTagName("div")
                If e.classname Like "mT10*" Then
                    i = i + 1
                    a(i) = e.Children(0).Children(0).getElementsByTagName("a")(0)
                    b(i, 1) = e.Children(0).Children(0).innertext
                    b(i, 2) = Trim$(Split(e.Children(0).Children(1).innertext, "|")(1))
                    b(i, 3) = e.Children(0).Children(2).innertext
                End If
            Next
    
            With Sheets.Add.Cells(1).Resize(UBound(b, 1), UBound(b, 2))
                .Value = b
                For i = 1 To UBound(a, 1)
                    .Parent.Hyperlinks.Add .Cells(i, 1), a(i), .Cells(i, 1).Value
                Next
                .EntireColumn.AutoFit
            End With
    
        End With
    
    End Sub

    Sathis
    Last edited by sathis; 07-19-2015 at 10:16 AM.

  9. #9
    Forum Expert
    Join Date
    03-28-2012
    Location
    TBA
    MS-Off Ver
    Office 365
    Posts
    12,454

    Re: Data Scraping from the WebSite

    Jasper and Berlan,
    I suspect the OP is looking to download the data embedded with in JAVA and PDf file, not simply the data on the body of the page.
    I am unable to run Jasper's code as VBA keep tells me "Active component can not create object" despite I am connected to the Script control (Ticked). I am sure there is something missing from my reference library.

  10. #10
    Forum Expert
    Join Date
    02-22-2013
    Location
    London, UK
    MS-Off Ver
    Office 365
    Posts
    1,218

    Re: Data Scraping from the WebSite

    You're welcome, and thanks for the feedback!

  11. #11
    Registered User
    Join Date
    04-30-2007
    Posts
    26

    Re: Data Scraping from the WebSite

    Hi,

    Web scraping from the following site got error , while using the following code.

    Need your help to resolve.

    Sub DataBSE()
    
        Dim a(1 To 15), b(1 To 15, 1 To 3), E, i As Integer, c As Range
    
        With CreateObject("InternetExplorer.Application")
    
            .Navigate "http://www.bseindia.com/markets/currencyDerivatives/CurrDeriArchiveSum.aspx?expandable=4"
            .Visible = False
    
            While .busy Or .Readystate <> 4: DoEvents: Wend
    
            For Each E In .Document.getElementById("ctl00_ContentPlaceHolder1_ddlContracts").getElementsByTagName("div")
                If E.classname Like "mT10*" Then
                    i = i + 1
                    a(i) = E.Children(0).Children(0).getElementsByTagName("a")(0)
                    b(i, 1) = E.Children(0).Children(0).innertext
                    b(i, 2) = Trim$(Split(E.Children(0).Children(1).innertext, "|")(1))
                    b(i, 3) = E.Children(0).Children(2).innertext
                End If
            Next
    
            With Sheets("USDINR").Cells(1).Resize(UBound(b, 1), UBound(b, 2))
                .Value = b
                For i = 1 To UBound(a, 1)
                    .Parent.Add .Cells(i, 1), a(i), .Cells(i, 1).Value  
                Next
                .EntireColumn.AutoFit
            End With
        
            End With
    
        Dim Shell As Object
        Dim IE As Object
     
        Set Shell = CreateObject("Shell.Application")
     
        For Each IE In Shell.Windows
            If TypeName(IE.Document) = "HTMLDocument" Then
                IE.Quit
            End If
        Next
    
        
    End Sub

  12. #12
    Forum Expert
    Join Date
    02-22-2013
    Location
    London, UK
    MS-Off Ver
    Office 365
    Posts
    1,218

    Re: Data Scraping from the WebSite

    Hi,

    change,
    .Parent.Add .Cells(i, 1), a(i), .Cells(i, 1).Value
    to
    .Parent.Hyperlinks.Add .Cells(i, 1), a(i), .Cells(i, 1).Value
    or feel free delete the whole loop for adding hyperlinks..

    Cheers,
    berlan

+ Reply to Thread

Thread Information

Users Browsing this Thread

There are currently 1 users browsing this thread. (0 members and 1 guests)

Similar Threads

  1. problem while scraping data from website
    By dps700 in forum Excel Programming / VBA / Macros
    Replies: 0
    Last Post: 12-09-2014, 09:46 AM
  2. Automated data scraping from website into excel
    By kavin in forum Excel Programming / VBA / Macros
    Replies: 1
    Last Post: 09-10-2014, 01:42 PM
  3. Scraping website into excel
    By eodsolo in forum Excel Programming / VBA / Macros
    Replies: 7
    Last Post: 07-12-2014, 06:25 AM
  4. Replies: 1
    Last Post: 02-17-2014, 11:21 PM
  5. Scraping website data when Get External Data from Web doesnt work
    By Zipping2010 in forum Excel Programming / VBA / Macros
    Replies: 1
    Last Post: 02-01-2013, 12:16 AM
  6. scraping data from a website
    By redpanda in forum Excel Programming / VBA / Macros
    Replies: 9
    Last Post: 07-05-2012, 01:20 PM
  7. Scraping website data into Excel
    By oliver79 in forum Excel General
    Replies: 0
    Last Post: 06-13-2010, 02:15 PM

Tags for this Thread

Bookmarks

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts

Search Engine Friendly URLs by vBSEO 3.6.0 RC 1