WinBatch Tech Support Home

Database Search

If you can't find the information using the categories below, post a question over in our WinBatch Tech Support Forum.

TechHome

OLE with MSIE
plus

Can't find the information you are looking for here? Then leave a message over on our WinBatch Tech Support Forum.

WebPage Link Lister


;***************************************************************************
;**
;**          Web Page Scraper
;**
;** Purpose: Extract information from a webpage
;** Inputs:  url
;** Outputs: Messages containing data
;**
;** Deana Falk
;** Revisions: 2013.10.22 Initial Release
;**
;**
;***************************************************************************


#DefineFunction udfIEPageLoadWait( objIE )
    If !udfIsObject(objIE)
      Pause('udfIEPageLoadWait','Not a Valid Object')
      Exit
    EndIf
    While !(objIE.readyState == 'complete' || objIE.readyState == 4 )
            TimeDelay(0.1)
    EndWhile
    While !(objIE.document.readyState == 'complete' || objIE.document.readyState == 4 )
            TimeDelay(0.1)
    EndWhile
    Return 1
#EndFunction

#DefineFunction udfIECreate( strUrl )
   objIE = ObjectCreate( 'InternetExplorer.Application')
   If !udfIsObject(objIE)
      Pause('udfIECreate','Not a Valid Object')
      Exit
   EndIf
   objIE.visible = @TRUE
   objIE.navigate( strUrl )
   udfIEPageLoadWait( objIE )
   Return objIE
#EndFunction

#DefineFunction udfIEAttach( strMode, strString )
   ; strMode
   ;   title strString is the title of the page you are trying to access
   ;   url   strString is the url of the page you are trying to access
   ;   text  strString is some text of the page you are trying to access
   ;   html  strString is some html of the page you are trying to access
   strMode = StrLower(strMode)
     objShell = ObjectCreate('Shell.Application')
   objShellWindows = objShell.Windows(); collection of all ShellWindows (IE and File Explorer)
   ;ForEach objWindow In objShellWindows
   For x = 0 To objShellWindows.count-1
      objWindow = objShellWindows.Item(x)
      ; Check  window object is a valid browser, if not, skip it
      bIsBrowser = @TRUE
      ; Check conditions to verify that the object is a browser
      If bIsBrowser
         ErrorMode(@OFF)
         ret = objWindow.type ; Is .type a valid property?
         ErrorMode(@CANCEL)
         If ret == 0 Then bIsBrowser = @FALSE
      EndIf
      If bIsBrowser
         ErrorMode(@OFF)
         ret = objWindow.document.title ; Does object have a .document and .title property?
         ErrorMode(@CANCEL)
         If ret == 0 Then bIsBrowser = @FALSE
      EndIf
      If bIsBrowser
         Switch @TRUE
            Case strMode =='title'
               If StrIndex( strString, objWindow.document.title, 1, @FWDSCAN ) > 0
                  Return objWindow
               EndIf
               Break
            Case strMode =='url'
               If StrIndex(objWindow.LocationURL, strString, 1 , @FWDSCAN) > 0
                  Return objWindow
               EndIf
               Break
            Case strMode =='text'
               If StrIndex(objWindow.document.body.innerText, strString, 1 ,@FWDSCAN) > 0
                  Return objWindow
               EndIf
               Break
            Case strMode =='html'
               If StrIndex(objWindow.document.body.innerHTML, strString) > 0
                  Return objWindow
               EndIf
               Break
            Case strMode ; Invalid Mode
               Pause('udfIEAttach','Invalid Mode Specified')
               Exit
         EndSwitch
      EndIf
   Next
   Return 0
#EndFunction

#DefineFunction udfGetURL( objIE )
  If !udfIsObject( objIE )
      Pause('udfIECreate','Not a Valid Object')
      Exit
   EndIf
   strUrl = objIE.LocationURL
   Return strUrl
#EndFunction

#DefineFunction udfIsObject( obj )
   Return(VarType(obj)>=1024)
#EndFunction


#DefineFunction udfListLinks( objIE )
   If !udfIsObject(objIE)
      Pause('udfListLinks','Not a Valid Object')
      Exit
   EndIf
   objBrowserDoc = objIE.Document
   objLinks = objBrowserDoc.Links
   strLinkList = ''
   numberofLinks = objLinks.Length - 1
   For x = 0 To numberofLinks
      objLinks = objBrowserDoc.Links(x)
      If strLinkList == '' Then strLinkList = objLinks.href
      Else strLinkList = strLinkList : @TAB :objLinks.href
   Next
   Return strLinkList
#EndFunction

#DefineFunction udfListAnchors( objIE )
   If !udfIsObject( objIE )
      Pause('udfListAnchors','Not a Valid Object')
      Exit
   EndIf
   objBrowserDoc = objIE.Document
   objAnchors = objBrowserDoc.anchors
   strAnchorList = ''
   numberofAnchors = objAnchors.length - 1
   For x = 0 To numberofAnchors
      objAnchor = objBrowserDoc.anchors(x)
      If strAnchorList == '' Then strAnchorList = objAnchor.name
      Else strAnchorList = strAnchorList : @TAB :objAnchor.name
   Next
   Return strAnchorList
#EndFunction

#DefineFunction udfListImages( objIE )
   If !udfIsObject( objIE )
      Pause('udfListImages','Not a Valid Object')
      Exit
   EndIf
   objBrowserDoc = objIE.Document
   objImages = objBrowserDoc.Images
   strImageList = ''
   numberofImages = objImages.Length - 1
   For x = 0 To numberofImages
      objImage = objBrowserDoc.Images(x)
      strAltText = objImage.alt
      If strAltText=='' Then strAltText='  '
      strSource = objImage.src
      ;nSize=objImage.size
      If strImageList == '' Then strImageList = strSource : ' --- ' : strAltText
      Else strImageList = strImageList : @TAB : strSource : ' --- ' : strAltText
   Next
   Return strImageList
#EndFunction

#DefineFunction udfGetBody(objIE, nOption)
   If !udfIsObject( objIE )
      Pause( 'udfGetBody', 'Not a Valid Object' )
      Exit
   EndIf
   objBrowserDoc = objIE.Document
   objBody = objBrowserDoc.Body
   Switch nOption
      Case 0
         strContents = objBody.innertext
         Break
      Case 1
         strContents = objBody.innerhtml
         Break
      Case nOption
         Pause( 'udfGetBody', 'Invalid Option' )
         Return 0
      Break
   EndSwitch
   Return strContents
#EndFunction


strUrl = 'http://www.winbatch.com/'
objIE = udfIECreate( strUrl )
If objIE == 0
   Pause('udfIECreate','Unable to create browser')
   Exit
EndIf

; Attach to existing browser with this url
;objIE = udfIEAttach('url', 'http://www.winbatch.com/')
;if objIE == 0
;   Pause('udfIEAttach','Unable to locate browser using this mode')
;   Exit
;Endif

url = udfGetURL( objIE )
Pause( 'Current Url', url )

ListLinks = udfListLinks( objIE )
AskItemlist( 'ListLinks', ListLinks, @TAB, @UNSORTED, @SINGLE )

ListAnchors = udfListAnchors( objIE )
AskItemlist( 'ListAnchors', ListAnchors, @TAB, @UNSORTED, @SINGLE )

ListImages = udfListImages( objIE )
AskItemlist( 'ListImages', ListImages, @TAB, @UNSORTED, @SINGLE )

URLBodyTxt = udfGetBody( objIE, 0 ) ;inner text
Pause( 'Body Inner Text', URLBodyTxt )

URLBodyHTML = udfGetBody( objIE, 1 ) ;inner html
Pause( 'Body Inner HTML', URLBodyHTML )

Article ID:   W16138
File Created: 2013:10:22:15:28:40
Last Updated: 2013:10:22:15:28:40