Can't find the information you are looking for here? Then leave a message over on our WinBatch Tech Support Forum.
;*************************************************************************** ;** ;** Web Page Scraper ;** ;** Purpose: Extract information from a webpage ;** Inputs: url ;** Outputs: Messages containing data ;** ;** Deana Falk ;** Revisions: 2013.10.22 Initial Release ;** ;** ;*************************************************************************** #DefineFunction udfIEPageLoadWait( objIE ) If !udfIsObject(objIE) Pause('udfIEPageLoadWait','Not a Valid Object') Exit EndIf While !(objIE.readyState == 'complete' || objIE.readyState == 4 ) TimeDelay(0.1) EndWhile While !(objIE.document.readyState == 'complete' || objIE.document.readyState == 4 ) TimeDelay(0.1) EndWhile Return 1 #EndFunction #DefineFunction udfIECreate( strUrl ) objIE = ObjectCreate( 'InternetExplorer.Application') If !udfIsObject(objIE) Pause('udfIECreate','Not a Valid Object') Exit EndIf objIE.visible = @TRUE objIE.navigate( strUrl ) udfIEPageLoadWait( objIE ) Return objIE #EndFunction #DefineFunction udfIEAttach( strMode, strString ) ; strMode ; title strString is the title of the page you are trying to access ; url strString is the url of the page you are trying to access ; text strString is some text of the page you are trying to access ; html strString is some html of the page you are trying to access strMode = StrLower(strMode) objShell = ObjectCreate('Shell.Application') objShellWindows = objShell.Windows(); collection of all ShellWindows (IE and File Explorer) ;ForEach objWindow In objShellWindows For x = 0 To objShellWindows.count-1 objWindow = objShellWindows.Item(x) ; Check window object is a valid browser, if not, skip it bIsBrowser = @TRUE ; Check conditions to verify that the object is a browser If bIsBrowser ErrorMode(@OFF) ret = objWindow.type ; Is .type a valid property? ErrorMode(@CANCEL) If ret == 0 Then bIsBrowser = @FALSE EndIf If bIsBrowser ErrorMode(@OFF) ret = objWindow.document.title ; Does object have a .document and .title property? ErrorMode(@CANCEL) If ret == 0 Then bIsBrowser = @FALSE EndIf If bIsBrowser Switch @TRUE Case strMode =='title' If StrIndex( strString, objWindow.document.title, 1, @FWDSCAN ) > 0 Return objWindow EndIf Break Case strMode =='url' If StrIndex(objWindow.LocationURL, strString, 1 , @FWDSCAN) > 0 Return objWindow EndIf Break Case strMode =='text' If StrIndex(objWindow.document.body.innerText, strString, 1 ,@FWDSCAN) > 0 Return objWindow EndIf Break Case strMode =='html' If StrIndex(objWindow.document.body.innerHTML, strString) > 0 Return objWindow EndIf Break Case strMode ; Invalid Mode Pause('udfIEAttach','Invalid Mode Specified') Exit EndSwitch EndIf Next Return 0 #EndFunction #DefineFunction udfGetURL( objIE ) If !udfIsObject( objIE ) Pause('udfIECreate','Not a Valid Object') Exit EndIf strUrl = objIE.LocationURL Return strUrl #EndFunction #DefineFunction udfIsObject( obj ) Return(VarType(obj)>=1024) #EndFunction #DefineFunction udfListLinks( objIE ) If !udfIsObject(objIE) Pause('udfListLinks','Not a Valid Object') Exit EndIf objBrowserDoc = objIE.Document objLinks = objBrowserDoc.Links strLinkList = '' numberofLinks = objLinks.Length - 1 For x = 0 To numberofLinks objLinks = objBrowserDoc.Links(x) If strLinkList == '' Then strLinkList = objLinks.href Else strLinkList = strLinkList : @TAB :objLinks.href Next Return strLinkList #EndFunction #DefineFunction udfListAnchors( objIE ) If !udfIsObject( objIE ) Pause('udfListAnchors','Not a Valid Object') Exit EndIf objBrowserDoc = objIE.Document objAnchors = objBrowserDoc.anchors strAnchorList = '' numberofAnchors = objAnchors.length - 1 For x = 0 To numberofAnchors objAnchor = objBrowserDoc.anchors(x) If strAnchorList == '' Then strAnchorList = objAnchor.name Else strAnchorList = strAnchorList : @TAB :objAnchor.name Next Return strAnchorList #EndFunction #DefineFunction udfListImages( objIE ) If !udfIsObject( objIE ) Pause('udfListImages','Not a Valid Object') Exit EndIf objBrowserDoc = objIE.Document objImages = objBrowserDoc.Images strImageList = '' numberofImages = objImages.Length - 1 For x = 0 To numberofImages objImage = objBrowserDoc.Images(x) strAltText = objImage.alt If strAltText=='' Then strAltText=' ' strSource = objImage.src ;nSize=objImage.size If strImageList == '' Then strImageList = strSource : ' --- ' : strAltText Else strImageList = strImageList : @TAB : strSource : ' --- ' : strAltText Next Return strImageList #EndFunction #DefineFunction udfGetBody(objIE, nOption) If !udfIsObject( objIE ) Pause( 'udfGetBody', 'Not a Valid Object' ) Exit EndIf objBrowserDoc = objIE.Document objBody = objBrowserDoc.Body Switch nOption Case 0 strContents = objBody.innertext Break Case 1 strContents = objBody.innerhtml Break Case nOption Pause( 'udfGetBody', 'Invalid Option' ) Return 0 Break EndSwitch Return strContents #EndFunction strUrl = 'http://www.winbatch.com/' objIE = udfIECreate( strUrl ) If objIE == 0 Pause('udfIECreate','Unable to create browser') Exit EndIf ; Attach to existing browser with this url ;objIE = udfIEAttach('url', 'http://www.winbatch.com/') ;if objIE == 0 ; Pause('udfIEAttach','Unable to locate browser using this mode') ; Exit ;Endif url = udfGetURL( objIE ) Pause( 'Current Url', url ) ListLinks = udfListLinks( objIE ) AskItemlist( 'ListLinks', ListLinks, @TAB, @UNSORTED, @SINGLE ) ListAnchors = udfListAnchors( objIE ) AskItemlist( 'ListAnchors', ListAnchors, @TAB, @UNSORTED, @SINGLE ) ListImages = udfListImages( objIE ) AskItemlist( 'ListImages', ListImages, @TAB, @UNSORTED, @SINGLE ) URLBodyTxt = udfGetBody( objIE, 0 ) ;inner text Pause( 'Body Inner Text', URLBodyTxt ) URLBodyHTML = udfGetBody( objIE, 1 ) ;inner html Pause( 'Body Inner HTML', URLBodyHTML )
Article ID: W16138
File Created: 2013:10:22:15:28:40
Last Updated: 2013:10:22:15:28:40