WinBatch Tech Support Home

Database Search

If you can't find the information using the categories below, post a question over in our WinBatch Tech Support Forum.

TechHome

XML
plus
plus

Can't find the information you are looking for here? Then leave a message over on our WinBatch Tech Support Forum.

Large XML File Discussion

 Keywords: GB XML Large Huge Parse  

Trials and tribulations of reading 40GB XML file. Apparently MSXML, XMLOLEDB and the DOM Extender won't handle this large of an XML file. You could read it line by line but that takes forever!

Here are the results of research on this issue:

Hope this info is helpful to someone.


;Jim Taylor (jtaylor@jtdata.com)
;Always the license questions so...
;Use this however you want except I'd prefer you not bundle it up and sell it or distribute it for money in any way.

;This will split XML files into smaller chunks.  To date it is by far the fastest solution and I've tried several.
;Also the size of the file doesn't matter. Most solutions will not work with truly large files.
;I've tested with 40Gb files and it works great.
;Thanks to Detlev Dalitz for showing me how to use Large Numbers with the Binary Functions.
;The splitting code can be found in the last two functions.  The rest is the interface code.

AddExtender ("WWHUG34i.DLL")

Home_Path = DirScript()
Data_Path = ShortCutDir("AppData",0,@TRUE):"xml_splitter\"
If !DirExist(Data_Path) Then DirMake(Data_Path)
System_Ini = Data_Path:"xml_splitter.ini"
WinHide("")
If !FileExist(System_Ini) Then FilePut(System_Ini,"")
DirChange(Home_Path)
GoSub Load_Routines
IntControl(49,3,0,0,0)
IntControl(4,0,0,0,0)
Init_Dialog_Constants()


SPLITFormat=`WWWDLGED,6.2`

SPLITCaption=`XML Splitter`
SPLITX=-01
SPLITY=-01
SPLITWidth=258
SPLITHeight=330
SPLITNumControls=026
SPLITProcedure=`XMLSPLIT`
SPLITFont=`DEFAULT`
SPLITTextColor=`DEFAULT`
SPLITBackground=`DEFAULT,0|0|91`
SPLITConfig=0

SPLIT001=`219,003,034,012,PUSHBUTTON,"pb_SPL_Exit",DEFAULT,"E&xit",1,3,32,"Microsoft Sans Serif|6656|70|34","0|0|0",DEFAULT`
SPLIT002=`003,005,042,012,STATICTEXT,"st_SPL_FiletoSplit",DEFAULT,"File to Split",DEFAULT,6,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT003=`077,003,024,012,EDITBOX,"eb_SPL_file_filter",file_filter,"file_filter",DEFAULT,9,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT004=`105,003,014,012,PUSHBUTTON,"pb_SPL_Up",DEFAULT,"Up",3,12,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT005=`003,017,116,302,FILELISTBOX,"fl_SPL_files",files,DEFAULT,DEFAULT,15,256,DEFAULT,DEFAULT,DEFAULT`
SPLIT006=`125,017,038,012,STATICTEXT,"st_SPL_ChunkSize",DEFAULT,"Chunk Size:",DEFAULT,18,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT007=`167,017,052,012,EDITBOX,"eb_SPL_chunk_size",chunk_size,"chunk_size",DEFAULT,21,64,DEFAULT,DEFAULT,DEFAULT`
SPLIT008=`125,035,038,012,STATICTEXT,"st_SPL_RootNode",DEFAULT,"Root Node:",DEFAULT,24,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT009=`167,033,052,012,EDITBOX,"eb_SPL_root_node",root_node,"root_node",DEFAULT,27,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT010=`125,051,038,012,STATICTEXT,"st_SPL_ItemNode",DEFAULT,"Item Node:",DEFAULT,30,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT011=`167,049,052,012,EDITBOX,"eb_SPL_item_node",item_node,"item_node",DEFAULT,33,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT012=`219,067,034,012,PUSHBUTTON,"pb_SPL_Split",DEFAULT,"Split",4,36,DEFAULT,"Microsoft Sans Serif|6656|70|34","0|0|0",DEFAULT`
SPLIT013=`121,079,132,242,GROUPBOX,"gb_SPL_Output",DEFAULT,"Output",DEFAULT,39,DEFAULT,"Microsoft Sans Serif|6656|40|34","0|0|0",DEFAULT`
SPLIT014=`125,091,020,012,STATICTEXT,"st_SPL_Base",DEFAULT,"Base:",DEFAULT,42,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT015=`125,103,074,012,EDITBOX,"eb_SPL_output_base",output_base,"output_base",DEFAULT,45,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT016=`203,091,024,012,STATICTEXT,"st_SPL_CntrLength",DEFAULT,"Length:",DEFAULT,48,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT017=`203,103,020,012,EDITBOX,"eb_SPL_output_counter",output_counter,"output_counter",DEFAULT,51,64,DEFAULT,DEFAULT,DEFAULT`
SPLIT018=`229,091,012,012,STATICTEXT,"st_SPL_Extension",DEFAULT,"Ext:",DEFAULT,54,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT019=`229,103,018,012,EDITBOX,"eb_SPL_output_ext",output_ext,"output_ext",DEFAULT,57,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT020=`125,119,034,012,PUSHBUTTON,"pb_SPL_OutputDir",DEFAULT,"Output Dir.",2,60,DEFAULT,DEFAULT,DEFAULT,DEFAULT`
SPLIT021=`125,133,122,012,EDITBOX,"eb_SPL_output_dir",output_dir,"output_dir",DEFAULT,63,8,DEFAULT,DEFAULT,DEFAULT`
SPLIT022=`125,149,122,140,ITEMBOX,"ib_SPL_output_files",output_files,DEFAULT,DEFAULT,66,256,DEFAULT,DEFAULT,DEFAULT`
SPLIT023=`125,293,046,012,STATICTEXT,"st_SPL_OutputFileExample",DEFAULT,"File Example:",DEFAULT,69,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT024=`125,305,122,012,VARYTEXT,"vt_SPL_output_sample",output_sample,"output_sample",DEFAULT,72,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT025=`221,019,014,012,STATICTEXT,"StaticText_1",DEFAULT,"Mb",DEFAULT,250,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`
SPLIT026=`057,005,018,012,STATICTEXT,"st_SPL_Filter",DEFAULT,"Filter:",DEFAULT,6,DEFAULT,"Microsoft Sans Serif|6656|40|34","255|255|255",DEFAULT`

ButtonPushed=Dialog("SPLIT")



Exit

:Load_Routines

#DefineSubRoutine SPLIT_Get_List_Rtn()

  file_filter               = DialogControlGet(SPL_Handle,"eb_SPL_file_filter",dc_editbox)          ; EDITBOX
  files                     = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxselect)          ; FILELISTBOX
  chunk_size                = DialogControlGet(SPL_Handle,"eb_SPL_chunk_size",dc_editbox)           ; EDITBOX
  root_node                 = DialogControlGet(SPL_Handle,"eb_SPL_root_node",dc_editbox)            ; EDITBOX
                If root_node == "" Then root_node = ""
                root_node_end = "":StrSub(root_node,2,StrLen(root_node))
  item_node                 = DialogControlGet(SPL_Handle,"eb_SPL_item_node",dc_editbox)            ; EDITBOX
                If item_node == "" Then item_node = ""
                item_node_end = "":StrSub(item_node,2,StrLen(item_node))
  output_base               = DialogControlGet(SPL_Handle,"eb_SPL_output_base",dc_editbox)          ; EDITBOX
  output_counter            = DialogControlGet(SPL_Handle,"eb_SPL_output_counter",dc_editbox)       ; EDITBOX
  output_ext                = DialogControlGet(SPL_Handle,"eb_SPL_output_ext",dc_editbox)           ; EDITBOX
  output_dir                = DialogControlGet(SPL_Handle,"eb_SPL_output_dir",dc_editbox)           ; EDITBOX
  output_files              = DialogControlGet(SPL_Handle,"ib_SPL_output_files",dc_itemboxselect)   ; ITEMBOX
  output_sample             = DialogControlGet(SPL_Handle,"vt_SPL_output_sample",dc_title)          ; VARYTEXT

#EndSubRoutine


#DefineSubRoutine SPLIT_Set_List_Rtn()

  DialogControlSet(SPL_Handle,"eb_SPL_file_filter",dc_editbox,file_filter)                          ; EDITBOX
; DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents,files)                                ; FILELISTBOX
; DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,files)                                ; FILELISTBOX
  DialogControlSet(SPL_Handle,"eb_SPL_chunk_size",dc_editbox,chunk_size)                            ; EDITBOX
  DialogControlSet(SPL_Handle,"eb_SPL_root_node",dc_editbox,root_node)                              ; EDITBOX
  DialogControlSet(SPL_Handle,"eb_SPL_item_node",dc_editbox,item_node)                              ; EDITBOX
  DialogControlSet(SPL_Handle,"eb_SPL_output_base",dc_editbox,output_base)                          ; EDITBOX
  DialogControlSet(SPL_Handle,"eb_SPL_output_counter",dc_editbox,output_counter)                    ; EDITBOX
  DialogControlSet(SPL_Handle,"eb_SPL_output_ext",dc_editbox,output_ext)                            ; EDITBOX
  DialogControlSet(SPL_Handle,"eb_SPL_output_dir",dc_editbox,output_dir)                            ; EDITBOX
  DialogControlSet(SPL_Handle,"ib_SPL_output_files",dc_itemboxcontents,output_files_list)           ; ITEMBOX
  DialogControlSet(SPL_Handle,"ib_SPL_output_files",dc_itemboxselect,output_files)                  ; ITEMBOX
  DialogControlSet(SPL_Handle,"vt_SPL_output_sample",dc_title,output_sample)                        ; VARYTEXT

#EndSubRoutine

#DefineSubRoutine Init_Dialog_Constants()

   ;DialogprocOptions Constants

   MSG_INIT               = 0    ; The one-time initilization
   MSG_TIMER              = 1    ; Timer event
   MSG_BUTTONPUSHED       = 2    ; Pushbutton or Picturebutton
   MSG_RADIOPUSHED        = 3    ; Radiobutton clicked
   MSG_CHECKBOX           = 4    ; Checkbox clicked
   MSG_EDITBOX            = 5    ; Editbox or Multilinebox
   MSG_FILESELECT         = 6    ; Filelistbox
   MSG_ITEMSELECT         = 7    ; Itembox
   MSG_COMBOCHANGE        = 8    ; Combobox/Droplistbox
   MSG_CALENDAR           = 9    ; Calendar date change
   MSG_SPINNER            = 10   ; Spinner number change
   MSG_CLOSEVIA49         = 11   ; Close clicked (Enabled via Intcontrol 49)
   MSG_FILEBOXDOUBLECLICK = 12   ; Get double-click message on a FileListBox
   MSG_ITEMBOXDOUBLECLICK = 13   ; Get double-click message on an ItemBox
   MSG_COMEVENT           = 14   ; Get double-click message on an ItemBox

   DPO_DISABLESTATE       = 1000 ; codes -1=GetSetting 0=EnableDialog 1=DisableDialog
   DPO_CHANGEBACKGROUND   = 1001 ; -1=GetSetting otherise bitmap or color string
   DPO_CHANGESYSMENU      = 1002 ; -1=Get Current 0=none 1=close 2=close/min 3=close/max 4=close/min/max
   DPO_CHANGETITLE        = 1003 ; -1=Get Current otherise new title
   DPO_GETCONTROLNAME     = 1004 ; Get Control Name from Number
   DPO_GETCONTROLNUM      = 1005 ; Get Control Number from Name

   ;DialogControlState Constants
   DCSTATE_SETFOCUS       = 1    ; Give Control Focus
   DCSTATE_QUERYSTYLE     = 2    ; Query control's style
   DCSTATE_ADDSTYLE       = 3    ; Add control style
   DCSTATE_REMOVESTYLE    = 4    ; Remove control style
   DCSTATE_GETFOCUS       = 5    ; Get control that has focus
   DCSTYLE_INVISIBLE      = 1    ; Set Control Invisible
   DCSTYLE_DISABLED       = 2    ; Set Control Disabled
   DCSTYLE_NOUSERDATA     = 4    ; Note: Setable via DialogControlState function ONLY SPINNER control only
   DCSTYLE_READONLY       = 8    ; Sets control to read-only (user cannot type in data) EDITBOX MULTILINEBOX SPINNER
   DCSTYLE_PASSWORD       = 16   ; Sets 'password mode' where only *'s are displayed EDITBOX
   DCSTYLE_DEFAULTBUTTON  = 32   ; Sets a button as a the default button PUSHBUTTON PICTUREBUTTON
   DCSTYLE_DIGITSONLY     = 64   ; Set edit box to accept digits only EDITMOX MULTILINEBOX
   DCSTYLE_FLAT           = 128  ; Makes a 'flat' hyperlink-looking button PUSHBUTTON PICTUREBUTTON
   DCSTYLE_HEIGHT         = 256  ; Turns off automatic height adjustment on ItemBoxes and FileListBoxes
   DCSTYLE_CENTER         = 512  ; Center Text in VARYTEXT and STATICTEXT Controls
   DCSTYLE_RIGHT          = 1024 ; Right Justify Text in VARYTEXT and STATICTEXT Controls
   DCSTYLE_NOSELCURLEFT   = 2048 ; No selection, cursor left EDITBOX MULTILINEBOX
   DCSTYLE_NOSELCURRIGHT  = 4096 ; No selection, cursor right EDITBOX MULTILINEBOX

   ;DialogControlSet / DialogControlGet Constants
   DC_CHECKBOX            = 1    ; CHECKBOX
   DC_RADIOBUTTON         = 2    ; RADIOBUTTON
   DC_EDITBOX             = 3    ; EDITBOX MULTILINEBOX
   DC_TITLE               = 4    ; PICTURE RADIOBUTTON CHECKBOX PICTUREBUTTON VARYTEXT STATICTEXT GROUPBOX PUSHBUTTON
   DC_ITEMBOXCONTENTS     = 5    ; ITEMBOX FILELISTBOX DROPLISTBOX
   DC_ITEMBOXSELECT       = 6    ; ITEMBOX FILELISTBOX DROPLISTBOX
   DC_CALENDAR            = 7    ; CALENDAR
   DC_SPINNER             = 8    ; SPINNER
   DC_MULTITABSTOPS       = 9    ; MULTILINEBOX
   DC_ITEMSCROLLPOS       = 10   ; ITEMBOX FILELISTBOX
   DC_BACKGROUNDCOLOR     = 11   ; RADIOBUTTON CHECKBOX VARYTEXT STATICTEXT GROUPBOX PUSHBUTTON ITEMBOX FILELISTBOX DROPLISTBOX SPINNER EDITBOX MULTILINEBOX
   DC_PICTUREBITMAP       = 12   ; PICTURE PICTUREBUTTON
   DC_TEXTCOLOR           = 13   ; RADIOBUTTON CHECKBOX VARYTEXT STATICTEXT GROUPBOX PUSHBUTTON ITEMBOX FIELLISTBOX DROPLISTBOX SPINNER EDITBOX MULTILINEBOX
   DC_ITEMBOXADD          = 14   ; ITEMBOX FILELISTBOX DROPLISTBOX
   DC_ITEMBOXREMOVE       = 15   ; ITEMBOX FILELISTBOX DROPLISTBOX
   DC_RADIOCONTROL        = 16   ; RADIOBUTTON
   DC_POSITION            = 17   ; RADIOBUTTON

   ;DialogObject constants

   DLGOBJECT_ADDEVENT     = 1    ; Call dialog callback when the specified event occurs
   DLGOBJECT_REMOVEEVENT  = 2    ; Stop calling dialog callback when an event previously requested with
   DLGOBJECT_GETOBJECT    = 3    ; Return an object references to the specified control
   DLGOBJECT_GETPICOBJECT = 4    ; Create and return an object reference to a picture object

   ;Return code constants
   RET_DO_CANCEL          =  0   ; Cancels dialog
   RET_DO_DEFAULT         = -1   ; Continue with default processing for control
   RET_DO_NOT_EXIT        = -2   ; Do not exit the dialog

#EndSubRoutine



#DefineSubRoutine System_Init_Var_Null()

  file_filter               = ""
  files                     = ""
  chunk_size                = ""
  root_node                 = ""
  item_node                 = ""
  root_node_end             = ""
  item_node_end             = ""
  output_base               = ""
  output_counter            = ""
  output_ext                = ""
  output_dir                = ""
  output_files              = ""
  output_files_list         = ""
  output_sample             = ""
  profile                   = "XML_Splitter"
  profilf                   = "XML_Splitter"

#EndSubRoutine


#DefineSubRoutine System_Ini_Read_Rtn()

  file_filter               = IniReadPvt(profile,"file_filter","",System_Ini)
  files                     = IniReadPvt(profile,"files","",System_Ini)

  chunk_size                = IniReadPvt(profilf,"chunk_size",10,System_Ini)
  root_node                 = IniReadPvt(profilf,"root_node","",System_Ini)
  item_node                 = IniReadPvt(profilf,"item_node","",System_Ini)
  output_base               = IniReadPvt(profilf,"output_base","books",System_Ini)
  output_counter            = IniReadPvt(profilf,"output_counter",3,System_Ini)
  output_ext                = IniReadPvt(profilf,"output_ext","xml",System_Ini)
  output_dir                = IniReadPvt(profilf,"output_dir",".\",System_Ini)

#EndSubRoutine


#DefineSubRoutine System_Ini_Save_Rtn()

  If ItemCount(files,"[") > 1 Then
    chunk_size                = ""
    root_node                 = ""
    item_node                 = ""
    output_base               = ""
    output_counter            = ""
    output_ext                = ""
    output_dir                = ""
    Return
  EndIf


  IniWritePvt(profile,"file_filter",file_filter,System_Ini)
  IniWritePvt(profile,"files",files,System_Ini)

  IniWritePvt(profilf,"chunk_size",chunk_size,System_Ini)
  IniWritePvt(profilf,"root_node",root_node,System_Ini)
  IniWritePvt(profilf,"item_node",item_node,System_Ini)
  IniWritePvt(profilf,"output_base",output_base,System_Ini)
  IniWritePvt(profilf,"output_counter",output_counter,System_Ini)
  IniWritePvt(profilf,"output_ext",output_ext,System_Ini)
  IniWritePvt(profilf,"output_dir",output_dir,System_Ini)

#EndSubRoutine


#DefineSubRoutine XMLSPLIT(SPL_Handle,DMsg,DCID,resvd4,resvd5)

Switch (DMsg)
    Case msg_init                 ; Dialog Initialization
    DialogProcOptions(SPL_Handle, msg_timer,0)                            ; TimerEvent (0- Off).
    DialogProcOptions(SPL_Handle, msg_closevia49,1)                       ; Close selected (IntControl(49....) (1-On, 0-Off).
    DialogProcOptions(SPL_Handle, dpo_disablestate,0)                     ; Dialog Disable (1-Disable, 2-Wait cursor, 0-Enable).
    DialogProcOptions(SPL_Handle, dpo_changebackground,-1)                ; Change Dialog Background (Bitmap File or RGB String).
    DialogProcOptions(SPL_Handle, msg_buttonpushed,1)                     ; PushButton/PictureButton.
    DialogProcOptions(SPL_Handle, msg_editbox,1)                          ; EditBox or Multi-LineBox.
    DialogProcOptions(SPL_Handle, msg_fileselect,1)                       ; FileList.
    DialogProcOptions(SPL_Handle, msg_fileboxdoubleclick,1)               ; FileList Double-Clicked.
    DialogProcOptions(SPL_Handle, msg_itemselect,1)                       ; ItemBox.
    DialogProcOptions(SPL_Handle, msg_itemboxdoubleclick,1)               ; ItemBox Double-Clicked.

    System_Init_Var_Null()
    System_Ini_Read_Rtn()

    If file_filter == "" Then file_filter = "*.*"
    file_filter = StrReplace(StrReplace("*":file_filter:"*","..","."),"**","*")
    If file_filter == "*.*.*" Then file_filter = "*.*"
    DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents,file_filter)
    SPLIT_Set_List_Rtn()
    files_list = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents)
    files = IniReadPvt(profile,"files","",System_Ini)
    If ItemLocate(files,files_list,@TAB) == 0 Then files = ItemExtract(1,files_list,@TAB)
    DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,files)
    profilf = FileRoot(files)
    System_Ini_Read_Rtn()
    SPLIT_Set_List_Rtn()
    Sample_File()
    DialogProcOptions(SPL_Handle, 1003,"XML Splitter - ":DirGet())

    Break
  Case msg_timer                ; TimerEvent
    Break
  Case msg_closevia49           ; Close
    SPLIT_Get_List_Rtn()
    If ItemCount(files,"[") > 1 Then DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,"")
    If files == "" Then
      files = ItemExtract(1,files_list,@TAB)
      DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,files)
    Else
      System_Ini_Save_Rtn()
    EndIf
    Return 9
    Break
  Case msg_editbox              ; Edit/MultiLine Box
    Switch(DialogProcOptions(SPL_Handle,dpo_getcontrolnum,DCID))
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_file_filter")
        file_filter = DialogControlGet(SPL_Handle,"eb_SPL_file_filter",dc_editbox)
        If file_filter == "" Then
          file_filter = "*.*"
          DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents,file_filter)
        Else
          file_filter = StrReplace(StrReplace("*":file_filter:"*","..","."),"**","*")
          If file_filter == "*.*.*" Then file_filter = "*.*"
          DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents,file_filter)
        EndIf
        files_list = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents)
        files = ItemExtract(1,files_list,@TAB)
        DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,files)
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_chunk_size")
        chunk_size = DialogControlGet(SPL_Handle,"eb_SPL_chunk_size",dc_editbox)
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_root_node")
        root_node = DialogControlGet(SPL_Handle,"eb_SPL_root_node",dc_editbox)
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_item_node")
        item_node = DialogControlGet(SPL_Handle,"eb_SPL_item_node",dc_editbox)
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_output_base")
        output_base = DialogControlGet(SPL_Handle,"eb_SPL_output_base",dc_editbox)
        Sample_File()
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_output_counter")
        output_counter = DialogControlGet(SPL_Handle,"eb_SPL_output_counter",dc_editbox)
        Sample_File()
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_output_ext")
        output_ext = DialogControlGet(SPL_Handle,"eb_SPL_output_ext",dc_editbox)
        Sample_File()
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"eb_SPL_output_dir")
        output_dir = DialogControlGet(SPL_Handle,"eb_SPL_output_dir",dc_editbox)
        Break
    EndSwitch
    Break
  Case msg_fileselect           ; File List
    Switch(DialogProcOptions(SPL_Handle,dpo_getcontrolnum,DCID))
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"fl_SPL_files")
        System_Ini_Save_Rtn()
        files = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxselect)
        files_list = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents)
        profilf = FileRoot(files)
        System_Ini_Read_Rtn()
        SPLIT_Set_List_Rtn()
        file_count = 1
        Sample_File()
        Break
    EndSwitch
    Break
  Case msg_fileboxdoubleclick   ; FileList DC
    Switch(DialogProcOptions(SPL_Handle,dpo_getcontrolnum,DCID))
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"fl_SPL_files")
        files = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxselect)
        files_list = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents)
        DialogProcOptions(SPL_Handle, 1003,"XML Splitter - ":DirGet())
        Break
    EndSwitch
    Break
  Case msg_itemboxdoubleclick   ; ItemBox DC
    Switch(DialogProcOptions(SPL_Handle,dpo_getcontrolnum,DCID))
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"ib_SPL_output_files")
        output_files = DialogControlGet(SPL_Handle,"ib_SPL_output_files",dc_itemboxselect)
        output_files_list = DialogControlGet(SPL_Handle,"ib_SPL_output_files",dc_itemboxcontents)
        Break
    EndSwitch
    Break
  Case msg_itemselect           ; Item Box
    Switch(DialogProcOptions(SPL_Handle,dpo_getcontrolnum,DCID))
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"ib_SPL_output_files")
        output_files = DialogControlGet(SPL_Handle,"ib_SPL_output_files",dc_itemboxselect)
        output_files_list = DialogControlGet(SPL_Handle,"ib_SPL_output_files",dc_itemboxcontents)
        Break
    EndSwitch
    Break
  Case msg_buttonpushed         ; PushButtion
    Switch(DialogProcOptions(SPL_Handle,dpo_getcontrolnum,DCID))
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"pb_SPL_Exit")
        nSelection = DialogControlGet(SPL_Handle,"pb_SPL_Exit",dc_title)
        SPLIT_Get_List_Rtn()
        If ItemCount(files,"[") > 1 Then DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,"")
        If files == "" Then
          files = ItemExtract(1,files_list,@TAB)
          DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxselect,files)
        Else
          System_Ini_Save_Rtn()
        EndIf
        Return -1
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"pb_SPL_Up")
        nSelection = DialogControlGet(SPL_Handle,"pb_SPL_Up",dc_title)
        System_Ini_Save_Rtn()
        file_path = DirGet()
;       message("HI",file_path)
        file_path = ItemRemove(-1,file_path,"\")
        file_path = ItemRemove(-1,file_path,"\")
        DirChange(file_path)
        If file_filter == "" Then
          file_filter = "*.*"
          DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents,file_filter)
        Else
          file_filter = StrReplace(StrReplace("*":file_filter:"*","..","."),"**","*")
          If file_filter == "*.*.*" Then file_filter = "*.*"
          DialogControlSet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents,file_filter)
        EndIf
        files_list = DialogControlGet(SPL_Handle,"fl_SPL_files",dc_itemboxcontents)
        DialogProcOptions(SPL_Handle, 1003,"XML Splitter - ":DirGet())
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"pb_SPL_Split")
        nSelection = DialogControlGet(SPL_Handle,"pb_SPL_Split",dc_title)
        SPLIT_Get_List_Rtn()
        split_file = DirGet():files
        If FileExist(split_file) != 1 Then
          Message("Note","This file doesn't exist or is in use by another program.  Please correct and try again.")
          Break
        EndIf
        If !IsNumber(chunk_size) Then
          Message("Note","Chunk size must have a value.  Please correct and try again.")
          Break
        EndIf
        If root_node == "" || item_node == "" Then
          Message("Note","Root Node and Item Node must have a value.  Please correct and try again.")
          Break
        EndIf
        DialogControlSet(SPL_Handle,"ib_SPL_output_files",dc_itemboxcontents,"")
        DialogProcOptions(SPL_Handle, dpo_disablestate,2)
        BinSplit()
        DialogProcOptions(SPL_Handle, dpo_disablestate,0)
        Break
      Case DialogProcOptions(SPL_Handle,dpo_getcontrolnum,"pb_SPL_OutputDir")
        nSelection = DialogControlGet(SPL_Handle,"pb_SPL_OutputDir",dc_title)
        output_dir =  Get_Dir_Name(output_dir)
        SPLIT_Set_List_Rtn()
        Break
    EndSwitch
    Break
EndSwitch
Return -2

#EndSubRoutine



#DefineSubRoutine Sample_File()

  If ItemCount(files,"[") > 1 Then
    output_sample = ""
    DialogControlSet(SPL_Handle,"vt_SPL_output_sample",dc_title,output_sample)
    Return
  EndIf
  SPLIT_Get_List_Rtn()
  If !IsNumber(output_counter) Then output_counter = 3
  output_base2 = output_base
  If output_base == "" Then output_base2 = FileRoot(files)
  output_ext2 = output_ext
  If output_ext  == "" Then output_ext2 = "xml"
  output_sample = output_base2:"_":StrFixLeft(1,"0",output_counter):".":output_ext2
  DialogControlSet(SPL_Handle,"vt_SPL_output_sample",dc_title,output_sample)

#EndSubRoutine


#DefineFunction Get_Dir_Name(get_file)
  IntControl(72,2,0,0,0)
  If !DirExist(get_file) Then
    File_Path = DirScript()
  Else
    File_Path = get_file
  EndIf
  get_file_tmp=AskDirectory("DIRECTORY SELECTION","",File_Path,"Directory Selected Was...",1|2|4)
  If !DirExist(get_file_tmp) && get_file_tmp != "" Then
    answer = AskYesNoJim("Create Directory","The directory '%get_file_tmp%' does not exist.  Create? ")
    If answer == @YES Then
      DirMake(get_file_tmp)
    Else
      get_file_tmp = ""
    EndIf
  EndIf
  Return(get_file_tmp)
  :CANCEL
    get_file_tmp = get_file
  Return
#EndFunction


#DefineFunction UDFCvtFloatToHuge(f)

  fupper=StrUpper(f + 0.0)
  f1=ItemExtract(1,fupper,"E")
  f2=ItemExtract(2,fupper,"E")
  If f2 == "" Then Return(f1)

  neg=@FALSE
  If f1<0
    neg=@TRUE
    f1 = -f1
  EndIf

  f1=StrReplace(f1,".","") ; remove .

  If f2<0
    f1=StrCat("0.",StrFixLeft(f1,0,-(f2)+StrLen(f1)-1))
  Else
    f1=StrFix(f1,0,Max(StrLen(f1) , f2+1))
  EndIf

  If neg==@FALSE Then
    Return(f1)
  Else
    Return (StrCat("-",f1))
  EndIf

#EndFunction

#DefineSubRoutine BinSplit()

   split_file_size = FileSize(split_file)
   If ItemCount(split_file_size,"e") > 1 Then
     split_file_size = UDFCvtFloatToHuge(split_file_size)
   EndIf

   chunk_size_byte = huge_Multiply(1 << 20, chunk_size)
   chunk_size_byte = huge_Add(chunk_size_byte,StrLen(root_node))
   chunk_size_byte = huge_Add(chunk_size_byte,StrLen(root_node_end))

   If StrSub(huge_Subtract(split_file_size,chunk_size_byte),1,1) == "-" Then
     Message("Note","Chunk Size is bigger than the file.  Please select a smaller chunk size and try again.")
     Return
   EndIf

   big_binary_buffer = BinaryAlloc(chunk_size_byte) ; Physical memory is the limit.
   chunk_size_byte = huge_Subtract(chunk_size_byte,StrLen(root_node))

   root_node_offset = 0
   file_count = 0
   Sample_File()
   split_file_offset = 0

   While @TRUE
     file_count = file_count + 1
     bytes_read = "" : BinaryReadEx(big_binary_buffer, root_node_offset, split_file, split_file_offset, chunk_size_byte)

     If file_count == 1 Then
       bytes_read_save = bytes_read
       root_node_offset = StrLen(root_node_end)-1
     Else
       BinaryPokeStr(big_binary_buffer,0,root_node)
     EndIf

     inode_offset = BinaryIndexEx(big_binary_buffer, chunk_size_byte-1, item_node_end, @BACKSCAN, @TRUE)
     If inode_offset == -1 Then
       Message("Note","Node not found")
       Break
     EndIf
     inode_offset = inode_offset + StrLen(item_node_end)
     inode_diff = huge_Subtract(chunk_size_byte,inode_offset)

     BinaryEodSet(big_binary_buffer,inode_offset)+StrLen(root_node_end)
     BinaryPokeStr(big_binary_buffer, inode_offset, root_node_end)
     output_file = output_dir:output_base2:"_":StrFixLeft(file_count,"0",output_counter):".":output_ext2

     If bytes_read < bytes_read_save Then BinaryEodSet(big_binary_buffer,bytes_read+root_node_offset)

     BinaryWrite(big_binary_buffer,output_file)

     DialogProcOptions(SPL_Handle, 1003,"XML Splitter - Chunk#: ":file_count)
     DialogControlSet(SPL_Handle,"ib_SPL_output_files",dc_itemboxadd,ItemExtract(-1,output_file,"\"))
     DialogControlSet(SPL_Handle,"ib_SPL_output_files",dc_itemscrollpos,-1)

     split_file_offset_save = split_file_offset
     If file_count == 1 Then
       split_file_offset = huge_Add(split_file_offset, chunk_size_byte)
     Else
       split_file_offset = huge_Add(split_file_offset, huge_Subtract(chunk_size_byte,StrLen(root_node)))
      EndIf
     split_file_offset = huge_Subtract(split_file_offset, inode_diff)

     If StrSub(huge_Subtract(split_file_offset, split_file_size),1,1) == "-" || huge_Subtract(split_file_offset, split_file_size) == 0 Then Continue

     Break

   EndWhile

   big_binary_buffer = BinaryFree (big_binary_buffer)

   WinHide("")

#EndSubRoutine


Return

Article ID:   W18491
Filename:   Large XML File Discussion.txt
File Created: 2009:08:31:13:21:02
Last Updated: 2009:08:31:13:21:02