BDG Scenario 3

Main (Form1.frm)

VERSION 5.00
Object = "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}#1.1#0"; "shdocvw.dll"
Object = "{831FDD16-0C5C-11D2-A9FC-0000F8754DA1}#2.0#0"; "MSCOMCTL.OCX"
Object = "{F9043C88-F6F2-101A-A3C9-08002B2F49FB}#1.2#0"; "comdlg32.ocx"
Object = "{298D967A-4550-11D1-957C-444553540000}#7.0#0"; "CJSplit32.OCX"
Begin VB.Form Main 
   Caption         =   "HTML Data Refinery"
   ClientHeight    =   7485
   ClientLeft      =   165
   ClientTop       =   735
   ClientWidth     =   9390
   LinkTopic       =   "Form1"
   ScaleHeight     =   7485
   ScaleWidth      =   9390
   StartUpPosition =   3  'Windows Default
   Begin VB.Timer Timer1 
      Left            =   8880
      Top             =   0
   End
   Begin MSComctlLib.StatusBar StatusBar1 
      Align           =   2  'Align Bottom
      Height          =   255
      Left            =   0
      TabIndex        =   0
      Top             =   7230
      Width           =   9390
      _ExtentX        =   16563
      _ExtentY        =   450
      _Version        =   393216
      BeginProperty Panels {8E3867A5-8586-11D1-B16A-00C0F0283628} 
         NumPanels       =   5
         BeginProperty Panel1 {8E3867AB-8586-11D1-B16A-00C0F0283628} 
            AutoSize        =   1
            Object.Width           =   9604
            MinWidth        =   5080
         EndProperty
         BeginProperty Panel2 {8E3867AB-8586-11D1-B16A-00C0F0283628} 
            Style           =   4
            Alignment       =   2
            Enabled         =   0   'False
            Object.Width           =   1270
            MinWidth        =   1270
            TextSave        =   "SCRL"
         EndProperty
         BeginProperty Panel3 {8E3867AB-8586-11D1-B16A-00C0F0283628} 
            Style           =   1
            Alignment       =   2
            Enabled         =   0   'False
            Object.Width           =   1270
            MinWidth        =   1270
            TextSave        =   "CAPS"
         EndProperty
         BeginProperty Panel4 {8E3867AB-8586-11D1-B16A-00C0F0283628} 
            Style           =   2
            Alignment       =   2
            Enabled         =   0   'False
            Object.Width           =   1270
            MinWidth        =   1270
            TextSave        =   "NUM"
         EndProperty
         BeginProperty Panel5 {8E3867AB-8586-11D1-B16A-00C0F0283628} 
            Style           =   5
            Alignment       =   2
            TextSave        =   "4:28 PM"
         EndProperty
      EndProperty
   End
   Begin MSComctlLib.ImageList ImageList1 
      Left            =   8760
      Top             =   1080
      _ExtentX        =   1005
      _ExtentY        =   1005
      BackColor       =   -2147483643
      MaskColor       =   12632256
      _Version        =   393216
   End
   Begin MSComDlg.CommonDialog Dialogs1 
      Left            =   8880
      Top             =   480
      _ExtentX        =   847
      _ExtentY        =   847
      _Version        =   393216
   End
   Begin CreativeSplitterProj.CreativeSplitter CreativeSplitter1 
      Height          =   7215
      Left            =   0
      TabIndex        =   1
      Top             =   0
      Width           =   8655
      _ExtentX        =   15266
      _ExtentY        =   12726
      FirstControl    =   "TreeView1"
      SecondControl   =   "TabStrip1"
      SplitPosition   =   2753
      Begin VB.TextBox Text1 
         Height          =   3735
         Left            =   5040
         Locked          =   -1  'True
         MultiLine       =   -1  'True
         ScrollBars      =   3  'Both
         TabIndex        =   5
         Top             =   2040
         Visible         =   0   'False
         Width           =   3015
      End
      Begin SHDocVwCtl.WebBrowser WebBrowser1 
         Height          =   3615
         Left            =   3480
         TabIndex        =   4
         Top             =   960
         Width           =   3015
         ExtentX         =   5318
         ExtentY         =   6376
         ViewMode        =   0
         Offline         =   0
         Silent          =   0
         RegisterAsBrowser=   0
         RegisterAsDropTarget=   1
         AutoArrange     =   0   'False
         NoClientEdge    =   0   'False
         AlignLeft       =   0   'False
         ViewID          =   "{0057D0E0-3573-11CF-AE69-08002B2E1262}"
         Location        =   "res://E:\WINNT\system32\shdoclc.dll/dnserror.htm#http:///"
      End
      Begin MSComctlLib.TabStrip TabStrip1 
         Height          =   7215
         Left            =   2813
         TabIndex        =   3
         Top             =   0
         Width           =   5842
         _ExtentX        =   10292
         _ExtentY        =   12726
         _Version        =   393216
         BeginProperty Tabs {1EFB6598-857C-11D1-B16A-00C0F0283628} 
            NumTabs         =   2
            BeginProperty Tab1 {1EFB659A-857C-11D1-B16A-00C0F0283628} 
               Caption         =   "HTML"
               Object.ToolTipText     =   "HTML Page View"
               ImageVarType    =   2
            EndProperty
            BeginProperty Tab2 {1EFB659A-857C-11D1-B16A-00C0F0283628} 
               Caption         =   "Data"
               Object.ToolTipText     =   "Data Extraction"
               ImageVarType    =   2
            EndProperty
         EndProperty
      End
      Begin MSComctlLib.TreeView TreeView1 
         Height          =   7215
         Left            =   0
         TabIndex        =   2
         Top             =   0
         Width           =   2753
         _ExtentX        =   4868
         _ExtentY        =   12726
         _Version        =   393217
         Indentation     =   441
         LabelEdit       =   1
         Style           =   6
         Appearance      =   1
      End
   End
   Begin VB.Menu mnuFile 
      Caption         =   "&File"
      Begin VB.Menu mnuFileOpen 
         Caption         =   "&Open HTML..."
         Shortcut        =   ^O
      End
      Begin VB.Menu mnuFileSave 
         Caption         =   "&Save HTML As..."
         Shortcut        =   ^S
      End
      Begin VB.Menu mnuSep 
         Caption         =   "-"
      End
      Begin VB.Menu mnuFileExit 
         Caption         =   "E&xit"
      End
   End
   Begin VB.Menu mnuConvert 
      Caption         =   "&Convert"
      Begin VB.Menu mnuConvertURL 
         Caption         =   "&URL..."
         Shortcut        =   ^U
      End
      Begin VB.Menu mnuConvertBuild 
         Caption         =   "&Build All Projects"
      End
      Begin VB.Menu mnuSep2 
         Caption         =   "-"
      End
      Begin VB.Menu mnuOptions 
         Caption         =   "&Options..."
      End
   End
   Begin VB.Menu mnuHelp 
      Caption         =   "&Help"
      Begin VB.Menu mnuHelpAbout 
         Caption         =   "&About"
      End
   End
   Begin VB.Menu mnuPropertiesPopup 
      Caption         =   "PopupProperties"
      Visible         =   0   'False
      Begin VB.Menu mnuProperties 
         Caption         =   "&Properties"
      End
      Begin VB.Menu mnuExpandAll 
         Caption         =   "Expand/Collapse &All"
      End
   End
End
Attribute VB_Name = "Main"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Option Explicit

Private m_aFiles() As String
Private m_nCurrentFile As Integer

Private Sub Form_Load()
   Set g_Options = New Options
   g_Options.LoadDefaults
   Timer1.Interval = 750
   ReDim m_aFiles(1)
   m_aFiles(1) = g_Options.GetRecentURL(1)
   Call BeginProcessing
End Sub

Private Sub Form_QueryUnload(Cancel As Integer, UnloadMode As Integer)
    Dim bYes As Boolean
    'bYes = (MsgBox("Unload OK?", vbYesNo, "Unload Me") = vbYes)
    'If Not bYes Then Cancel = True
End Sub

Private Sub Form_Unload(Cancel As Integer)
    Call g_Options.SaveDefaults
End Sub

Private Sub Form_Resize()
   If Me.ScaleHeight > 255 Then
      CreativeSplitter1.Move Me.ScaleLeft, Me.ScaleTop, _
                   Me.ScaleWidth, Me.ScaleHeight - StatusBar1.height
      Call CreativeSplitter1_Resize
   End If
End Sub

Private Sub CreativeSplitter1_Resize()
    WebBrowser1.Move TabStrip1.ClientLeft, TabStrip1.ClientTop, _
                     TabStrip1.ClientWidth, TabStrip1.ClientHeight
    Text1.Move TabStrip1.ClientLeft, TabStrip1.ClientTop, _
                   TabStrip1.ClientWidth, TabStrip1.ClientHeight
End Sub

Private Sub CreativeSplitter1_Paint()
    Call CreativeSplitter1_Resize
End Sub

Private Sub mnuFileExit_Click()
    Unload Me
End Sub

Private Sub mnuFileOpen_Click()
   On Error GoTo ErrHandler
   Dialogs1.CancelError = True
   Dialogs1.DialogTitle = "Open Document"
   Dialogs1.Filename = ""
   Dialogs1.Filter = "HTML Files (*.htm,*.html)|*.htm;*.html|All Files (*.*)|*.*"
   Dialogs1.Flags = cdlOFNFileMustExist + cdlOFNPathMustExist + cdlOFNAllowMultiselect + cdlOFNExplorer
   Dialogs1.DefaultExt = "*.htm"
OpenAgain:
   Dialogs1.ShowOpen
   
   ' Capture multiple selection in array
   Call BuildFileList(Dialogs1.Filename)
   
   If UBound(m_aFiles) > 1 Then
      If Not (vbYes = MsgBox("You have selected more than one file, but only one file may be examined at a time." & vbCrLf & _
             "Click Yes to process all files (saving the results as XML), or No to reconsider.", vbYesNo + vbQuestion)) Then
         GoTo ErrHandler
      End If
   End If
   
   Call BeginProcessing
   Exit Sub
   
ErrHandler:
   If Err = 20476 Then
      MsgBox "Too many files selected. Please try again with fewer files.", vbCritical, "Too many files"
      GoTo OpenAgain
   End If
   UpdateUser "Open canceled."
   Err.Clear
End Sub

Private Sub mnuConvertBuild_Click()
   Dim bitem As BuildListItem, i&
   For i = 1 To g_Options.ProjectCount
      Set bitem = g_Options.GetProject(i)
      If bitem.Enabled Then
         If Not bitem.Process Then Exit For
      End If
   Next i
End Sub

Private Sub mnuFileSave_Click()
   On Error GoTo ErrHandler
   Dim f As Integer
   Dialogs1.CancelError = True
   Dialogs1.DialogTitle = "Save Document"
   Dialogs1.Filter = "HTML Files (*.htm,*.html)|*.htm;*.html|XML Files (*.xml)|*.xml|All Files (*.*)|*.*"
   Dialogs1.Flags = cdlOFNOverwritePrompt + cdlOFNPathMustExist + cdlOFNNoReadOnlyReturn
   Dialogs1.DefaultExt = "*.htm"
   Dialogs1.ShowSave
   TabStrip1.Tabs(2).Selected = True
   f = FreeFile
   Open Dialogs1.Filename For Output Lock Read Write As #f
   Print #f, Text1.text
   Close #f
   Exit Sub
ErrHandler:
   MsgBox "Save canceled."
   Err.Clear
End Sub

Private Sub mnuConvertURL_Click()
   CenterForm LoadURL
   LoadURL.Show vbModal
   If Not LoadURL.m_bCancel Then
      ReDim m_aFiles(1)
      m_aFiles(1) = g_Options.GetRecentURL(1)
      m_nCurrentFile = 0
      Call BeginProcessing
   End If
End Sub

Private Sub mnuHelpAbout_Click()
   CenterForm About
   About.Show vbModal
End Sub

Private Sub mnuOptions_Click()
   CenterForm Settings
   Settings.Show vbModal
End Sub

Private Sub TabStrip1_Click()
    Select Case TabStrip1.SelectedItem.Index
    Case 1:
        Text1.Visible = False
        WebBrowser1.Visible = True
    Case 2:
        WebBrowser1.Visible = False
        Text1.Visible = True
        Call ParseText
    End Select
End Sub

Private Sub TreeView1_KeyPress(KeyAscii As Integer)
   If KeyAscii = Asc(vbCr) Then TreeView1_DblClick
End Sub

Private Sub TreeView1_MouseDown(Button As Integer, Shift As Integer, x As Single, y As Single)
   Dim nd As MSComctlLib.Node
   If Button = vbRightButton Then
      Set nd = TreeView1.SelectedItem
      mnuExpandAll.Enabled = IIf(nd.children > 0, True, False)
      PopupMenu mnuPropertiesPopup
   End If
End Sub

Private Sub TreeView1_DblClick()
   Dim nd As MSComctlLib.Node
   Set nd = TreeView1.SelectedItem
   If nd.children = 0 Then mnuProperties_Click
End Sub

Private Sub mnuProperties_Click()
   Dim nd As MSComctlLib.Node
   Set nd = TreeView1.SelectedItem
   Set Properties.Element = nd.tag
   Properties.Show vbModal
End Sub

Private Sub mnuExpandAll_Click()
   Dim nd As MSComctlLib.Node
   Set nd = TreeView1.SelectedItem
   nd.Expanded = Not nd.Expanded
   ExpandAll nd, nd.Expanded
End Sub

Private Sub ExpandAll(ByVal parent As MSComctlLib.Node, ByVal exp As Boolean)
   Dim nd As MSComctlLib.Node
   Set nd = parent.Child
   Do Until nd Is Nothing
      nd.Expanded = exp
      ExpandAll nd, exp
      Set nd = nd.Next
   Loop
End Sub

Private Sub Timer1_Timer()
   ' On Error Resume Next
   Dim hFile As Integer
   Dim szString1, szString2 As String
   Dim oTemp1, oTemp2 As Object
   Dim fs As Object
   Dim Count As Long

   Select Case WebBrowser1.ReadyState
   
   Case READYSTATE_UNINITIALIZED:
       'If we are on an empty IE (just initialized, then jump to loadnextpage
       WebBrowser1.Navigate "http://www.microsoft.com/ms.htm"
       UpdateUser "Begin loading..."
       Exit Sub

   Case READYSTATE_INTERACTIVE, READYSTATE_LOADING:
       UpdateUser "Still loading..."
       Exit Sub
       
   Case READYSTATE_COMPLETE, READYSTATE_LOADED:
       'If not finished then wait till next loop.
       UpdateUser "Loaded: " & g_Options.GetRecentURL(1)

   End Select
   
   Timer1.Enabled = False
   
   If UBound(m_aFiles) > 1 Then TabStrip1.Tabs(2).Selected = True
   If m_nCurrentFile = UBound(m_aFiles) Then Call LoadTree

End Sub

Private Sub ParseText()
    If Text1.text = "" Then
      Set Convert.m_rootElem = WebBrowser1.Document.documentElement
      CenterForm Convert
      Convert.Show vbModal
      Text1.text = Convert.m_strHTML
   End If
   ' Complete the loop, if other files are waiting
   If UBound(m_aFiles) > 1 Then
      Call SaveXMLFile
      Call BeginProcessing
   End If
End Sub

Private Sub LoadTree()
   Dim nd As MSComctlLib.Node
   Dim item As Variant
   Dim str As String
      
   If Not WebBrowser1.Document Is Nothing Then
      TreeView1.Nodes.Clear
      For Each item In WebBrowser1.Document.All
         If item.parentElement Is Nothing Then
            Set nd = TreeView1.Nodes.Add
         Else
            Set nd = TreeView1.Nodes.Add(item.parentElement.uniqueID, tvwChild)
         End If
         Set nd.tag = item
         nd.Key = item.uniqueID
         nd.text = item.nodeName
       Next item
   End If
End Sub

Private Sub BeginProcessing()
   Dim file$
   If m_nCurrentFile < UBound(m_aFiles) Then
      m_nCurrentFile = m_nCurrentFile + 1
      file = m_aFiles(m_nCurrentFile)
      g_Options.AddRecentURL file
      TabStrip1.Tabs(1).Selected = True
      Text1.text = ""
      WebBrowser1.Navigate file
      Timer1.Enabled = True
   End If
End Sub

' Always saves the currently opened file with .XML extension
Private Sub SaveXMLFile()
   Dim f&, file$, x&
   x = InStrRev(m_aFiles(m_nCurrentFile), ".")
   If x > 0 Then
      file = Left(m_aFiles(m_nCurrentFile), x) & "xml"
   Else
      file = m_aFiles(m_nCurrentFile) & ".xml"
   End If
   ' drop the "file://" extension
   If Left(file, 7) = "file://" Then file = Mid(file, 8)
   f = FreeFile
   Open file For Output Lock Read Write As #f
   Print #f, Text1.text
   Close #f
End Sub

' Separates multiple selection filename into m_aFiles array
Private Sub BuildFileList(Filename As String)
   Dim i&, x&, y&, path$
   
   ' Capture multiple selection in array
   y = InStr(Filename, Chr(0))
   If y > 0 Then
      path = "file://" & Left(Filename, y - 1)
      i = 1
      x = y + 1
      y = InStr(x, Filename, Chr(0))
      Do Until y = 0
         ReDim Preserve m_aFiles(i)
         m_aFiles(i) = path & "\" & Mid(Filename, x, y - x)
         i = i + 1
         x = y + 1
         y = InStr(x, Filename, Chr(0))
      Loop
      ReDim Preserve m_aFiles(i)
      m_aFiles(i) = path & "\" & Mid(Filename, x)
   Else ' only one file selected
      ReDim m_aFiles(1)
      m_aFiles(1) = "file://" & Filename
   End If
   
   ' reset current file counter
   m_nCurrentFile = 0

End Sub