メモ

主にプログラミング系の備忘録

TripAdviserの一覧からスポット情報をスクレイピング

先に記載した関数で取得したURLから、個々のスポット情報を取得

Public Function getSpotList(sUrl As String) As Variant
  Dim ie As InternetExplorer: Set ie = New InternetExplorer
  Dim sRet As String
  
  Dim dom As HTMLDocument
  Dim el As IHTMLElement
  Dim vtmp As Variant
  Dim f As Boolean
  Dim i As Long
  Dim j As Long
  Dim nPage As Long
  Dim ie2 As InternetExplorer
  Dim sFullAddress As String
  Dim sZipCode As String
  ie.navigate sUrl
  nPage = 1
  Call MySleep(1000)
  Call ieCheck(ie)
  Set dom = ie.Document
  f = True
  
  Do While f
    For Each el In dom.getElementsByClassName("near_listing")
      sRet = sRet & ConvTSV(el.getElementsByClassName("location_name")(0).innerText)
      sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("location_name")(0).getElementsByTagName("a")(0).getAttribute("href"))
      sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("address_box")(0).innerText)
      sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("distance")(0).getElementsByTagName("b")(0).innerText)

'ここで住所詳細も取得しよう。
'ただ、一覧だけだと7アクセスで済んだのが 個別ページまでアクセスすると207アクセス必要になるため
'やらない方がいいかも…。

      Set ie2 = New InternetExplorer: ie2.navigate "https://www.tripadvisor.jp" & el.getElementsByClassName("location_name")(0).getElementsByTagName("a")(0).getAttribute("href")
      Call MySleep(1000)
      Call ieCheck(ie2)
      
      sFullAddress = "": sZipCode = ""
      
      If ie2.Document.getElementsByClassName("address").length > 0 Then
        sFullAddress = Trim(ie2.Document.getElementsByClassName("address")(0).innerText)
        If ie2.Document.getElementsByClassName("postal-code").length > 0 Then sZipCode = Trim(ie2.Document.getElementsByClassName("postal-code")(0).innerText)
        sFullAddress = Replace(sFullAddress, sZipCode, "") & "\t" & sZipCode
        sRet = sRet & vbTab & ConvTSV(sFullAddress)
      End If
      ie2.Quit
      
'listinfo→評価が付かなくても入っていることがある…。
      If el.getElementsByClassName("listInfo").length > 0 Then
        sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("listInfo")(0).innerText)
'          ws.Range("G" & i).Value = el.getElementsByClassName("listInfo")(0).innerText
      End If
'ui_bubble_rating
      If el.getElementsByClassName("ui_bubble_rating").length > 0 Then
        vtmp = Split(el.getElementsByClassName("ui_bubble_rating")(0).getAttribute("alt"), " ")
        For j = LBound(vtmp) To UBound(vtmp)
          sRet = sRet & vbTab & ConvTSV(CStr(vtmp(j)))
        Next j
      End If
'more(口コミ件数)
      If el.getElementsByClassName("more").length > 0 Then
        sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("more")(0).innerText)
      End If
'popranking
      If el.getElementsByClassName("popRanking").length > 0 Then
        vtmp = Split(el.getElementsByClassName("popRanking")(0).innerText, " ")
        For j = LBound(vtmp) To UBound(vtmp)
          sRet = sRet & vbTab & ConvTSV(CStr(vtmp(j)))
        Next j
      End If
      sRet = sRet & vbCrLf
    Next el

'次のページ存在チェック    
    f = False
    For Each el In dom.getElementsByClassName("sprite-pageNext")
      If el.tagName = "A" Then
        f = True
        el.Click
        el.FireEvent ("onclick")
        Call ieCheck(ie)
        Call MySleep(1000)
        nPage = nPage + 1
        Exit For
      End If
    Next el
  Loop
  ie.Quit
  
  getSpotList = sRet
  
End Function

おまけ:タブ区切りテキストで出力するための変換関数

Public Function ConvTSV(ByVal str As String) As String
  Dim sRet As String
  sRet = Replace(Trim(str), """", """""") 'ダブルクオートのエスケープ & トリム
  sRet = Replace(sRet, vbCr, "\r") 'CRを\rに変換
  sRet = Replace(sRet, vbLf, "\n") 'LFを\nに変換
  sRet = """" & sRet & """"  '前後にダブルクオートをつける
  ConvTSV = sRet
  
End Function

サンプルプログラム:

Public Sub test_getSpotList(ParamArray vKeyWords())
  Dim fs As FileSystemObject: Set fs = New FileSystemObject
  Dim vtemp As Variant: vtemp = vKeyWords()
  
  Dim sFileName As String
  Dim i As Long
  For i = LBound(vKeyWords) To UBound(vKeyWords)
    If sFileName <> "" Then sFileName = sFileName & "_"
    sFileName = sFileName & CStr(vKeyWords(i))
  Next i
  Dim ts As TextStream: Set ts = fs.CreateTextFile("C:\" & sFileName & Format(Now(), "yymmddhhnnss") & ".txt")
  ts.Write getSpotList(getTAURL(vtemp))
  ts.Close
  Set fs = Nothing
End Sub