TripAdviserの一覧からスポット情報をスクレイピング
先に記載した関数で取得したURLから、個々のスポット情報を取得
Public Function getSpotList(sUrl As String) As Variant Dim ie As InternetExplorer: Set ie = New InternetExplorer Dim sRet As String Dim dom As HTMLDocument Dim el As IHTMLElement Dim vtmp As Variant Dim f As Boolean Dim i As Long Dim j As Long Dim nPage As Long Dim ie2 As InternetExplorer Dim sFullAddress As String Dim sZipCode As String ie.navigate sUrl nPage = 1 Call MySleep(1000) Call ieCheck(ie) Set dom = ie.Document f = True Do While f For Each el In dom.getElementsByClassName("near_listing") sRet = sRet & ConvTSV(el.getElementsByClassName("location_name")(0).innerText) sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("location_name")(0).getElementsByTagName("a")(0).getAttribute("href")) sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("address_box")(0).innerText) sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("distance")(0).getElementsByTagName("b")(0).innerText) 'ここで住所詳細も取得しよう。 'ただ、一覧だけだと7アクセスで済んだのが 個別ページまでアクセスすると207アクセス必要になるため 'やらない方がいいかも…。 Set ie2 = New InternetExplorer: ie2.navigate "https://www.tripadvisor.jp" & el.getElementsByClassName("location_name")(0).getElementsByTagName("a")(0).getAttribute("href") Call MySleep(1000) Call ieCheck(ie2) sFullAddress = "": sZipCode = "" If ie2.Document.getElementsByClassName("address").length > 0 Then sFullAddress = Trim(ie2.Document.getElementsByClassName("address")(0).innerText) If ie2.Document.getElementsByClassName("postal-code").length > 0 Then sZipCode = Trim(ie2.Document.getElementsByClassName("postal-code")(0).innerText) sFullAddress = Replace(sFullAddress, sZipCode, "") & "\t" & sZipCode sRet = sRet & vbTab & ConvTSV(sFullAddress) End If ie2.Quit 'listinfo→評価が付かなくても入っていることがある…。 If el.getElementsByClassName("listInfo").length > 0 Then sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("listInfo")(0).innerText) ' ws.Range("G" & i).Value = el.getElementsByClassName("listInfo")(0).innerText End If 'ui_bubble_rating If el.getElementsByClassName("ui_bubble_rating").length > 0 Then vtmp = Split(el.getElementsByClassName("ui_bubble_rating")(0).getAttribute("alt"), " ") For j = LBound(vtmp) To UBound(vtmp) sRet = sRet & vbTab & ConvTSV(CStr(vtmp(j))) Next j End If 'more(口コミ件数) If el.getElementsByClassName("more").length > 0 Then sRet = sRet & vbTab & ConvTSV(el.getElementsByClassName("more")(0).innerText) End If 'popranking If el.getElementsByClassName("popRanking").length > 0 Then vtmp = Split(el.getElementsByClassName("popRanking")(0).innerText, " ") For j = LBound(vtmp) To UBound(vtmp) sRet = sRet & vbTab & ConvTSV(CStr(vtmp(j))) Next j End If sRet = sRet & vbCrLf Next el '次のページ存在チェック f = False For Each el In dom.getElementsByClassName("sprite-pageNext") If el.tagName = "A" Then f = True el.Click el.FireEvent ("onclick") Call ieCheck(ie) Call MySleep(1000) nPage = nPage + 1 Exit For End If Next el Loop ie.Quit getSpotList = sRet End Function
おまけ:タブ区切りテキストで出力するための変換関数
Public Function ConvTSV(ByVal str As String) As String Dim sRet As String sRet = Replace(Trim(str), """", """""") 'ダブルクオートのエスケープ & トリム sRet = Replace(sRet, vbCr, "\r") 'CRを\rに変換 sRet = Replace(sRet, vbLf, "\n") 'LFを\nに変換 sRet = """" & sRet & """" '前後にダブルクオートをつける ConvTSV = sRet End Function
サンプルプログラム:
Public Sub test_getSpotList(ParamArray vKeyWords()) Dim fs As FileSystemObject: Set fs = New FileSystemObject Dim vtemp As Variant: vtemp = vKeyWords() Dim sFileName As String Dim i As Long For i = LBound(vKeyWords) To UBound(vKeyWords) If sFileName <> "" Then sFileName = sFileName & "_" sFileName = sFileName & CStr(vKeyWords(i)) Next i Dim ts As TextStream: Set ts = fs.CreateTextFile("C:\" & sFileName & Format(Now(), "yymmddhhnnss") & ".txt") ts.Write getSpotList(getTAURL(vtemp)) ts.Close Set fs = Nothing End Sub