从具有特定 id 的 div 中提取文本内容
Taggy-lens 允许我们使用镜头来解析和检查 HTML 文档。
#!/usr/bin/env stack
-- stack --resolver lts-7.0 --install-ghc runghc --package text --package lens --package taggy-lens
{-# LANGUAGE OverloadedStrings #-}
import qualified Data.Text.Lazy as TL
import qualified Data.Text.IO as T
import Text.Taggy.Lens
import Control.Lens
someHtml::TL.Text
someHtml =
"\
\<!doctype html><html><body>\
\<div>first div</div>\
\<div id=\"thediv\">second div</div>\
\<div id=\"not-thediv\">third div</div>"
main::IO ()
main = do
T.putStrLn
(someHtml ^. html . allAttributed (ix "id" . only "thediv") . contents)