用 Go 和 Metadata 编写的 StackExchange.Exceptional 收集器

可以将以下 Go 文件编译为连续的外部收集器,该收集器将查询使用 StackExchange.Exceptional 模式的 MSSQL 服务器数据库。它将查询多个服务器/数据库以查找自 UTC 00:00 以来的所有异常,以将原始条目转换为计数器。它还使用 bosun.org/metadata 包来包含 exceptions.exceptions.count 指标的元数据。

/*
Exceptional is an scollector external collector for StackExchange.Exceptional.
*/
package main

import (
    "database/sql"
    "encoding/json"
    "fmt"
    "log"
    "strings"
    "time"

    "bosun.org/metadata"
    "bosun.org/opentsdb"

    _ "github.com/denisenkom/go-mssqldb"
)

func mssqlConnect(server, database, user, pass, port string) (*sql.DB, error) {
    dsn := fmt.Sprintf("server=%s;port=%s;database=%s;user id=%s;password=%s", server, port, database, user, pass)
    return sql.Open("mssql", dsn)
}

type Exceptions struct {
    GUID            string
    ApplicationName string
    MachineName     string
    CreationDate    time.Time
    Type            string
    IsProtected     int
    Host            string
    Url             string
    HTTPMethod      string
    IPAddress       string
    Source          string
    Message         string
    Detail          string
    StatusCode      int
    SQL             string
    DeletionDate    time.Time
    FullJson        string
    ErrorHash       int
    DuplicateCount  int
}

type ExceptionsCount struct {
    ApplicationName string
    MachineName     string
    Count           int64
    Source          string
}

type ExceptionsDB struct {
    Server     string
    DBName     string
    DBPassword string
    DBPort     string
    Source     string
}

const (
    defaultPassword = "EnterPasswordHere"
    defaultPort     = "1433"

    metric     = "exceptional.exceptions.count"
    descMetric = "The number of exceptions thrown per second by applications and machines. Data is queried from multiple sources. See status instances for details on exceptions."
)

func main() {
    mds := []metadata.Metasend{
        {
            Metric: metric,
            Name:   "rate",
            Value:  "counter",
        },
        {
            Metric: metric,
            Name:   "unit",
            Value:  metadata.Error,
        },
        {
            Metric: metric,
            Name:   "desc",
            Value:  descMetric,
        },
    }
    for _, m := range mds {
        b, err := json.Marshal(m)
        if err != nil {
            log.Fatal(err)
        }
        fmt.Println(string(b))
    }
    instances := [...]ExceptionsDB{
        {"NY_AG", "NY.Exceptions", defaultPassword, defaultPort, "NY_Status"},
        {"CO-SQL", "CO.Exceptions", defaultPassword, defaultPort, "CO_Status"},
        {"NY-INTSQL", "Int.Exceptions", defaultPassword, defaultPort, "INT_Status"},
    }
    for _, exdb := range instances {
        go run(exdb)
    }
    select {}
}

func run(exdb ExceptionsDB) {
    const interval = time.Second * 30

    query := func() {
        // Database name is the same as the username
        db, err := mssqlConnect(exdb.Server, exdb.DBName, exdb.DBName, exdb.DBPassword, exdb.DBPort)
        if err != nil {
            log.Println(err)
        }
        defer db.Close()
        var results []ExceptionsCount
        sqlQuery := `
        SELECT ApplicationName, MachineName, MAX(Count) as Count FROM
        (
            --New since UTC rollover
            SELECT ApplicationName, MachineName, Sum(DuplicateCount) as Count from Exceptions
            WHERE CreationDate > CONVERT (date, GETUTCDATE())
            GROUP BY MachineName, ApplicationName
            UNION --Zero out any app/machine combos that had exceptions in last 24 hours
            SELECT DISTINCT ex.ApplicationName, ex.MachineName, 0 as Count from Exceptions ex WHERE ex.CreationDate Between Convert(Date, GETUTCDATE()-1) And Convert(Date, GETUTCDATE())
        ) as T
        GROUP By T.MachineName, T.ApplicationName`
        rows, err := db.Query(sqlQuery)
        if err != nil {
            log.Println(err)
            return
        }
        defer rows.Close()
        for rows.Next() {
            var r ExceptionsCount
            if err := rows.Scan(&r.ApplicationName, &r.MachineName, &r.Count); err != nil {
                log.Println(err)
                continue
            }
            r.Source = exdb.Source
            results = append(results, r)
        }
        if err := rows.Err(); err != nil {
            log.Println(err)
        }
        if len(results) > 0 {
            now := time.Now().Unix()
            for _, r := range results {
                application, err := opentsdb.Clean(r.ApplicationName)
                if err != nil {
                    log.Println(err)
                    continue
                }
                db := opentsdb.DataPoint{
                    Metric:    metric,
                    Timestamp: now,
                    Value:     r.Count,
                    Tags: opentsdb.TagSet{
                        "application": application,
                        "machine":     strings.ToLower(r.MachineName),
                        "source":      r.Source,
                    },
                }
                b, err := db.MarshalJSON()
                if err != nil {
                    log.Println(err)
                    continue
                }
                fmt.Println(string(b))
            }
        }
    }

    for {
        wait := time.After(interval)
        query()
        <-wait
    }
}