diff --git a/README.md b/README.md
index bc2c826..8dc0bf4 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,10 @@
[](http://www.apache.org/licenses/LICENSE-2.0.html)
-distributed monitoring system
+ OWL 是由国内领先的第三方数据智能服务商 [TalkingData]() 开源的一款企业级分布式监控告警系统,目前由 Tech Operation Team 持续开发更新维护。
+
+ OWL 后台组件全部使用 [Go](https://golang.org/) 语言开发,Go 语言是 Google 开发的一种静态强类型、编译型、并发型,并具有垃圾回收功能的编程语言,它的并发机制可以充分利用多核,同平台一次编译可以到处运行,运维成本极低,更多的信息可以参考[官方文档](https://golang.org/doc/)。前端页面使用 [iView]() 开发,iView 同样是由 TalkingData 开源的一套基于 Vue.js 的 UI 组件库,主要服务于 PC 界面的中后台产品。
-OWL是TalkingData公司推出的一款开源分布式监控系统
## Features
@@ -13,15 +14,38 @@ OWL是TalkingData公司推出的一款开源分布式监控系统
- 分布式,支持多机房
- 多维的数据模型,类opentsdb
- 支持多种报警算法,支持多条件组合、时间范围、报警模板等
-- 灵活的插件机制,支持任意语言编写,支持传参,自动同步
-- 丰富的报警渠道,邮件、微信、短信、电话、自定义
-- 原始数据永久存储,支持发送到opentsdb、kairosdb、kafka
-- 自带web管理界面以及强大的自定义图表功能
+- 灵活的插件机制,支持任意语言编写,支持传参,自动同步到客户端
+- 丰富的报警渠道,邮件、企业微信、短信、电话以及自定义脚本
+- 原始数据永久存储,支持发送到 opentsdb、kairosdb、kafka
+- 自带 web 管理界面以及强大的自定义图表功能能
## Architecture

+## Components
+
+**agent**:安装在每台被监控机器上,用于采集监控数据
+
+**netcollect**:通过 SNMP V2 采集网络设备的接口数据
+
+**repeater**:接收 `agent` 发送过来的监控数据,并写入后端存储
+
+**cfc**:维护客户端需要执行的插件列表,主机名 、ip地址更新以及采集到的指标列表
+
+**controller**:从数据库加载告警策略,生成任务发送给 `inspector`,并且根据执行结果进行告警
+
+**inspector**:从 `controller` 获取监控任务,根据 `tsdb` 中的数据进行计算,并将结果返回 `controller`
+
+**api**:对外提供 http rest api接口,web 页面就是通过它来获取数据
+
+**MySQL**:所有配置信息的持久化存储,包含主机信息,告警策略,主机组,人员等
+
+**TSDB**:时序数据库(time seires database),用于存储采集到的监控数据
+
+**frontend**:web 管理页面,可以方便的进行系统管理维护工作
+
+
## Demo
http://54.223.127.87/
diff --git a/api/data.go b/api/data.go
index 8f77f44..af47ebf 100644
--- a/api/data.go
+++ b/api/data.go
@@ -35,7 +35,7 @@ func queryTimeSeriesData(c *gin.Context) {
metric := c.Query("metric")
tags := c.Query("tags")
tagMap := types.ParseTags(tags)
- if groupName, exist := tagMap["host_group"]; exist {
+ if groupNames, exist := tagMap["host_group"]; exist {
productIDStr, ok := c.GetQuery("product_id")
if !ok {
response["code"] = http.StatusNotFound
@@ -49,14 +49,21 @@ func queryTimeSeriesData(c *gin.Context) {
return
}
delete(tagMap, "host_group")
- hostSet := getHostnameTagsFromProductGroup(productID, groupName)
+ var hostSet []string
+ for _, groupName := range strings.Split(groupNames, "|") {
+ hostSet = append(hostSet, getHostnameTagsFromProductGroup(productID, groupName)...)
+ }
if len(hostSet) == 0 {
response["code"] = http.StatusBadRequest
- response["message"] = groupName + " has no host"
+ response["message"] = "all group has no host"
return
}
-
- tagMap["host"] = strings.Join(hostSet, "|")
+ hosts := strings.Join(hostSet, "|")
+ // 如果存在 tag host, merge
+ if host, ok := tagMap["host"]; ok {
+ hosts = hosts + "|" + host
+ }
+ tagMap["host"] = hosts
tags = Tags2String(tagMap)
}
diff --git a/api/host_groups.go b/api/host_groups.go
index d1f624c..6184ccf 100644
--- a/api/host_groups.go
+++ b/api/host_groups.go
@@ -61,10 +61,15 @@ func listNotInProductHostGroupHosts(c *gin.Context) {
func listProductHostGroups(c *gin.Context) {
response := gin.H{"code": http.StatusOK}
defer c.JSON(http.StatusOK, response)
+ var username string
+ if c.DefaultQuery("my", "false") == "true" {
+ username = c.GetString("username")
+ }
total, hostGroups := mydb.getProductHostGroups(
c.GetInt("product_id"),
c.GetBool("paging"),
c.GetString("query"),
+ username,
c.GetString("order"),
c.GetInt("offset"),
c.GetInt("limit"),
diff --git a/api/mysql.go b/api/mysql.go
index 333786e..654c68b 100644
--- a/api/mysql.go
+++ b/api/mysql.go
@@ -450,7 +450,7 @@ func (d *db) GetAlarmRecords(eventID int64, order, limit string) (records []*Ala
// GetTriggersRecords 获取报警事件下的表达式组
func (d *db) GetTriggersRecords(eventID int64, count int) []*TriggerEventRecord {
triggers := []*TriggerEventRecord{}
- rawSQL := "SELECT * FROM trigger_event_record WHERE strategy_event_id = ? AND count = ?"
+ rawSQL := "SELECT * FROM trigger_event_record WHERE strategy_event_id = ? AND count = ? AND triggered=TRUE"
if err := d.Select(&triggers, rawSQL, eventID, count); err != nil {
log.Println(err)
return nil
@@ -1551,13 +1551,14 @@ func (d *db) removeHostsFromProduct(productID int, ids []string) (err error) {
}
//获取产品线下的主机组
-func (d *db) getProductHostGroups(productID int, paging bool, query string, order string, offset, limit int) (int, []WarpHostGroup) {
+func (d *db) getProductHostGroups(productID int, paging bool, query string, user string, order string, offset, limit int) (int, []WarpHostGroup) {
var (
groups = make([]WarpHostGroup, 0)
err error
cnt int
+ rawSQL string
)
- rawSQL := fmt.Sprintf("select hg.id, hg.name, hg.description, hg.creator, DATE_FORMAT(hg.create_at,'%s') as create_at,"+
+ rawSQL = fmt.Sprintf("select hg.id, hg.name, hg.description, hg.creator, DATE_FORMAT(hg.create_at,'%s') as create_at,"+
"DATE_FORMAT(hg.update_at,'%s') as update_at, count(distinct host_group_plugin.id) as plugin_cnt, "+
"count(distinct host_group_host.id) as host_cnt, count(distinct strategy_group.id) as strategy_cnt "+
" from host_group as hg left join host_group_plugin on hg.id = host_group_plugin.group_id left join host_group_host "+
@@ -1565,6 +1566,10 @@ func (d *db) getProductHostGroups(productID int, paging bool, query string, orde
" where hg.product_id=%d",
dbDateFormat, dbDateFormat, productID)
cntSQL := fmt.Sprintf("select count(*) from host_group where product_id = %d", productID)
+ if len(user) > 0 {
+ rawSQL = fmt.Sprintf("%s and hg.creator='%s'", rawSQL, user)
+ cntSQL = fmt.Sprintf("%s and creator='%s'", cntSQL, user)
+ }
if len(query) > 0 {
rawSQL = fmt.Sprintf("%s and hg.name like '%%%s%%'", rawSQL, query)
cntSQL = fmt.Sprintf("%s and name like '%%%s%%'", cntSQL, query)
diff --git a/client/builtin/fd.go b/client/builtin/fd.go
index 6093a61..f84ecd5 100644
--- a/client/builtin/fd.go
+++ b/client/builtin/fd.go
@@ -30,6 +30,7 @@ func fdMetrics(cycle int) []*types.TimeSeriesData {
if err != nil {
return nil
}
+ defer fd.Close()
ts := time.Now().Unix()
r := bufio.NewReader(fd)
line, err := r.ReadString('\n')
diff --git a/common/chanMonitor/monitor.go b/common/chanMonitor/monitor.go
new file mode 100755
index 0000000..b57f1b5
--- /dev/null
+++ b/common/chanMonitor/monitor.go
@@ -0,0 +1,80 @@
+package chanMonitor
+
+import (
+ "fmt"
+ "reflect"
+ "sync"
+)
+
+var chans = make(map[key]interface{})
+var chmu sync.RWMutex
+
+// AddNamed adds a channel to be monitor and associates the channel
+// with this name and, optionally, the instance of this named channel (there may be many)
+func AddNamed(name, instance string, channel interface{}) error {
+
+ //reflect on the input to get the correct channel type.
+ if reflect.TypeOf(channel).Kind() != reflect.Chan {
+ return fmt.Errorf("invalid input type %v for input param channel, must be of type chan", channel)
+ }
+
+ chmu.Lock()
+ defer chmu.Unlock()
+
+ k := key{name: name, instance: instance}
+
+ if _, found := chans[k]; found {
+ return fmt.Errorf("channel with name: %s already being monitored.", name)
+ }
+ chans[k] = channel
+
+ return nil
+}
+
+// ChanState struct holding Length and Capacity.
+type ChanState struct {
+ Len int `json:"length"`
+ Cap int `json:"capacity"`
+ Instance string `json:"instance"`
+}
+
+type key struct {
+ name string
+ instance string
+}
+
+// Get returns the channel state for a give channel name.
+func Get(name, instance string) *ChanState {
+
+ chmu.RLock()
+ defer chmu.RUnlock()
+
+ k := key{name: name, instance: instance}
+
+ ch, found := chans[k]
+ if !found {
+ return nil
+ }
+
+ return &ChanState{
+ Len: reflect.ValueOf(ch).Len(),
+ Cap: reflect.ValueOf(ch).Cap(),
+ Instance: k.instance,
+ }
+
+}
+
+// Get the channel states map[string]*ChanState of all the monitored channels. Keyed by channel name.
+func GetAll() map[string]*ChanState {
+
+ results := make(map[string]*ChanState)
+
+ chmu.RLock()
+ defer chmu.RUnlock()
+ for k, _ := range chans {
+ results[k.name] = Get(k.name, k.instance)
+ }
+
+ return results
+
+}
diff --git a/common/chanMonitor/service.go b/common/chanMonitor/service.go
new file mode 100755
index 0000000..100ee66
--- /dev/null
+++ b/common/chanMonitor/service.go
@@ -0,0 +1,62 @@
+package chanMonitor
+
+import (
+ "encoding/json"
+ "log"
+ "net/http"
+)
+
+type Service struct {
+ url string
+ name string
+}
+
+func New(serviceName string, url string) *Service {
+
+ return &Service{
+ url: url,
+ name: serviceName,
+ }
+}
+
+func (this *Service) Start() {
+ http.HandleFunc("/channels", this.chanHandler)
+ go func() {
+ if err := this.start(); err != nil {
+ panic(err)
+ }
+ }()
+}
+
+func (this *Service) start() error {
+ return http.ListenAndServe(this.url, nil)
+}
+
+func (this *Service) chanHandler(w http.ResponseWriter, r *http.Request) {
+ chStats := GetAll()
+
+ resp := &ServiceChannelsStatus{
+ Service: this.name,
+ Channels: chStats,
+ }
+
+ jsonResp, err := json.Marshal(resp)
+ if err != nil {
+ w.WriteHeader(http.StatusInternalServerError)
+ w.Write(nil)
+ log.Printf("Error: %#v", err)
+ }
+
+ w.Header().Add("Content-Type", "application/json")
+ w.Write(jsonResp)
+}
+
+type ServiceChannelsStatus struct {
+ Service string `json:"service"`
+ Channels map[string]*ChanState `json:"channels"`
+}
+
+type Config struct {
+ Name string
+ Url string
+}
diff --git a/inspector/main.go b/inspector/main.go
index 35b33cc..792be23 100644
--- a/inspector/main.go
+++ b/inspector/main.go
@@ -6,6 +6,7 @@ package main
import (
"fmt"
"os"
+ chm "owl/common/chanMonitor"
"path/filepath"
"runtime"
)
@@ -33,5 +34,9 @@ func main() {
fmt.Println("failed to init inspector:", err)
return
}
+
+ chm.AddNamed("inspector.resultPool.results", "owl-inspector", inspector.resultPool.results)
+ chm.AddNamed("inspector.taskPool.tasks", "owl-inspector", inspector.taskPool.tasks)
+ chm.New("owl-inspector", ":20001").Start()
select {}
}