Grouping + Aggregation

This commit is contained in:
cdricms
2024-04-28 21:05:13 +02:00
parent e43c4b129c
commit 081a914560
2 changed files with 168 additions and 15 deletions

26
main.go
View File

@@ -1,12 +1,9 @@
package main package main
import ( import (
// "encoding/csv"
"fmt" "fmt"
// "os"
"git.cems.dev/cdricms/bdooc/proto" "git.cems.dev/cdricms/bdooc/proto"
// "git.cems.dev/cdricms/bdooc/parsing"
) )
func main() { func main() {
@@ -27,7 +24,6 @@ func main() {
// data := parsing.MapToProto(employees) // data := parsing.MapToProto(employees)
// data.SaveToFile("employee_data.bin") // data.SaveToFile("employee_data.bin")
// fmt.Println(employees[:10]) // fmt.Println(employees[:10])
data, err := proto.LoadFromFile("employee_data.bin") data, err := proto.LoadFromFile("employee_data.bin")
@@ -35,10 +31,22 @@ func main() {
fmt.Println(err) fmt.Println(err)
} }
result, err := data.QueryEmployeesByColumn(proto.FieldYearsOfService, "2.48") // fmt.Println('F')
if err != nil {
fmt.Println(err)
}
fmt.Println(result, len(result)) // _, err = data.QueryEmployeesByColumn(proto.FieldGender, "70")
// if err != nil {
// fmt.Println(err)
// }
r := proto.AggregateByColumn(data, proto.FieldRegion, func(ep proto.EmployeePList) float64 {
return proto.Max(ep, proto.FieldSalary)
})
fmt.Println(r)
r = proto.AggregateByColumn(data, proto.FieldRegion, func(ep proto.EmployeePList) float64 {
return proto.Min(ep, proto.FieldSalary)
})
fmt.Println(r)
// for key := range result {
// fmt.Println(key)
// }
} }

View File

@@ -1,9 +1,26 @@
package proto package proto
import ( import (
"fmt"
"math"
"reflect" "reflect"
) )
func GetMax[T float64 | float32 | int | uint | int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64](a, b T) T {
if a > b {
return a
}
return b
}
func GetMin[T float64 | float32 | int | uint | int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64](a, b T) T {
if a < b {
return a
}
return b
}
type EmployeeField string type EmployeeField string
const ( const (
@@ -91,10 +108,138 @@ func (el *EmployeeList) QueryEmployeesByColumn(column EmployeeField, query strin
for _, employee := range el.GetEmployees() { for _, employee := range el.GetEmployees() {
value := reflect.ValueOf(employee).Elem().FieldByName(string(column)) value := reflect.ValueOf(employee).Elem().FieldByName(string(column))
if value.String() == query { if fmt.Sprintf("%v", value) == query {
employees = append(employees, employee) employees = append(employees, employee)
} }
} }
return employees, nil return employees, nil
} }
type Groups map[string][]*Employee
func (el *EmployeeList) GroupByColumn(column EmployeeField) Groups {
groups := make(map[string][]*Employee)
for _, employee := range el.GetEmployees() {
value := reflect.ValueOf(employee).Elem().FieldByName(string(column))
formattedValue := fmt.Sprintf("%v", value)
groups[formattedValue] = append(groups[formattedValue], employee)
}
return groups
}
type FilterFunction func(*Employee) bool
func (grps Groups) HavingByColumn(cb FilterFunction) Groups {
newGrps := make(map[string][]*Employee)
for groupName, group := range grps {
for _, employee := range group {
if cb(employee) {
newGrps[groupName] = append(newGrps[groupName], employee)
}
}
}
return newGrps
}
func ForEachGroup[T any](grps Groups, cb func(ep EmployeePList) T) map[string]T {
g := make(map[string]T)
for groupName, group := range grps {
g[groupName] = cb(group)
}
return g
}
func AggregateByColumn[T any](el *EmployeeList, column EmployeeField, cb func(ep EmployeePList) T) map[string]T {
grps := el.GroupByColumn(column)
res := ForEachGroup(grps, cb)
return res
}
type EmployeePList []*Employee
func Sum(ep EmployeePList, column EmployeeField) float64 {
sum := 0.0
_, fieldFound := reflect.TypeOf(Employee{}).FieldByName(string(column))
if !fieldFound {
return sum
}
for _, employee := range ep {
value := reflect.ValueOf(employee).Elem().FieldByName(string(column))
switch value.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
sum += float64(value.Int())
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
sum += float64(value.Uint())
case reflect.Float64, reflect.Float32:
sum += value.Float()
default:
return sum
}
}
return sum
}
func Average(ep EmployeePList, column EmployeeField) float64 {
return Sum(ep, column) / float64(len(ep))
}
func Max(ep EmployeePList, column EmployeeField) float64 {
max := 0.0
_, fieldFound := reflect.TypeOf(Employee{}).FieldByName(string(column))
if !fieldFound {
return max
}
for _, employee := range ep {
value := reflect.ValueOf(employee).Elem().FieldByName(string(column))
switch value.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
max = GetMax(max, float64(value.Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
max = GetMax(max, float64(value.Uint()))
case reflect.Float64, reflect.Float32:
max = GetMax(max, value.Float())
default:
return max
}
}
return max
}
func Min(ep EmployeePList, column EmployeeField) float64 {
min := math.MaxFloat64
_, fieldFound := reflect.TypeOf(Employee{}).FieldByName(string(column))
if !fieldFound {
return min
}
for _, employee := range ep {
value := reflect.ValueOf(employee).Elem().FieldByName(string(column))
switch value.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
min = GetMin(min, float64(value.Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
min = GetMin(min, float64(value.Uint()))
case reflect.Float64, reflect.Float32:
min = GetMin(min, value.Float())
default:
return min
}
}
return min
}