diff --git a/ex2/books.csv b/ex2/books.csv new file mode 100644 index 0000000..856c759 --- /dev/null +++ b/ex2/books.csv @@ -0,0 +1,12 @@ +Title,Price,Available +It's Only the Himalayas,£45.17,true +Full Moon over Noah’s Ark: An Odyssey to Mount Ararat and Beyond,£49.43,true +See America: A Celebration of Our National Parks & Treasured Sites,£48.87,true +Vagabonding: An Uncommon Guide to the Art of Long-Term World Travel,£36.94,true +Under the Tuscan Sun,£37.33,true +A Summer In Europe,£44.34,true +The Great Railway Bazaar,£30.54,true +A Year in Provence (Provence #1),£56.88,true +The Road to Little Dribbling: Adventures of an American in Britain (Notes From a Small Island #2),£23.21,true +Neither Here nor There: Travels in Europe,£38.95,true +"1,000 Places to See Before You Die",£26.08,true diff --git a/ex2/go.mod b/ex2/go.mod new file mode 100644 index 0000000..99fe262 --- /dev/null +++ b/ex2/go.mod @@ -0,0 +1,5 @@ +module git.cems.dev/cdricms/bdooc/ex2 + +go 1.22.0 + +require golang.org/x/net v0.24.0 // indirect diff --git a/ex2/go.sum b/ex2/go.sum new file mode 100644 index 0000000..4a8ba20 --- /dev/null +++ b/ex2/go.sum @@ -0,0 +1,2 @@ +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= diff --git a/ex2/logger.go b/ex2/logger.go new file mode 100644 index 0000000..64dc874 --- /dev/null +++ b/ex2/logger.go @@ -0,0 +1,21 @@ +package main + +type Log struct { + Msg string +} + +type Logs []Log +var LogList Logs + +func (ll *Logs) Log(msg string) { + *ll = append(*ll, Log{msg}) +} + +func (ll Logs) ToString() string { + res := "" + for _, log := range ll { + res += log.Msg + "\n" + } + return res +} + diff --git a/ex2/logs.txt b/ex2/logs.txt new file mode 100644 index 0000000..f775051 --- /dev/null +++ b/ex2/logs.txt @@ -0,0 +1,2 @@ +2024-04-30 02:34:53.450332367 +0200 CEST m=+0.919809694:https://books.toscrape.com/catalogue/category/books/travel_2/index.html +Done! diff --git a/ex2/main.go b/ex2/main.go new file mode 100644 index 0000000..b9a2c9d --- /dev/null +++ b/ex2/main.go @@ -0,0 +1,113 @@ +package main + +import ( + "fmt" + "net/http" + "os" + "strings" + + "encoding/csv" + + "time" + "golang.org/x/net/html" +) + +type Book struct { + Title string + Price string + Available bool +} + +func FindFirstElement(root *html.Node, name string) *html.Node { + if root.Type == html.ElementNode && root.Data == name { + return root + } + for c := root.FirstChild; c != nil; c = c.NextSibling { + el := FindFirstElement(c, name) + if el != nil { + return el + } + } + return nil +} + +func Scrape(url string) { + res, err := http.Get(url) + if err != nil { + LogList.Log(err.Error()) + return + } + defer res.Body.Close() + + root, err := html.Parse(res.Body) + if err != nil { + LogList.Log(err.Error()) + return + } + + ol := FindFirstElement(root, "ol") + + books := []Book{} + + for c := ol.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode && c.Data == "li" { + article := FindFirstElement(c, "article") + article.RemoveChild(FindFirstElement(article, "div")) + h3 := FindFirstElement(article, "h3") + title := h3.FirstChild.Attr[1].Val + productPrice := FindFirstElement(article, "div") + price := productPrice.FirstChild.NextSibling.FirstChild.Data + isAvailable := productPrice.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.NextSibling.NextSibling.Data + isAvailable = strings.ReplaceAll(isAvailable, "\n", "") + isAvailable = strings.ReplaceAll(isAvailable, " ", "") + + if len(title) > 0 && len(price) > 0 && len(isAvailable) > 0 { + books = append(books, Book{Title: title, Price: price, Available: isAvailable == "Instock"}) + } else { + LogList.Log(fmt.Sprintf("Error: %s %s %s", title, price, isAvailable)) + return + } + } + } + + file, err := os.Create("books.csv") + if err != nil { + LogList.Log(err.Error()) + return + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + err = writer.Write([]string{"Title", "Price", "Available"}) + if err != nil { + LogList.Log(err.Error()) + return + } + + for _, book := range books { + err = writer.Write([]string{book.Title, book.Price, fmt.Sprintf("%t", book.Available)}) + if err != nil { + LogList.Log(err.Error()) + return + } + } + +} + +func main() { + url := "https://books.toscrape.com/catalogue/category/books/travel_2/index.html" + Scrape(url) + LogList.Log("Done!") + f, err := os.Create("logs.txt") + if err != nil { + fmt.Println(err) + } + defer f.Close() + _, err = f.Write([]byte(time.Now().String() + ":" + url + "\n" + LogList.ToString())) + if err != nil { + fmt.Println(err) + } +} +