Exercise 2 completed
This commit is contained in:
12
ex2/books.csv
Normal file
12
ex2/books.csv
Normal file
@@ -0,0 +1,12 @@
|
||||
Title,Price,Available
|
||||
It's Only the Himalayas,£45.17,true
|
||||
Full Moon over Noah’s Ark: An Odyssey to Mount Ararat and Beyond,£49.43,true
|
||||
See America: A Celebration of Our National Parks & Treasured Sites,£48.87,true
|
||||
Vagabonding: An Uncommon Guide to the Art of Long-Term World Travel,£36.94,true
|
||||
Under the Tuscan Sun,£37.33,true
|
||||
A Summer In Europe,£44.34,true
|
||||
The Great Railway Bazaar,£30.54,true
|
||||
A Year in Provence (Provence #1),£56.88,true
|
||||
The Road to Little Dribbling: Adventures of an American in Britain (Notes From a Small Island #2),£23.21,true
|
||||
Neither Here nor There: Travels in Europe,£38.95,true
|
||||
"1,000 Places to See Before You Die",£26.08,true
|
||||
|
5
ex2/go.mod
Normal file
5
ex2/go.mod
Normal file
@@ -0,0 +1,5 @@
|
||||
module git.cems.dev/cdricms/bdooc/ex2
|
||||
|
||||
go 1.22.0
|
||||
|
||||
require golang.org/x/net v0.24.0 // indirect
|
||||
2
ex2/go.sum
Normal file
2
ex2/go.sum
Normal file
@@ -0,0 +1,2 @@
|
||||
golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
|
||||
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
|
||||
21
ex2/logger.go
Normal file
21
ex2/logger.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
type Log struct {
|
||||
Msg string
|
||||
}
|
||||
|
||||
type Logs []Log
|
||||
var LogList Logs
|
||||
|
||||
func (ll *Logs) Log(msg string) {
|
||||
*ll = append(*ll, Log{msg})
|
||||
}
|
||||
|
||||
func (ll Logs) ToString() string {
|
||||
res := ""
|
||||
for _, log := range ll {
|
||||
res += log.Msg + "\n"
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
2
ex2/logs.txt
Normal file
2
ex2/logs.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
2024-04-30 02:34:53.450332367 +0200 CEST m=+0.919809694:https://books.toscrape.com/catalogue/category/books/travel_2/index.html
|
||||
Done!
|
||||
113
ex2/main.go
Normal file
113
ex2/main.go
Normal file
@@ -0,0 +1,113 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"encoding/csv"
|
||||
|
||||
"time"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
type Book struct {
|
||||
Title string
|
||||
Price string
|
||||
Available bool
|
||||
}
|
||||
|
||||
func FindFirstElement(root *html.Node, name string) *html.Node {
|
||||
if root.Type == html.ElementNode && root.Data == name {
|
||||
return root
|
||||
}
|
||||
for c := root.FirstChild; c != nil; c = c.NextSibling {
|
||||
el := FindFirstElement(c, name)
|
||||
if el != nil {
|
||||
return el
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Scrape(url string) {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
LogList.Log(err.Error())
|
||||
return
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
root, err := html.Parse(res.Body)
|
||||
if err != nil {
|
||||
LogList.Log(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
ol := FindFirstElement(root, "ol")
|
||||
|
||||
books := []Book{}
|
||||
|
||||
for c := ol.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type == html.ElementNode && c.Data == "li" {
|
||||
article := FindFirstElement(c, "article")
|
||||
article.RemoveChild(FindFirstElement(article, "div"))
|
||||
h3 := FindFirstElement(article, "h3")
|
||||
title := h3.FirstChild.Attr[1].Val
|
||||
productPrice := FindFirstElement(article, "div")
|
||||
price := productPrice.FirstChild.NextSibling.FirstChild.Data
|
||||
isAvailable := productPrice.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.NextSibling.NextSibling.Data
|
||||
isAvailable = strings.ReplaceAll(isAvailable, "\n", "")
|
||||
isAvailable = strings.ReplaceAll(isAvailable, " ", "")
|
||||
|
||||
if len(title) > 0 && len(price) > 0 && len(isAvailable) > 0 {
|
||||
books = append(books, Book{Title: title, Price: price, Available: isAvailable == "Instock"})
|
||||
} else {
|
||||
LogList.Log(fmt.Sprintf("Error: %s %s %s", title, price, isAvailable))
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file, err := os.Create("books.csv")
|
||||
if err != nil {
|
||||
LogList.Log(err.Error())
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
writer := csv.NewWriter(file)
|
||||
defer writer.Flush()
|
||||
|
||||
err = writer.Write([]string{"Title", "Price", "Available"})
|
||||
if err != nil {
|
||||
LogList.Log(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
for _, book := range books {
|
||||
err = writer.Write([]string{book.Title, book.Price, fmt.Sprintf("%t", book.Available)})
|
||||
if err != nil {
|
||||
LogList.Log(err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func main() {
|
||||
url := "https://books.toscrape.com/catalogue/category/books/travel_2/index.html"
|
||||
Scrape(url)
|
||||
LogList.Log("Done!")
|
||||
f, err := os.Create("logs.txt")
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
defer f.Close()
|
||||
_, err = f.Write([]byte(time.Now().String() + ":" + url + "\n" + LogList.ToString()))
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user