package main import ( "fmt" "net/http" "os" "strings" "encoding/csv" "time" "golang.org/x/net/html" ) type Book struct { Title string Price string Available bool } func FindFirstElement(root *html.Node, name string) *html.Node { if root.Type == html.ElementNode && root.Data == name { return root } for c := root.FirstChild; c != nil; c = c.NextSibling { el := FindFirstElement(c, name) if el != nil { return el } } return nil } func Scrape(url string) { res, err := http.Get(url) if err != nil { LogList.Log(err.Error()) return } defer res.Body.Close() root, err := html.Parse(res.Body) if err != nil { LogList.Log(err.Error()) return } ol := FindFirstElement(root, "ol") books := []Book{} for c := ol.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.ElementNode && c.Data == "li" { article := FindFirstElement(c, "article") article.RemoveChild(FindFirstElement(article, "div")) h3 := FindFirstElement(article, "h3") title := h3.FirstChild.Attr[1].Val productPrice := FindFirstElement(article, "div") price := productPrice.FirstChild.NextSibling.FirstChild.Data isAvailable := productPrice.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.NextSibling.NextSibling.Data isAvailable = strings.ReplaceAll(isAvailable, "\n", "") isAvailable = strings.ReplaceAll(isAvailable, " ", "") if len(title) > 0 && len(price) > 0 && len(isAvailable) > 0 { books = append(books, Book{Title: title, Price: price, Available: isAvailable == "Instock"}) } else { LogList.Log(fmt.Sprintf("Error: %s %s %s", title, price, isAvailable)) return } } } file, err := os.Create("books.csv") if err != nil { LogList.Log(err.Error()) return } defer file.Close() writer := csv.NewWriter(file) defer writer.Flush() err = writer.Write([]string{"Title", "Price", "Available"}) if err != nil { LogList.Log(err.Error()) return } for _, book := range books { err = writer.Write([]string{book.Title, book.Price, fmt.Sprintf("%t", book.Available)}) if err != nil { LogList.Log(err.Error()) return } } } func main() { url := "https://books.toscrape.com/catalogue/category/books/travel_2/index.html" Scrape(url) LogList.Log("Done!") f, err := os.Create("logs.txt") if err != nil { fmt.Println(err) } defer f.Close() _, err = f.Write([]byte(time.Now().String() + ":" + url + "\n" + LogList.ToString())) if err != nil { fmt.Println(err) } }