Exercise 2 completed
This commit is contained in:
12
ex2/books.csv
Normal file
12
ex2/books.csv
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
Title,Price,Available
|
||||||
|
It's Only the Himalayas,£45.17,true
|
||||||
|
Full Moon over Noah’s Ark: An Odyssey to Mount Ararat and Beyond,£49.43,true
|
||||||
|
See America: A Celebration of Our National Parks & Treasured Sites,£48.87,true
|
||||||
|
Vagabonding: An Uncommon Guide to the Art of Long-Term World Travel,£36.94,true
|
||||||
|
Under the Tuscan Sun,£37.33,true
|
||||||
|
A Summer In Europe,£44.34,true
|
||||||
|
The Great Railway Bazaar,£30.54,true
|
||||||
|
A Year in Provence (Provence #1),£56.88,true
|
||||||
|
The Road to Little Dribbling: Adventures of an American in Britain (Notes From a Small Island #2),£23.21,true
|
||||||
|
Neither Here nor There: Travels in Europe,£38.95,true
|
||||||
|
"1,000 Places to See Before You Die",£26.08,true
|
||||||
|
5
ex2/go.mod
Normal file
5
ex2/go.mod
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
module git.cems.dev/cdricms/bdooc/ex2
|
||||||
|
|
||||||
|
go 1.22.0
|
||||||
|
|
||||||
|
require golang.org/x/net v0.24.0 // indirect
|
||||||
2
ex2/go.sum
Normal file
2
ex2/go.sum
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
|
||||||
|
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
|
||||||
21
ex2/logger.go
Normal file
21
ex2/logger.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
type Log struct {
|
||||||
|
Msg string
|
||||||
|
}
|
||||||
|
|
||||||
|
type Logs []Log
|
||||||
|
var LogList Logs
|
||||||
|
|
||||||
|
func (ll *Logs) Log(msg string) {
|
||||||
|
*ll = append(*ll, Log{msg})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ll Logs) ToString() string {
|
||||||
|
res := ""
|
||||||
|
for _, log := range ll {
|
||||||
|
res += log.Msg + "\n"
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
2
ex2/logs.txt
Normal file
2
ex2/logs.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
2024-04-30 02:34:53.450332367 +0200 CEST m=+0.919809694:https://books.toscrape.com/catalogue/category/books/travel_2/index.html
|
||||||
|
Done!
|
||||||
113
ex2/main.go
Normal file
113
ex2/main.go
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"encoding/csv"
|
||||||
|
|
||||||
|
"time"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Book struct {
|
||||||
|
Title string
|
||||||
|
Price string
|
||||||
|
Available bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func FindFirstElement(root *html.Node, name string) *html.Node {
|
||||||
|
if root.Type == html.ElementNode && root.Data == name {
|
||||||
|
return root
|
||||||
|
}
|
||||||
|
for c := root.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
el := FindFirstElement(c, name)
|
||||||
|
if el != nil {
|
||||||
|
return el
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Scrape(url string) {
|
||||||
|
res, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
LogList.Log(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer res.Body.Close()
|
||||||
|
|
||||||
|
root, err := html.Parse(res.Body)
|
||||||
|
if err != nil {
|
||||||
|
LogList.Log(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ol := FindFirstElement(root, "ol")
|
||||||
|
|
||||||
|
books := []Book{}
|
||||||
|
|
||||||
|
for c := ol.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
if c.Type == html.ElementNode && c.Data == "li" {
|
||||||
|
article := FindFirstElement(c, "article")
|
||||||
|
article.RemoveChild(FindFirstElement(article, "div"))
|
||||||
|
h3 := FindFirstElement(article, "h3")
|
||||||
|
title := h3.FirstChild.Attr[1].Val
|
||||||
|
productPrice := FindFirstElement(article, "div")
|
||||||
|
price := productPrice.FirstChild.NextSibling.FirstChild.Data
|
||||||
|
isAvailable := productPrice.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.NextSibling.NextSibling.Data
|
||||||
|
isAvailable = strings.ReplaceAll(isAvailable, "\n", "")
|
||||||
|
isAvailable = strings.ReplaceAll(isAvailable, " ", "")
|
||||||
|
|
||||||
|
if len(title) > 0 && len(price) > 0 && len(isAvailable) > 0 {
|
||||||
|
books = append(books, Book{Title: title, Price: price, Available: isAvailable == "Instock"})
|
||||||
|
} else {
|
||||||
|
LogList.Log(fmt.Sprintf("Error: %s %s %s", title, price, isAvailable))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
file, err := os.Create("books.csv")
|
||||||
|
if err != nil {
|
||||||
|
LogList.Log(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
writer := csv.NewWriter(file)
|
||||||
|
defer writer.Flush()
|
||||||
|
|
||||||
|
err = writer.Write([]string{"Title", "Price", "Available"})
|
||||||
|
if err != nil {
|
||||||
|
LogList.Log(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, book := range books {
|
||||||
|
err = writer.Write([]string{book.Title, book.Price, fmt.Sprintf("%t", book.Available)})
|
||||||
|
if err != nil {
|
||||||
|
LogList.Log(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
url := "https://books.toscrape.com/catalogue/category/books/travel_2/index.html"
|
||||||
|
Scrape(url)
|
||||||
|
LogList.Log("Done!")
|
||||||
|
f, err := os.Create("logs.txt")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
_, err = f.Write([]byte(time.Now().String() + ":" + url + "\n" + LogList.ToString()))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user