diff --git a/config.go b/config.go index 14c67dc..ffe2d2e 100644 --- a/config.go +++ b/config.go @@ -34,7 +34,7 @@ import ( ) const ( - VERSION string = "0.3.3" + VERSION string = "0.3.4" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." @@ -65,6 +65,8 @@ const ( WIN string = "windows" ) +var DirsVisited map[string]int + const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. Usage: kleingebaeck [-dvVhmoclu] [,...] @@ -77,7 +79,7 @@ Options: -l --limit Limit the ads to download to , default: load all. -c --config Use config file (default: ~/.kleingebaeck). --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. --f --force Download images even if they already exist. +-f --force Overwrite images and ads even if the already exist. -m --manual Show manual. -h --help Show usage. -V --version Show program version. diff --git a/go.mod b/go.mod index 8170540..d52b032 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/lmittmann/tint v1.0.4 github.com/mattn/go-isatty v0.0.20 github.com/spf13/pflag v1.0.5 - github.com/tlinden/yadu v0.1.1 + github.com/tlinden/yadu v0.1.2 golang.org/x/sync v0.5.0 ) @@ -33,7 +33,7 @@ require ( github.com/pelletier/go-toml v1.9.5 // indirect github.com/pkg/errors v0.9.1 // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect - golang.org/x/sys v0.14.0 // indirect + golang.org/x/sys v0.17.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 3cadaea..5e88119 100644 --- a/go.sum +++ b/go.sum @@ -66,6 +66,8 @@ github.com/tlinden/yadu v0.1.0 h1:qtCi1jxg392qVRLFyrJ2LYu6/PiKSp1LT02EX+mNLME= github.com/tlinden/yadu v0.1.0/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA= github.com/tlinden/yadu v0.1.1 h1:116oEUy9b4PcMF5wLL2dCFA/sn/praYutOnao07MROw= github.com/tlinden/yadu v0.1.1/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA= +github.com/tlinden/yadu v0.1.2 h1:TYYVnUJwziRJ9YPbIbRf9ikmDw0Q8Ifixm+J/kBQFh8= +github.com/tlinden/yadu v0.1.2/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -81,6 +83,8 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 350063b..e920378 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2024-01-25" "1" "User Commands" +.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -152,7 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool \& \-l \-\-limit Limit the ads to download to , default: load all. \& \-c \-\-config Use config file (default: ~/.kleingebaeck). \& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. -\& \-f \-\-force Download images even if they already exist. +\& \-f \-\-force Overwrite images and ads even if the already exist. \& \-m \-\-manual Show manual. \& \-h \-\-help Show usage. \& \-V \-\-version Show program version. diff --git a/kleingebaeck.go b/kleingebaeck.go index 992232d..247d4c8 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -14,7 +14,7 @@ SYNOPSYS -l --limit Limit the ads to download to , default: load all. -c --config Use config file (default: ~/.kleingebaeck). --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. - -f --force Download images even if they already exist. + -f --force Overwrite images and ads even if the already exist. -m --manual Show manual. -h --help Show usage. -V --version Show program version. diff --git a/kleingebaeck.pod b/kleingebaeck.pod index b8fdd99..50dfd7e 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -13,7 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool -l --limit Limit the ads to download to , default: load all. -c --config Use config file (default: ~/.kleingebaeck). --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. - -f --force Download images even if they already exist. + -f --force Overwrite images and ads even if the already exist. -m --manual Show manual. -h --help Show usage. -V --version Show program version. diff --git a/main.go b/main.go index c489634..6c62f71 100644 --- a/main.go +++ b/main.go @@ -123,6 +123,9 @@ func Main(output io.Writer) int { return Die(err) } + // setup ad dir registry, needed to check for duplicates + DirsVisited = make(map[string]int) + switch { case len(conf.Adlinks) >= 1: // directly backup ad listing[s] diff --git a/scrape.go b/scrape.go index 755ed08..ed5cadf 100644 --- a/scrape.go +++ b/scrape.go @@ -126,6 +126,11 @@ func ScrapeAd(fetch *Fetcher, uri string) error { advertisement.CalculateExpire() + proceed := CheckAdVisited(fetch.Config, advertisement.Slug) + if !proceed { + return nil + } + // write listing addir, err := WriteAd(fetch.Config, advertisement) if err != nil { diff --git a/store.go b/store.go index e3d4e76..0b0f2e2 100644 --- a/store.go +++ b/store.go @@ -133,3 +133,24 @@ func fileExists(filename string) bool { return !info.IsDir() } + +// check if an addir has already been processed by current run and +// decide what to do +func CheckAdVisited(conf *Config, adname string) bool { + if Exists(DirsVisited, adname) { + if conf.ForceDownload { + slog.Warn("an ad with the same name has already been downloaded, overwriting", "addir", adname) + return true + } + + // don't overwrite + slog.Warn("an ad with the same name has already been downloaded, skipping (use -f to overwrite)", "addir", adname) + return false + } + + // register + DirsVisited[adname] = 1 + + // overwrite + return true +} diff --git a/t/httproot/templates/render.sh b/t/httproot/templates/render.sh index f1d4cb5..48aadc4 100755 --- a/t/httproot/templates/render.sh +++ b/t/httproot/templates/render.sh @@ -1,5 +1,7 @@ #!/bin/sh -x base="../kleinanzeigen" + +rm -rf $base mkdir -p $base echo "Generating /s-bestandsliste.html" diff --git a/util.go b/util.go index a416be2..e96af63 100644 --- a/util.go +++ b/util.go @@ -74,3 +74,12 @@ func IsNoTty() bool { func GetThrottleTime() time.Duration { return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond } + +// look if a key in a map exists, generic variant +func Exists[K comparable, V any](m map[K]V, v K) bool { + if _, ok := m[v]; ok { + return true + } + + return false +}