Skip to content

Commit

Permalink
fix #74: warn if about to write to already visited ad, overwrite if -f
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas von Dein authored and TLINDEN committed Feb 10, 2024
1 parent ed78731 commit 612ed2a
Show file tree
Hide file tree
Showing 11 changed files with 54 additions and 8 deletions.
6 changes: 4 additions & 2 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
)

const (
VERSION string = "0.3.3"
VERSION string = "0.3.4"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
Expand Down Expand Up @@ -65,6 +65,8 @@ const (
WIN string = "windows"
)

var DirsVisited map[string]int

const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
Expand All @@ -77,7 +79,7 @@ Options:
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/lmittmann/tint v1.0.4
github.com/mattn/go-isatty v0.0.20
github.com/spf13/pflag v1.0.5
github.com/tlinden/yadu v0.1.1
github.com/tlinden/yadu v0.1.2
golang.org/x/sync v0.5.0
)

Expand All @@ -33,7 +33,7 @@ require (
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/sys v0.14.0 // indirect
golang.org/x/sys v0.17.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ github.com/tlinden/yadu v0.1.0 h1:qtCi1jxg392qVRLFyrJ2LYu6/PiKSp1LT02EX+mNLME=
github.com/tlinden/yadu v0.1.0/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/tlinden/yadu v0.1.1 h1:116oEUy9b4PcMF5wLL2dCFA/sn/praYutOnao07MROw=
github.com/tlinden/yadu v0.1.1/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/tlinden/yadu v0.1.2 h1:TYYVnUJwziRJ9YPbIbRf9ikmDw0Q8Ifixm+J/kBQFh8=
github.com/tlinden/yadu v0.1.2/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
Expand All @@ -81,6 +83,8 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
4 changes: 2 additions & 2 deletions kleingebaeck.1
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2024-01-25" "1" "User Commands"
.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
Expand All @@ -152,7 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
\& \-f \-\-force Download images even if they already exist.
\& \-f \-\-force Overwrite images and ads even if the already exist.
\& \-m \-\-manual Show manual.
\& \-h \-\-help Show usage.
\& \-V \-\-version Show program version.
Expand Down
2 changes: 1 addition & 1 deletion kleingebaeck.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SYNOPSYS
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
Expand Down
2 changes: 1 addition & 1 deletion kleingebaeck.pod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
Expand Down
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ func Main(output io.Writer) int {
return Die(err)
}

// setup ad dir registry, needed to check for duplicates
DirsVisited = make(map[string]int)

switch {
case len(conf.Adlinks) >= 1:
// directly backup ad listing[s]
Expand Down
5 changes: 5 additions & 0 deletions scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ func ScrapeAd(fetch *Fetcher, uri string) error {

advertisement.CalculateExpire()

proceed := CheckAdVisited(fetch.Config, advertisement.Slug)
if !proceed {
return nil
}

// write listing
addir, err := WriteAd(fetch.Config, advertisement)
if err != nil {
Expand Down
21 changes: 21 additions & 0 deletions store.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,24 @@ func fileExists(filename string) bool {

return !info.IsDir()
}

// check if an addir has already been processed by current run and
// decide what to do
func CheckAdVisited(conf *Config, adname string) bool {
if Exists(DirsVisited, adname) {
if conf.ForceDownload {
slog.Warn("an ad with the same name has already been downloaded, overwriting", "addir", adname)
return true
}

// don't overwrite
slog.Warn("an ad with the same name has already been downloaded, skipping (use -f to overwrite)", "addir", adname)
return false
}

// register
DirsVisited[adname] = 1

// overwrite
return true
}
2 changes: 2 additions & 0 deletions t/httproot/templates/render.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/sh -x
base="../kleinanzeigen"

rm -rf $base
mkdir -p $base

echo "Generating /s-bestandsliste.html"
Expand Down
9 changes: 9 additions & 0 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,12 @@ func IsNoTty() bool {
func GetThrottleTime() time.Duration {
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
}

// look if a key in a map exists, generic variant
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}

return false
}

0 comments on commit 612ed2a

Please sign in to comment.