Skip to content

Commit

Permalink
switch bibtex parser and improve upload instructions (resolves #1343)
Browse files Browse the repository at this point in the history
  • Loading branch information
nics committed Dec 15, 2023
1 parent 8dd878b commit 78af539
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 106 deletions.
144 changes: 49 additions & 95 deletions backends/bibtex/decoder.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
package bibtex

import (
"bufio"
"io"
"regexp"
"strings"
"unicode"

"github.com/dimchansky/utfbom"
"github.com/nickng/bibtex"
"github.com/ugent-library/biblio-backoffice/backends"
"github.com/ugent-library/biblio-backoffice/models"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"github.com/ugent-library/bibtex"
)

var (
Expand All @@ -22,71 +16,33 @@ var (
)

type Decoder struct {
r io.Reader
bibtex *bibtex.BibTex
i int
parser *bibtex.Parser
}

func NewDecoder(r io.Reader) backends.PublicationDecoder {
return &Decoder{r: r}
return &Decoder{parser: bibtex.NewParser(r)}
}

func (d *Decoder) parse() error {
// cleanup
var r io.Reader
// remove utf8 bom
r = utfbom.SkipOnly(d.r)
// remove unicode non spacing marks
// note that the parser doens't actually fail on combined grave, acute, circumflex, umlaut accents in field values
r = transform.NewReader(r, transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC))
// skip file preambles, comments, etc until we encounter the first entry
b := bufio.NewReader(r)
for {
c, _, err := b.ReadRune()
if err != nil {
return err
}
if c == '@' {
b.UnreadRune()
break
}
}

bib, err := bibtex.Parse(b)
func (d *Decoder) Decode(p *models.Publication) error {
entry, err := d.parser.Next()
if err != nil {
return err
}
d.bibtex = bib

return nil
}

func (d *Decoder) Decode(p *models.Publication) error {
if d.bibtex == nil {
if err := d.parse(); err != nil {
return err
}
}

if len(d.bibtex.Entries) == 0 || d.i >= len(d.bibtex.Entries) {
if entry == nil {
return io.EOF
}

entry := d.bibtex.Entries[d.i]
d.i++

mapEntry(entry, p)

return nil
}

func mapEntry(e *bibtex.BibEntry, p *models.Publication) {
func mapEntry(e *bibtex.Entry, p *models.Publication) {
p.Type = "journal_article"

// field names may have capitals
entries := map[string]string{}
for key, val := range e.Fields {
entries[strings.ToLower(key)] = val.String()
fields := make(map[string]string, len(e.Fields))
for _, f := range e.Fields {
fields[f.Name] = f.Value
}

switch e.Type {
Expand All @@ -108,89 +64,87 @@ func mapEntry(e *bibtex.BibEntry, p *models.Publication) {
p.MiscellaneousType = "report"
}

if f, ok := entries["title"]; ok {
for _, name := range e.Authors {
nameParts := reSplit.Split(name, -1)
lastName := nameParts[0]
firstName := "[missing]" // TODO
if len(nameParts) > 1 {
firstName = nameParts[1]
}
p.Author = append(p.Author, models.ContributorFromFirstLastName(firstName, lastName))
}

for _, name := range e.Editors {
nameParts := reSplit.Split(name, -1)
lastName := nameParts[0]
firstName := "[missing]" // TODO
if len(nameParts) > 1 {
firstName = nameParts[1]
}
p.Editor = append(p.Editor, models.ContributorFromFirstLastName(firstName, lastName))
}

if f, ok := fields["title"]; ok {
p.Title = f
}
if f, ok := entries["year"]; ok {
if f, ok := fields["year"]; ok {
p.Year = f
}
if f, ok := entries["pages"]; ok {
if f, ok := fields["pages"]; ok {
pageParts := reSplitPages.Split(f, -1)
p.PageFirst = pageParts[0]
if len(pageParts) > 1 {
p.PageLast = pageParts[1]
}
}
if f, ok := entries["keywords"]; ok {
if f, ok := fields["keywords"]; ok {
p.Keyword = reSplit.Split(f, -1)
}
if f, ok := entries["abstract"]; ok {
if f, ok := fields["abstract"]; ok {
p.AddAbstract(&models.Text{Text: f, Lang: "und"})
}
if f, ok := entries["volume"]; ok {
if f, ok := fields["volume"]; ok {
p.Volume = f
}
if f, ok := entries["number"]; ok {
if f, ok := fields["number"]; ok {
p.Issue = f
}
if f, ok := entries["address"]; ok {
if f, ok := fields["address"]; ok {
p.PlaceOfPublication = f
}
if f, ok := entries["doi"]; ok {
if f, ok := fields["doi"]; ok {
p.DOI = f
}
if f, ok := entries["issn"]; ok {
if f, ok := fields["issn"]; ok {
p.ISSN = []string{f}
}
if f, ok := entries["isbn"]; ok {
if f, ok := fields["isbn"]; ok {
p.ISBN = []string{f}
}
if f, ok := entries["series"]; ok {
if f, ok := fields["series"]; ok {
p.SeriesTitle = f
}
if f, ok := entries["journal"]; ok {
if f, ok := fields["journal"]; ok {
p.Publication = f
}
if f, ok := entries["booktitle"]; ok {
if f, ok := fields["booktitle"]; ok {
p.Publication = f
}
if f, ok := entries["school"]; ok {
if f, ok := fields["school"]; ok {
p.Publisher = f
}
if f, ok := entries["publisher"]; ok {
if f, ok := fields["publisher"]; ok {
p.Publisher = f
}
if f, ok := entries["author"]; ok {
for _, v := range strings.Split(f, " and ") {
nameParts := reSplit.Split(v, -1)
lastName := nameParts[0]
firstName := "[missing]" // TODO
if len(nameParts) > 1 {
firstName = nameParts[1]
}
p.Author = append(p.Author, models.ContributorFromFirstLastName(firstName, lastName))
}
}
if f, ok := entries["editor"]; ok {
for _, v := range strings.Split(f, " and ") {
nameParts := reSplit.Split(v, -1)
lastName := nameParts[0]
firstName := "[missing]" // TODO
if len(nameParts) > 1 {
firstName = nameParts[1]
}
p.Editor = append(p.Editor, models.ContributorFromFirstLastName(firstName, lastName))
}
}

// WoS bibtex records
if f, ok := entries["journal-iso"]; ok {
if f, ok := fields["journal-iso"]; ok {
p.PublicationAbbreviation = f
}
if f, ok := entries["keywords-plus"]; ok {
if f, ok := fields["keywords-plus"]; ok {
p.Keyword = append(p.Keyword, reSplit.Split(f, -1)...)
}
if f, ok := entries["unique-id"]; ok {
if f, ok := fields["unique-id"]; ok {
if strings.HasPrefix(f, "ISI:") {
p.WOSID = strings.TrimPrefix(f, "ISI:")
}
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ require (
github.com/blevesearch/bleve/v2 v2.3.10
github.com/bluele/gcache v0.0.2
github.com/caltechlibrary/doitools v0.0.1
github.com/dimchansky/utfbom v1.1.1
github.com/elastic/go-elasticsearch/v6 v6.8.10
github.com/go-chi/chi/v5 v5.0.10
github.com/go-playground/form/v4 v4.2.1
Expand All @@ -24,7 +23,6 @@ require (
github.com/jackc/pgx/v4 v4.18.1
github.com/joho/godotenv v1.5.1
github.com/jpillora/ipfilter v1.2.9
github.com/nickng/bibtex v1.2.0
github.com/ory/graceful v0.1.3
github.com/pkg/errors v0.9.1
github.com/rvflash/elapsed v0.3.0
Expand Down Expand Up @@ -166,6 +164,7 @@ require (
github.com/subosito/gotenv v1.6.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/ugent-library/bibtex v0.0.0-20231215144723-4e5925e47a69
github.com/ugent-library/bind v0.0.0-20231106145128-3b6630797063
github.com/ugent-library/friendly v0.0.0-20230811091914-e5eb37f47072
github.com/ugent-library/httperror v0.0.0-20230123152913-d3d289bcce20
Expand Down
8 changes: 2 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,6 @@ github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7Do
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U=
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/elastic/go-elasticsearch/v6 v6.8.10 h1:2lN0gJ93gMBXvkhwih5xquldszpm8FlUwqG5sPzr6a8=
Expand Down Expand Up @@ -441,10 +439,6 @@ github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/nickng/bibtex v1.2.0 h1:b+buRQja8xk27I9nCd3o+OujB8E8CRKbubAgHsrd4Kk=
github.com/nickng/bibtex v1.2.0/go.mod h1:4BJ3ka/ZjGVXcHOlkzlRonex6U17L3kW6ICEsygP2bg=
github.com/nics/ich v0.0.0-20231128153104-da30f0e02497 h1:2KefPDytkKZDOj4GgC4i+AhlbQ5lSHiIGEaYGy8XDZc=
github.com/nics/ich v0.0.0-20231128153104-da30f0e02497/go.mod h1:VOU/223ZShi9iLORheJXhd60DpLKnOtw/CsXTDPa0Qg=
github.com/nics/ich v0.0.0-20231128155525-f5704f956383 h1:af13T+6ta5pXDK/0JpkNPcvjPZ4hS7kATlr0unhBiGw=
github.com/nics/ich v0.0.0-20231128155525-f5704f956383/go.mod h1:AsuZcCw75zKhstoYa507cGjVzV262w8KX6gaFqAEHmA=
github.com/ogen-go/ogen v0.75.0 h1:4p9zxJOPS/lizvKfbqqzRf8jYuzr2NrMJRy69tva6uY=
Expand Down Expand Up @@ -536,6 +530,8 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tomasen/realip v0.0.0-20180522021738-f0c99a92ddce h1:fb190+cK2Xz/dvi9Hv8eCYJYvIGUTN2/KLq1pT6CjEc=
github.com/tomasen/realip v0.0.0-20180522021738-f0c99a92ddce/go.mod h1:o8v6yHRoik09Xen7gje4m9ERNah1d1PPsVq1VEx9vE4=
github.com/ugent-library/bibtex v0.0.0-20231215144723-4e5925e47a69 h1:jU4/3Q3NXjIV0ncULkiLSV1GaCvKsxFNl0x3+dRdY3k=
github.com/ugent-library/bibtex v0.0.0-20231215144723-4e5925e47a69/go.mod h1:UX+eTPyILtVo9OmXGgO+1Pl0NU4nj/KxB0JxgR/4LNo=
github.com/ugent-library/bind v0.0.0-20231106145128-3b6630797063 h1:uU2vnCNHaJF8Lx+exuV4PcZgZQX/PJ5hcgQCZEvg+Fg=
github.com/ugent-library/bind v0.0.0-20231106145128-3b6630797063/go.mod h1:QN9gGk5/ux2xoIia5EQbkw7XdO7yYQ5dG7/p6WSvp0o=
github.com/ugent-library/crypt v0.0.0-20230630063634-8c02106fd40e h1:LnsNQmbNBSNTNd2mdozNfy87fbUOZ5ZxjBxA+T8h0lg=
Expand Down
6 changes: 3 additions & 3 deletions views/publication/pages/add_bibtex.gohtml
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@
<div class="c-file-upload">
<input type="file" name="file">
<div class="c-file-upload__content">
<p>Drag and drop your .txt file or</p>
<button class="btn btn-outline-primary">Upload .txt file
<p>Drag and drop your .bib file or</p>
<button class="btn btn-outline-primary">Upload .bib file
<div class="spinner-border">
<span class="visually-hidden"></span>
</div>
</button>
</div>
</div>
<small class="form-text text-muted my-3"><a href="https://onderzoektips.ugent.be/en/tips/00002068/" target="_blank">BIBtex import instructions</a></small>
<small class="form-text text-muted my-3"><a href="https://onderzoektips.ugent.be/en/tips/00002068/" target="_blank">BibTeX import instructions</a></small>
</form>
</div>
</div>
Expand Down

0 comments on commit 78af539

Please sign in to comment.