From 17fb6add26291b31f7020e3551a7c8487130a747 Mon Sep 17 00:00:00 2001 From: "F.O." Date: Sun, 16 Feb 2025 17:56:08 +0100 Subject: genesi --- document/document.go | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 document/document.go (limited to 'document/document.go') diff --git a/document/document.go b/document/document.go new file mode 100644 index 0000000..a036eee --- /dev/null +++ b/document/document.go @@ -0,0 +1,236 @@ +package document + +import ( + "github.com/scrotadamus/ghligh/go-poppler" + + "os" + "sync" + + "strings" + + "fmt" +) + +const ghlighFilter = "ghligh-Y2lhbm5v:" + +// This is different from poppler's annot_mapping +// it is the list of annotations mapped to the page index +type AnnotsMap map[int][]AnnotJSON + +type GhlighDoc struct { + doc *poppler.Document + mu sync.Mutex + + Path string `json:"file"` + HashBuffer string `json:"hash"` + AnnotsBuffer AnnotsMap `json:"highlights,omitempty"` +} + +type HighlightedText struct { + Page int `json:"page"` + Text string `json:"text"` + Contents string `json:"contents,omitempty"` +} + +func Open(filename string) (*GhlighDoc, error) { + var err error + + g := &GhlighDoc{} + + g.doc, err = poppler.Open(filename) + if err != nil { + fmt.Errorf("%s: error opening pdf %v", os.Args[0], err) + return nil, err + } + g.Path = filename + // HashDoc?? + + return g, nil +} + +func (d *GhlighDoc) Close() { + d.AnnotsBuffer = nil + d.HashBuffer = "" + if d.doc != nil { + d.doc.Close() + } +} + +func (d *GhlighDoc) Info() poppler.DocumentInfo { + return d.doc.Info() +} + +func (d *GhlighDoc) tagExists(text string) bool { + for _, tag := range d.GetTags() { + if tag == text { + return true + } + } + return false +} + +func (d *GhlighDoc) Tag(text string) { + if !d.tagExists(text) { + d.doc.Tag(ghlighFilter + text) + } else { + fmt.Fprintf(os.Stderr, "warning: tag %s already exist inside %s, i don't do anything\n", text, d.Path) + } +} + +func (d *GhlighDoc) GetTags() []string { + var tags []string + annots := d.doc.GetTags(ghlighFilter) + for _, annot := range annots { + contents := strings.TrimPrefix(annot.Contents(), ghlighFilter) + tags = append(tags, contents) + } + return tags +} + +func (d *GhlighDoc) RemoveTags(tags []string) int { + zeroPage := d.doc.GetPage(0) + var removedTags int + + annots := d.doc.GetTags(ghlighFilter) + for _, annot := range annots { + contents := strings.TrimPrefix(annot.Contents(), ghlighFilter) + for _, tag := range tags { + if tag == contents { + zeroPage.RemoveAnnot(*annot) + removedTags += 1 + break + } + } + } + return removedTags +} + +func (d *GhlighDoc) Import(annotsMap AnnotsMap) (int, error) { + d.mu.Lock() + defer d.mu.Unlock() + annots_count := 0 + + var err error + d.AnnotsBuffer = annotsMap + + for key := range d.AnnotsBuffer { + page := d.doc.GetPage(key) + for _, annot := range d.AnnotsBuffer[key] { + a := d.jsonToAnnot(annot) + if !isInPage(a, page) { + annots_count += 1 + page.AddAnnot(*a) + } + + } + page.Close() + } + + d.AnnotsBuffer = nil + return annots_count, err +} + +func integrityCheck(tizio *GhlighDoc, caio *GhlighDoc) { + +} + +func (d *GhlighDoc) Save() (bool, error) { + d.mu.Lock() + defer d.mu.Unlock() + tempFile, err := os.CreateTemp("", ".ghligh_*.pdf") + if err != nil { + return false, err + } + defer os.Remove(tempFile.Name()) + + ok, err := d.doc.Save(tempFile.Name()) + if !ok { + return false, err + } + + /* integrity check */ + newDoc, err := Open(tempFile.Name()) + if err != nil { + return false, err + } + + if newDoc.HashDoc() != d.HashDoc() { + return false, fmt.Errorf("After saving document %s to %s its hash doesn't correspond the the old one", d.Path, tempFile.Name()) + } + + err = os.Rename(tempFile.Name(), d.Path) + if err != nil { + return false, err + } + + return true, nil +} + +func (d *GhlighDoc) Cat() []HighlightedText { + var highlights []HighlightedText + + n_pages := d.doc.GetNPages() + for i := 0; i < n_pages; i++ { + page := d.doc.GetPage(i) + annots := page.GetAnnots() + for _, annot := range annots { + if annot.Type() == poppler.AnnotHighlight { + annotText := page.AnnotText(*annot) + + highlights = append(highlights, HighlightedText{Page: i, Text: annotText, Contents: annot.Contents()}) + } + } + + page.Close() + } + return highlights +} + +func (d *GhlighDoc) HasHighlights() bool { + // check if is tagged with ls + if d.tagExists("ls") { + return true + } + + // check if it has highlights + n_pages := d.doc.GetNPages() + for i := 0; i < n_pages; i++ { + page := d.doc.GetPage(i) + annots := page.GetAnnots() + for _, annot := range annots { + if annot.Type() == poppler.AnnotHighlight { + return true + } + } + + page.Close() + } + return false +} + +func (d *GhlighDoc) GetAnnotsBuffer() AnnotsMap { + annots_json_of_page := make(AnnotsMap) + + n := d.doc.GetNPages() + var annots_json []AnnotJSON + for i := 0; i < n; i++ { + annots_json = nil + page := d.doc.GetPage(i) + + annots := page.GetAnnots() + for _, annot := range annots { + if annot.Type() == poppler.AnnotHighlight { + annot_json := annotToJson(*annot) + annots_json = append(annots_json, annot_json) + } + } + + page.Close() + + if len(annots_json) > 0 { + annots_json_of_page[i] = annots_json + } + } + + return annots_json_of_page +} -- cgit v1.2.3