summaryrefslogtreecommitdiff
path: root/document/document.go
diff options
context:
space:
mode:
Diffstat (limited to 'document/document.go')
-rw-r--r--document/document.go236
1 files changed, 236 insertions, 0 deletions
diff --git a/document/document.go b/document/document.go
new file mode 100644
index 0000000..a036eee
--- /dev/null
+++ b/document/document.go
@@ -0,0 +1,236 @@
+package document
+
+import (
+ "github.com/scrotadamus/ghligh/go-poppler"
+
+ "os"
+ "sync"
+
+ "strings"
+
+ "fmt"
+)
+
+const ghlighFilter = "ghligh-Y2lhbm5v:"
+
+// This is different from poppler's annot_mapping
+// it is the list of annotations mapped to the page index
+type AnnotsMap map[int][]AnnotJSON
+
+type GhlighDoc struct {
+ doc *poppler.Document
+ mu sync.Mutex
+
+ Path string `json:"file"`
+ HashBuffer string `json:"hash"`
+ AnnotsBuffer AnnotsMap `json:"highlights,omitempty"`
+}
+
+type HighlightedText struct {
+ Page int `json:"page"`
+ Text string `json:"text"`
+ Contents string `json:"contents,omitempty"`
+}
+
+func Open(filename string) (*GhlighDoc, error) {
+ var err error
+
+ g := &GhlighDoc{}
+
+ g.doc, err = poppler.Open(filename)
+ if err != nil {
+ fmt.Errorf("%s: error opening pdf %v", os.Args[0], err)
+ return nil, err
+ }
+ g.Path = filename
+ // HashDoc??
+
+ return g, nil
+}
+
+func (d *GhlighDoc) Close() {
+ d.AnnotsBuffer = nil
+ d.HashBuffer = ""
+ if d.doc != nil {
+ d.doc.Close()
+ }
+}
+
+func (d *GhlighDoc) Info() poppler.DocumentInfo {
+ return d.doc.Info()
+}
+
+func (d *GhlighDoc) tagExists(text string) bool {
+ for _, tag := range d.GetTags() {
+ if tag == text {
+ return true
+ }
+ }
+ return false
+}
+
+func (d *GhlighDoc) Tag(text string) {
+ if !d.tagExists(text) {
+ d.doc.Tag(ghlighFilter + text)
+ } else {
+ fmt.Fprintf(os.Stderr, "warning: tag %s already exist inside %s, i don't do anything\n", text, d.Path)
+ }
+}
+
+func (d *GhlighDoc) GetTags() []string {
+ var tags []string
+ annots := d.doc.GetTags(ghlighFilter)
+ for _, annot := range annots {
+ contents := strings.TrimPrefix(annot.Contents(), ghlighFilter)
+ tags = append(tags, contents)
+ }
+ return tags
+}
+
+func (d *GhlighDoc) RemoveTags(tags []string) int {
+ zeroPage := d.doc.GetPage(0)
+ var removedTags int
+
+ annots := d.doc.GetTags(ghlighFilter)
+ for _, annot := range annots {
+ contents := strings.TrimPrefix(annot.Contents(), ghlighFilter)
+ for _, tag := range tags {
+ if tag == contents {
+ zeroPage.RemoveAnnot(*annot)
+ removedTags += 1
+ break
+ }
+ }
+ }
+ return removedTags
+}
+
+func (d *GhlighDoc) Import(annotsMap AnnotsMap) (int, error) {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ annots_count := 0
+
+ var err error
+ d.AnnotsBuffer = annotsMap
+
+ for key := range d.AnnotsBuffer {
+ page := d.doc.GetPage(key)
+ for _, annot := range d.AnnotsBuffer[key] {
+ a := d.jsonToAnnot(annot)
+ if !isInPage(a, page) {
+ annots_count += 1
+ page.AddAnnot(*a)
+ }
+
+ }
+ page.Close()
+ }
+
+ d.AnnotsBuffer = nil
+ return annots_count, err
+}
+
+func integrityCheck(tizio *GhlighDoc, caio *GhlighDoc) {
+
+}
+
+func (d *GhlighDoc) Save() (bool, error) {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ tempFile, err := os.CreateTemp("", ".ghligh_*.pdf")
+ if err != nil {
+ return false, err
+ }
+ defer os.Remove(tempFile.Name())
+
+ ok, err := d.doc.Save(tempFile.Name())
+ if !ok {
+ return false, err
+ }
+
+ /* integrity check */
+ newDoc, err := Open(tempFile.Name())
+ if err != nil {
+ return false, err
+ }
+
+ if newDoc.HashDoc() != d.HashDoc() {
+ return false, fmt.Errorf("After saving document %s to %s its hash doesn't correspond the the old one", d.Path, tempFile.Name())
+ }
+
+ err = os.Rename(tempFile.Name(), d.Path)
+ if err != nil {
+ return false, err
+ }
+
+ return true, nil
+}
+
+func (d *GhlighDoc) Cat() []HighlightedText {
+ var highlights []HighlightedText
+
+ n_pages := d.doc.GetNPages()
+ for i := 0; i < n_pages; i++ {
+ page := d.doc.GetPage(i)
+ annots := page.GetAnnots()
+ for _, annot := range annots {
+ if annot.Type() == poppler.AnnotHighlight {
+ annotText := page.AnnotText(*annot)
+
+ highlights = append(highlights, HighlightedText{Page: i, Text: annotText, Contents: annot.Contents()})
+ }
+ }
+
+ page.Close()
+ }
+ return highlights
+}
+
+func (d *GhlighDoc) HasHighlights() bool {
+ // check if is tagged with ls
+ if d.tagExists("ls") {
+ return true
+ }
+
+ // check if it has highlights
+ n_pages := d.doc.GetNPages()
+ for i := 0; i < n_pages; i++ {
+ page := d.doc.GetPage(i)
+ annots := page.GetAnnots()
+ for _, annot := range annots {
+ if annot.Type() == poppler.AnnotHighlight {
+ return true
+ }
+ }
+
+ page.Close()
+ }
+ return false
+}
+
+func (d *GhlighDoc) GetAnnotsBuffer() AnnotsMap {
+ annots_json_of_page := make(AnnotsMap)
+
+ n := d.doc.GetNPages()
+ var annots_json []AnnotJSON
+ for i := 0; i < n; i++ {
+ annots_json = nil
+ page := d.doc.GetPage(i)
+
+ annots := page.GetAnnots()
+ for _, annot := range annots {
+ if annot.Type() == poppler.AnnotHighlight {
+ annot_json := annotToJson(*annot)
+ annots_json = append(annots_json, annot_json)
+ }
+ }
+
+ page.Close()
+
+ if len(annots_json) > 0 {
+ annots_json_of_page[i] = annots_json
+ }
+ }
+
+ return annots_json_of_page
+}