diff options
author | F.O. <scrotadamus@insiberia.net> | 2025-02-16 17:56:08 +0100 |
---|---|---|
committer | F.O. <scrotadamus@insiberia.net> | 2025-02-16 17:56:57 +0100 |
commit | 17fb6add26291b31f7020e3551a7c8487130a747 (patch) | |
tree | d4559a7339ed181393ff921909e6ce05b7c2cf18 /go-poppler |
genesi
Diffstat (limited to 'go-poppler')
-rw-r--r-- | go-poppler/LICENSE.md | 363 | ||||
-rw-r--r-- | go-poppler/annot.go | 194 | ||||
-rw-r--r-- | go-poppler/document.go | 161 | ||||
-rw-r--r-- | go-poppler/image.go | 31 | ||||
-rw-r--r-- | go-poppler/page.go | 217 | ||||
-rw-r--r-- | go-poppler/poppler.go | 58 | ||||
-rw-r--r-- | go-poppler/tags.go | 57 | ||||
-rw-r--r-- | go-poppler/text.go | 22 | ||||
-rw-r--r-- | go-poppler/utils.go | 85 |
9 files changed, 1188 insertions, 0 deletions
diff --git a/go-poppler/LICENSE.md b/go-poppler/LICENSE.md new file mode 100644 index 0000000..e96bb63 --- /dev/null +++ b/go-poppler/LICENSE.md @@ -0,0 +1,363 @@ +Copyright (c) 2015-2020 Sergey Cherepanov + +### GNU GENERAL PUBLIC LICENSE + +Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +### Preamble + +The licenses for most software are designed to take away your freedom +to share and change it. By contrast, the GNU General Public License is +intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if +you distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + +We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, +we want its recipients to know that what they have is not the +original, so that any problems introduced by others will not reflect +on the original authors' reputations. + +Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at +all. + +The precise terms and conditions for copying, distribution and +modification follow. + +### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +**0.** This License applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work +based on the Program" means either the Program or any derivative work +under copyright law: that is to say, a work containing the Program or +a portion of it, either verbatim or with modifications and/or +translated into another language. (Hereinafter, translation is +included without limitation in the term "modification".) Each licensee +is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the Program +(independent of having been made by running the Program). Whether that +is true depends on what the Program does. + +**1.** You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a +fee. + +**2.** You may modify your copy or copies of the Program or any +portion of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + +**a)** You must cause the modified files to carry prominent notices +stating that you changed the files and the date of any change. + + +**b)** You must cause any work that you distribute or publish, that in +whole or in part contains or is derived from the Program or any part +thereof, to be licensed as a whole at no charge to all third parties +under the terms of this License. + + +**c)** If the modified program normally reads commands interactively +when run, you must cause it, when started running for such interactive +use in the most ordinary way, to print or display an announcement +including an appropriate copyright notice and a notice that there is +no warranty (or else, saying that you provide a warranty) and that +users may redistribute the program under these conditions, and telling +the user how to view a copy of this License. (Exception: if the +Program itself is interactive but does not normally print such an +announcement, your work based on the Program is not required to print +an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + +**3.** You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + +**a)** Accompany it with the complete corresponding machine-readable +source code, which must be distributed under the terms of Sections 1 +and 2 above on a medium customarily used for software interchange; or, + + +**b)** Accompany it with a written offer, valid for at least three +years, to give any third party, for a charge no more than your cost of +physically performing source distribution, a complete machine-readable +copy of the corresponding source code, to be distributed under the +terms of Sections 1 and 2 above on a medium customarily used for +software interchange; or, + + +**c)** Accompany it with the information you received as to the offer +to distribute corresponding source code. (This alternative is allowed +only for noncommercial distribution and only if you received the +program in object code or executable form with such an offer, in +accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + +**4.** You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt otherwise +to copy, modify, sublicense or distribute the Program is void, and +will automatically terminate your rights under this License. However, +parties who have received copies, or rights, from you under this +License will not have their licenses terminated so long as such +parties remain in full compliance. + +**5.** You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +**6.** Each time you redistribute the Program (or any work based on +the Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + +**7.** If, as a consequence of a court judgment or allegation of +patent infringement or for any other reason (not limited to patent +issues), conditions are imposed on you (whether by court order, +agreement or otherwise) that contradict the conditions of this +License, they do not excuse you from the conditions of this License. +If you cannot distribute so as to satisfy simultaneously your +obligations under this License and any other pertinent obligations, +then as a consequence you may not distribute the Program at all. For +example, if a patent license would not permit royalty-free +redistribution of the Program by all those who receive copies directly +or indirectly through you, then the only way you could satisfy both it +and this License would be to refrain entirely from distribution of the +Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + +**8.** If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + +**9.** The Free Software Foundation may publish revised and/or new +versions of the General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Program does not specify a +version number of this License, you may choose any version ever +published by the Free Software Foundation. + +**10.** If you wish to incorporate parts of the Program into other +free programs whose distribution conditions are different, write to +the author to ask for permission. For software which is copyrighted by +the Free Software Foundation, write to the Free Software Foundation; +we sometimes make exceptions for this. Our decision will be guided by +the two goals of preserving the free status of all derivatives of our +free software and of promoting the sharing and reuse of software +generally. + +**NO WARRANTY** + +**11.** BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +**12.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + +### END OF TERMS AND CONDITIONS + +### How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + one line to give the program's name and an idea of what it does. + Copyright (C) yyyy name of author + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +Also add information on how to contact you by electronic and paper +mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details + type `show w'. This is free software, and you are welcome + to redistribute it under certain conditions; type `show c' + for details. + +The hypothetical commands \`show w' and \`show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than \`show w' and +\`show c'; they could even be mouse-clicks or menu items--whatever +suits your program. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the program, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright + interest in the program `Gnomovision' + (which makes passes at compilers) written + by James Hacker. + + signature of Ty Coon, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, +you may consider it more useful to permit linking proprietary +applications with the library. If this is what you want to do, use the +[GNU Lesser General Public +License](https://www.gnu.org/licenses/lgpl.html) instead of this +License. diff --git a/go-poppler/annot.go b/go-poppler/annot.go new file mode 100644 index 0000000..4527d83 --- /dev/null +++ b/go-poppler/annot.go @@ -0,0 +1,194 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <glib.h> +// #include <cairo.h> +// +// /* macro wrappings */ +// gboolean wrap_POPPLER_IS_ANNOT_TEXT_MARKUP(PopplerAnnot *annot){ +// return POPPLER_IS_ANNOT_TEXT_MARKUP(annot); +// } +// PopplerAnnotTextMarkup *wrap_POPPLER_ANNOT_TEXT_MARKUP(PopplerAnnot *annot) { +// return POPPLER_ANNOT_TEXT_MARKUP(annot); +// } +import "C" + +import "unsafe" +//import "github.com/ungerik/go-cairo" + +// DEBUG +//import "fmt" + +type Point struct { + X, Y float64 +} +type Quad struct { + P1, P2, P3, P4 Point +} + +type Annot struct { + am *C.struct__PopplerAnnotMapping +} + +type AnnotType int + +const ( + AnnotUnknown AnnotType = iota + AnnotText + AnnotLink + AnnotFreeText + AnnotLine + AnnotSquare + AnnotCircle + AnnotPolygon + AnnotPolyLine + AnnotHighlight + AnnotUnderline + AnnotSquiggly + AnnotStrikeOut + AnnotStamp + AnnotCaret + AnnotInk + AnnotPopup + AnnotFileAttachment + AnnotSound + AnnotMovie + AnnotWidget + AnnotScreen + AnnotPrinterMark + AnnotTrapNet + AnnotWatermark + Annot3D +) + +type AnnotFlag int + +const AnnotFlagUnknown AnnotFlag = 0 +const ( + AnnotFlagInvisible AnnotFlag = 1 << iota + AnnotFlagHidden + AnnotFlagPrint + AnnotFlagNoZoom + AnnotFlagNoRotate + AnnotFlagNoView + AnnotFlagReadOnly + AnnotFlagLocked + AnnotFlagToggleNoView + AnnotFlagLockedContents +) + +func (a *Annot) Type() AnnotType { + t := C.poppler_annot_get_annot_type(a.am.annot) + return AnnotType(t) +} + +func (a *Annot) Index() int { + i := C.poppler_annot_get_page_index(a.am.annot) + return int(i) +} + +func (a *Annot) Date() string { + cText := C.poppler_annot_get_modified(a.am.annot) + return C.GoString(cText) +} + +func (a *Annot) Rect() Rectangle { + var r C.PopplerRectangle + C.poppler_annot_get_rectangle(a.am.annot, &r) + + rect := Rectangle{ + X1: float64(r.x1), + Y1: float64(r.y1), + X2: float64(r.x2), + Y2: float64(r.y2), + } + + return rect + +} + +func (a *Annot) Color() Color { + c := C.poppler_annot_get_color(a.am.annot) + if c == nil { + return Color{} + } + defer C.poppler_color_free(c) + + color := Color{ + R: int(c.red), + G: int(c.green), + B: int(c.blue), + } + + return color +} + +func (a *Annot) Name() string { + cText := C.poppler_annot_get_name(a.am.annot) + return C.GoString(cText) +} + +func (a *Annot) Contents() string { + //if a.am.annot == nil { + //return "" + //} + cText := C.poppler_annot_get_contents(a.am.annot) + //fmt.Printf("DEBUG poppler_annot_get_contents returned pointer: %v", cText) + if cText == nil { + return "" + } + return C.GoString(cText) +} + +func (a *Annot) Flags() AnnotFlag { + f := C.poppler_annot_get_flags(a.am.annot) + return AnnotFlag(f) +} + +func (a *Annot) Quads() []Quad { + if C.wrap_POPPLER_IS_ANNOT_TEXT_MARKUP(a.am.annot) == C.FALSE { + return nil + } + + + textMarkup := C.wrap_POPPLER_ANNOT_TEXT_MARKUP(a.am.annot) + + q := C.poppler_annot_text_markup_get_quadrilaterals(textMarkup) + + quads := gArrayToQuads(q) + + C.g_array_free(q, 1) + + return quads +} + +func (a *Annot) Close() { + if a.am != nil { + C.poppler_annot_mapping_free(a.am) + a.am = nil + } +} + +func (a *Annot) SetColor(c Color){ + pColor := C.poppler_color_new() + pColor.red = C.ushort(c.R) + pColor.green = C.ushort(c.G) + pColor.blue = C.ushort(c.B) + defer C.poppler_color_free(pColor) + + C.poppler_annot_set_color(a.am.annot, pColor ) +} + +func (a *Annot) SetContents(c string){ + cStr := C.CString(c) + defer C.free(unsafe.Pointer(cStr)) + + C.poppler_annot_set_contents(a.am.annot, cStr) +} + +func (a *Annot) SetFlags(f AnnotFlag){ + pFlags := C.PopplerAnnotFlag(f) + + C.poppler_annot_set_flags(a.am.annot, pFlags) +} diff --git a/go-poppler/document.go b/go-poppler/document.go new file mode 100644 index 0000000..58365ca --- /dev/null +++ b/go-poppler/document.go @@ -0,0 +1,161 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <stdlib.h> +// #include <glib.h> +// #include <unistd.h> +import "C" + +import ( + "errors" + "unsafe" + "path/filepath" +) + +type Document struct { + doc poppDoc + openedPages []*Page +} + +type DocumentInfo struct { + PdfVersion string `json:"pdf_version,omitempty"` + Title string `json:"title,omitempty"` + Author string `json:"author,omitempty"` + Subject string `json:"subject,omitempty"` + KeyWords string `json:"keywords,omitempty"` + Creator string `json:"creator,omitempty"` + Producer string `json:"producer,omitempty"` + Metadata string `json:"metadata,omitempty"` + CreationDate int `json:"creation_date,omitempty"` + ModificationDate int `json:"modification_date,omitempty"` + Pages int `json:"pages_number,omitempty"` + IsLinearized bool `json:"is_linearized,omitempty"` +} + +func (d *Document) Info() DocumentInfo { + return DocumentInfo{ + PdfVersion: toString(C.poppler_document_get_pdf_version_string(d.doc)), + Title: toString(C.poppler_document_get_title(d.doc)), + Author: toString(C.poppler_document_get_author(d.doc)), + Subject: toString(C.poppler_document_get_subject(d.doc)), + KeyWords: toString(C.poppler_document_get_keywords(d.doc)), + Creator: toString(C.poppler_document_get_creator(d.doc)), + Producer: toString(C.poppler_document_get_producer(d.doc)), + Metadata: toString(C.poppler_document_get_metadata(d.doc)), + CreationDate: int(C.poppler_document_get_creation_date(d.doc)), + ModificationDate: int(C.poppler_document_get_modification_date(d.doc)), + Pages: int(C.poppler_document_get_n_pages(d.doc)), + IsLinearized: toBool(C.poppler_document_is_linearized(d.doc)), + } +} + +func (d *Document) GetNPages() int { + return int(C.poppler_document_get_n_pages(d.doc)) +} + +func (d *Document) GetPage(i int) (page *Page) { + p := C.poppler_document_get_page(d.doc, C.int(i)) + + page = &Page{ + p: p, + openedAnnots: nil, + } + d.openedPages = append(d.openedPages, page) + + return page +} + +func (d *Document) HasAttachments() bool { + return toBool(C.poppler_document_has_attachments(d.doc)) +} + +func (d *Document) GetNAttachments() int { + return int(C.poppler_document_get_n_attachments(d.doc)) +} + +func (d *Document) Close() { + + for i := 0; i < len(d.openedPages); i++ { + d.openedPages[i].Close() + } + d.openedPages = []*Page{} + + C.g_object_unref(C.gpointer(d.doc)) +} + + +func (d *Document) NewAnnot(t AnnotType, r Rectangle, q []Quad) (Annot, error) { + am := C.poppler_annot_mapping_new(); + + annot := Annot { + am: am, + } + + pRect := rectangleToPopplerRectangle(r) + + pQuad := quadsToGArray(q) + defer C.g_array_free(pQuad, 1) + + + switch (t){ + case AnnotHighlight: + am.annot = C.poppler_annot_text_markup_new_highlight(d.doc, &pRect, pQuad) + case AnnotUnderline: + am.annot = C.poppler_annot_text_markup_new_underline(d.doc, &pRect, pQuad) + case AnnotSquiggly: + am.annot = C.poppler_annot_text_markup_new_squiggly(d.doc, &pRect, pQuad) + case AnnotStrikeOut: + am.annot = C.poppler_annot_text_markup_new_strikeout(d.doc, &pRect, pQuad) + default: + C.poppler_annot_mapping_free(am) + return annot, errors.New("invalid type for new annotation") + } + + + if am.annot == nil { + C.poppler_annot_mapping_free(am) + return annot, errors.New("failed to create annotation") + } + + /* Can't get real annot mapping area as done in + * poppler_page_get_annot_mapping() since page is + * needed for page->page->getCropBox() and + * page->page->getRotate() + * + * as a placeholder we just use the annot rect + */ + annot.am.area = pRect + + return annot, nil +} + +func (d *Document) Save(filename string) (saved bool, err error) { + filename, err = filepath.Abs(filename) + if err != nil { + return false, err + } + + var e *C.GError + cFilename := (*C.gchar)(C.CString(filename)) + defer C.free(unsafe.Pointer(cFilename)) + + cUri := C.g_filename_to_uri(cFilename, nil, nil) + cBool := C.poppler_document_save (d.doc, cUri, &e); + if e != nil { + err = errors.New(C.GoString((*C.char)(e.message))) + return false, err + } + + if cBool == C.TRUE { + return true, nil + } + + return false, nil +} + +/* +func (d *Document) GetAttachments() []Attachment { + return +} +*/ diff --git a/go-poppler/image.go b/go-poppler/image.go new file mode 100644 index 0000000..ba2abc8 --- /dev/null +++ b/go-poppler/image.go @@ -0,0 +1,31 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <glib.h> +import "C" +import ( + "unsafe" + + "github.com/ungerik/go-cairo" +) + +// Image + +type Image struct { + Id int + Area Rectangle + p *C.struct__PopplerPage +} + +type Rectangle struct { + X1, Y1, X2, Y2 float64 +} + +func (im *Image) GetSurface() (cs *cairo.Surface) { + ci := C.poppler_page_get_image(im.p, C.gint(im.Id)) + ctx := C.cairo_create(ci) + cip := (cairo.Cairo_surface)(unsafe.Pointer(ci)) + ctxp := (cairo.Cairo_context)(unsafe.Pointer(ctx)) + return cairo.NewSurfaceFromC(cip, ctxp) +} diff --git a/go-poppler/page.go b/go-poppler/page.go new file mode 100644 index 0000000..efd0706 --- /dev/null +++ b/go-poppler/page.go @@ -0,0 +1,217 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <glib.h> +// #include <cairo.h> +import "C" +import "unsafe" +import "github.com/ungerik/go-cairo" + +//import "fmt" + +type Page struct { + p *C.struct__PopplerPage + openedAnnots []*Annot +} + +func (p *Page) Text() string { + return C.GoString(C.poppler_page_get_text(p.p)) +} + +func (p *Page) TextAttributes() (results []TextAttributes) { + a := C.poppler_page_get_text_attributes(p.p) + defer C.poppler_page_free_text_attributes(a) + var attr *C.PopplerTextAttributes + results = make([]TextAttributes, 0) + el := C.g_list_first(a) + for el != nil { + attr = (*C.PopplerTextAttributes)(el.data) + fn := attr.font_name + result := TextAttributes{ + FontName: toString(fn), + FontSize: float64(attr.font_size), + IsUnderlined: toBool(attr.is_underlined), + StartIndex: int(attr.start_index), + EndIndex: int(attr.end_index), + Color: Color{ + R: int(attr.color.red), + G: int(attr.color.green), + B: int(attr.color.blue), + }, + } + results = append(results, result) + el = el.next + } + return +} + +func (p *Page) Size() (width, height float64) { + var w, h C.double + C.poppler_page_get_size(p.p, &w, &h) + return float64(w), float64(h) +} + +func (p *Page) Index() int { + return int(C.poppler_page_get_index(p.p)) +} + +func (p *Page) Label() string { + return toString(C.poppler_page_get_label(p.p)) +} + +func (p *Page) Duration() float64 { + return float64(C.poppler_page_get_duration(p.p)) +} + +func (p *Page) Images() (results []Image) { + l := C.poppler_page_get_image_mapping(p.p) + defer C.poppler_page_free_image_mapping(l) + results = make([]Image, 0) + var im *C.PopplerImageMapping + for el := C.g_list_first(l); el != nil; el = el.next { + im = (*C.PopplerImageMapping)(el.data) + result := Image{ + Id: int(im.image_id), + Area: Rectangle{ + X1: float64(im.area.x1), + Y1: float64(im.area.y1), + X2: float64(im.area.x2), + Y2: float64(im.area.y2), + }, + p: p.p, + } + results = append(results, result) + } + return +} + +func (p *Page) TextLayout() (layouts []Rectangle) { + var rect *C.PopplerRectangle + var n C.guint + if toBool(C.poppler_page_get_text_layout(p.p, &rect, &n)) { + defer C.g_free((C.gpointer)(rect)) + layouts = make([]Rectangle, int(n)) + r := (*[1 << 30]C.PopplerRectangle)(unsafe.Pointer(rect))[:n:n] + for i := 0; i < int(n); i++ { + layouts[i] = Rectangle{ + X1: float64(r[i].x1), + Y1: float64(r[i].y1), + X2: float64(r[i].x2), + Y2: float64(r[i].y2), + } + } + } + return +} + +func (p *Page) TextLayoutAndAttrs() (result []TextEl) { + text := p.Text() + attrs := p.TextAttributes() + layout := p.TextLayout() + result = make([]TextEl, len(layout)) + attrsRef := make([]*TextAttributes, len(attrs)) + for i, a := range attrs { + attr := a + attrsRef[i] = &attr + } + i := 0 + for _, t := range text { + var a *TextAttributes + for _, a = range attrsRef { + if i >= a.StartIndex && i <= a.EndIndex { + break + } + } + result[i] = TextEl{ + Text: string(t), + Attrs: a, + Rect: layout[i], + } + i++ + } + return +} + +func (p *Page) Close() { + p.closeAnnotMappings() + + if p.p != nil { + C.g_object_unref(C.gpointer(p.p)) + /* avoid double free */ + p.p = nil + } +} + +// Converts a page into SVG and saves to file. +// Inspired by https://github.com/dawbarton/pdf2svg +func (p *Page) ConvertToSVG(filename string){ + width, height := p.Size() + + // Open the SVG file + surface := cairo.NewSVGSurface( filename, width, height, cairo.SVG_VERSION_1_2 ) + + // TODO Can be improved by using cairo_svg_surface_create_for_stream() instead of + // cairo_svg_surface_create() for stream processing instead of file processing. + // However, this needs to be changed in github.com/ungerik/go-cairo/surface.go + + // Get cairo context pointer + _, drawcontext := surface.Native() + + // Render the PDF file into the SVG file + C.poppler_page_render_for_printing(p.p, (*C.cairo_t)(unsafe.Pointer(drawcontext)) ); + + // Close the SVG file + surface.ShowPage() + surface.Destroy() +} + +func (p *Page) closeAnnotMappings(){ + for i := 0; i < len(p.openedAnnots); i++ { + p.openedAnnots[i].Close() + } + + p.openedAnnots = nil + +} + +func (p *Page) GetAnnots() (Annots []*Annot) { + var annots []*Annot + + annotGlist := C.poppler_page_get_annot_mapping(p.p) + defer C.g_list_free(annotGlist) + + p.closeAnnotMappings() + + for annotGlist != nil { + popplerAnnot := (*C.PopplerAnnotMapping)(annotGlist.data) + + + annot := &Annot{ + am: popplerAnnot, + } + + /* Maybe we can used openedAnnots instead of annots + openedAnnots + */ + + annots = append(annots, annot) + p.openedAnnots = append(p.openedAnnots, annot) + + + annotGlist = annotGlist.next + } + + return annots +} + +func (p *Page) AnnotText(a Annot) string { + cText := C.poppler_page_get_text_for_area(p.p, &a.am.area) + return C.GoString(cText) +} + +func (p *Page) AddAnnot(a Annot) { + C.poppler_page_add_annot(p.p, a.am.annot) +} +func (p *Page) RemoveAnnot(a Annot) { + C.poppler_page_remove_annot(p.p, a.am.annot) +} diff --git a/go-poppler/poppler.go b/go-poppler/poppler.go new file mode 100644 index 0000000..eaa53c4 --- /dev/null +++ b/go-poppler/poppler.go @@ -0,0 +1,58 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <stdlib.h> +// #include <glib.h> +// #include <unistd.h> +import "C" + +import ( + "errors" + "path/filepath" + "unsafe" +) + +type poppDoc *C.struct__PopplerDocument + +func Open(filename string) (doc *Document, err error) { + filename, err = filepath.Abs(filename) + if err != nil { + return + } + var e *C.GError + cfilename := (*C.gchar)(C.CString(filename)) + defer C.free(unsafe.Pointer(cfilename)) + fn := C.g_filename_to_uri(cfilename, nil, nil) + var d poppDoc + d = C.poppler_document_new_from_file((*C.char)(fn), nil, &e) + if e != nil { + err = errors.New(C.GoString((*C.char)(e.message))) + } + doc = &Document{ + doc: d, + openedPages: []*Page{}, + } + return +} + +func Load(data []byte) (doc *Document, err error) { + var e *C.GError + var d poppDoc + + b := C.g_bytes_new((C.gconstpointer)(unsafe.Pointer(&data[0])), (C.ulong)(len(data))) + defer C.g_bytes_unref(b) + + d = C.poppler_document_new_from_bytes(b, nil, &e) + if e != nil { + err = errors.New(C.GoString((*C.char)(e.message))) + } + doc = &Document{ + doc: d, + } + return +} + +func Version() string { + return C.GoString(C.poppler_get_version()) +} diff --git a/go-poppler/tags.go b/go-poppler/tags.go new file mode 100644 index 0000000..fd24f70 --- /dev/null +++ b/go-poppler/tags.go @@ -0,0 +1,57 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <stdlib.h> +// #include <glib.h> +// #include <unistd.h> +import "C" +import "strings" + + + +var zeroRect = Rectangle{X1: 0, X2: 0, Y1: 0, Y2: 0} + +func (d *Document) Tag(text string){ + am := C.poppler_annot_mapping_new(); + + pRect := rectangleToPopplerRectangle(zeroRect) + + annot := Annot { + am: am, + } + defer annot.Close() + + am.annot = C.poppler_annot_text_new(d.doc, &pRect) + annot.SetContents(text) + annot.SetFlags(AnnotFlagHidden | AnnotFlagInvisible) + + zeroPage := d.GetPage(0) + zeroPage.AddAnnot(annot) + defer zeroPage.Close() +} + + +func (d *Document) GetTags(filter string) []*Annot { + page := d.GetPage(0) + //defer page.Close() + + annots := page.GetAnnots() + var tags []*Annot + for _, a := range(annots) { + if a.Type() == AnnotText && + rectEq(a.Rect(), zeroRect) && + a.Flags() & AnnotFlagHidden != 0 && + a.Flags() & AnnotFlagInvisible != 0 && + strings.HasPrefix(a.Contents(), filter){ + tags = append(tags, a) + } + } + + return tags +} + +func (d *Document) RemoveTags(filter string){ +// d.GetPage(0).AddAnnot(annot) + // TODO +} diff --git a/go-poppler/text.go b/go-poppler/text.go new file mode 100644 index 0000000..5550b64 --- /dev/null +++ b/go-poppler/text.go @@ -0,0 +1,22 @@ +package poppler + +import () + +type TextEl struct { + Text string + Attrs *TextAttributes + Rect Rectangle +} + +type TextAttributes struct { + FontName string + FontSize float64 + IsUnderlined bool + Color Color + StartIndex, EndIndex int +} + +type Color struct { + R, G, B int +} + diff --git a/go-poppler/utils.go b/go-poppler/utils.go new file mode 100644 index 0000000..4bda53a --- /dev/null +++ b/go-poppler/utils.go @@ -0,0 +1,85 @@ +package poppler + +// #cgo pkg-config: poppler-glib +// #include <poppler.h> +// #include <glib.h> +// #include <unistd.h> +// #include <stdlib.h> +import "C" + +import "unsafe" + +func toString(in *C.gchar) string { + return C.GoString((*C.char)(in)) +} + +func toBool(in C.gboolean) bool { + return int(in) > 0 +} + +/* convert a Quad struct to a GArray */ +func quadsToGArray(quads []Quad) *C.GArray { + garray := C.g_array_new(C.FALSE, C.FALSE, C.sizeof_PopplerQuadrilateral) + + for _, quad := range quads { + item := C.PopplerQuadrilateral{ + p1: C.PopplerPoint{ + x: C.double(quad.P1.X), + y: C.double(quad.P1.Y), + }, + p2: C.PopplerPoint{ + x: C.double(quad.P2.X), + y: C.double(quad.P2.Y), + }, + p3: C.PopplerPoint{ + x: C.double(quad.P3.X), + y: C.double(quad.P3.Y), + }, + p4: C.PopplerPoint{ + x: C.double(quad.P4.X), + y: C.double(quad.P4.Y), + }, + } + + C.g_array_append_vals(garray, C.gconstpointer(&item),1) + } + + return garray +} + +/* convert a GArray to a quad */ +func gArrayToQuads(q *C.GArray) []Quad { + length := int(q.len) + + quads := make([]Quad, length) + + for i := 0; i < length; i++ { + item := (*C.PopplerQuadrilateral)(unsafe.Pointer(uintptr(unsafe.Pointer(q.data)) + uintptr(i)*unsafe.Sizeof(C.PopplerQuadrilateral{}))) + quads[i] = Quad{ + P1: Point{X: float64(item.p1.x), Y: float64(item.p1.y)}, + P2: Point{X: float64(item.p2.x), Y: float64(item.p2.y)}, + P3: Point{X: float64(item.p3.x), Y: float64(item.p3.y)}, + P4: Point{X: float64(item.p4.x), Y: float64(item.p4.y)}, + } + } + + return quads +} + +func rectangleToPopplerRectangle (r Rectangle) C.PopplerRectangle { + var pRect C.PopplerRectangle + + pRect.x1 = C.double(r.X1) + pRect.y1 = C.double(r.Y1) + pRect.x2 = C.double(r.X2) + pRect.y2 = C.double(r.Y2) + + return pRect +} + +func rectEq(r1 Rectangle, r2 Rectangle) bool { + return r1.X1 == r2.X1 && + r1.X2 == r2.X2 && + r1.Y1 == r2.Y1 && + r1.Y2 == r2.Y2 +} |