Skip to content

标记内容 (Marked content)

在 PDF 文档中,可以将一部分内容标记为标记内容元素。标记内容功能有助于管理 PDF 文档的逻辑结构信息并且可以用于生成加标记的 PDF (tagged PDF) 。加标记的 PDF 具有标准的结构类型和属性,有助于提取和再利用页面内容。有关标记内容的更多详细信息,请参阅 PDF reference 相关章节。

获取页面中的标记内容以及 tag 名称

c++
#include "include/common/fs_common.h"
#include "include/pdf/fs_pdfdoc.h"
#include "include/pdf/fs_pdfpage.h"
#include "include/common/fs_image.h"
#include "include/pdf/graphics/fs_pdfgraphicsobject.h"
#include "include/pdf/objects/fs_pdfobject.h"

using namespace foxit;
using namespace foxit::common;
using foxit::common::Library;
using namespace pdf;
using namespace graphics;
using namespace objects;

... 
// Assuming PDFPage page has been loaded and parsed.

POSITION position = page.GetFirstGraphicsObjectPosition(GraphicsObject::e_TypeText);
TextObject* text_obj = reinterpret_cast<TextObject*>(page.GetGraphicsObject(position));
MarkedContent* content = text_obj->GetMarkedContent();
int item_count = content->GetItemCount();

// Get marked content property
for (int i = 0; i < item_count; i++) {
    String tag_name = content->GetItemTagName(i);
    int mcid = content->GetItemMCID(i);
}
...
C
#include "include/fs_basictypes_c.h"
#include "include/fs_common_c.h"
#include "include/fs_pdfdoc_c.h"
#include "include/fs_pdfpage_c.h"
#include "include/fs_image_c.h"
#include "include/fs_pdfgraphicsobject_c.h"
#include "include/fs_pdfobject_c.h"

... 
// Assuming FS_PDFPAGE_HANDLE page has been loaded and parsed.

FS_POSITION position;
FSDK_GraphicsObjects_GetFirstGraphicsObjectPosition(page, e_FSTypeText, &position);
FS_TEXTOBJECT_HANDLE text_obj;
reinterpret_cast<FS_TEXTOBJECT_HANDLE*>(FSDK_GraphicsObjects_GetGraphicsObject(page, position, &text_obj));
FS_MARKEDCONTENT_HANDLE content;
FSDK_GraphicsObject_GetMarkedContent(text_obj, &content);
int item_count = 0;
FSDK_MarkedContent_GetItemCount(content, &item_count);

// Get marked content property
for (int i = 0; i < item_count; i++) {
FS_BSTR tag_name;
FSDK_MarkedContent_GetItemTagName(content, i, &tag_name);
int mcid;
FSDK_MarkedContent_GetItemMCID(content, i, &mcid);
}
...
java
import com.foxit.sdk.pdf.graphics.GraphicsObject;
import com.foxit.sdk.pdf.graphics.MarkedContent;
...

long position = page.getFirstGraphicsObjectPosition(e_TypeText);
GraphicsObject text_obj = page.getGraphicsObject(position);
MarkedContent content = text_obj.getMarkedContent();

int nCount = content.getItemCount();
// Get marked content property
for (int i = 0; i < nCount; i++) {
     String tag_name = content.getItemTagName(i);
     int mcid = content.getItemMCID(i);
}
...
py
import sys
import site

if sys.version_info.major == 2:
    _PYTHON2_ = True
else:
    _PYTHON2_ = False

if _PYTHON2_:
    #replace with the python2 lib path
    site.addsitedir('../../../')
    from FoxitPDFSDKPython2 import *
else:
    from FoxitPDFSDKPython3 import *
... 
# Assuming PDFPage page has been loaded and parsed.

position = page.GetFirstGraphicsObjectPosition(GraphicsObject.e_TypeText)
text_obj = page.GetGraphicsObject(position)
content = text_obj.GetMarkedContent()
item_count = content.GetItemCount()

# Get marked content property
for i in range(0, item_count):
    tag_name = content.GetItemTagName(i)
    mcid = content.GetItemMCID(i)
...
objc
#include "FSPDFObjC.h"
...

long position = [page getFirstGraphicsObjectPosition:FSGraphicsObjectTypeText];
FSTextObject *text_obj = [[page getGraphicsObject:position] getTextObject];
FSMarkedContent *content = [text_obj getMarkedContent];
int item_count = [content getItemCount];

// Get marked content property
for (int i = 0; i < item_count; i++) {
    NSString *tag_name = [content getItemTagName:i];
    int mcid = [content getItemMCID:i];
}
...
js
const FSDK = require("@foxitsoftware/foxit-pdf-sdk-node");
... 
// Assuming PDFPage page has been loaded and parsed.

let position = page.GetLastGraphicsObjectPosition(FSDK.GraphicsObject.e_TypePath);
let text_obj = page.GetGraphicsObject(position).GetTextObject();
let content = text_object.GetMarkedContent();
let item_count = content.GetItemCount();

// Get marked content property.
for (let i = 0; i < item_count; i++) {
  text_doc.Write("index: %d\r\n", i);
  let tag_name = content.GetItemTagName(i);
  let mcid = content.GetItemMCID(i);
  }
...
csharp
using foxit;
using foxit.common;
using foxit.common.fxcrt;
using foxit.pdf;
using foxit.pdf.graphics;
using foxit.pdf.objects;

... 
// Assuming PDFPage page has been loaded and parsed.

long position = page.GetFirstGraphicsObjectPosition(GraphicsObject.Type.e_TypeText);
GraphicsObject text_obj = page.GetGraphicsObject(position);
MarkedContent content = text_obj.GetMarkedContent();

int nCount = content.GetItemCount();
// Get marked content property
for (int i = 0; i < nCount; i++)
{
     String tag_name = content.GetItemTagName(i);
     int mcid = content.GetItemMCID(i);
}
...