文本搜索 (Text Search)
福昕 PDF SDK 提供 APIs 来搜索 PDF 文档、XFA 文档、文本页面或者 PDF 注释中的文本。它提供了文本搜索和获取搜索结果的函数:(以 Java 开发语言为例)
- 指定搜索模式和选项,使用
TextSearch.setPattern
、TextSearch.setStartPage
(仅对 PDF 文档中的文本搜索有用)、TextSearch.setEndPage
(仅对 PDF 文档中的文本搜索有用)、和TextSearch.setSearchFlags
接口。 - 进行搜索,使用
TextSearch.findNext
和TextSearch.findPrev
接口。 - 获取搜索结果,使用
TextSearch.getMatchXXX()
接口。
如何在 PDF 文档中搜索指定的文本
c++
#include "include/common/fs_common.h"
#include "include/pdf/fs_pdfdoc.h"
#include "include/pdf/fs_pdfpage.h"
#include "include/pdf/fs_search.h"
using namespace foxit;
using namespace foxit::common;
using foxit::common::Library;
using namespace pdf;
...
// Assuming PDFDoc doc has been loaded.
// Search for all pages of doc.
TextSearch search(doc, NULL);
int start_index = 0, end_index = doc.GetPageCount() - 1;
search.SetStartPage(start_index);
search.SetEndPage(end_index);
WString pattern = L"Foxit";
search.SetPattern(pattern);
foxit::uint32 flags = TextSearch::e_SearchNormal;
search.SetSearchFlags(flags);
...
int match_count = 0;
while (search.FindNext()) {
RectFArray rect_array = search.GetMatchRects();
match_count ++;
}
...
C
#include "include/fs_basictypes_c.h"
#include "include/fs_common_c.h"
#include "include/fs_pdfdoc_c.h"
#include "include/fs_pdfpage_c.h"
#include "include/fs_search_c.h"
...
// Assuming FS_PDFDOC_HANDLE doc has been loaded.
// Search for all pages of doc.
FS_TEXTSEARCH_HANDLE search;
FSDK_TextSearch_Create(doc, NULL, e_FSTextParseFlagsParseTextNormal, &search);
int start_index = 0;
int end_index = 0;
int index = 0;
FSDK_PDFDoc_GetPageCount(doc, &index);
end_index = index - 1;
FS_BOOL return_value1 = false;
FSDK_TextSearch_SetStartPage(search, start_index, &return_value1);
FS_BOOL return_value2 = false;
FSDK_TextSearch_SetEndPage(search, end_index, &return_value2);
const wchar_t* pattern = L"Foxit";
FS_BOOL return_value3;
FSDK_TextSearch_SetPattern(search, pattern, &return_value3);
FS_UINT32 flags = e_FSSearchFlagsSearchNormal;
FS_BOOL return_value4;
FSDK_TextSearch_SetSearchFlags(search, flags, &return_value4);
...
int match_count = 0;
FS_BOOL return_value5;
FSDK_TextSearch_FindNext(search, &return_value5);
while (return_value5) {
FSRectF *rect_array = NULL;
FS_UINT32 return_array_length;
FSDK_TextSearch_GetMatchRects(search, rect_array, &return_array_length);
match_count ++;
rect_array = (FSRectF*)malloc(length * sizeof(FSRectF));
FSDK_TextSearch_GetMatchRects(search, rect_array, &return_array_length);
FSDK_TextSearch_FindNext(search, &return_value5);
free(rect_array);
}
...
java
import com.foxit.sdk.common.fxcrt.RectF;
import com.foxit.sdk.common.fxcrt.RectFArray;
import com.foxit.sdk.pdf.PDFDoc;
import com.foxit.sdk.pdf.TextSearch;
import com.foxit.sdk.pdf.TextPage;
...
TextSearch search = new TextSearch(doc, null, TextPage.e_ParseTextNormal );
int start_index = 0, end_index = doc.getPageCount() - 1;
search.setStartPage(0);
search.setEndPage(doc.getPageCount() - 1);
String pattern = "Foxit";
search.setPattern(pattern);
int flags = e_SearchNormal;
// if want to specify flags, you can do it like this:
// flags |= TextSearch::e_SearchMatchCase;
// flags |= TextSearch::e_SearchMatchWholeWord;
// flags |= TextSearch::e_SearchConsecutive;
search.setSearchFlags(flags);
int match_count = 0;
while (search.findNext()) {
RectFArray rect_array = search.getMatchRects();
match_count++;
}
...
py
import sys
import site
if sys.version_info.major == 2:
_PYTHON2_ = True
else:
_PYTHON2_ = False
if _PYTHON2_:
site.addsitedir(‘../../../’)
#replace with python2 lib path
from FoxitPDFSDKPython2 import *
else:
from FoxitPDFSDKPython3 import *
...
# Assuming PDFDoc doc has been loaded.
# Search for all pages of doc.
search = TextSearch(doc, None)
start_index = 0
end_index = doc.GetPageCount() - 1
search.SetStartPage(start_index)
search.SetEndPage(end_index)
pattern = "Foxit"
search.SetPattern(pattern)
flags = TextSearch.e_SearchNormal
search.SetSearchFlags(flags)
...
match_count = 0
while search.FindNext():
rect_array = search.GetMatchRects()
match_count = match_count + 1
...
objc
#include "FSPDFObjC.h"
...
// Assuming FSPDFDoc doc has been loaded.
...
FSTextSearch *search = [[FSTextSearch alloc] initWithDocument:doc cancel:nil flags:(int)FSTextPageParseTextNormal];
int startIndex = 0;
int endIndex = [doc getPageCount] - 1;
[search setStartPage:startIndex];
[search setEndPage:endIndex];
NSString *pattern = @"Foxit";
[search setPattern:pattern];
NSInteger flags = FSTextSearchSearchNormal;
[search setSearchFlags:(unsigned int)flags];
int match_count = 0;
while ([search findNext]) {
FSRectFArray *rects = [search getMatchRects];
match_count ++;
}
...
js
const FSDK = require("@foxitsoftware/foxit-pdf-sdk-node");
...
// Assuming PDFDoc doc has been loaded.
// Search for all pages of doc.
let search = new FSDK.TextSearch(doc, null, FSDK.TextPage.e_ParseTextNormal);
let start_index = 0;
let end_index = doc.GetPageCount() - 1;
search.SetStartPage(start_index);
search.SetEndPage(end_index - 1);
let pattern = "Foxit";
search.SetPattern(pattern);
let flags = FSDK.TextSearch.e_SearchNormal;
search.SetSearchFlags(flags);
...
let match_count = 0;
while (search.FindNext()) {
let rect_array = search.GetMatchRects();
OutputMatchedInfo(text_out, search, match_count);
match_count ++;
}
...
csharp
using foxit.common;
using foxit.pdf;
...
// Assuming PDFDoc doc has been loaded.
using (TextSearch search = new TextSearch(doc, null, (int)TextPage.TextParseFlags.e_ParseTextNormal))
{
int start_index = 0;
int end_index = doc.GetPageCount() - 1;
search.SetStartPage(0);
search.SetEndPage(doc.GetPageCount() - 1);
String pattern = "Foxit";
search.SetPattern(pattern);
Int32 flags = (int)TextSearch.SearchFlags.e_SearchNormal;
search.SetSearchFlags(flags);
int match_count = 0;
while (search.FindNext())
{
RectFArray rect_array = search.GetMatchRects();
match_count++;
}
...