Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion packages/global/common/system/types/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,21 @@ export type SystemEnvType = {
oneapiUrl?: string;
chatApiKey?: string;

customPdfParse?: customPdfParseType;
customPdfParse?: SystemEnvCustomPdfParseType;
};

export type customPdfParseType = {
name: string;
desc: string;
url?: string;
key?: string;
doc2xKey?: string;
price?: number;
extension?: string;
};

export type SystemEnvCustomPdfParseType = customPdfParseType[];

export type LicenseDataType = {
startTime: string;
expiredTime: string;
Expand Down
3 changes: 2 additions & 1 deletion packages/global/core/app/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ export const defaultChatInputGuideConfig = {
export const defaultAppSelectFileConfig: AppFileSelectConfigType = {
canSelectFile: false,
canSelectImg: false,
maxFiles: 10
maxFiles: 10,
customPdfParse: ''
};

export enum AppTemplateTypeEnum {
Expand Down
2 changes: 1 addition & 1 deletion packages/global/core/app/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ export type AppAutoExecuteConfigType = {
// File
export type AppFileSelectConfigType = {
canSelectFile: boolean;
customPdfParse?: boolean;
customPdfParse?: string;
canSelectImg: boolean;
maxFiles: number;
};
Expand Down
2 changes: 1 addition & 1 deletion packages/global/core/dataset/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type DatasetCollectionStoreDataType = ChunkSettingsType & {
parentId?: string;
metadata?: Record<string, any>;

customPdfParse?: boolean;
customPdfParse?: string;
};

// create collection params
Expand Down
2 changes: 1 addition & 1 deletion packages/global/core/dataset/type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ export type DatasetCollectionSchemaType = ChunkSettingsType & {
};

// Parse settings
customPdfParse?: boolean;
customPdfParse?: string;
trainingType: DatasetCollectionDataProcessModeEnum;
};

Expand Down
4 changes: 2 additions & 2 deletions packages/service/common/file/gridfs/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,14 @@ export const readFileContentFromMongo = async ({
tmbId,
bucketName,
fileId,
customPdfParse = false,
customPdfParse,
getFormatText
}: {
teamId: string;
tmbId: string;
bucketName: `${BucketNameEnum}`;
fileId: string;
customPdfParse?: boolean;
customPdfParse?: string;
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
}): Promise<{
rawText: string;
Expand Down
40 changes: 27 additions & 13 deletions packages/service/common/file/read/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export type readRawTextByLocalFileParams = {
tmbId: string;
path: string;
encoding: string;
customPdfParse?: boolean;
customPdfParse?: string;
getFormatText?: boolean;
metadata?: Record<string, any>;
};
Expand Down Expand Up @@ -46,7 +46,7 @@ export const readRawContentByFileBuffer = async ({
buffer,
encoding,
metadata,
customPdfParse = false,
customPdfParse,
getFormatText = true
}: {
teamId: string;
Expand All @@ -57,7 +57,7 @@ export const readRawContentByFileBuffer = async ({
encoding: string;
metadata?: Record<string, any>;

customPdfParse?: boolean;
customPdfParse?: string;
getFormatText?: boolean;
}): Promise<{
rawText: string;
Expand All @@ -68,9 +68,9 @@ export const readRawContentByFileBuffer = async ({
encoding,
buffer
});
const parsePdfFromCustomService = async (): Promise<ReadFileResponse> => {
const url = global.systemEnv.customPdfParse?.url;
const token = global.systemEnv.customPdfParse?.key;
const parsePdfFromCustomService = async (parser: any): Promise<ReadFileResponse> => {
const url = parser.url;
const token = parser.key;
if (!url) return systemParse();

const start = Date.now();
Expand Down Expand Up @@ -104,7 +104,8 @@ export const readRawContentByFileBuffer = async ({
createPdfParseUsage({
teamId,
tmbId,
pages: response.pages
pages: response.pages,
parserName: customPdfParse
});

return {
Expand All @@ -114,16 +115,17 @@ export const readRawContentByFileBuffer = async ({
};
};
// Doc2x api
const parsePdfFromDoc2x = async (): Promise<ReadFileResponse> => {
const doc2xKey = global.systemEnv.customPdfParse?.doc2xKey;
const parsePdfFromDoc2x = async (parser: any): Promise<ReadFileResponse> => {
const doc2xKey = parser.doc2xKey;
if (!doc2xKey) return systemParse();

const { pages, text, imageList } = await useDoc2xServer({ apiKey: doc2xKey }).parsePDF(buffer);

createPdfParseUsage({
teamId,
tmbId,
pages
pages,
parserName: customPdfParse
});

return {
Expand All @@ -135,8 +137,14 @@ export const readRawContentByFileBuffer = async ({
// Custom read file service
const pdfParseFn = async (): Promise<ReadFileResponse> => {
if (!customPdfParse) return systemParse();
if (global.systemEnv.customPdfParse?.url) return parsePdfFromCustomService();
if (global.systemEnv.customPdfParse?.doc2xKey) return parsePdfFromDoc2x();

const parsers = global.systemEnv.customPdfParse || [];
const selectedParser = parsers.find((parser) => parser.name === customPdfParse);

if (!selectedParser) return systemParse();

if (selectedParser.url) return parsePdfFromCustomService(selectedParser);
if (selectedParser.doc2xKey) return parsePdfFromDoc2x(selectedParser);

return systemParse();
};
Expand All @@ -145,9 +153,15 @@ export const readRawContentByFileBuffer = async ({
addLog.debug(`Start parse file`, { extension });

let { rawText, formatText, imageList } = await (async () => {
if (extension === 'pdf') {
// Check if any parser supports this extension
const parsers = global.systemEnv.customPdfParse || [];
const selectedParser = parsers.find((parser) => parser.name === customPdfParse);
const ext = selectedParser?.extension?.split(',');

if (ext?.includes(extension)) {
return await pdfParseFn();
}

return await systemParse();
})();

Expand Down
5 changes: 2 additions & 3 deletions packages/service/common/system/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ export const initFastGPTConfig = (config?: FastGPTConfigFileType) => {
if (!config) return;

// Special config computed
config.feConfigs.showCustomPdfParse =
!!config.systemEnv.customPdfParse?.url || !!config.systemEnv.customPdfParse?.doc2xKey;
config.feConfigs.customPdfParsePrice = config.systemEnv.customPdfParse?.price || 0;
const parsers = config.systemEnv.customPdfParse || [];
config.feConfigs.showCustomPdfParse = parsers.length > 0;

global.feConfigs = config.feConfigs;
global.systemEnv = config.systemEnv;
Expand Down
2 changes: 1 addition & 1 deletion packages/service/core/dataset/apiDataset/custom/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
teamId: string;
tmbId: string;
apiFileId: string;
customPdfParse?: boolean;
customPdfParse?: string;
}): Promise<ApiFileReadContentResponse> => {
const data = await request<
{
Expand Down
2 changes: 1 addition & 1 deletion packages/service/core/dataset/collection/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ const DatasetCollectionSchema = new Schema({
forbid: Boolean,

// Parse settings
customPdfParse: Boolean,
customPdfParse: String,
apiFileParentId: String,

// Chunk settings
Expand Down
6 changes: 3 additions & 3 deletions packages/service/core/dataset/read.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export const readFileRawTextByUrl = async ({
teamId: string;
tmbId: string;
url: string;
customPdfParse?: boolean;
customPdfParse?: string;
getFormatText?: boolean;
relatedId: string; // externalFileId / apiFileId
maxFileSize?: number;
Expand Down Expand Up @@ -161,7 +161,7 @@ export const readDatasetSourceRawText = async ({
tmbId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
customPdfParse?: boolean;
customPdfParse?: string;
getFormatText?: boolean;

selector?: string; // link selector
Expand Down Expand Up @@ -241,7 +241,7 @@ export const readApiServerFileContent = async ({
apiFileId: string;
teamId: string;
tmbId: string;
customPdfParse?: boolean;
customPdfParse?: string;
}): Promise<{
title?: string;
rawText: string;
Expand Down
2 changes: 1 addition & 1 deletion packages/service/core/workflow/dispatch/ai/agent/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ const getMultiInput = async ({
fileLinks?: string[];
requestOrigin?: string;
maxFiles: number;
customPdfParse?: boolean;
customPdfParse?: string;
inputFiles: UserChatItemValueItemType['file'][];
hasReadFilesTool: boolean;
}) => {
Expand Down
2 changes: 1 addition & 1 deletion packages/service/core/workflow/dispatch/ai/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ async function getMultiInput({
stringQuoteText?: string; // file quote
requestOrigin?: string;
maxFiles: number;
customPdfParse?: boolean;
customPdfParse?: string;
runningUserInfo: ChatDispatchProps['runningUserInfo'];
}) {
// 旧版本适配====>
Expand Down
4 changes: 2 additions & 2 deletions packages/service/core/workflow/dispatch/tools/readFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
params: { fileUrlList = [] }
} = props;
const maxFiles = chatConfig?.fileSelectConfig?.maxFiles || 20;
const customPdfParse = chatConfig?.fileSelectConfig?.customPdfParse || false;
const customPdfParse = chatConfig?.fileSelectConfig?.customPdfParse;

// Get files from histories
const filesFromHistories = version !== '489' ? [] : getHistoryFileLinks(histories);
Expand Down Expand Up @@ -126,7 +126,7 @@ export const getFileContentFromLinks = async ({
maxFiles: number;
teamId: string;
tmbId: string;
customPdfParse?: boolean;
customPdfParse?: string;
}) => {
const parseUrlList = urls
// Remove invalid urls
Expand Down
8 changes: 6 additions & 2 deletions packages/service/support/wallet/usage/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,17 @@ export const createTrainingUsage = async ({
export const createPdfParseUsage = async ({
teamId,
tmbId,
pages
pages,
parserName
}: {
teamId: string;
tmbId: string;
pages: number;
parserName?: string;
}) => {
const unitPrice = global.systemEnv?.customPdfParse?.price || 0;
const parsers = global.systemEnv?.customPdfParse || [];
const selectedParser = parserName ? parsers.find((p) => p.name === parserName) : parsers[0];
const unitPrice = selectedParser?.price || 0;
const totalPoints = pages * unitPrice;

createUsage({
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/en/app.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@
"pdf_enhance_parse": "PDF enhancement analysis",
"pdf_enhance_parse_price": "{{price}}Points/page",
"pdf_enhance_parse_tips": "Calling PDF recognition model for parsing, you can convert it into Markdown and retain pictures in the document. At the same time, you can also identify scanned documents, which will take a long time to identify them.",
"select_pdf_parser": "Select PDF Parser",
"system_default_parser": "System Default Parser",
"system_default_parser_desc": "Use system built-in PDF parser",
"permission.des.manage": "Based on write permissions, you can configure publishing channels, view conversation logs, and assign permissions to the application.",
"permission.des.read": "Use the app to have conversations",
"permission.des.write": "Can view and edit apps",
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/en/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@
"pdf_enhance_parse": "PDF enhancement analysis",
"pdf_enhance_parse_price": "{{price}} points/page",
"pdf_enhance_parse_tips": "Calling PDF recognition model for parsing, you can convert it into Markdown and retain pictures in the document. At the same time, you can also identify scanned documents, which will take a long time to identify them.",
"select_pdf_parser": "Select PDF Parser",
"system_default_parser": "System Default Parser",
"system_default_parser_desc": "Use system built-in PDF parser",
"permission.des.manage": "Can manage the entire knowledge base data and information",
"permission.des.read": "View knowledge base content",
"permission.des.write": "Ability to add and change knowledge base content",
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/zh-CN/app.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@
"pdf_enhance_parse": "PDF增强解析",
"pdf_enhance_parse_price": "{{price}}积分/页",
"pdf_enhance_parse_tips": "调用 PDF 识别模型进行解析,可以将其转换成 Markdown 并保留文档中的图片,同时也可以对扫描件进行识别,识别时间较长。",
"select_pdf_parser": "选择PDF解析器",
"system_default_parser": "系统默认解析器",
"system_default_parser_desc": "使用系统内置的PDF解析器",
"permission.des.manage": "写权限基础上,可配置发布渠道、查看对话日志、分配该应用权限",
"permission.des.read": "可使用该应用进行对话",
"permission.des.write": "可查看和编辑应用",
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/zh-CN/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@
"pdf_enhance_parse": "PDF增强解析",
"pdf_enhance_parse_price": "{{price}}积分/页",
"pdf_enhance_parse_tips": "调用 PDF 识别模型进行解析,可以将其转换成 Markdown 并保留文档中的图片,同时也可以对扫描件进行识别,识别时间较长。",
"select_pdf_parser": "选择PDF解析器",
"system_default_parser": "系统默认解析器",
"system_default_parser_desc": "使用系统内置的PDF解析器",
"permission.des.manage": "可管理整个知识库数据和信息",
"permission.des.read": "可查看知识库内容",
"permission.des.write": "可增加和变更知识库内容",
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/zh-Hant/app.json
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@
"pdf_enhance_parse": "PDF 增強解析",
"pdf_enhance_parse_price": "{{price}}積分/頁",
"pdf_enhance_parse_tips": "呼叫 PDF 識別模型進行解析,可以將其轉換成 Markdown 並保留文件中的圖片,同時也可以對掃描件進行識別,識別時間較長。",
"select_pdf_parser": "選擇PDF解析器",
"system_default_parser": "系統預設解析器",
"system_default_parser_desc": "使用系統內建的PDF解析器",
"permission.des.manage": "在寫入權限基礎上,可以設定發布通道、檢視對話紀錄、分配這個應用程式的權限",
"permission.des.read": "可以使用這個應用程式進行對話",
"permission.des.write": "可以檢視和編輯應用程式",
Expand Down
3 changes: 3 additions & 0 deletions packages/web/i18n/zh-Hant/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@
"pdf_enhance_parse": "PDF 增強解析",
"pdf_enhance_parse_price": "{{price}}積分/頁",
"pdf_enhance_parse_tips": "呼叫 PDF 識別模型進行解析,可以將其轉換成 Markdown 並保留文件中的圖片,同時也可以對掃描件進行識別,識別時間較長。",
"select_pdf_parser": "選擇PDF解析器",
"system_default_parser": "系統預設解析器",
"system_default_parser_desc": "使用系統內建的PDF解析器",
"permission.des.manage": "可管理整個資料集的資料和資訊",
"permission.des.read": "可檢視資料集內容",
"permission.des.write": "可新增和變更資料集內容",
Expand Down
Loading
Loading