Skip to content

Commit 422e150

Browse files
authored
Merge pull request #26 from DocumindHQ/gemini-fix
Replaced ai-sdk/google
2 parents d596b39 + b484a1e commit 422e150

11 files changed

Lines changed: 93 additions & 38 deletions

File tree

core/dist/types.d.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ export declare enum OpenAIModels {
33
GPT_4O_MINI = "gpt-4o-mini"
44
}
55
export declare enum LocalModels {
6-
LLAVA = "llava",
76
LLAMA3_2_VISION = "llama3.2-vision"
87
}
98
export declare enum GoogleModels {
109
GEMINI_2_FLASH = "gemini-2.0-flash-001",
11-
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview-02-05"
10+
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview-02-05",
11+
GEMINI_1_5_FLASH = "gemini-1.5-flash",
12+
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b",
13+
GEMINI_1_5_PRO = "gemini-1.5-pro"
1214
}
1315
export type ModelOptions = OpenAIModels | GoogleModels | LocalModels;
1416
export interface DocumindArgs {

core/dist/types.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ var OpenAIModels;
88
})(OpenAIModels || (exports.OpenAIModels = OpenAIModels = {}));
99
var LocalModels;
1010
(function (LocalModels) {
11-
LocalModels["LLAVA"] = "llava";
11+
//LLAVA = "llava",
1212
LocalModels["LLAMA3_2_VISION"] = "llama3.2-vision";
1313
})(LocalModels || (exports.LocalModels = LocalModels = {}));
1414
var GoogleModels;
1515
(function (GoogleModels) {
1616
GoogleModels["GEMINI_2_FLASH"] = "gemini-2.0-flash-001";
1717
GoogleModels["GEMINI_2_FLASH_LITE"] = "gemini-2.0-flash-lite-preview-02-05";
18+
GoogleModels["GEMINI_1_5_FLASH"] = "gemini-1.5-flash";
19+
GoogleModels["GEMINI_1_5_FLASH_8B"] = "gemini-1.5-flash-8b";
20+
GoogleModels["GEMINI_1_5_PRO"] = "gemini-1.5-pro";
1821
})(GoogleModels || (exports.GoogleModels = GoogleModels = {}));

core/src/types.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@ export enum OpenAIModels {
44
}
55

66
export enum LocalModels {
7-
LLAVA = "llava",
7+
//LLAVA = "llava",
88
LLAMA3_2_VISION = "llama3.2-vision",
99
}
1010

1111
export enum GoogleModels {
1212
GEMINI_2_FLASH = "gemini-2.0-flash-001",
1313
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview-02-05",
14+
GEMINI_1_5_FLASH = "gemini-1.5-flash",
15+
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b",
16+
GEMINI_1_5_PRO = "gemini-1.5-pro",
1417
}
1518

1619
export type ModelOptions = OpenAIModels | GoogleModels | LocalModels;

extractor/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@
99
},
1010
"dependencies": {
1111
"@ai-sdk/google": "^1.1.14",
12+
"@google/generative-ai": "^0.22.0",
1213
"ai": "^4.1.41",
1314
"axios": "^1.7.7",
1415
"core": "*",
1516
"dotenv": "^16.4.5",
1617
"openai": "^4.68.4",
1718
"pdf-lib": "^1.17.1",
1819
"uuid": "^11.0.2",
19-
"zod": "^3.23.8"
20+
"zod": "^3.23.8",
21+
"zod-to-json-schema": "^3.24.2"
2022
},
2123
"devDependencies": {
2224
"nodemon": "^3.1.7"

extractor/src/autoschema/autogenerateSchema.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ async function blanketSchema(markdown, model) {
4242
const result = await extraction({
4343
markdown,
4444
zodSchema: schemaToUse,
45-
prompt: AUTO_SCHEMA_PROMPT(markdown),
45+
prompt: AUTO_SCHEMA_PROMPT(markdown),
46+
model: model,
4647
});
4748

4849
if (!result || !result.fields) {
@@ -67,7 +68,8 @@ async function instructionBasedSchema(markdown, model, instructions) {
6768
const instructionFields = await extraction({
6869
markdown: instructions,
6970
zodSchema: instructionsZod,
70-
prompt: instructionPrompt
71+
prompt: instructionPrompt,
72+
model: model,
7173
});
7274

7375
if (!instructionFields || !instructionFields.fields) {
@@ -81,7 +83,8 @@ async function instructionBasedSchema(markdown, model, instructions) {
8183
const result = await extraction({
8284
markdown,
8385
zodSchema: schemaToUse,
84-
prompt: INSTRUCTIONS_SCHEMA_PROMPT(markdown, data),
86+
prompt: INSTRUCTIONS_SCHEMA_PROMPT(markdown, data),
87+
model: model,
8588
});
8689

8790
if (!result || !result.fields) {

extractor/src/extractors/google.js

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,49 @@
1-
import { createGoogleGenerativeAI } from '@ai-sdk/google';
2-
import { generateObject } from 'ai';
1+
import { GoogleGenerativeAI } from "@google/generative-ai";
2+
import { zodToJsonSchema } from "zod-to-json-schema";
33

4-
export const googleExtractor = async ({ markdown, zodSchema, prompt }) => {
4+
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
5+
6+
export const googleExtractor = async ({ markdown, zodSchema, prompt, model }) => {
57
if (!process.env.GEMINI_API_KEY) {
68
throw new Error("Missing GEMINI_API_KEY");
79
}
810

9-
const google = createGoogleGenerativeAI({
10-
apiKey: process.env.GEMINI_API_KEY,
11-
});
12-
const googleModel = "gemini-2.0-flash-001"
13-
14-
const completion = await generateObject({
15-
model: google(googleModel, {
16-
structuredOutputs: false,
17-
}),
18-
schema: zodSchema,
19-
prompt: markdown,
20-
system: prompt,
21-
});
22-
23-
const event = completion.object;
24-
return event;
25-
}
11+
const googleModel = model
12+
13+
// Convert Zod schema to JSON schema
14+
let jsonSchema = zodToJsonSchema(zodSchema);
15+
16+
// Remove additionalProperties and $schema keys
17+
const removeKeys = (obj) => {
18+
if (Array.isArray(obj)) {
19+
return obj.map(removeKeys);
20+
} else if (typeof obj === "object" && obj !== null) {
21+
return Object.fromEntries(
22+
Object.entries(obj)
23+
.filter(([key]) => key !== "additionalProperties" && key !== "$schema")
24+
.map(([key, value]) => [key, removeKeys(value)])
25+
);
26+
}
27+
return obj;
28+
};
29+
30+
jsonSchema = removeKeys(jsonSchema);
31+
32+
const modelToUse = genAI.getGenerativeModel({
33+
model: googleModel,
34+
systemInstruction: prompt,
35+
generationConfig: {
36+
responseMimeType: "application/json",
37+
responseSchema: jsonSchema,
38+
},
39+
});
40+
41+
const result = await modelToUse.generateContent(
42+
markdown,
43+
);
44+
45+
//console.log(result.response.text());
46+
const event = result.response.text();
47+
return event;
48+
}
49+

extractor/src/extractors/index.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@ import { openAIExtractor } from "./openAI.js";
33
import { googleExtractor } from "./google.js";
44

55
export const OpenAIModels = ["gpt-4o", "gpt-4o-mini"];
6-
export const LocalModels = ["llava", "llama3.2-vision"];
7-
export const GoogleModels = ["gemini-2.0-flash-001", "gemini-2.0-flash-lite-preview-02-05"]
6+
export const LocalModels = ["llama3.2-vision"];
7+
export const GoogleModels = [
8+
"gemini-2.0-flash-001",
9+
"gemini-2.0-flash-lite-preview-02-05",
10+
"gemini-1.5-flash",
11+
"gemini-1.5-flash-8b",
12+
"gemini-1.5-pro"
13+
];
814

915
export function getExtractor(model) {
1016
if (OpenAIModels.includes(model)) {

extractor/src/extractors/ollama.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import OpenAI from "openai";
22
import { zodResponseFormat } from "openai/helpers/zod";
33

4-
export const ollamaExtractor = async ({ markdown, zodSchema, prompt }) => {
4+
export const ollamaExtractor = async ({ markdown, zodSchema, prompt, model }) => {
55
if (!process.env.BASE_URL) {
66
throw new Error("Missing BASE_URL");
77
}
@@ -11,7 +11,7 @@ export const ollamaExtractor = async ({ markdown, zodSchema, prompt }) => {
1111
apiKey: 'ollama'
1212
});
1313

14-
const ollamaModel = "llama3.1";
14+
const ollamaModel = model;
1515

1616
const completion = await openai.beta.chat.completions.parse({
1717
model: ollamaModel,

extractor/src/extractors/openAI.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import OpenAI from "openai";
22
import { zodResponseFormat } from "openai/helpers/zod";
33

4-
export const openAIExtractor = async ({ markdown, zodSchema, prompt }) => {
4+
export const openAIExtractor = async ({ markdown, zodSchema, prompt, model }) => {
55
if (!process.env.OPENAI_API_KEY) {
66
throw new Error("Missing OPENAI_API_KEY");
77
}
88

99
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
10-
const openAiModel = "gpt-4o-2024-08-06";
10+
const openAiModel = model;
1111

1212
const completion = await openai.beta.chat.completions.parse({
1313
model: openAiModel,

extractor/src/services/extract.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ export async function extract({ file, schema, template, model, autoSchema }) {
5959
markdown,
6060
zodSchema: dynamicZodSchema,
6161
prompt: BASE_EXTRACTION_PROMPT,
62+
model: defaultModel,
6263
});
6364

6465
return {

0 commit comments

Comments
 (0)