diff --git a/.gitignore b/.gitignore index f0caab40..59ad9f3a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ # Output files output.md +downloaded-urls # Dependencies node_modules diff --git a/package-lock.json b/package-lock.json index 02b619d3..ecf95897 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,12 +15,13 @@ "js-tiktoken": "^1.0.17", "lodash-es": "^4.17.21", "p-limit": "^6.2.0", + "sanitize-filename": "^1.6.3", "zod": "^3.24.1" }, "devDependencies": { "@ianvs/prettier-plugin-sort-imports": "^4.4.1", "@types/lodash-es": "^4.17.12", - "@types/node": "^22.13.0", + "@types/node": "^22.13.4", "prettier": "^3.4.2", "tsx": "^4.19.2", "typescript": "^5.7.3" @@ -775,9 +776,9 @@ } }, "node_modules/@types/node": { - "version": "22.13.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.0.tgz", - "integrity": "sha512-ClIbNe36lawluuvq3+YYhnIN2CELi+6q8NpnM7PYp4hBn/TatfboPgVSm2rwKRfnV2M+Ty9GWDFI64KEe+kysA==", + "version": "22.13.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.4.tgz", + "integrity": "sha512-ywP2X0DYtX3y08eFVx5fNIw7/uIv8hYUKgXoK8oayJlLnKcRfEYCxWMVE1XagUdVtCJlZT1AU4LXEABW+L1Peg==", "dev": true, "license": "MIT", "dependencies": { @@ -1220,6 +1221,15 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/sanitize-filename": { + "version": "1.6.3", + "resolved": "https://registry.npmjs.org/sanitize-filename/-/sanitize-filename-1.6.3.tgz", + "integrity": "sha512-y/52Mcy7aw3gRm7IrcGDFx/bCk4AhRh2eI9luHOQM86nZsqwiRkkq2GekHXBBD+SmPidc8i2PqtYZl+pWJ8Oeg==", + "license": "WTFPL OR ISC", + "dependencies": { + "truncate-utf8-bytes": "^1.0.0" + } + }, "node_modules/secure-json-parse": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", @@ -1264,6 +1274,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/truncate-utf8-bytes": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz", + "integrity": "sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==", + "license": "WTFPL", + "dependencies": { + "utf8-byte-length": "^1.0.1" + } + }, "node_modules/tsx": { "version": "4.19.2", "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.2.tgz", @@ -1320,6 +1339,12 @@ "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, + "node_modules/utf8-byte-length": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.5.tgz", + "integrity": "sha512-Xn0w3MtiQ6zoz2vFyUVruaCL53O/DwUvkEeOvj+uulMm0BkUGYWmBYVyElqZaSLhY6ZD0ulfU3aBra2aVT4xfA==", + "license": "(WTFPL OR MIT)" + }, "node_modules/ws": { "version": "8.18.0", "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", diff --git a/package.json b/package.json index 456ec7c4..ea5d3052 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "devDependencies": { "@ianvs/prettier-plugin-sort-imports": "^4.4.1", "@types/lodash-es": "^4.17.12", - "@types/node": "^22.13.0", + "@types/node": "^22.13.4", "prettier": "^3.4.2", "tsx": "^4.19.2", "typescript": "^5.7.3" @@ -27,6 +27,7 @@ "js-tiktoken": "^1.0.17", "lodash-es": "^4.17.21", "p-limit": "^6.2.0", + "sanitize-filename": "^1.6.3", "zod": "^3.24.1" }, "engines": { diff --git a/src/deep-research.ts b/src/deep-research.ts index 1505c4af..ad45a03d 100644 --- a/src/deep-research.ts +++ b/src/deep-research.ts @@ -3,6 +3,7 @@ import { generateObject } from 'ai'; import { compact } from 'lodash-es'; import pLimit from 'p-limit'; import { z } from 'zod'; +import * as fs from 'fs/promises'; import { o3MiniModel, trimPrompt } from './ai/providers'; import { systemPrompt } from './prompt'; @@ -86,6 +87,12 @@ async function generateSerpQueries({ return res.object.queries.slice(0, numQueries); } +import sanitize from 'sanitize-filename'; +import path from 'path'; +export function urlToFilepath(url: string): string { + return path.join('downloaded-urls', `${sanitize(url, { replacement: '-' })}.md`); +} + async function processSerpResult({ query, result, @@ -97,6 +104,25 @@ async function processSerpResult({ numLearnings?: number; numFollowUpQuestions?: number; }) { + // Create downloaded-urls directory if it doesn't exist + await fs.mkdir('downloaded-urls', { recursive: true }); + + // Save each document + for (const doc of result.data) { + if (doc.markdown && doc.url) { + const content = [ + doc.title ? `Title: ${doc.title}` : '', + doc.description ? `Description: ${doc.description}` : '', + `URL: ${doc.url}`, + `Accessed at: ${Date()}`, + '', + doc.markdown + ].filter(Boolean).join('\n'); + + await fs.writeFile(urlToFilepath(doc.url), content, 'utf-8'); + } + } + const contents = compact(result.data.map(item => item.markdown)).map( content => trimPrompt(content, 25_000), ); @@ -156,7 +182,7 @@ export async function writeFinalReport({ }); // Append the visited URLs section to the report - const urlsSection = `\n\n## Sources\n\n${visitedUrls.map(url => `- ${url}`).join('\n')}`; + const urlsSection = `\n\n## Sources\n\n${visitedUrls.map(url => `- ${url}, saved at ${urlToFilepath(url)}`).join('\n')}`; return res.object.reportMarkdown + urlsSection; } diff --git a/src/run.ts b/src/run.ts index 87c6073c..2f3b311e 100644 --- a/src/run.ts +++ b/src/run.ts @@ -30,7 +30,7 @@ function askQuestion(query: string): Promise { async function run() { // Get initial query const initialQuery = await askQuestion('What would you like to research? '); - + const outputFile = 'output.md'; // Get breath and depth parameters const breadth = parseInt( @@ -45,7 +45,10 @@ async function run() { 10, ) || 2; - log(`Creating research plan...`); + const inittext = `Research starting at ${new Date().toISOString()} with breadth ${breadth} and depth ${depth}.`; + + await fs.writeFile(outputFile, inittext, 'utf-8'); + log(`${inittext}\nCreating research plan...`); // Generate follow-up questions const followUpQuestions = await generateFeedback({ @@ -69,6 +72,7 @@ Initial Query: ${initialQuery} Follow-up Questions and Answers: ${followUpQuestions.map((q: string, i: number) => `Q: ${q}\nA: ${answers[i]}`).join('\n')} `; + await fs.appendFile(outputFile, combinedQuery, 'utf-8'); log('\nResearching your topic...'); @@ -83,23 +87,23 @@ ${followUpQuestions.map((q: string, i: number) => `Q: ${q}\nA: ${answers[i]}`).j }, }); - log(`\n\nLearnings:\n\n${learnings.join('\n')}`); - log( - `\n\nVisited URLs (${visitedUrls.length}):\n\n${visitedUrls.join('\n')}`, - ); + const URLsandLearnings = `\n\nLearnings:\n\n${learnings.join('\n')}\n\nVisited URLs (${visitedUrls.length}):\n\n${visitedUrls.join('\n')}\n`; + await fs.appendFile(outputFile, URLsandLearnings, 'utf-8'); + + log(URLsandLearnings); log('Writing final report...'); - const report = await writeFinalReport({ + const report = `\n\nFinal Report:\n\n${await writeFinalReport({ prompt: combinedQuery, learnings, visitedUrls, - }); + })}` // Save report to file - await fs.writeFile('output.md', report, 'utf-8'); + await fs.appendFile(outputFile, report, 'utf-8'); - console.log(`\n\nFinal Report:\n\n${report}`); - console.log('\nReport has been saved to output.md'); + console.log(report); + console.log(`\nReport has been saved to ${outputFile}`); rl.close(); }