This is my langchain code to get the pdf text and query the answer, but I need to get the page number , at which page or area of page the openai get the answer.
I need your help how to get the page number from the pdf or the area of pdf,
const pdf = require("pdf-parse");
const fs = require("fs");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { OpenAIEmbeddings } = require("langchain/embeddings");
const { HNSWLib } = require("langchain/vectorstores");
const { OpenAI } = require("langchain/llms");
const { loadQAChain, RetrievalQAChain } = require("langchain/chains");
const aiFunction = async () => {
try {
const fileName = "sample";
const VECTOR_STORE_PATH = `${fileName}.index`;
const question =
"How does HTML contribute to the structure and content of a web page?";
let dataBuffer = fs.readFileSync(`./pdf/${fileName}.pdf`);
const { text } = await pdf(dataBuffer);
const text_splitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await text_splitter.createDocuments([text]);
let vectorstore;
const embeddings = new OpenAIEmbeddings();
const modelName = embeddings.modelName;
console.log({ modelName });
vectorstore = await HNSWLib.fromDocuments(chunks, embeddings);
await vectorstore.save(VECTOR_STORE_PATH);
console.log(vectorstore);
//method 1. Accept user questions/query
const model = new OpenAI();
const chain = RetrievalQAChain.fromLLM(model, vectorstore.asRetriever());
const res = await chain.call({
query: question,
});
console.log(vectorstore);
console.log(res);
} catch (error) {
console.log(error);
}
};
aiFunction();