Upload localKB_construct.py
Browse files- localKB_construct.py +18 -8
localKB_construct.py
CHANGED
@@ -2,10 +2,12 @@
|
|
2 |
1.更新了llama-index的库。对应的函数名和用法都有所改变。
|
3 |
'''
|
4 |
|
|
|
5 |
import openai
|
6 |
import requests
|
7 |
import csv
|
8 |
from llama_index import PromptHelper
|
|
|
9 |
from llama_index import LLMPredictor
|
10 |
from llama_index import ServiceContext
|
11 |
from langchain.chat_models import ChatOpenAI
|
@@ -18,7 +20,6 @@ import math
|
|
18 |
import pandas as pd
|
19 |
import numpy as np
|
20 |
import PyPDF2
|
21 |
-
# from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTSimpleVectorIndex, LLMPredictor, PromptHelper #* working in the previous version.
|
22 |
|
23 |
##* in the latest version: GPTSimpleVectorIndex was renamed to GPTVectorStoreIndex, try removing it from the end of your imports
|
24 |
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTVectorStoreIndex, LLMPredictor, PromptHelper
|
@@ -28,7 +29,7 @@ from llama_index import download_loader
|
|
28 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
29 |
import sys
|
30 |
import os
|
31 |
-
|
32 |
|
33 |
## enironment settings.
|
34 |
os.environ["OPENAI_API_KEY"] = "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"
|
@@ -72,27 +73,36 @@ def construct_index(directory_path):
|
|
72 |
|
73 |
## 如果是PDF文件,那么需要用如下命令。注意与txt文件的区别。切需要from llama_index import download_loader。
|
74 |
#NOTE: 这里可以问:give me an example of GPT-4 solving math problem. 会回答关于这个PDF中的内容,所以可以确认这个程序调用了in-context learning的功能。
|
75 |
-
CJKPDFReader = download_loader("CJKPDFReader")
|
76 |
-
loader = CJKPDFReader()
|
|
|
|
|
77 |
# documents = loader.load_data(file=directory_path) #! 注意这里是指向文件本身,而不同于txt文件的指文件夹。
|
|
|
|
|
|
|
78 |
documents = loader.load_data(file=directory_path) #! 注意这里是指向文件本身,而不同于txt文件的指文件夹。
|
|
|
79 |
# index = GPTSimpleVectorIndex(
|
80 |
# documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper
|
81 |
# )
|
82 |
|
83 |
# index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) ## oringinal version, working.
|
|
|
84 |
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) #* the funciton renamed.
|
|
|
|
|
85 |
# index.save_to_disk('/Users/yunshi/Downloads/txt_dir/index.json') ## in the latest version, this function is not working.
|
86 |
|
87 |
return index, service_context
|
88 |
|
89 |
-
def process_file():
|
90 |
print('process_file starts')
|
91 |
-
file_path = "/Users/yunshi/Downloads/txt_dir/Sparks_of_AGI.pdf"
|
92 |
#! 第一次运行是需要开启这个function。如果测试通过index,因此不需要在运行了。记得上传PDF和JSON文件到云服务器上。
|
93 |
index, service_context = construct_index(file_path)
|
94 |
# index.storage_context.persist(persist_dir="/Users/yunshi/Downloads/txt_dir/") #* 存储到本地,为以后调用。
|
95 |
-
index.storage_context.persist(persist_dir=f"./") #* 存储到本地,为以后调用。
|
96 |
print(index)
|
97 |
|
98 |
-
process_file()
|
|
|
2 |
1.更新了llama-index的库。对应的函数名和用法都有所改变。
|
3 |
'''
|
4 |
|
5 |
+
# import gradio as gr
|
6 |
import openai
|
7 |
import requests
|
8 |
import csv
|
9 |
from llama_index import PromptHelper
|
10 |
+
# from llama_index import GPTSimpleVectorIndex ## renamed in the latest version.
|
11 |
from llama_index import LLMPredictor
|
12 |
from llama_index import ServiceContext
|
13 |
from langchain.chat_models import ChatOpenAI
|
|
|
20 |
import pandas as pd
|
21 |
import numpy as np
|
22 |
import PyPDF2
|
|
|
23 |
|
24 |
##* in the latest version: GPTSimpleVectorIndex was renamed to GPTVectorStoreIndex, try removing it from the end of your imports
|
25 |
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTVectorStoreIndex, LLMPredictor, PromptHelper
|
|
|
29 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
30 |
import sys
|
31 |
import os
|
32 |
+
from rich import print
|
33 |
|
34 |
## enironment settings.
|
35 |
os.environ["OPENAI_API_KEY"] = "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"
|
|
|
73 |
|
74 |
## 如果是PDF文件,那么需要用如下命令。注意与txt文件的区别。切需要from llama_index import download_loader。
|
75 |
#NOTE: 这里可以问:give me an example of GPT-4 solving math problem. 会回答关于这个PDF中的内容,所以可以确认这个程序调用了in-context learning的功能。
|
76 |
+
# CJKPDFReader = download_loader("CJKPDFReader") ## 最新的版本好像不行了,需要用下面的命令。
|
77 |
+
# loader = CJKPDFReader()
|
78 |
+
PDFReader = download_loader("PDFReader") # working。
|
79 |
+
loader = PDFReader()
|
80 |
# documents = loader.load_data(file=directory_path) #! 注意这里是指向文件本身,而不同于txt文件的指文件夹。
|
81 |
+
print('directory_path now:', directory_path)
|
82 |
+
# print('111')
|
83 |
+
# documents = loader.load_data(file="/Users/yunshi/Downloads/txt_dir/Sparks_of_AGI.pdf") #! 注意这里是指向文件本身,而不同于txt文件的指文件夹。
|
84 |
documents = loader.load_data(file=directory_path) #! 注意这里是指向文件本身,而不同于txt文件的指文件夹。
|
85 |
+
print('222')
|
86 |
# index = GPTSimpleVectorIndex(
|
87 |
# documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper
|
88 |
# )
|
89 |
|
90 |
# index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) ## oringinal version, working.
|
91 |
+
# print('documents:', documents)
|
92 |
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) #* the funciton renamed.
|
93 |
+
print('333')
|
94 |
+
|
95 |
# index.save_to_disk('/Users/yunshi/Downloads/txt_dir/index.json') ## in the latest version, this function is not working.
|
96 |
|
97 |
return index, service_context
|
98 |
|
99 |
+
def process_file(file_path,username):
|
100 |
print('process_file starts')
|
101 |
+
# file_path = "/Users/yunshi/Downloads/txt_dir/Sparks_of_AGI.pdf"
|
102 |
#! 第一次运行是需要开启这个function。如果测试通过index,因此不需要在运行了。记得上传PDF和JSON文件到云服务器上。
|
103 |
index, service_context = construct_index(file_path)
|
104 |
# index.storage_context.persist(persist_dir="/Users/yunshi/Downloads/txt_dir/") #* 存储到本地,为以后调用。
|
105 |
+
index.storage_context.persist(persist_dir=f"./{username}/") #* 存储到本地,为以后调用。
|
106 |
print(index)
|
107 |
|
108 |
+
# process_file(file_path)
|