gpt-academic

Sleeping

App Files Files Community

3v324v23 commited on Mar 26, 2023

Commit

f04d975

•

1 Parent(s): 6505fea

add comments

Browse files

Files changed (6) hide show

README.md +23 -7
crazy_functions/解析项目源代码.py +6 -6
crazy_functions/读文章写摘要.py +3 -3
main.py +10 -7
predict.py +25 -1
toolbox.py +6 -1

README.md CHANGED Viewed

@@ -60,7 +60,7 @@ chat分析报告生成 | [实验性功能] 运行后自动生成总结汇报
 ## 直接运行 (Windows or Linux or MacOS)
-```
 # 下载项目
 git clone https://github.com/binary-husky/chatgpt_academic.git
 cd chatgpt_academic
@@ -73,9 +73,16 @@ python -m pip install -r requirements.txt
 python main.py
 # 测试实验性功能
-input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 解析整个C++项目的头文件
-input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 解读latex论文写摘要
-input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 解析整个Python项目
 ```
@@ -93,9 +100,18 @@ docker build -t gpt-academic .
 docker run --rm -it --net=host gpt-academic
 # 测试实验性功能
-input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 解析整个C++项目的头文件
-input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 解读latex论文写摘要
-input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 解析整个Python项目
 ```

 ## 直接运行 (Windows or Linux or MacOS)
+``` sh
 # 下载项目
 git clone https://github.com/binary-husky/chatgpt_academic.git
 cd chatgpt_academic
 python main.py
 # 测试实验性功能
+## 测试C++项目头文件分析
+input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 "[实验] 解析整个C++项目（input输入项目根路径）"
+## 测试给Latex项目写摘要
+input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 "[实验] 读tex论文写摘要（input输入项目根路径）"
+## 测试Python项目分析
+input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 "[实验] 解析整个py项目（input输入项目根路径）"
+## 测试自我代码解读
+点击 "[实验] 请解析并解构此项目本身"
+## 测试实验功能模板函数（要求gpt回答几个数的平方是什么），您可以根据此函数为模板，实现更复杂的功能
+点击 "[实验] 实验功能函数模板"
 ```
 docker run --rm -it --net=host gpt-academic
 # 测试实验性功能
+## 测试自我代码解读
+点击 "[实验] 请解析并解构此项目本身"
+## 测试实验功能模板函数（要求gpt回答几个数的平方是什么），您可以根据此函数为模板，实现更复杂的功能
+点击 "[实验] 实验功能函数模板"
+##（请注意在docker中运行时，需要额外注意程序的文件访问权限问题）
+## 测试C++项目头文件分析
+input区域 输入 ./crazy_functions/test_project/cpp/libJPG ， 然后点击 "[实验] 解析整个C++项目（input输入项目根路径）"
+## 测试给Latex项目写摘要
+input区域 输入 ./crazy_functions/test_project/latex/attention ， 然后点击 "[实验] 读tex论文写摘要（input输入项目根路径）"
+## 测试Python项目分析
+input区域 输入 ./crazy_functions/test_project/python/dqn ， 然后点击 "[实验] 解析整个py项目（input输入项目根路径）"
 ```

crazy_functions/解析项目源代码.py CHANGED Viewed

@@ -9,9 +9,9 @@ def 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot,
         with open(fp, 'r', encoding='utf-8') as f:
             file_content = f.read()
-        前言 = "接下来请你逐文件分析下面的工程" if index==0 else ""
-        i_say = 前言 + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
-        i_say_show_user = 前言 + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
         chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
         yield chatbot, history, '正常'
@@ -56,9 +56,9 @@ def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTx
         with open(fp, 'r', encoding='utf-8') as f:
             file_content = f.read()
-        前言 = "接下来请你分析自己的程序构成，别紧张，" if index==0 else ""
-        i_say = 前言 + f'请对下面的程序文件做一个概述文件名是{fp}，文件代码是 ```{file_content}```'
-        i_say_show_user = 前言 + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
         chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
         yield chatbot, history, '正常'

         with open(fp, 'r', encoding='utf-8') as f:
             file_content = f.read()
+        prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
         chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
         yield chatbot, history, '正常'
         with open(fp, 'r', encoding='utf-8') as f:
             file_content = f.read()
+        prefix = "接下来请你分析自己的程序构成，别紧张，" if index==0 else ""
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{fp}，文件代码是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
         chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
         yield chatbot, history, '正常'

crazy_functions/读文章写摘要.py CHANGED Viewed

@@ -10,9 +10,9 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
         with open(fp, 'r', encoding='utf-8') as f:
             file_content = f.read()
-        前言 = "接下来请你逐文件分析下面的论文文件，概括其内容" if index==0 else ""
-        i_say = 前言 + f'请对下面的文章片段用中文做一个概述，文件名是{os.path.relpath(fp, project_folder)}，文章内容是 ```{file_content}```'
-        i_say_show_user = 前言 + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
         chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
         print('[1] yield chatbot, history')
         yield chatbot, history, '正常'

         with open(fp, 'r', encoding='utf-8') as f:
             file_content = f.read()
+        prefix = "接下来请你逐文件分析下面的论文文件，概括其内容" if index==0 else ""
+        i_say = prefix + f'请对下面的文章片段用中文做一个概述，文件名是{os.path.relpath(fp, project_folder)}，文章内容是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
         chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
         print('[1] yield chatbot, history')
         yield chatbot, history, '正常'

main.py CHANGED Viewed

@@ -1,11 +1,13 @@
-import os; os.environ['no_proxy'] = '*'
 import gradio as gr
 from predict import predict
 from toolbox import format_io, find_free_port
-try: from config_private import proxies, WEB_PORT # 放自己的秘密如API和代理网址 os.path.exists('config_private.py')
 except: from config import proxies, WEB_PORT
 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
 initial_prompt = "Serve me as a writing and programming assistant."
@@ -13,20 +15,21 @@ title_html = """<h1 align="center">ChatGPT 学术优化</h1>"""
 import logging
 os.makedirs('gpt_log', exist_ok=True)
-logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8')
 print('所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log，请注意自我隐私保护哦！')
-# 一些普通功能
 from functional import get_functionals
 functional = get_functionals()
-# 对一些丧心病狂的实验性功能进行测试
 from functional_crazy import get_crazy_functionals
 crazy_functional = get_crazy_functionals()
 gr.Chatbot.postprocess = format_io
-with gr.Blocks() as demo:
     gr.HTML(title_html)
     with gr.Row():
         with gr.Column(scale=2):
@@ -66,7 +69,7 @@ with gr.Blocks() as demo:
         crazy_functional[k]["Button"].click(crazy_functional[k]["Function"],
             [txt, top_p, temperature, chatbot, history, systemPromptTxt, gr.State(PORT)], [chatbot, history, statusDisplay])
 def auto_opentab_delay():
     import threading, webbrowser, time
     print(f"URL http://localhost:{PORT}")

+import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
 import gradio as gr
 from predict import predict
 from toolbox import format_io, find_free_port
+# 建议您复制一个config_private.py放自己的秘密，如API和代理网址，避免不小心传github被别人看到
+try: from config_private import proxies, WEB_PORT
 except: from config import proxies, WEB_PORT
+# 如果WEB_PORT是-1，则随机选取WEB端口
 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
 initial_prompt = "Serve me as a writing and programming assistant."
 import logging
 os.makedirs('gpt_log', exist_ok=True)
+logging.basicConfig(filename='gpt_log/chat_secrets.log', level=logging.INFO, encoding='utf-8') # python 版本建议3.9+（越新越好）
 print('所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log，请注意自我隐私保护哦！')
+# 一些普通功能模块
 from functional import get_functionals
 functional = get_functionals()
+# 对一些丧心病狂的实验性功能模块进行测试
 from functional_crazy import get_crazy_functionals
 crazy_functional = get_crazy_functionals()
+# 处理markdown文本格式的转变
 gr.Chatbot.postprocess = format_io
+with gr.Blocks() as demo:   # 借助gradio框架，实现webUI
     gr.HTML(title_html)
     with gr.Row():
         with gr.Column(scale=2):
         crazy_functional[k]["Button"].click(crazy_functional[k]["Function"],
             [txt, top_p, temperature, chatbot, history, systemPromptTxt, gr.State(PORT)], [chatbot, history, statusDisplay])
+# 延迟函数，做一些准备工作，最后尝试打开浏览器
 def auto_opentab_delay():
     import threading, webbrowser, time
     print(f"URL http://localhost:{PORT}")

predict.py CHANGED Viewed

@@ -15,6 +15,9 @@ except: from config import proxies, API_URL, API_KEY, TIMEOUT_SECONDS, MAX_RETRY
 timeout_bot_msg = '[local] Request timeout, network error. please check proxy settings in config.py.'
 def get_full_error(chunk, stream_response):
     while True:
         try:
             chunk += next(stream_response)
@@ -23,6 +26,16 @@ def get_full_error(chunk, stream_response):
     return chunk
 def predict_no_ui(inputs, top_p, temperature, history=[]):
     headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt="", stream=False)
     retry = 0
@@ -47,7 +60,15 @@ def predict_no_ui(inputs, top_p, temperature, history=[]):
 def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='',
             stream = True, additional_fn=None):
     if additional_fn is not None:
         import functional
         importlib.reload(functional)
@@ -115,6 +136,9 @@ def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt=''
                     return
 def generate_payload(inputs, top_p, temperature, history, system_prompt, stream):
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {API_KEY}"

 timeout_bot_msg = '[local] Request timeout, network error. please check proxy settings in config.py.'
 def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
     while True:
         try:
             chunk += next(stream_response)
     return chunk
 def predict_no_ui(inputs, top_p, temperature, history=[]):
+    """
+        发送至chatGPT，等待回复，一次性完成，不显示中间过程。
+        predict函数的简化版。
+        用于payload比较大的情况，或者用于实现多线、带嵌套的复杂功能。
+        inputs 是本次问询的输入
+        top_p, temperature是chatGPT的内部调优参数
+        history 是之前的对话列表
+        （注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误，然后raise ConnectionAbortedError）
+    """
     headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt="", stream=False)
     retry = 0
 def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='',
             stream = True, additional_fn=None):
+    """
+        发送至chatGPT，流式获取输出。
+        用于基础的对话功能。
+        inputs 是本次问询的输入
+        top_p, temperature是chatGPT的内部调优参数
+        history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+        chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+        additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
     if additional_fn is not None:
         import functional
         importlib.reload(functional)
                     return
 def generate_payload(inputs, top_p, temperature, history, system_prompt, stream):
+    """
+        整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
     headers = {
         "Content-Type": "application/json",
         "Authorization": f"Bearer {API_KEY}"

toolbox.py CHANGED Viewed

@@ -10,7 +10,10 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
     try: from config_private import TIMEOUT_SECONDS, MAX_RETRY
     except: from config import TIMEOUT_SECONDS, MAX_RETRY
     from predict import predict_no_ui
     mutable = [None, '']
     def mt(i_say, history):
         while True:
             try:
@@ -25,14 +28,16 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
                     mutable[1] = 'Warning! Input file is too long, cut into half. '
             except TimeoutError as e:
                 mutable[0] = '[Local Message] Failed with timeout'
     thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
     cnt = 0
     while thread_name.is_alive():
         cnt += 1
         chatbot[-1] = (i_say_show_user, f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt%4)))
         yield chatbot, history, '正常'
         time.sleep(1)
     gpt_say = mutable[0]
     return gpt_say

     try: from config_private import TIMEOUT_SECONDS, MAX_RETRY
     except: from config import TIMEOUT_SECONDS, MAX_RETRY
     from predict import predict_no_ui
+    # 多线程的时候，需要一个mutable结构在不同线程之间传递信息
+    # list就是最简单的mutable结构，我们第一个位置放gpt输出，第二个位置传递报错信息
     mutable = [None, '']
+    # multi-threading worker
     def mt(i_say, history):
         while True:
             try:
                     mutable[1] = 'Warning! Input file is too long, cut into half. '
             except TimeoutError as e:
                 mutable[0] = '[Local Message] Failed with timeout'
+    # 创建新线程发出http请求
     thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
+    # 原来的线程则负责持续更新UI，实现一个超时倒计时，并等待新线程的任务完成
     cnt = 0
     while thread_name.is_alive():
         cnt += 1
         chatbot[-1] = (i_say_show_user, f"[Local Message] {mutable[1]}waiting gpt response {cnt}/{TIMEOUT_SECONDS*2*(MAX_RETRY+1)}"+''.join(['.']*(cnt%4)))
         yield chatbot, history, '正常'
         time.sleep(1)
+    # 把gpt的输出从mutable中取出来
     gpt_say = mutable[0]
     return gpt_say