Spaces:

Tonic
/

scitonic

Runtime error

File size: 5,627 Bytes

59c3706

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9CrIbR0AK3d1",
        "outputId": "8624a380-d370-43b0-969c-1b21e275b322"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: typing_extensions in /usr/local/lib/python3.10/dist-packages (4.9.0)\n"
          ]
        }
      ],
      "source": [
        "# !pip install openai sentence-transformers\n",
        "# !pip install langchain\n",
        "!pip install typing_extensions\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import openai\n",
        "from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, DirectoryLoader\n",
        "from transformers import AutoModel\n",
        "from langchain_community.embeddings.sentence_transformer import (\n",
        "    SentenceTransformerEmbeddings,\n",
        ")\n",
        "from langchain_community.vectorstores import Chroma\n",
        "import torch\n",
        "import json"
      ],
      "metadata": {
        "id": "xOFM83MoLQ-B"
      },
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('new_articles')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WMvNDl83M7Xb",
        "outputId": "d59ab804-42ce-4b10-fee6-f01f19d60b38"
      },
      "execution_count": 53,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Drive already mounted at new_articles; to attempt to forcibly remount, call drive.mount(\"new_articles\", force_remount=True).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def document_loader(directory):\n",
        "  documents = {}\n",
        "  for filename in os.listdir(directory):\n",
        "    file_path = os.path.join(directory, filename)\n",
        "    if filename.endswith(\".csv\"):\n",
        "      loader = CSVLoader(file_path)\n",
        "    elif filename.endswith(\".pdf\"):\n",
        "      loader = PyPDFLoader(file_path)\n",
        "    elif filename.endswith(\".txt\"):\n",
        "      loader = TextLoader(file_path)\n",
        "    else:\n",
        "      break\n",
        "\n",
        "    document = loader.load()\n",
        "    documents[filename] = document\n",
        "  return (documents)\n"
      ],
      "metadata": {
        "id": "QxVY8IyNL3Zp"
      },
      "execution_count": 54,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "openai.api_key = \"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\"\n",
        "JSON_DATA = []\n",
        "directory = \"/content/new_articles/MyDrive/new_articles\"\n",
        "documents = document_loader(directory)\n",
        "for filename, document in documents.items():\n",
        "    doc = document[0].page_content\n",
        "    # print(filename)\n",
        "    # print(document)\n",
        "    response = openai.chat.completions.create(\n",
        "    model=\"gpt-3.5-turbo\",\n",
        "    messages = [\n",
        "    {\"role\": \"system\", \"content\": f\"Generate one Question, Answer,Reference_Article:(use {filename}), Reference_Text from(use block of text which you've used to generate answer {doc})\"},\n",
        "  ], temperature = 0.3\n",
        "  )\n",
        "    #print(response)\n",
        "    result = response.choices[0].message.content.split(\"\\n\")\n",
        "    # print(result)\n",
        "    json_data = {\n",
        "    \"Question\": result[0].split(\"Question: \")[1].strip() if len(result) > 0 and \"Question:\" in result[0] else \"Not provided\",\n",
        "    \"Answer\": result[2].split(\"Answer: \")[1].strip() if len(result) > 2 and \"Answer:\" in result[2] else \"Not provided\",\n",
        "    \"Reference_article\": result[4].split(\"Reference_article: \")[1].strip() if len(result) > 4 and \"Reference_article:\" in result[4] else \"Not provided\",\n",
        "    \"Reference_text\": result[6].split(\"Reference_text: \")[1].strip() if len(result) > 6 and \"Reference_text:\" in result[6] else \"Not provided\",\n",
        "  }\n",
        "\n",
        "    # print(json_data)\n",
        "\n",
        "    JSON_DATA.append(json_data)\n",
        "\n",
        "with open('question_and_answer_list.json', 'w') as json_file:\n",
        "    json.dump(JSON_DATA, json_file, indent=2)\n",
        "\n",
        "print(\"JSON data saved to question_and_answer_list.json\")\n",
        "\n",
        "print(JSON_DATA)\n"
      ],
      "metadata": {
        "id": "LO9imR5SMA1u"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "eOAr3cy6iA9J"
      },
      "execution_count": 46,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "E86P5xBqizsG"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}