{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyOM1MK/wsnFQZ9IDhe5wqdw", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3W6PqL7fnKoE", "outputId": "d1e0d8b6-d930-440d-f6b5-e8b444d83d63" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting PyPDF2\n", " Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m232.6/232.6 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: PyPDF2\n", "Successfully installed PyPDF2-3.0.1\n" ] } ], "source": [ "!pip install PyPDF2" ] }, { "cell_type": "code", "source": [ "start_page = 12\n", "end_page =94" ], "metadata": { "id": "GowGZUpWrUDo" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "import csv\n", "from PyPDF2 import PdfReader\n", "\n", "# Creating a pdf reader object\n", "reader = PdfReader('puzzles.pdf')\n", "\n", "# Open a new CSV file for writing\n", "with open('questions.csv', 'w', newline='', encoding='utf-8') as file:\n", " writer = csv.writer(file)\n", "\n", " # Writing the header row\n", " writer.writerow(['page_number', 'questions'])\n", "\n", " # Looping through the pages\n", " for page_number in range(start_page, end_page):\n", " page = reader.pages[page_number]\n", " text = page.extract_text()\n", "\n", " # Writing the page number and text to the CSV file\n", " writer.writerow([page_number + 1, text]) # Adding 1 because page_number starts from 0\n" ], "metadata": { "id": "6G7OW0_qnaTT" }, "execution_count": 5, "outputs": [] } ] }