Program error in summary in python: Resource punkt not found

Asked 2 years ago, Updated 2 years ago, 207 views

I'm in trouble because I don't know how to deal with this error.
Please let me know.

[Error]

 0% | | 0/2 [00:00<?,?it/s] 0
  0% | | 0/2 [00:06<?, ?it/s]
---------------------------------------------------------------------------
LookupError Traceback (most recent call last)
/usr/local/lib/python 3.7/dist-packages/sumy/nlp/tokenizers.py in_get_sentence_tokenizer(self,language)
    126 path=to_string("tokenizers/punkt/%s.pickle")%to_string(language)
-->127 return nltk.data.load (path)
    128 except (LookupError, zipfile.BadZipfile) as:

5 frames
LookupError: 
**********************************************************************
  Resource punkt not found.
  Please use the NLTK Downloader to obtain the resource:

  >>import nltk
  >>>nltk.download('punkt')
  
  Searched in:
    - '/root/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - '/usr/nltk_data'
    - '/usr/lib/nltk_data'
    - ''
**********************************************************************


During handling of the above exception, another exception occurred:

LookupError Traceback (most recent call last)
/usr/local/lib/python 3.7/dist-packages/sumy/nlp/tokenizers.py in_get_sentence_tokenizer(self,language)
    130 "NLTK tokenizers are missing or the language is not supported.\n"
    131""Download them by following command: python-c" import nltk;nltk.download('punkt')"\n""
-->132 "Original error was:\n" + str(e)
    133             )
    134 

LookupError: NLTK tokenizers are missing or the language is not supported.
Download them by following command: python-c "import nltk;nltk.download('punkt')"
Original error was:

**********************************************************************
  Resource punkt not found.
  Please use the NLTK Downloader to obtain the resource:

  >>import nltk
  >>>nltk.download('punkt')
  
  Searched in:
    - '/root/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - '/usr/nltk_data'
    - '/usr/lib/nltk_data'
    - ''
**********************************************************************

[Code]

#@title
import requests
import json
import csv
import pytz
import datetime
import tqdm
import numpy as np

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from time import sleep

from google.colab import files

##############
### DEFINE###
##############
#KEYWORD = "Welfare"
# SLEEP_TIME = 0.5

url='https://api.jgrants-portal.go.jp/exp/v1/public/subsidies?keyword='+keyword+'&sort=created_date&order=DESC&acceptance=1'
req = requests.get(url)
reqJSON=json.loads(req.text)

# loop of result data
csvList = [ ]
ID = [ ]
for intqdm.tqdm(range(len(reqJSON["result")))):
  # tqdm
  np.pi*np.pi
  if i%1e6 == 0:
    print(i)

  # Add header row to csv write list
  resultData=reqJSON["result"][i]
  csvRow = [ ]
  csvRow.append(resultData["title"])
  csvRow.append(resultData["id"])
  csvRow.append (resultData["acceptance_start_datetime")
  csvRow.append(resultData["acceptance_end_datetime"])
  csvRow.append(resultData["subsidy_max_limit"])
  csvRow.append(resultData["target_area_search"])
  csvRow.append(resultData["target_number_of_employees")

  # selenium, definition of Chromedriver
  options=webdriver.ChromeOptions()
  options.add_argument('--headless')
  options.add_argument('--no-sandbox')
  options.add_argument('--disable-dev-shm-usage')
  wd=webdriver.Chrome('chromedriver', options=options)

  # Open HP and Scrap Based on ID List Stored in ID
  tURL="https://www.jgrants-portal.go.jp/subsidy/"+resultData["id"]
  wd.get(tURL)
  sleep(sleep_time)
  detail=wd.find_element(by=By.TAG_NAME, value="table").text

  # Load Packages
  from sumy.parsers.plaintext import PlaintextParser
  from sumy.nlp.tokenizers import Tokenizer
  # For Strings
  parser=PlaintextParser.from_string(detail, Tokenizer("english"))
  from sumy.summarizers.text_rank import TextRankSummarizer
  # Summarize using sumy TextRank
  summerizer=TextRankSummerizer()
  summary=summarizer_4(parser.document, 2)
  text_summary=""
  For presence in summary:
      text_summary+=str(sentence)
  print(text_summary)

  # Add details to csv write list
  csvRow.append(detail)
  csvRow.append(text_summary)

  # Add tURL
  csvRow.append(tURL)

  # Save one row as one element in an array
  csvList.append(csvRow)

wd.close()

# Creating a New CSV File
csv_date = datetime.datetime.now(pytz.timezone('Asia/Tokyo')).strftime("%Y%m%d")
csv_file_name=(keyword+"jGrants"+csv_date+".csv")
f=open(csv_file_name, "w", encoding="Shift-jis", errors="ignore")# For windows encoding=Shift-js

# Writing to a csv file
writer=csv.writer(f, lineterminator="\n") 
csv_header=["Title", "ID", "Start Date", "End Date", "Amount Limit", "Region Coverage", "Number of Employees Coverage", "Details", "Summary", "URL" ]
writer.writerow(csv_header)
for csvData incsvList:
  writer.writerow (csvData)
f.close()

# csv file output
files.download(csv_file_name)

python

2022-09-30 22:01

1 Answers

If you have any answers or tips


© 2024 OneMinuteCode. All rights reserved.