편집 기록

편집 기록
  • 프로필 nowp님의 편집
    날짜2019.10.25

    크롤링 코드 you attempted to use functionality that needed an active HTTP request 에러 뜨는데 해결방법좀 부탁드립니다.


    중고나라에서 상품가격을 크롤링하는 코딩입니다.

    import urllib.request
    import urllib.parse
    import re
    from bs4 import BeautifulSoup
    import numpy as np
    import pandas as pd
    import sys
    from flask import Flask, render_template, request
    import tablib
    import os
    
    app = Flask(__name__)
    @app.route("/")
    def student(plusurl=None):
      return render_template('student.html',plusurl='Name')
    
    def func_clawler(plusurl):
      if request.method == 'POST':
      #plusurl = input("검색어를 입력하세요:")
        page=3
        pageNum=1
        lastPage=int(page)*10-9
        num=1
        list1=list()
        while pageNum < lastPage +1:
    
          baseurl = "https://search.naver.com/search.naver?where=article&sm=tab_jum&query="
          suburl ="%20중고나라%20판매중%20"
          addurl ="&prdtype=0&t=0&st=rel&date_option=0&date_from=&date_to=&srchby=text&dup_remove=1&cafe_url=&without_cafe_url=&board=&sm=tab_pge&start="
          url =  baseurl+urllib.parse.quote_plus(suburl)+urllib.parse.quote_plus(plusurl)+addurl+str(pageNum)
          html = urllib.request.urlopen(url).read()
          soup = BeautifulSoup(html, "html.parser")
          my_price = soup.find_all(class_="cafe_item_price")
    
          for title in my_price:
            numbers = re.findall("\d+",title.text.replace(',',''))
            index=[]
            new_numbers=np.delete(numbers, index)
            real_numbers=int(new_numbers)
            num +=1
            list1.append(real_numbers)
            list1.sort()
    
          pageNum += 10
        for i in list1:
           print(int(i),end="원 ")
        articles = pd.DataFrame(list1, columns=['가격'])
        articles.to_csv('c:\project\priceextract\priceresults.csv',encoding='cp949')
    
        dataset = tablib.Dataset()
        with open(os.path.join(os.path.dirname(__file__),'c:\project\priceextract\priceresults.csv')) as f:
             dataset.csv = f.read()
        @app.route("/result" )
        def index():    
            return dataset.html  
    
        if __name__== "__main__":
           app.run()  
    
     func_clawler('Name')
    

    위 코드를 실행하면 아래와 같은 결과로 막히네요.

    This typically means that you attempted to use functionality that needed
    an active HTTP request.  Consult the documentation on testing for
    information about how to avoid this problem.
    

    혹시 이 문제를 해결하는 방법을 아시면 공유 부탁드립니다.

  • 프로필 알 수 없는 사용자님의 편집
    날짜2019.10.25

    크롤링 코드 you attempted to use functionality that needed an active HTTP request 에러 뜨는데 해결방법좀 부탁드립니다.


    중고나라에서 상품가격을 크롤링하는 코딩입니다. import urllib.request import urllib.parse import re from bs4 import BeautifulSoup import numpy as np import pandas as pd import sys from flask import Flask, render_template, request import tablib import os

    app = Flask(name) @app.route("/") def student(plusurl=None): return render_template('student.html',plusurl='Name')

    def func_clawler(plusurl): if request.method == 'POST': #plusurl = input("검색어를 입력하세요:") page=3 pageNum=1 lastPage=int(page)*10-9 num=1 list1=list() while pageNum < lastPage +1:

      baseurl = "https://search.naver.com/search.naver?where=article&sm=tab_jum&query="
      suburl ="%20중고나라%20판매중%20"
      addurl ="&prdtype=0&t=0&st=rel&date_option=0&date_from=&date_to=&srchby=text&dup_remove=1&cafe_url=&without_cafe_url=&board=&sm=tab_pge&start="
      url =  baseurl+urllib.parse.quote_plus(suburl)+urllib.parse.quote_plus(plusurl)+addurl+str(pageNum)
      html = urllib.request.urlopen(url).read()
      soup = BeautifulSoup(html, "html.parser")
      my_price = soup.find_all(class_="cafe_item_price")
    
      for title in my_price:
        numbers = re.findall("\d+",title.text.replace(',',''))
        index=[]
        new_numbers=np.delete(numbers, index)
        real_numbers=int(new_numbers)
        num +=1
        list1.append(real_numbers)
        list1.sort()
    
      pageNum += 10
    for i in list1:
       print(int(i),end="원 ")
    articles = pd.DataFrame(list1, columns=['가격'])
    articles.to_csv('c:\project\priceextract\priceresults.csv',encoding='cp949')
    
    dataset = tablib.Dataset()
    with open(os.path.join(os.path.dirname(__file__),'c:\project\priceextract\priceresults.csv')) as f:
         dataset.csv = f.read()
    @app.route("/result" )
    def index():    
        return dataset.html  
    
    if __name__== "__main__":
       app.run()  
    

    func_clawler('Name')

    위 코드를 실행하면 아래와 같은 결과로 막히네요. This typically means that you attempted to use functionality that needed an active HTTP request. Consult the documentation on testing for information about how to avoid this problem.

    혹시 이 문제를 해결하는 방법을 아시면 공유 부탁드립니다.