多线程爬取糗事网python3

1.导入模块:

多线程爬取糗事网python3

使用到了多线程这里使用queue进行数据交互

2.创建爬取页面的类

多线程爬取糗事网python3

多线程爬取糗事网python3

3.创建处理数据的类

多线程爬取糗事网python3

多线程爬取糗事网python3多线程爬取糗事网python3

4.创建调用函数

 

CRAM_EXIT = False
PARSE_EXIT = False


def main():
    pageQueue = Queue(20)
    for i in range(1, 21):
        pageQueue.put(i)
    # 采集结果
    dataQueue = Queue()
    filename = open("E://file/qiushi2.json", "a")
    # 创建锁
    lock = threading.Lock()
    # 三个采集线程的名字
    crawList = ['线程1号', '线程2号', '线程3号']
    threadcrawl = []
    # 存储三个采集线程的名字
    for threadName in crawList:

        thread = ThreadCrawl(threadName, pageQueue, dataQueue)
        thread.start()
        threadcrawl.append(thread)


    # 三个解析线程的名字
    parseList = ["解析线程1号", "解析线程2号", "解析线程3号"]
    # 存储三个解析线程
    threadparse = []
    for threadName in parseList:
        thread = ThreadParse(threadName, dataQueue, filename, lock)
        thread.start()
        threadparse.append(thread)

    while not pageQueue.empty():
        pass

    global CRAM_EXIT
    CRAM_EXIT = True

    print('pageQueue为空')

    for thread in threadcrawl:
        thread.join()
        print('1')
    while not dataQueue.empty():
        pass

    global PARSE_EXIT
    PARSE_EXIT = True

    for thread in threadparse:
        thread.join()
        print('2')

    with lock:
        # 关闭文件
        filename.close()
    print("谢谢使用!")


if __name__ == "__main__":
    main()