Documentos de Académico
Documentos de Profesional
Documentos de Cultura
import sys
from PyQt5.QtWidgets import QApplication, QMainWindow, QLabel, QLineEdit,
QPushButton
import asyncio
import aiohttp
import sqlite3
import urllib.robotparser
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import celery
from celery import Celery
import redis
# Configuración de Celery
app = Celery('web_crawler', broker='redis://localhost:6379/0')
# Configuración de Redis
redis_client = redis.Redis(host='localhost', port=6379, db=1)
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Web Crawler")
self.setGeometry(100, 100, 400, 200)
self.line_edit_keyword = QLineEdit(self)
self.line_edit_keyword.setGeometry(150, 50, 150, 30)
def start_crawler(self):
keyword = self.line_edit_keyword.text()
# Llamada a la función para iniciar el rastreo con la palabra clave
ingresada
start_web_crawler(keyword)
def start_web_crawler(keyword):
loop = asyncio.get_event_loop()
loop.run_until_complete(run_crawler(keyword))
# Iniciar el rastreo
await crawl_page('https://www.example.com', keyword, session, robot_parser,
cursor, stop_words)
if __name__ == '__main__':
app.start()
app.worker_main()