Proxy Scraper С#

AngelOfLove

ОПЫТНЫЙ USER
Регистрация
16 Июл 2021
Сообщения
218
Реакции
65
[HIDE]
Код:
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
 
namespace w4zt3d_ProxyGRB
{
    class Program
    {
        static void Main(string[] args)
        {
            string[] querys = { "fresh proxies", "pastebin proxies", "grayhatforum proxylist", "pastebin proxylist", ".txt proxylist", "pastebin proxy ip port" };
 
            Console.Title = "ProxyGRB by w4zt3d for GrayHatForum.org";
            int count = 15;//pages to scrape (all on first page npt looping this time)
            Regex linkRegex = new Regex("<a href=\"(.*?)\" h=");
            Regex proxyRegex = new Regex(@"\d{1,3}(\.\d{1,3}){3}:\d{1,5}");
            Regex proxyRegex2 = new Regex(@"\d{1,3}(\.\d{1,3}){3}</td><td>\d{1,5}");
            WebClient webClient = new WebClient();
            List<string> alreadyFound = new List<string>();
            string filename = "scraped_proxylist.txt";
            File.WriteAllText(filename, "");
            int counter = 0;
            foreach (string tofind in querys)
            {
                string url = "https://www.bing.com/search?q=" + tofind + "\"&count=#count#";
                try
                {
                    string code = webClient.DownloadString(url.Replace("#count#", count.ToString()));
                    MatchCollection matches1 = linkRegex.Matches(code);
                    foreach (Match match in matches1)
                    {
                        try
                        {
                            string s = match.Groups[1].Value;
 
                            if (s.StartsWith("http") & !s.Contains("amp") & !s.Contains("microsoft"))//amp & microsoft to avoid shit from Bing
                            {
                                string html = webClient.DownloadString(s);
                                MatchCollection matches = proxyRegex.Matches(html);
                                foreach (Match m in matches)
                                {
                                    string proxy = m.Groups[0].Value;
                                    if (!alreadyFound.Contains(proxy))
                                    {
                                        counter++;
                                        alreadyFound.Add(proxy);
                                        Console.WriteLine(proxy);
                                        File.AppendAllText(filename, proxy + Environment.NewLine);
                                    }
                                }
                                matches = proxyRegex2.Matches(html);
                                foreach (Match m in matches)
                                {
                                    string proxy = m.Groups[0].Value.Replace("</td><td>", ":");
                                    if (!alreadyFound.Contains(proxy))
                                    {
                                        counter++;
                                        alreadyFound.Add(proxy);
                                        Console.WriteLine(proxy);
                                        File.AppendAllText(filename, proxy + Environment.NewLine);
                                    }
                                }
                                Console.Title = "ProxyGRB " + "\"" + tofind + "\" Proxys = " + counter + " Url: " + s;
                            }
                        }
                        catch { }
                    }
                }
                catch { }
            }
            Console.Beep();
            Console.WriteLine("Done!");
            Console.Title = "ProxyGRB by w4zt3d " + counter + " proxies found.";
            while (true) { }
        }
    }
}
[/HIDE]
 
Спасибо, есть чему поучится. А возможно ли такое сделать на обычном Си??
 
Назад
Сверху