Commit 435575fe authored by whzecomjm's avatar whzecomjm
Browse files

remove useless .py files

parent 6c1fb468
Loading
Loading
Loading
Loading
+0 −0
Original line number Diff line number Diff line
+0 −36
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""LectureNoteDownload.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1aE4KuvnzlfpXyd1fefTsj_jqoHnNkqxy
"""

from urllib import request
from bs4 import BeautifulSoup
import re

url=r'http://u.math.biu.ac.il/~solomyb/TEACH/18/GMT/index.html'
link=r'http://u.math.biu.ac.il/~solomyb/TEACH/18/GMT/'

# proxy={'http':'http://localhost:80'}
headers = ("User-Agent"," Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36")  #这里模拟浏览器  
opener = request.build_opener()  
opener.addheaders = [headers]
request.install_opener(opener)
# 添加 header 模拟浏览器, 可兼容 urlretrieve.

contents = request.urlopen(url).read().decode()
soup = BeautifulSoup(contents,"html.parser")
n=1
for tag in soup.find_all('a'): 
    pdf = tag.get('href')
    pdfurl = link+pdf
    print(pdfurl+"\n")
    pdfdir = 'C:/Users/whzec/Desktop/'+pdf
    request.urlretrieve(pdfurl,pdfdir)
    n=n+1

# urlretrieve 用来保存文件, py3 在 urllib.request内, py2在urllib

Douban/douban.py

deleted100644 → 0
+0 −92
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""douban.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1qg0hwEsZ4IjUxPMAlKT0DwKyUgnjc6Zx

# 提取豆瓣电影Top250
"""

#coding:utf-8  
# 文件导出版本
''''' 
@author: jsjxy 
@origin: https://www.cnblogs.com/xisheng/p/9130156.html
'''  
from urllib import request 
from bs4 import BeautifulSoup  
import re
#from distutils.filelist import findall   

def doubantop(N):
    with open('./douban-top-%s.txt'%str(N),'w') as f:
        f.write("豆瓣电影TOP"+str(N)+'\n') 
        f.write('%-13s%-5s%-15s%-40s\n'%('影片名','评分','评价人数','链接'))
        for n in range(N//25):
            url = 'https://movie.douban.com/top250?start='+str(25*n)+'&filter='  
            page = request.urlopen(url)   
            contents = page.read()   
         #print(contents)  
            soup = BeautifulSoup(contents,"html.parser")  

            for tag in soup.find_all('div', class_='info'):    
               # print tag  
                m_name = tag.find('span', class_='title').get_text()        
                m_rating_score = float(tag.find('span',class_='rating_num').get_text())          
                m_people = tag.find('div',class_="star")  
                m_span = m_people.findAll('span')  
                m_peoplecount = re.sub(r'[^0-9]','',m_span[3].contents[0])
                # 剔除了`人评价`字样, 只保留人数数字
                m_url=tag.find('a').get('href')  
                f.write('%s\t%s\t%s\t%s\n'% (m_name,str(m_rating_score), m_peoplecount ,m_url) )
            n=n+1
    print('请查看文件douban-top-%s!' %str(N))

num = input("你想要看豆瓣电影TOP榜单的前几名(小于等于250且最好是25的倍数):")
num = int(num)
doubantop(num)

# print 查看版本
#coding:utf-8  
''''' 
@author: jsjxy 
@origin: https://www.cnblogs.com/xisheng/p/9130156.html
'''  
from urllib import request 
from bs4 import BeautifulSoup  
import re
#from distutils.filelist import findall   

all_list = []
for n in range(10):
    url = 'https://movie.douban.com/top250?start='+str(25*n)+'&filter='  
    page = request.urlopen(url)   
    contents = page.read()   
 #print(contents)  
    soup = BeautifulSoup(contents,"html.parser")  
    for tag in soup.find_all('div', class_='info'):    
       # print tag  
        m_name = tag.find('span', class_='title').get_text()        
        m_rating_score = float(tag.find('span',class_='rating_num').get_text())          
        m_people = tag.find('div',class_="star")  
        m_span = m_people.findAll('span')  
        m_peoplecount = re.sub(r'[^0-9]','',m_span[3].contents[0])
        # 剔除了`人评价`字样, 只保留人数数字
        m_url=tag.find('a').get('href')  
        all_list.append('%s\t%s\t%s\t%s\n'% (m_name,str(m_rating_score), m_peoplecount ,m_url)) 



def doubantop(N,n=1):
    print("豆瓣电影TOP "+str(n)+' - '+str(N)+'\n'+'%-13s%-5s%-15s%-40s\n'%('影片名','评分','评价人数','链接'))
    for movie in all_list[n-1:N]:
        print(movie)
        


#num = input("你想要看豆瓣电影TOP250榜单的范围")
#num = int(num)
doubantop(30)

Filesoperation/replacetxt.py

deleted100644 → 0
+0 −24
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""replacetxt.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1eXL6qy3DWAte-dKIzPmKEaKpBviXpLTK

# 文件的读取与写入

首先我们给出一个非常有用的例子, 即文件的替换.
"""

# replace some words by python
import re
f=open(r'C:\Users\whzec\Desktop\1.md','r') #加上r是为了能使用反斜杠的地址,否则使用斜杠,或者双反斜杠.
alllines=f.readlines()
f.close()
f=open(r'C:\Users\whzec\Desktop\1.md','w+')
for eachline in alllines:
    a=re.sub('List','cctvcctv',eachline)
    f.writelines(a)
f.close()

SS.today/sstoday.py

deleted100644 → 0
+0 −35
Original line number Diff line number Diff line
#!/usr/bin/env python
# coding: utf-8

from urllib import request
from bs4 import BeautifulSoup
import re

url='https://ss.freeshadowsocks.biz/'

# proxy={'http':'http://localhost:80'}
headers = ("User-Agent"," Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36")  #这里模拟浏览器  
opener = request.build_opener()  
opener.addheaders = [headers]
request.install_opener(opener)
# 添加 header 模拟浏览器, 可兼容 urlretrieve.

contents = request.urlopen(url).read().decode()
soup = BeautifulSoup(contents,"html.parser")
n=1
for tag in soup.find_all('div',class_='shot-item'):
    img = tag.find('a').get('href')
    imgurl = url+img
    print(imgurl+"\n")
    if n<=3:
        imgdir = 'D:/Desktop/'+'ss-%d'% n+'.png'
    else:
        imgdir = 'D:/Desktop/'+'ssr-%d'% int(n-3) +'.png'
    request.urlretrieve(imgurl,imgdir)
    n=n+1

# urlretrieve 用来保存文件, py3 在 urllib.request内, py2在urllib



Loading