remove useless .py files (435575fe) · Commits · Wenchao Zhang / Python-Instances

Batch download/LectureNoteDownload.ipynb

+0 −0

Original line number	Diff line number	Diff line

Batch download/LectureNoteDownload.py

deleted100644 → 0

+0 −36

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""LectureNoteDownload.ipynb

		Automatically generated by Colaboratory.

		Original file is located at
		https://colab.research.google.com/drive/1aE4KuvnzlfpXyd1fefTsj_jqoHnNkqxy
		"""

		from urllib import request
		from bs4 import BeautifulSoup
		import re

		url=r'http://u.math.biu.ac.il/~solomyb/TEACH/18/GMT/index.html'
		link=r'http://u.math.biu.ac.il/~solomyb/TEACH/18/GMT/'

		# proxy={'http':'http://localhost:80'}
		headers = ("User-Agent"," Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36") #这里模拟浏览器
		opener = request.build_opener()
		opener.addheaders = [headers]
		request.install_opener(opener)
		# 添加 header 模拟浏览器, 可兼容 urlretrieve.

		contents = request.urlopen(url).read().decode()
		soup = BeautifulSoup(contents,"html.parser")
		n=1
		for tag in soup.find_all('a'):
		pdf = tag.get('href')
		pdfurl = link+pdf
		print(pdfurl+"\n")
		pdfdir = 'C:/Users/whzec/Desktop/'+pdf
		request.urlretrieve(pdfurl,pdfdir)
		n=n+1

		# urlretrieve 用来保存文件, py3 在 urllib.request内, py2在urllib

Douban/douban.py

deleted100644 → 0

+0 −92

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""douban.ipynb

		Automatically generated by Colaboratory.

		Original file is located at
		https://colab.research.google.com/drive/1qg0hwEsZ4IjUxPMAlKT0DwKyUgnjc6Zx

		# 提取豆瓣电影Top250
		"""

		#coding:utf-8
		# 文件导出版本
		'''''
		@author: jsjxy
		@origin: https://www.cnblogs.com/xisheng/p/9130156.html
		'''
		from urllib import request
		from bs4 import BeautifulSoup
		import re
		#from distutils.filelist import findall

		def doubantop(N):
		with open('./douban-top-%s.txt'%str(N),'w') as f:
		f.write("豆瓣电影TOP"+str(N)+'\n')
		f.write('%-13s%-5s%-15s%-40s\n'%('影片名','评分','评价人数','链接'))
		for n in range(N//25):
		url = 'https://movie.douban.com/top250?start='+str(25*n)+'&filter='
		page = request.urlopen(url)
		contents = page.read()
		#print(contents)
		soup = BeautifulSoup(contents,"html.parser")

		for tag in soup.find_all('div', class_='info'):
		# print tag
		m_name = tag.find('span', class_='title').get_text()
		m_rating_score = float(tag.find('span',class_='rating_num').get_text())
		m_people = tag.find('div',class_="star")
		m_span = m_people.findAll('span')
		m_peoplecount = re.sub(r'[^0-9]','',m_span[3].contents[0])
		# 剔除了`人评价`字样, 只保留人数数字
		m_url=tag.find('a').get('href')
		f.write('%s\t%s\t%s\t%s\n'% (m_name,str(m_rating_score), m_peoplecount ,m_url) )
		n=n+1
		print('请查看文件douban-top-%s!' %str(N))

		num = input("你想要看豆瓣电影TOP榜单的前几名(小于等于250且最好是25的倍数):")
		num = int(num)
		doubantop(num)

		# print 查看版本
		#coding:utf-8
		'''''
		@author: jsjxy
		@origin: https://www.cnblogs.com/xisheng/p/9130156.html
		'''
		from urllib import request
		from bs4 import BeautifulSoup
		import re
		#from distutils.filelist import findall

		all_list = []
		for n in range(10):
		url = 'https://movie.douban.com/top250?start='+str(25*n)+'&filter='
		page = request.urlopen(url)
		contents = page.read()
		#print(contents)
		soup = BeautifulSoup(contents,"html.parser")
		for tag in soup.find_all('div', class_='info'):
		# print tag
		m_name = tag.find('span', class_='title').get_text()
		m_rating_score = float(tag.find('span',class_='rating_num').get_text())
		m_people = tag.find('div',class_="star")
		m_span = m_people.findAll('span')
		m_peoplecount = re.sub(r'[^0-9]','',m_span[3].contents[0])
		# 剔除了`人评价`字样, 只保留人数数字
		m_url=tag.find('a').get('href')
		all_list.append('%s\t%s\t%s\t%s\n'% (m_name,str(m_rating_score), m_peoplecount ,m_url))



		def doubantop(N,n=1):
		print("豆瓣电影TOP "+str(n)+' - '+str(N)+'\n'+'%-13s%-5s%-15s%-40s\n'%('影片名','评分','评价人数','链接'))
		for movie in all_list[n-1:N]:
		print(movie)



		#num = input("你想要看豆瓣电影TOP250榜单的范围")
		#num = int(num)
		doubantop(30)

Filesoperation/replacetxt.py

deleted100644 → 0

+0 −24

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""replacetxt.ipynb

		Automatically generated by Colaboratory.

		Original file is located at
		https://colab.research.google.com/drive/1eXL6qy3DWAte-dKIzPmKEaKpBviXpLTK

		# 文件的读取与写入

		首先我们给出一个非常有用的例子, 即文件的替换.
		"""

		# replace some words by python
		import re
		f=open(r'C:\Users\whzec\Desktop\1.md','r') #加上r是为了能使用反斜杠的地址,否则使用斜杠,或者双反斜杠.
		alllines=f.readlines()
		f.close()
		f=open(r'C:\Users\whzec\Desktop\1.md','w+')
		for eachline in alllines:
		a=re.sub('List','cctvcctv',eachline)
		f.writelines(a)
		f.close()

SS.today/sstoday.py

deleted100644 → 0

+0 −35

Original line number	Diff line number	Diff line
		#!/usr/bin/env python
		# coding: utf-8

		from urllib import request
		from bs4 import BeautifulSoup
		import re

		url='https://ss.freeshadowsocks.biz/'

		# proxy={'http':'http://localhost:80'}
		headers = ("User-Agent"," Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36") #这里模拟浏览器
		opener = request.build_opener()
		opener.addheaders = [headers]
		request.install_opener(opener)
		# 添加 header 模拟浏览器, 可兼容 urlretrieve.

		contents = request.urlopen(url).read().decode()
		soup = BeautifulSoup(contents,"html.parser")
		n=1
		for tag in soup.find_all('div',class_='shot-item'):
		img = tag.find('a').get('href')
		imgurl = url+img
		print(imgurl+"\n")
		if n<=3:
		imgdir = 'D:/Desktop/'+'ss-%d'% n+'.png'
		else:
		imgdir = 'D:/Desktop/'+'ssr-%d'% int(n-3) +'.png'
		request.urlretrieve(imgurl,imgdir)
		n=n+1

		# urlretrieve 用来保存文件, py3 在 urllib.request内, py2在urllib

Admin message