Verified Commit e9da1fd9 authored by 施乐存's avatar 施乐存
Browse files

添加UP主视频数量分布及累计分布图的生成代码和相关PNG文件



Signed-off-by: 施乐存's avatarszdytom <szdytom@qq.com>
parent aa668d1c
Loading
Loading
Loading
Loading
+79 −0
Original line number Diff line number Diff line
(* ::Package:: *)

(* \:4ece\:6570\:636e\:5e93\:83b7\:53d6\:6392\:540d\:7d2f\:8ba1\:6570\:636e *)
data = Normal[ExternalEvaluate[
  DatabaseReference[
   File[FileNameJoin[{NotebookDirectory[], "..", "data.db"}]]],
   "WITH up_video_counts AS (
    SELECT 
        author_id,
        COUNT(*) as video_count
    FROM videos
    GROUP BY author_id
),
ranked_up AS (
    SELECT 
        author_id,
        video_count,
        ROW_NUMBER() OVER (ORDER BY video_count DESC, author_id) as rank_num
    FROM up_video_counts
),
cumulative_videos AS (
    SELECT 
        rank_num,
        SUM(video_count) OVER (ORDER BY rank_num) as cumulative_videos
    FROM ranked_up
)
SELECT 
    rank_num as x,
    cumulative_videos as y
FROM cumulative_videos
ORDER BY rank_num;"]];

(* \:8f6c\:6362\:4e3a\:5750\:6807\:70b9\:5217\:8868 *)
points = {#["x"], #["y"]} & /@ data;

(* \:8ba1\:7b97\:603b\:89c6\:9891\:6570 *)
totalVideos = Last[points][[2]];

(* \:521b\:5efa\:56fe\:5f62 *)
plot = ListPlot[points,
  Joined -> True,
  PlotStyle -> Directive[RGBColor[0.368, 0.507, 0.71], Thickness[0.004]],
  PlotRange -> {{0, Max[points[[All, 1]]]}, {0, totalVideos*1.05}},
  Frame -> True,
  FrameLabel -> {"UP\:4e3b\:6392\:540d\:ff08\:6309\:4e0a\:699c\:89c6\:9891\:6570\:91cf\:ff09", "\:524dx\:540dUP\:4e3b\:4ea7\:51fa\:7684\:89c6\:9891\:603b\:6570"},
  FrameStyle -> Directive[Black, 14],
  GridLines -> Automatic,
  GridLinesStyle -> Directive[Gray, Dotted],
  ImageSize -> 800,
  PlotLabel -> Style["UP\:4e3b\:4e0a\:699c\:89c6\:9891\:6570\:91cf\:7d2f\:8ba1\:5206\:5e03", 16, Bold],
  LabelStyle -> Directive[Black, 12],
  Epilog -> {
    (* \:6dfb\:52a0\:603b\:89c6\:9891\:6570\:53c2\:8003\:7ebf *)
    {Dashed, Gray, 
     Line[{{0, totalVideos}, {Max[points[[All, 1]]], totalVideos}}]},
    (* \:6dfb\:52a020%\:548c80%\:7684\:53c2\:8003\:7ebf *)
    {Dashed, RGBColor[0.88, 0.4, 0.4], 
     Line[{{0.2*Max[points[[All, 1]]], 0}, 
           {0.2*Max[points[[All, 1]]], totalVideos}}]},
    {Dashed, RGBColor[0.88, 0.4, 0.4], 
     Line[{{0, 0.8*totalVideos}, 
           {Max[points[[All, 1]]], 0.8*totalVideos}}]},
    (* \:6dfb\:52a0\:6807\:6ce8 *)
    Text[Style[Row[{"\:603b\:89c6\:9891\:6570: ", totalVideos}], 12, Gray], 
     Scaled[{0.8, 0.95}]],
    Text[Style["20% UP\:4e3b", 11, RGBColor[0.88, 0.4, 0.4]], 
     Scaled[{0.21, 0.05}]],
    Text[Style["\:8d21\:732e80%\:89c6\:9891", 11, RGBColor[0.88, 0.4, 0.4]], 
     Scaled[{0.05, 0.82}]]
  }
];

plot

(* \:5bfc\:51fa\:4e3aPNG\:6587\:4ef6 *)
Export[FileNameJoin[{NotebookDirectory[], "up_rank_cumulative.png"}], plot]


+65 −0
Original line number Diff line number Diff line
(* ::Package:: *)

(* \:4ece\:6570\:636e\:5e93\:83b7\:53d6\:6570\:636e - \:4f7f\:7528\:76f8\:5bf9\:8def\:5f84 *)
data = Normal[ExternalEvaluate[
  DatabaseReference[
   File[FileNameJoin[{NotebookDirectory[], "..", "data.db"}]]],
   "WITH up_video_counts AS (
    SELECT 
        author_id,
        COUNT(*) as video_count
    FROM videos
    GROUP BY author_id
)
SELECT 
    video_count as x,
    COUNT(author_id) as y
FROM up_video_counts
GROUP BY video_count
ORDER BY video_count;"]];

(* \:8f6c\:6362\:4e3a\:5750\:6807\:70b9\:5217\:8868 *)
points = {#["x"], #["y"]} & /@ data;

(* \:8fc7\:6ee4\:6389y=0\:7684\:70b9\:ff08\:7406\:8bba\:4e0a\:4e0d\:4f1a\:6709\:ff0c\:4f46\:5b89\:5168\:8d77\:89c1\:ff09 *)
points = Select[points, #[[2]] > 0 &];

(* \:8fdb\:884c\:5e42\:5f8b\:5206\:5e03\:62df\:5408\:ff1ay = a * x^b *)
(* \:53d6\:5bf9\:6570\:540e\:ff1aLog[y] = Log[a] + b * Log[x]\:ff0c\:8fdb\:884c\:7ebf\:6027\:62df\:5408 *)
logPoints = {Log[#[[1]]], Log[#[[2]]]} & /@ points;

(* \:7ebf\:6027\:62df\:5408 *)
fit = LinearModelFit[logPoints, x, x];
params = fit["BestFitParameters"];
(* \:63d0\:53d6\:53c2\:6570\:ff1aLog[a] = params[[1]], b = params[[2]] *)
a = Exp[params[[1]]];
b = params[[2]];

(* \:5b9a\:4e49\:62df\:5408\:51fd\:6570 *)
fittedFunc[x_] := a * x^b;

(* \:7ed8\:5236\:5206\:5e03\:56fe\:548c\:62df\:5408\:76f4\:7ebf *)
plot = Show[
 ListLogLogPlot[points,
  PlotStyle -> {PointSize[0.02], RGBColor[0.368, 0.507, 0.71]},
  PlotMarkers -> {"\[FilledCircle]", 12},
  PlotLabel -> Style["UP\:4e3b\:4e0a\:699c\:9891\:6b21\:5206\:5e03\:4e0e\:62df\:5408", 16, Bold],
  Frame -> True,
  FrameLabel -> {"\:4e0a\:699c\:89c6\:9891\:6570\:91cf\:ff08\:5bf9\:6570\:ff09", "UP\:4e3b\:6570\:91cf\:ff08\:5bf9\:6570\:ff09"},
  FrameStyle -> Directive[Black, 14],
  GridLines -> Automatic,
  GridLinesStyle -> Directive[Gray, Dotted],
  ImageSize -> 600,
  LabelStyle -> Directive[Black, 12]],
 LogLogPlot[fittedFunc[x], {x, Min[points[[All, 1]]], Max[points[[All, 1]]]},
  PlotStyle -> Directive[RGBColor[0.88, 0.4, 0.4], Thickness[0.005]],
  PlotLegends -> 
   Placed[LineLegend[{RGBColor[0.88, 0.4, 0.4]}, 
      {Row[{"\:62df\:5408: y = ", NumberForm[a, 3], " \[Times] x^", 
         NumberForm[b, 3]}]}], {0.7, 0.8}]
]]

Export[FileNameJoin[{NotebookDirectory[], "up_video_distribution.png"}], plot]


+118 KiB
Loading image diff...
+91.5 KiB
Loading image diff...