1 | let writer accum data =
|
---|
2 | Buffer.add_string accum data;
|
---|
3 | String.length data
|
---|
4 |
|
---|
5 | let getContent connection url =
|
---|
6 | Curl.set_url connection url;
|
---|
7 | Curl.perform connection
|
---|
8 |
|
---|
9 | let curl_pull url =
|
---|
10 | let result = Buffer.create 4069
|
---|
11 | and errorBuffer = ref "" in
|
---|
12 | let connection = Curl.init () in
|
---|
13 | try
|
---|
14 | Curl.set_errorbuffer connection errorBuffer;
|
---|
15 | Curl.set_writefunction connection (writer result);
|
---|
16 | Curl.set_followlocation connection true;
|
---|
17 | Curl.set_url connection url;
|
---|
18 | Curl.perform connection;
|
---|
19 | Curl.cleanup connection;
|
---|
20 | Ok result
|
---|
21 | with
|
---|
22 | | Curl.CurlException (_reason, _code, _str) ->
|
---|
23 | Curl.cleanup connection;
|
---|
24 | Error (Printf.sprintf "Error: %s %s" url !errorBuffer)
|
---|
25 | | Failure s ->
|
---|
26 | Curl.cleanup connection;
|
---|
27 | Error (Printf.sprintf "Caught exception: %s" s)
|
---|
28 |
|
---|
29 | let newer time id dir =
|
---|
30 | match Kosuzu.File_store.to_text @@ Filename.(concat dir (Kosuzu.Id.short id) ^ ".txt") with
|
---|
31 | | Error x -> prerr_endline x; true
|
---|
32 | | Ok txt -> time > (Kosuzu.(Header_pack.date (Date.listing txt.date)))
|
---|
33 | | exception (Sys_error _) -> true
|
---|
34 |
|
---|
35 | let print_peers p =
|
---|
36 | let open Kosuzu.Header_pack in
|
---|
37 | match Msgpck.to_list p.peers with [] -> ()
|
---|
38 | | ps -> print_endline @@
|
---|
39 | List.fold_left (fun a x -> Printf.sprintf "%s %s" a (Msgpck.to_string x)) "peers: " ps
|
---|
40 |
|
---|
41 | type filter_t = { authors: Kosuzu.Person.Set.t; topics: Kosuzu.String_set.t }
|
---|
42 |
|
---|
43 | let print_pull_start width total title dir =
|
---|
44 | Printf.printf "%*d/%s %s => %s %!" width 0 total title dir
|
---|
45 |
|
---|
46 | let print_pull width total i =
|
---|
47 | Printf.printf "\r%*d/%s %!" width (i+1) total
|
---|
48 |
|
---|
49 | let printers total title dir =
|
---|
50 | let width = String.length total in
|
---|
51 | print_pull_start width total title dir;
|
---|
52 | print_pull width total
|
---|
53 |
|
---|
54 | let fname dir text = Filename.concat dir (Kosuzu.Text.short_id text ^ ".txt")
|
---|
55 |
|
---|
56 | let pull_text url dir id =
|
---|
57 | let u = Filename.concat url ((Kosuzu.Id.short id) ^ ".txt") in
|
---|
58 | match curl_pull u with
|
---|
59 | | Error msg -> Printf.eprintf "Failed getting %s: %s" u msg
|
---|
60 | | Ok txt -> let txt = Buffer.contents txt in
|
---|
61 | match Kosuzu.Text.of_string txt with
|
---|
62 | | Error s -> prerr_endline s
|
---|
63 | | Ok text ->
|
---|
64 | let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640 (fname dir text) in
|
---|
65 | output_string file txt; close_out file
|
---|
66 |
|
---|
67 | let per_text url dir filter print i id time title authors topics _refs _reps = match id with
|
---|
68 | | "" -> Printf.eprintf "\nInvalid id for %s\n" title
|
---|
69 | | id -> let open Kosuzu in
|
---|
70 | print i;
|
---|
71 | if newer time id dir
|
---|
72 | && (String_set.empty = filter.topics
|
---|
73 | || String_set.exists (fun t -> List.mem t topics) filter.topics)
|
---|
74 | && (Person.Set.empty = filter.authors
|
---|
75 | || Person.Set.exists (fun t -> List.mem (Person.to_string t) authors) filter.authors)
|
---|
76 | then pull_text url dir id
|
---|
77 |
|
---|
78 | let pull_index url authors_opt topics_opt =
|
---|
79 | let index_url = Filename.concat url "index.pck" in
|
---|
80 | match curl_pull index_url with
|
---|
81 | | Error s -> prerr_endline s; false
|
---|
82 | | Ok body ->
|
---|
83 | match Kosuzu.Header_pack.of_string (Buffer.contents body) with
|
---|
84 | | Error s -> Printf.printf "Error with %s: %s\n" url s; false
|
---|
85 | | Ok pk when pk.info.id = "" ->
|
---|
86 | Printf.printf "Empty ID index.pck, skipping %s\n" url; false
|
---|
87 | | Ok pk when not (Kosuzu.Validate.validate_id_length pk.info.id) ->
|
---|
88 | Printf.printf "Index pack ID longer than 32 characters, skipping %s\n" url; false
|
---|
89 | | Ok pk when not (Kosuzu.Validate.validate_id_chars pk.info.id) ->
|
---|
90 | Printf.printf "Index pack contains invalid ID characters, skipping %s\n" url; false
|
---|
91 | | Ok pk ->
|
---|
92 | let dir = Filename.concat Kosuzu.Peers.text_dir pk.info.id in
|
---|
93 | Kosuzu.File_store.with_dir dir;
|
---|
94 | let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640
|
---|
95 | (Filename.concat dir "index.pck") in
|
---|
96 | output_string file ( Kosuzu.Header_pack.string {
|
---|
97 | pk with info = { pk.info with locations = url::pk.info.locations }});
|
---|
98 | close_out file;
|
---|
99 | let filter = let open Kosuzu in {
|
---|
100 | authors = (match authors_opt with Some s -> Person.Set.of_string s | None -> Person.Set.empty);
|
---|
101 | topics =( match topics_opt with Some s -> String_set.of_string s | None -> String_set.empty);
|
---|
102 | } in
|
---|
103 | let name = match pk.info.title with "" -> url | title -> title in
|
---|
104 | let print = printers (string_of_int @@ Kosuzu.Header_pack.numof_texts pk) name dir in
|
---|
105 | try Kosuzu.Header_pack.iteri (per_text url dir filter print) pk; print_newline (); true
|
---|
106 | with Invalid_argument msg -> Printf.printf "\nFailed to parse %s: %s\n%!" url msg; false
|
---|
107 |
|
---|
108 | let pull_list auths topics =
|
---|
109 | Curl.global_init Curl.CURLINIT_GLOBALALL;
|
---|
110 | let pull got_one peer_url = if got_one then got_one else
|
---|
111 | (pull_index peer_url auths topics) in
|
---|
112 | let open Kosuzu in
|
---|
113 | let fold_locations init peer =
|
---|
114 | ignore @@ List.fold_left pull init peer.Peers.pack.Header_pack.info.locations;
|
---|
115 | false
|
---|
116 | in
|
---|
117 | ignore @@ Peers.fold fold_locations false;
|
---|
118 | Curl.global_cleanup ()
|
---|
119 |
|
---|
120 | let pull url auths topics = match url with
|
---|
121 | | "" -> pull_list auths topics | x -> ignore (pull_index x auths topics)
|
---|
122 |
|
---|
123 | open Cmdliner
|
---|
124 | let authors = Arg.(value & opt (some string) None & info ["a"; "authors"] ~docv:"Comma-separated names" ~doc:"Filter by authors")
|
---|
125 | let topics = Arg.(value & opt (some string) None & info ["t"; "topics"] ~docv:"Comma-separated topics" ~doc:"Filter by topics")
|
---|
126 | let url = Arg.(value & pos 0 string "" & info [] ~docv:"URL" ~doc:"Repository location")
|
---|
127 |
|
---|
128 | let pull_t = Term.(const pull $ url $ authors $ topics)
|
---|
129 |
|
---|
130 | let cmd =
|
---|
131 | let doc = "Pull listed texts" in
|
---|
132 | let man = [
|
---|
133 | `S Manpage.s_description;
|
---|
134 | `P "Pull texts from known repositories." ]
|
---|
135 | in
|
---|
136 | let info = Cmd.info "pull" ~doc ~man in
|
---|
137 | Cmd.v info pull_t
|
---|