source: code/trunk/cli/pull.ml@ 3

Last change on this file since 3 was 3, checked in by fox, 3 years ago
  • Removed 'txt init'

Format

  • New B32 ID

Index

  • New option: txt index --print
  • Move scheme to peers
  • Replace peer.*.conf files with index packed locations Instead of adding a URL to peers.*.conf, run txt pull <url>

Conversion

  • Rewritten converters
  • txt-convert looks for a .convert.conf containing key: value lines.
  • Specifiable topic-roots from .convert.conf.
  • Added Topics: key, with comma seperated topics.

If set only those topics will appear in the main index and used as topic roots.
Other topics will have sub-indices generated, but won't be listed in the main index.

  • HTML converter header & footer options
  • HTML-index renamed to HTM-index

Internal

  • Change types: uuid:Uuid -> id:string
  • File_store merges identical texts
  • Use peer ID for store path, store peers' texts in .local/share/texts
  • Simple URN resolution for converter

Continue to next feed if parsing one fails

  • Phasing-out Archive, replaced by improved packs
  • Eliminate Bos, Cohttp, lwt, uri, tls, Re, Ptime, dependencies
  • Lock version for Cmdliner, fix dune-project
  • Optional resursive store
  • Improve header_pack
  • Fix recursive mkdir
File size: 6.1 KB
Line 
1let writer accum data =
2 Buffer.add_string accum data;
3 String.length data
4
5let showContent content =
6 Printf.printf "%s" (Buffer.contents content);
7 flush stdout
8
9let showInfo connection =
10 Printf.printf "Time: %f for: %s\n"
11 (Curl.get_totaltime connection)
12 (Curl.get_effectiveurl connection)
13
14let getContent connection url =
15 Curl.set_url connection url;
16 Curl.perform connection
17
18let curl_pull url =
19 let result = Buffer.create 4069
20 and errorBuffer = ref "" in
21 let connection = Curl.init () in
22 try
23 Curl.set_errorbuffer connection errorBuffer;
24 Curl.set_writefunction connection (writer result);
25 Curl.set_followlocation connection true;
26 Curl.set_url connection url;
27 Curl.perform connection;
28(* showContent result;*)
29(* showInfo connection;*)
30 Curl.cleanup connection;
31 Ok result
32 with
33 | Curl.CurlException (_reason, _code, _str) ->
34 Curl.cleanup connection;
35 Error (Printf.sprintf "Error: %s %s" url !errorBuffer)
36 | Failure s ->
37 Curl.cleanup connection;
38 Error (Printf.sprintf "Caught exception: %s" s)
39
40let newer time id dir =
41 match Logarion.File_store.to_text @@ Filename.(concat dir (Logarion.Id.short id) ^ ".txt") with
42 | Error x -> prerr_endline x; true
43 | Ok txt -> time > (Logarion.(Header_pack.date (Date.listing txt.date)))
44 | exception (Sys_error _) -> true
45
46let print_peers p =
47 let open Logarion.Header_pack in
48 match Msgpck.to_list p.peers with [] -> ()
49 | ps -> print_endline @@
50 List.fold_left (fun a x -> Printf.sprintf "%s %s" a (Msgpck.to_string x)) "peers: " ps
51
52let parse_index _is_selected fn url dir p =
53 let open Logarion.Header_pack in
54 match Msgpck.to_list p.texts with
55 | [] -> Printf.printf "%s => %s, has empty index\n" p.info.title dir; false
56 | texts ->
57 let numof_texts = string_of_int @@ List.length texts in
58 let text_num_len = String.length numof_texts in
59 Printf.printf "%*d/%s %s => %s\r" text_num_len 0 numof_texts p.info.title dir;
60 let of_pck i x =
61 Printf.printf "\r%*d/%s %!" text_num_len (i+1) numof_texts;
62 match x with
63 | Msgpck.List (id::time::title::_authors::_topics) ->
64 (match Logarion.Header_pack.to_id id with
65 | "" -> Printf.eprintf "Invalid id for%s " (Msgpck.to_string title)
66 | id ->
67 let t = match time with Msgpck.Int i -> Int32.of_int i | Msgpck.Uint32 i -> i | x -> Msgpck.to_uint32 x in
68 if newer t id dir then fn url dir id)
69 | _ -> prerr_endline ("Invalid record structure") in
70 List.iteri of_pck texts;
71 print_newline ();
72 true
73
74let fname dir text = Filename.concat dir (Logarion.Text.short_id text ^ ".txt")
75let pull_text url dir id =
76 let u = Filename.concat url ((Logarion.Id.short id) ^ ".txt") in
77 match curl_pull u with
78 | Error msg -> Printf.eprintf "Failed getting %s: %s" u msg
79 | Ok txt ->
80 let txt = Buffer.contents txt in
81 match Logarion.Text.of_string txt with
82 | Error s -> prerr_endline s
83 | Ok text ->
84 let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640 (fname dir text) in
85 output_string file txt; close_out file
86
87let pull_index url _authors _topics =
88 let index_url = url ^ "/index.pck" in
89 match curl_pull index_url with
90 | Error s -> prerr_endline s; false
91 | Ok body ->
92 match Logarion.Header_pack.of_string (Buffer.contents body) with
93 | Error s -> Printf.printf "Error with %s: %s\n" url s; false
94 | Ok pk ->
95 let dir = Filename.concat Logarion.Peers.text_dir pk.info.id in
96 Logarion.File_store.with_dir dir;
97 let file = open_out_gen [Open_creat; Open_trunc; Open_wronly] 0o640 (Filename.concat dir "index.pck") in
98 output_string file ( Logarion.Header_pack.string {
99 pk with info = { pk.info with locations = url::pk.info.locations }});
100 close_out file;
101(* let predicates = A.predicate A.authored authors_opt*)
102(* @ A.predicate A.topics topics_opt in*)
103 let is_selected text = List.fold_left (fun a e -> a && e text) true [](*predicates*) in
104 try parse_index is_selected pull_text url dir pk with
105 Invalid_argument msg -> Printf.eprintf "Failed to parse: %s\n%!" msg; false
106
107let pull_list auths topics =
108 Curl.global_init Curl.CURLINIT_GLOBALALL;
109 let pull got_one peer_url = if got_one then got_one else
110 (pull_index peer_url auths topics) in
111 Logarion.Peers.fold pull false;
112 Curl.global_cleanup ()
113
114let pull url auths topics = match url with
115 | "" -> pull_list auths topics | x -> ignore (pull_index x auths topics)
116
117open Cmdliner
118let term =
119 let authors = Arg.(value & opt (some string) None & info ["a"; "authors"]
120 ~docv:"comma-separated names" ~doc:"filter by authors") in
121 let topics = Arg.(value & opt (some string) None & info ["t"; "topics"]
122 ~docv:"comma-separated topics" ~doc:"filter by topics") in
123 let url = Arg.(value & pos 0 string "" & info [] ~docv:"URL"
124 ~doc:"Repository location") in
125 Term.(const pull $ url $ authors $ topics),
126 Term.info "pull" ~doc:"pull listed texts" ~man:[ `S "DESCRIPTION";
127 `P "Pull texts from known repositories. To add a new repository use:";
128 `P "txt pull [url]";
129 `P ("This creates a directory in " ^ Logarion.Peers.text_dir
130 ^ " and downloads the text index.pck file in it")]
131
132(*module Msg = struct*)
133(* type t = string * string*)
134(* let compare (x0,y0) (x1,y1) =*)
135(* match compare x1 x0 with 0 -> String.compare y0 y1 | c -> c*)
136(*end*)
137(*module MsgSet = Set.Make(Msg)*)
138(*let pull_msgs url _authors _topics =*)
139(* match http_apply response url with*)
140(* | Error msg ->*)
141(* Printf.eprintf "Failed index request for %s %s" url msg*)
142(* | Ok body ->*)
143(* let rec fold_msgs s a fn =*)
144(* let t, msg = Scanf.bscanf s "%s %s@\n" (fun t m -> t, m) in*)
145(* if t <> "" then fold_msgs s (fn a t msg) fn else a*)
146(* in*)
147(* let s = Scanf.Scanning.from_string body in*)
148(* let msgs = MsgSet.empty in*)
149(* let date_string t = Ptime.to_date t |>*)
150(* fun (y, m, d) -> Printf.sprintf "%04d-%02d-%02d" y m d in*)
151(* let msgs = fold_msgs s msgs*)
152(* (fun msgs t m -> match Ptime.of_rfc3339 t with*)
153(* | Ok (v,_,_) -> let open MsgSet in*)
154(* let msgs = if cardinal msgs > 1 then remove (max_elt msgs) msgs else msgs in*)
155(* add (v,m) msgs*)
156(* | _ -> msgs) in*)
157(* let msg_string = MsgSet.fold*)
158(* (fun (t,m) a -> a ^ Printf.sprintf " %s 𐄁 %s\n" (date_string t) m)*)
159(* msgs "" in*)
160(* Printf.printf "┌───{ %s }───┐\n%s" url msg_string*)
Note: See TracBrowser for help on using the repository browser.